bson 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata.gz.sig CHANGED
Binary file
@@ -1,1344 +0,0 @@
1
- /*
2
- * Copyright (C) 2009-2019 MongoDB Inc.
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing, software
11
- * distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions and
14
- * limitations under the License.
15
- */
16
- #include <ruby.h>
17
- #include <ruby/encoding.h>
18
- #include <stdbool.h>
19
- #include <time.h>
20
- #include <unistd.h>
21
- #include "native-endian.h"
22
-
23
- #define BSON_BYTE_BUFFER_SIZE 1024
24
-
25
- #ifndef HOST_NAME_HASH_MAX
26
- #define HOST_NAME_HASH_MAX 256
27
- #endif
28
-
29
- #define BSON_TYPE_DOUBLE 1
30
- #define BSON_TYPE_STRING 2
31
- #define BSON_TYPE_OBJECT 3
32
- #define BSON_TYPE_ARRAY 4
33
- #define BSON_TYPE_INT32 16
34
- #define BSON_TYPE_INT64 18
35
- #define BSON_TYPE_BOOLEAN 8
36
-
37
- typedef struct {
38
- size_t size;
39
- size_t write_position;
40
- size_t read_position;
41
- char buffer[BSON_BYTE_BUFFER_SIZE];
42
- char *b_ptr;
43
- } byte_buffer_t;
44
-
45
- #define READ_PTR(byte_buffer_ptr) \
46
- (byte_buffer_ptr->b_ptr + byte_buffer_ptr->read_position)
47
-
48
- #define READ_SIZE(byte_buffer_ptr) \
49
- (byte_buffer_ptr->write_position - byte_buffer_ptr->read_position)
50
-
51
- #define WRITE_PTR(byte_buffer_ptr) \
52
- (byte_buffer_ptr->b_ptr + byte_buffer_ptr->write_position)
53
-
54
- #define ENSURE_BSON_WRITE(buffer_ptr, length) \
55
- { if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); }
56
-
57
- #define ENSURE_BSON_READ(buffer_ptr, length) \
58
- { if (buffer_ptr->read_position + length > buffer_ptr->write_position) \
59
- rb_raise(rb_eRangeError, "Attempted to read %zu bytes, but only %zu bytes remain", (size_t)length, READ_SIZE(buffer_ptr)); }
60
-
61
- static VALUE rb_bson_byte_buffer_allocate(VALUE klass);
62
- static VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self);
63
- static VALUE rb_bson_byte_buffer_length(VALUE self);
64
- static VALUE rb_bson_byte_buffer_get_byte(VALUE self);
65
- static VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i);
66
- static VALUE rb_bson_byte_buffer_get_cstring(VALUE self);
67
- static VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self);
68
- static VALUE rb_bson_byte_buffer_get_double(VALUE self);
69
- static VALUE rb_bson_byte_buffer_get_int32(VALUE self);
70
- static VALUE rb_bson_byte_buffer_get_int64(VALUE self);
71
- static VALUE rb_bson_byte_buffer_get_string(VALUE self);
72
- static VALUE rb_bson_byte_buffer_get_hash(VALUE self);
73
- static VALUE rb_bson_byte_buffer_get_array(VALUE self);
74
- static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte);
75
- static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes);
76
- static VALUE rb_bson_byte_buffer_put_bson_partial_string(VALUE self, const char *str, int32_t length);
77
- static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string);
78
- static VALUE rb_bson_byte_buffer_put_decimal128(VALUE self, VALUE low, VALUE high);
79
- static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f);
80
- static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i);
81
- static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i);
82
- static VALUE rb_bson_byte_buffer_put_bson_string(VALUE self, const char *str, int32_t length);
83
- static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string);
84
- static VALUE rb_bson_byte_buffer_put_symbol(VALUE self, VALUE symbol);
85
- static VALUE rb_bson_byte_buffer_put_hash(VALUE self, VALUE hash, VALUE validating_keys);
86
- static VALUE rb_bson_byte_buffer_put_array(VALUE self, VALUE array, VALUE validating_keys);
87
- static VALUE rb_bson_byte_buffer_read_position(VALUE self);
88
- static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i);
89
- static VALUE rb_bson_byte_buffer_rewind(VALUE self);
90
- static VALUE rb_bson_byte_buffer_write_position(VALUE self);
91
- static VALUE rb_bson_byte_buffer_to_s(VALUE self);
92
- static VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self);
93
-
94
- static size_t rb_bson_byte_buffer_memsize(const void *ptr);
95
- static void rb_bson_byte_buffer_free(void *ptr);
96
- static void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length);
97
- static void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id);
98
- static bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null);
99
-
100
- static const rb_data_type_t rb_byte_buffer_data_type = {
101
- "bson/byte_buffer",
102
- { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize }
103
- };
104
-
105
- static uint8_t pvt_get_type_byte(byte_buffer_t *b);
106
- static VALUE pvt_get_int32(byte_buffer_t *b);
107
- static VALUE pvt_get_int64(byte_buffer_t *b);
108
- static VALUE pvt_get_double(byte_buffer_t *b);
109
- static VALUE pvt_get_string(byte_buffer_t *b);
110
- static VALUE pvt_get_boolean(byte_buffer_t *b);
111
-
112
-
113
- static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type);
114
- static void pvt_replace_int32(byte_buffer_t *b, int32_t position, int32_t newval);
115
- static void pvt_skip_cstring(byte_buffer_t *b);
116
- static void pvt_validate_length(byte_buffer_t *b);
117
-
118
-
119
- static void pvt_put_field(byte_buffer_t *b, VALUE rb_buffer, VALUE val, VALUE validating_keys);
120
- static void pvt_put_byte(byte_buffer_t *b, const char byte);
121
- static void pvt_put_int32(byte_buffer_t *b, const int32_t i32);
122
- static void pvt_put_int64(byte_buffer_t *b, const int64_t i);
123
- static void pvt_put_double(byte_buffer_t *b, double f);
124
- static void pvt_put_cstring(byte_buffer_t *b, const char *str, int32_t length);
125
- static void pvt_put_bson_key(byte_buffer_t *b, VALUE string, VALUE validating_keys);
126
-
127
- /**
128
- * Holds the machine id hash for object id generation.
129
- */
130
- static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX];
131
-
132
- /**
133
- * The counter for incrementing object ids.
134
- */
135
- static uint32_t rb_bson_object_id_counter;
136
-
137
-
138
- static VALUE rb_bson_registry;
139
-
140
- static VALUE rb_bson_illegal_key;
141
- /**
142
- * Initialize the bson_native extension.
143
- */
144
- void Init_bson_native()
145
- {
146
- char rb_bson_machine_id[256];
147
-
148
- VALUE rb_bson_module = rb_define_module("BSON");
149
- VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject);
150
- VALUE rb_bson_object_id_class = rb_const_get(rb_bson_module, rb_intern("ObjectId"));
151
- VALUE rb_bson_object_id_generator_class = rb_const_get(rb_bson_object_id_class, rb_intern("Generator"));
152
- VALUE rb_digest_class = rb_const_get(rb_cObject, rb_intern("Digest"));
153
- VALUE rb_md5_class = rb_const_get(rb_digest_class, rb_intern("MD5"));
154
-
155
- rb_bson_illegal_key = rb_const_get(rb_const_get(rb_bson_module, rb_intern("String")),rb_intern("IllegalKey"));
156
-
157
- rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate);
158
- rb_define_method(rb_byte_buffer_class, "initialize", rb_bson_byte_buffer_initialize, -1);
159
- rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0);
160
- rb_define_method(rb_byte_buffer_class, "get_byte", rb_bson_byte_buffer_get_byte, 0);
161
- rb_define_method(rb_byte_buffer_class, "get_bytes", rb_bson_byte_buffer_get_bytes, 1);
162
- rb_define_method(rb_byte_buffer_class, "get_cstring", rb_bson_byte_buffer_get_cstring, 0);
163
- rb_define_method(rb_byte_buffer_class, "get_decimal128_bytes", rb_bson_byte_buffer_get_decimal128_bytes, 0);
164
- rb_define_method(rb_byte_buffer_class, "get_double", rb_bson_byte_buffer_get_double, 0);
165
- rb_define_method(rb_byte_buffer_class, "get_hash", rb_bson_byte_buffer_get_hash, 0);
166
- rb_define_method(rb_byte_buffer_class, "get_array", rb_bson_byte_buffer_get_array, 0);
167
-
168
- rb_define_method(rb_byte_buffer_class, "get_int32", rb_bson_byte_buffer_get_int32, 0);
169
- rb_define_method(rb_byte_buffer_class, "get_int64", rb_bson_byte_buffer_get_int64, 0);
170
- rb_define_method(rb_byte_buffer_class, "get_string", rb_bson_byte_buffer_get_string, 0);
171
- rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1);
172
- rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1);
173
- rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1);
174
- rb_define_method(rb_byte_buffer_class, "put_decimal128", rb_bson_byte_buffer_put_decimal128, 2);
175
- rb_define_method(rb_byte_buffer_class, "put_double", rb_bson_byte_buffer_put_double, 1);
176
- rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1);
177
- rb_define_method(rb_byte_buffer_class, "put_int64", rb_bson_byte_buffer_put_int64, 1);
178
- rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1);
179
- rb_define_method(rb_byte_buffer_class, "put_symbol", rb_bson_byte_buffer_put_symbol, 1);
180
- rb_define_method(rb_byte_buffer_class, "read_position", rb_bson_byte_buffer_read_position, 0);
181
- rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2);
182
- rb_define_method(rb_byte_buffer_class, "rewind!", rb_bson_byte_buffer_rewind, 0);
183
- rb_define_method(rb_byte_buffer_class, "write_position", rb_bson_byte_buffer_write_position, 0);
184
- rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0);
185
- rb_define_method(rb_bson_object_id_generator_class, "next_object_id", rb_bson_object_id_generator_next, -1);
186
-
187
- rb_define_method(rb_byte_buffer_class, "put_hash", rb_bson_byte_buffer_put_hash, 2);
188
- rb_define_method(rb_byte_buffer_class, "put_array", rb_bson_byte_buffer_put_array, 2);
189
-
190
- // Get the object id machine id and hash it.
191
- rb_require("digest/md5");
192
- gethostname(rb_bson_machine_id, sizeof(rb_bson_machine_id));
193
- rb_bson_machine_id[255] = '\0';
194
- rb_bson_generate_machine_id(rb_md5_class, rb_bson_machine_id);
195
-
196
- // Set the object id counter to a random number
197
- rb_bson_object_id_counter = FIX2INT(rb_funcall(rb_mKernel, rb_intern("rand"), 1, INT2FIX(0x1000000)));
198
-
199
- rb_bson_registry = rb_const_get(rb_bson_module, rb_intern("Registry"));
200
- }
201
-
202
- void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id)
203
- {
204
- VALUE digest = rb_funcall(rb_md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id));
205
- memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest));
206
- }
207
-
208
- /**
209
- * Allocates a bson byte buffer that wraps a byte_buffer_t.
210
- */
211
- VALUE rb_bson_byte_buffer_allocate(VALUE klass)
212
- {
213
- byte_buffer_t *b;
214
- VALUE obj = TypedData_Make_Struct(klass, byte_buffer_t, &rb_byte_buffer_data_type, b);
215
- b->b_ptr = b->buffer;
216
- b->size = BSON_BYTE_BUFFER_SIZE;
217
- return obj;
218
- }
219
-
220
- /**
221
- * Initialize a byte buffer.
222
- */
223
- VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self)
224
- {
225
- VALUE bytes;
226
- rb_scan_args(argc, argv, "01", &bytes);
227
-
228
- if (!NIL_P(bytes)) {
229
- rb_bson_byte_buffer_put_bytes(self, bytes);
230
- }
231
-
232
- return self;
233
- }
234
-
235
- static int fits_int32(int64_t i64){
236
- return i64 >= INT32_MIN && i64 <= INT32_MAX;
237
- }
238
-
239
- /* write the byte denoting the BSON type for the passed object*/
240
- void pvt_put_type_byte(byte_buffer_t *b, VALUE val){
241
- switch(TYPE(val)){
242
- case T_BIGNUM:
243
- case T_FIXNUM:
244
- if(fits_int32(NUM2LL(val))){
245
- pvt_put_byte(b, BSON_TYPE_INT32);
246
- }else{
247
- pvt_put_byte(b, BSON_TYPE_INT64);
248
- }
249
- break;
250
- case T_STRING:
251
- pvt_put_byte(b, BSON_TYPE_STRING);
252
- break;
253
- case T_ARRAY:
254
- pvt_put_byte(b, BSON_TYPE_ARRAY);
255
- break;
256
- case T_TRUE:
257
- case T_FALSE:
258
- pvt_put_byte(b, BSON_TYPE_BOOLEAN);
259
- break;
260
- case T_HASH:
261
- pvt_put_byte(b, BSON_TYPE_OBJECT);
262
- break;
263
- case T_FLOAT:
264
- pvt_put_byte(b, BSON_TYPE_DOUBLE);
265
- break;
266
- default:{
267
- VALUE type = rb_funcall(val, rb_intern("bson_type"),0);
268
- pvt_put_byte(b, *RSTRING_PTR(type));
269
- break;
270
- }
271
- }
272
- }
273
-
274
- void pvt_put_field(byte_buffer_t *b, VALUE rb_buffer, VALUE val, VALUE validating_keys){
275
- switch(TYPE(val)){
276
- case T_BIGNUM:
277
- case T_FIXNUM:{
278
- int64_t i64= NUM2LL(val);
279
- if(fits_int32(i64)){
280
- pvt_put_int32(b, (int32_t)i64);
281
- }else{
282
- pvt_put_int64(b, i64);
283
- }
284
- break;
285
- }
286
- case T_FLOAT:
287
- pvt_put_double(b, NUM2DBL(val));
288
- break;
289
- case T_ARRAY:
290
- rb_bson_byte_buffer_put_array(rb_buffer, val, validating_keys);
291
- break;
292
- case T_TRUE:
293
- pvt_put_byte(b, 1);
294
- break;
295
- case T_FALSE:
296
- pvt_put_byte(b, 0);
297
- break;
298
- case T_HASH:
299
- rb_bson_byte_buffer_put_hash(rb_buffer, val, validating_keys);
300
- break;
301
- default:{
302
- rb_funcall(val, rb_intern("to_bson"), 2, rb_buffer, validating_keys);
303
- break;
304
- }
305
- }
306
- }
307
-
308
- typedef struct{
309
- byte_buffer_t *b;
310
- VALUE buffer;
311
- VALUE validating_keys;
312
- } put_hash_context;
313
-
314
- static int put_hash_callback(VALUE key, VALUE val, VALUE context){
315
- VALUE buffer = ((put_hash_context*)context)->buffer;
316
- VALUE validating_keys = ((put_hash_context*)context)->validating_keys;
317
- byte_buffer_t *b = ((put_hash_context*)context)->b;
318
-
319
- pvt_put_type_byte(b, val);
320
-
321
- switch(TYPE(key)){
322
- case T_STRING:
323
- pvt_put_bson_key(b, key, validating_keys);
324
- break;
325
- case T_SYMBOL:
326
- pvt_put_bson_key(b, rb_sym_to_s(key), validating_keys);
327
- break;
328
- default:
329
- rb_bson_byte_buffer_put_cstring(buffer, rb_funcall(key, rb_intern("to_bson_key"), 1, validating_keys));
330
- }
331
-
332
- pvt_put_field(b, buffer, val, validating_keys);
333
- return ST_CONTINUE;
334
- }
335
-
336
- /**
337
- * serializes a hash into the byte buffer
338
- */
339
- VALUE rb_bson_byte_buffer_put_hash(VALUE self, VALUE hash, VALUE validating_keys){
340
- byte_buffer_t *b = NULL;
341
- put_hash_context context = {0};
342
- size_t position = 0;
343
- size_t new_position = 0;
344
- int32_t new_length = 0;
345
-
346
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
347
- Check_Type(hash, T_HASH);
348
-
349
- position = READ_SIZE(b);
350
-
351
- /* insert length placeholder */
352
- pvt_put_int32(b, 0);
353
- context.buffer = self;
354
- context.validating_keys = validating_keys;
355
- context.b = b;
356
-
357
- rb_hash_foreach(hash, put_hash_callback, (VALUE)&context);
358
- pvt_put_byte(b, 0);
359
-
360
- /* update length placeholder with actual value */
361
- new_position = READ_SIZE(b);
362
- new_length = new_position - position;
363
- pvt_replace_int32(b, position, new_length);
364
-
365
- return self;
366
- }
367
-
368
- static const char *index_strings[] = {
369
- "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
370
- "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
371
- "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
372
- "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43",
373
- "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54",
374
- "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65",
375
- "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76",
376
- "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87",
377
- "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98",
378
- "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
379
- "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120",
380
- "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131",
381
- "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142",
382
- "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153",
383
- "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164",
384
- "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175",
385
- "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186",
386
- "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197",
387
- "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208",
388
- "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
389
- "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230",
390
- "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241",
391
- "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252",
392
- "253", "254", "255", "256", "257", "258", "259", "260", "261", "262", "263",
393
- "264", "265", "266", "267", "268", "269", "270", "271", "272", "273", "274",
394
- "275", "276", "277", "278", "279", "280", "281", "282", "283", "284", "285",
395
- "286", "287", "288", "289", "290", "291", "292", "293", "294", "295", "296",
396
- "297", "298", "299", "300", "301", "302", "303", "304", "305", "306", "307",
397
- "308", "309", "310", "311", "312", "313", "314", "315", "316", "317", "318",
398
- "319", "320", "321", "322", "323", "324", "325", "326", "327", "328", "329",
399
- "330", "331", "332", "333", "334", "335", "336", "337", "338", "339", "340",
400
- "341", "342", "343", "344", "345", "346", "347", "348", "349", "350", "351",
401
- "352", "353", "354", "355", "356", "357", "358", "359", "360", "361", "362",
402
- "363", "364", "365", "366", "367", "368", "369", "370", "371", "372", "373",
403
- "374", "375", "376", "377", "378", "379", "380", "381", "382", "383", "384",
404
- "385", "386", "387", "388", "389", "390", "391", "392", "393", "394", "395",
405
- "396", "397", "398", "399", "400", "401", "402", "403", "404", "405", "406",
406
- "407", "408", "409", "410", "411", "412", "413", "414", "415", "416", "417",
407
- "418", "419", "420", "421", "422", "423", "424", "425", "426", "427", "428",
408
- "429", "430", "431", "432", "433", "434", "435", "436", "437", "438", "439",
409
- "440", "441", "442", "443", "444", "445", "446", "447", "448", "449", "450",
410
- "451", "452", "453", "454", "455", "456", "457", "458", "459", "460", "461",
411
- "462", "463", "464", "465", "466", "467", "468", "469", "470", "471", "472",
412
- "473", "474", "475", "476", "477", "478", "479", "480", "481", "482", "483",
413
- "484", "485", "486", "487", "488", "489", "490", "491", "492", "493", "494",
414
- "495", "496", "497", "498", "499", "500", "501", "502", "503", "504", "505",
415
- "506", "507", "508", "509", "510", "511", "512", "513", "514", "515", "516",
416
- "517", "518", "519", "520", "521", "522", "523", "524", "525", "526", "527",
417
- "528", "529", "530", "531", "532", "533", "534", "535", "536", "537", "538",
418
- "539", "540", "541", "542", "543", "544", "545", "546", "547", "548", "549",
419
- "550", "551", "552", "553", "554", "555", "556", "557", "558", "559", "560",
420
- "561", "562", "563", "564", "565", "566", "567", "568", "569", "570", "571",
421
- "572", "573", "574", "575", "576", "577", "578", "579", "580", "581", "582",
422
- "583", "584", "585", "586", "587", "588", "589", "590", "591", "592", "593",
423
- "594", "595", "596", "597", "598", "599", "600", "601", "602", "603", "604",
424
- "605", "606", "607", "608", "609", "610", "611", "612", "613", "614", "615",
425
- "616", "617", "618", "619", "620", "621", "622", "623", "624", "625", "626",
426
- "627", "628", "629", "630", "631", "632", "633", "634", "635", "636", "637",
427
- "638", "639", "640", "641", "642", "643", "644", "645", "646", "647", "648",
428
- "649", "650", "651", "652", "653", "654", "655", "656", "657", "658", "659",
429
- "660", "661", "662", "663", "664", "665", "666", "667", "668", "669", "670",
430
- "671", "672", "673", "674", "675", "676", "677", "678", "679", "680", "681",
431
- "682", "683", "684", "685", "686", "687", "688", "689", "690", "691", "692",
432
- "693", "694", "695", "696", "697", "698", "699", "700", "701", "702", "703",
433
- "704", "705", "706", "707", "708", "709", "710", "711", "712", "713", "714",
434
- "715", "716", "717", "718", "719", "720", "721", "722", "723", "724", "725",
435
- "726", "727", "728", "729", "730", "731", "732", "733", "734", "735", "736",
436
- "737", "738", "739", "740", "741", "742", "743", "744", "745", "746", "747",
437
- "748", "749", "750", "751", "752", "753", "754", "755", "756", "757", "758",
438
- "759", "760", "761", "762", "763", "764", "765", "766", "767", "768", "769",
439
- "770", "771", "772", "773", "774", "775", "776", "777", "778", "779", "780",
440
- "781", "782", "783", "784", "785", "786", "787", "788", "789", "790", "791",
441
- "792", "793", "794", "795", "796", "797", "798", "799", "800", "801", "802",
442
- "803", "804", "805", "806", "807", "808", "809", "810", "811", "812", "813",
443
- "814", "815", "816", "817", "818", "819", "820", "821", "822", "823", "824",
444
- "825", "826", "827", "828", "829", "830", "831", "832", "833", "834", "835",
445
- "836", "837", "838", "839", "840", "841", "842", "843", "844", "845", "846",
446
- "847", "848", "849", "850", "851", "852", "853", "854", "855", "856", "857",
447
- "858", "859", "860", "861", "862", "863", "864", "865", "866", "867", "868",
448
- "869", "870", "871", "872", "873", "874", "875", "876", "877", "878", "879",
449
- "880", "881", "882", "883", "884", "885", "886", "887", "888", "889", "890",
450
- "891", "892", "893", "894", "895", "896", "897", "898", "899", "900", "901",
451
- "902", "903", "904", "905", "906", "907", "908", "909", "910", "911", "912",
452
- "913", "914", "915", "916", "917", "918", "919", "920", "921", "922", "923",
453
- "924", "925", "926", "927", "928", "929", "930", "931", "932", "933", "934",
454
- "935", "936", "937", "938", "939", "940", "941", "942", "943", "944", "945",
455
- "946", "947", "948", "949", "950", "951", "952", "953", "954", "955", "956",
456
- "957", "958", "959", "960", "961", "962", "963", "964", "965", "966", "967",
457
- "968", "969", "970", "971", "972", "973", "974", "975", "976", "977", "978",
458
- "979", "980", "981", "982", "983", "984", "985", "986", "987", "988", "989",
459
- "990", "991", "992", "993", "994", "995", "996", "997", "998", "999"};
460
-
461
- /**
462
- * Writes an array index to the byte buffer.
463
- */
464
- void pvt_put_array_index(byte_buffer_t *b, int32_t index)
465
- {
466
- char buffer[16];
467
- const char *c_str = NULL;
468
- size_t length;
469
-
470
- if (index < 1000) {
471
- c_str = index_strings[index];
472
- } else {
473
- c_str = buffer;
474
- snprintf(buffer, sizeof(buffer), "%d", index);
475
- }
476
- length = strlen(c_str) + 1;
477
- ENSURE_BSON_WRITE(b, length);
478
- memcpy(WRITE_PTR(b), c_str, length);
479
- b->write_position += length;
480
- }
481
-
482
- /**
483
- * serializes an array into the byte buffer
484
- */
485
- VALUE rb_bson_byte_buffer_put_array(VALUE self, VALUE array, VALUE validating_keys){
486
- byte_buffer_t *b = NULL;
487
- size_t new_position = 0;
488
- int32_t new_length = 0;
489
- size_t position = 0;
490
- VALUE *array_element = NULL;
491
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
492
- Check_Type(array, T_ARRAY);
493
-
494
- position = READ_SIZE(b);
495
- /* insert length placeholder */
496
- pvt_put_int32(b, 0);
497
-
498
- array_element = RARRAY_PTR(array);
499
-
500
- for(int32_t index=0; index < RARRAY_LEN(array); index++, array_element++){
501
- pvt_put_type_byte(b, *array_element);
502
- pvt_put_array_index(b,index);
503
- pvt_put_field(b, self, *array_element, validating_keys);
504
- }
505
- pvt_put_byte(b, 0);
506
-
507
- /* update length placeholder */
508
- new_position = READ_SIZE(b);
509
- new_length = new_position - position;
510
- pvt_replace_int32(b, position, new_length);
511
-
512
- return self;
513
- }
514
-
515
- /**
516
- * Get the length of the buffer.
517
- */
518
- VALUE rb_bson_byte_buffer_length(VALUE self)
519
- {
520
- byte_buffer_t *b;
521
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
522
- return UINT2NUM(READ_SIZE(b));
523
- }
524
-
525
- /**
526
- * Get a single byte from the buffer.
527
- */
528
- VALUE rb_bson_byte_buffer_get_byte(VALUE self)
529
- {
530
- byte_buffer_t *b;
531
- VALUE byte;
532
-
533
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
534
- ENSURE_BSON_READ(b, 1);
535
- byte = rb_str_new(READ_PTR(b), 1);
536
- b->read_position += 1;
537
- return byte;
538
- }
539
-
540
- uint8_t pvt_get_type_byte(byte_buffer_t *b){
541
- int8_t byte;
542
- ENSURE_BSON_READ(b, 1);
543
- byte = *READ_PTR(b);
544
- b->read_position += 1;
545
- return (uint8_t)byte;
546
- }
547
-
548
- /**
549
- * Get bytes from the buffer.
550
- */
551
- VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
552
- {
553
- byte_buffer_t *b;
554
- VALUE bytes;
555
- const uint32_t length = FIX2LONG(i);
556
-
557
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
558
- ENSURE_BSON_READ(b, length);
559
- bytes = rb_str_new(READ_PTR(b), length);
560
- b->read_position += length;
561
- return bytes;
562
- }
563
-
564
- VALUE pvt_get_boolean(byte_buffer_t *b){
565
- VALUE result = Qnil;
566
- ENSURE_BSON_READ(b, 1);
567
- result = *READ_PTR(b) == 1 ? Qtrue: Qfalse;
568
- b->read_position += 1;
569
- return result;
570
- }
571
-
572
- /**
573
- * Get a cstring from the buffer.
574
- */
575
- VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
576
- {
577
- byte_buffer_t *b;
578
- VALUE string;
579
- int length;
580
-
581
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
582
- length = (int)strlen(READ_PTR(b));
583
- ENSURE_BSON_READ(b, length);
584
- string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
585
- b->read_position += length + 1;
586
- return string;
587
- }
588
-
589
- /**
590
- * Reads but does not return a cstring from the buffer.
591
- */
592
- void pvt_skip_cstring(byte_buffer_t *b)
593
- {
594
- int length;
595
- length = (int)strlen(READ_PTR(b));
596
- ENSURE_BSON_READ(b, length);
597
- b->read_position += length + 1;
598
- }
599
-
600
- /**
601
- * Get the 16 bytes representing the decimal128 from the buffer.
602
- */
603
- VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self)
604
- {
605
- byte_buffer_t *b;
606
- VALUE bytes;
607
-
608
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
609
- ENSURE_BSON_READ(b, 16);
610
- bytes = rb_str_new(READ_PTR(b), 16);
611
- b->read_position += 16;
612
- return bytes;
613
- }
614
-
615
- /**
616
- * Get a double from the buffer.
617
- */
618
- VALUE rb_bson_byte_buffer_get_double(VALUE self)
619
- {
620
- byte_buffer_t *b;
621
-
622
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
623
- return pvt_get_double(b);
624
- }
625
-
626
- VALUE pvt_get_double(byte_buffer_t *b)
627
- {
628
- double d;
629
-
630
- ENSURE_BSON_READ(b, 8);
631
- memcpy(&d, READ_PTR(b), 8);
632
- b->read_position += 8;
633
- return DBL2NUM(BSON_DOUBLE_FROM_LE(d));
634
- }
635
-
636
- /**
637
- * Get a int32 from the buffer.
638
- */
639
- VALUE rb_bson_byte_buffer_get_int32(VALUE self)
640
- {
641
- byte_buffer_t *b;
642
-
643
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
644
- return pvt_get_int32(b);
645
- }
646
-
647
- VALUE pvt_get_int32(byte_buffer_t *b)
648
- {
649
- int32_t i32;
650
-
651
- ENSURE_BSON_READ(b, 4);
652
- memcpy(&i32, READ_PTR(b), 4);
653
- b->read_position += 4;
654
- return INT2NUM(BSON_UINT32_FROM_LE(i32));
655
- }
656
-
657
- /**
658
- * Get a int64 from the buffer.
659
- */
660
- VALUE rb_bson_byte_buffer_get_int64(VALUE self)
661
- {
662
- byte_buffer_t *b;
663
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
664
- return pvt_get_int64(b);
665
- }
666
-
667
- VALUE pvt_get_int64(byte_buffer_t *b)
668
- {
669
- int64_t i64;
670
-
671
- ENSURE_BSON_READ(b, 8);
672
- memcpy(&i64, READ_PTR(b), 8);
673
- b->read_position += 8;
674
- return LL2NUM(BSON_UINT64_FROM_LE(i64));
675
- }
676
-
677
- /**
678
- * Get a string from the buffer.
679
- */
680
- VALUE rb_bson_byte_buffer_get_string(VALUE self)
681
- {
682
- byte_buffer_t *b;
683
-
684
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
685
- return pvt_get_string(b);
686
- }
687
-
688
- VALUE pvt_get_string(byte_buffer_t *b)
689
- {
690
- int32_t length;
691
- int32_t length_le;
692
- VALUE string;
693
-
694
- ENSURE_BSON_READ(b, 4);
695
- memcpy(&length, READ_PTR(b), 4);
696
- length_le = BSON_UINT32_FROM_LE(length);
697
- b->read_position += 4;
698
- ENSURE_BSON_READ(b, length_le);
699
- string = rb_enc_str_new(READ_PTR(b), length_le - 1, rb_utf8_encoding());
700
- b->read_position += length_le;
701
- return string;
702
- }
703
-
704
-
705
- VALUE rb_bson_byte_buffer_get_hash(VALUE self){
706
- VALUE doc = Qnil;
707
- byte_buffer_t *b=NULL;
708
- uint8_t type;
709
- VALUE cDocument = rb_const_get(rb_const_get(rb_cObject, rb_intern("BSON")), rb_intern("Document"));
710
-
711
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
712
-
713
- pvt_validate_length(b);
714
-
715
- doc = rb_funcall(cDocument, rb_intern("allocate"),0);
716
-
717
- while((type = pvt_get_type_byte(b)) != 0){
718
- VALUE field = rb_bson_byte_buffer_get_cstring(self);
719
- rb_hash_aset(doc, field, pvt_read_field(b, self, type));
720
- }
721
- return doc;
722
- }
723
-
724
- VALUE rb_bson_byte_buffer_get_array(VALUE self){
725
- byte_buffer_t *b;
726
- VALUE array = Qnil;
727
- uint8_t type;
728
-
729
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
730
-
731
- pvt_validate_length(b);
732
-
733
- array = rb_ary_new();
734
- while((type = pvt_get_type_byte(b)) != 0){
735
- pvt_skip_cstring(b);
736
- rb_ary_push(array, pvt_read_field(b, self, type));
737
- }
738
- return array;
739
- }
740
-
741
- /**
742
- * Read a single field from a hash or array
743
- */
744
- VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type){
745
- switch(type) {
746
- case BSON_TYPE_INT32: return pvt_get_int32(b);
747
- case BSON_TYPE_INT64: return pvt_get_int64(b);
748
- case BSON_TYPE_DOUBLE: return pvt_get_double(b);
749
- case BSON_TYPE_STRING: return pvt_get_string(b);
750
- case BSON_TYPE_ARRAY: return rb_bson_byte_buffer_get_array(rb_buffer);
751
- case BSON_TYPE_OBJECT: return rb_bson_byte_buffer_get_hash(rb_buffer);
752
- case BSON_TYPE_BOOLEAN: return pvt_get_boolean(b);
753
- default:
754
- {
755
- VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(type));
756
- VALUE value = rb_funcall(klass, rb_intern("from_bson"),1, rb_buffer);
757
- return value;
758
- }
759
- }
760
- }
761
-
762
- /**
763
- * Writes a byte to the byte buffer.
764
- */
765
- VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte)
766
- {
767
- byte_buffer_t *b;
768
- const char *str;
769
-
770
- if (!RB_TYPE_P(byte, T_STRING))
771
- rb_raise(rb_eArgError, "Invalid input");
772
-
773
- str = RSTRING_PTR(byte);
774
-
775
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
776
- ENSURE_BSON_WRITE(b, 1);
777
- memcpy(WRITE_PTR(b), str, 1);
778
- b->write_position += 1;
779
-
780
- return self;
781
- }
782
-
783
- void pvt_put_byte( byte_buffer_t *b, const char byte)
784
- {
785
- ENSURE_BSON_WRITE(b, 1);
786
- *WRITE_PTR(b) = byte;
787
- b->write_position += 1;
788
-
789
- }
790
- /**
791
- * Writes bytes to the byte buffer.
792
- */
793
- VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes)
794
- {
795
- byte_buffer_t *b;
796
- const char *str;
797
- size_t length;
798
-
799
- if (!RB_TYPE_P(bytes, T_STRING) && !RB_TYPE_P(bytes, RUBY_T_DATA))
800
- rb_raise(rb_eArgError, "Invalid input");
801
-
802
- str = RSTRING_PTR(bytes);
803
- length = RSTRING_LEN(bytes);
804
-
805
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
806
- ENSURE_BSON_WRITE(b, length);
807
- memcpy(WRITE_PTR(b), str, length);
808
- b->write_position += length;
809
- return self;
810
- }
811
-
812
- /**
813
- * Writes a string (which may form part of a BSON object) to the byte buffer.
814
- * length does not include the null terminator.
815
- */
816
- VALUE rb_bson_byte_buffer_put_bson_partial_string(VALUE self, const char *str, int32_t length)
817
- {
818
- byte_buffer_t *b;
819
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
820
- pvt_put_cstring(b, str, length);
821
- return self;
822
- }
823
-
824
- /**
825
- * length does not include the null terminator.
826
- */
827
- void pvt_put_cstring(byte_buffer_t *b, const char *str, int32_t length)
828
- {
829
- int bytes_to_write;
830
- if (!rb_bson_utf8_validate(str, length, false)) {
831
- rb_raise(rb_eArgError, "String %s is not a valid UTF-8 CString.", str);
832
- }
833
- bytes_to_write = length + 1;
834
- ENSURE_BSON_WRITE(b, bytes_to_write);
835
- memcpy(WRITE_PTR(b), str, bytes_to_write);
836
- b->write_position += bytes_to_write;
837
- }
838
-
839
- /**
840
- * Write a hash key to the byte buffer, validating it if requested
841
- */
842
- void pvt_put_bson_key(byte_buffer_t *b, VALUE string, VALUE validating_keys){
843
- char *c_str = RSTRING_PTR(string);
844
- size_t length = RSTRING_LEN(string);
845
-
846
- if (RTEST(validating_keys)) {
847
- if (length > 0 && (c_str[0] == '$' || memchr(c_str, '.', length))) {
848
- rb_exc_raise(rb_funcall(rb_bson_illegal_key, rb_intern("new"), 1, string));
849
- }
850
- }
851
-
852
- pvt_put_cstring(b, c_str, length);
853
- }
854
-
855
- /**
856
- * Writes a cstring to the byte buffer.
857
- * This magically supports both Ruby symbols and strings.
858
- */
859
- VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string)
860
- {
861
- int32_t length;
862
-
863
- if (TYPE(string) == T_SYMBOL) {
864
- const char *sym = rb_id2name(SYM2ID(string));
865
- length = strlen(sym);
866
-
867
- return rb_bson_byte_buffer_put_bson_partial_string(self, sym, length);
868
- } else if (TYPE(string) == T_STRING) {
869
- const char *str = RSTRING_PTR(string);
870
- length = RSTRING_LEN(string);
871
-
872
- return rb_bson_byte_buffer_put_bson_partial_string(self, str, length);
873
- } else if (TYPE(string) == T_FIXNUM) {
874
- const char *str = RSTRING_PTR(rb_fix2str(string, 10));
875
- length = strlen(str);
876
-
877
- return rb_bson_byte_buffer_put_bson_partial_string(self, str, length);
878
- } else {
879
- rb_raise(rb_eTypeError, "Invalid type for string");
880
- }
881
- }
882
-
883
- /**
884
- * Writes a 128 bit decimal to the byte buffer.
885
- */
886
- VALUE rb_bson_byte_buffer_put_decimal128(VALUE self, VALUE low, VALUE high)
887
- {
888
- byte_buffer_t *b;
889
- const int64_t low64 = BSON_UINT64_TO_LE(NUM2ULL(low));
890
- const int64_t high64 = BSON_UINT64_TO_LE(NUM2ULL(high));
891
-
892
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
893
- ENSURE_BSON_WRITE(b, 8);
894
- memcpy(WRITE_PTR(b), &low64, 8);
895
- b->write_position += 8;
896
-
897
- ENSURE_BSON_WRITE(b, 8);
898
- memcpy(WRITE_PTR(b), &high64, 8);
899
- b->write_position += 8;
900
-
901
- return self;
902
- }
903
-
904
- /**
905
- * Writes a 64 bit double to the buffer.
906
- */
907
- VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f)
908
- {
909
- byte_buffer_t *b;
910
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
911
- pvt_put_double(b,NUM2DBL(f));
912
-
913
- return self;
914
- }
915
-
916
- void pvt_put_double(byte_buffer_t *b, double f)
917
- {
918
- const double d = BSON_DOUBLE_TO_LE(f);
919
- ENSURE_BSON_WRITE(b, 8);
920
- memcpy(WRITE_PTR(b), &d, 8);
921
- b->write_position += 8;
922
- }
923
-
924
- /**
925
- * Writes a 32 bit integer to the byte buffer.
926
- */
927
- VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i)
928
- {
929
- byte_buffer_t *b;
930
- const int32_t i32 = NUM2INT(i);
931
-
932
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
933
- pvt_put_int32(b, i32);
934
- return self;
935
- }
936
-
937
- void pvt_put_int32(byte_buffer_t *b, const int32_t i)
938
- {
939
- const int32_t i32 = BSON_UINT32_TO_LE(i);
940
- ENSURE_BSON_WRITE(b, 4);
941
- memcpy(WRITE_PTR(b), &i32, 4);
942
- b->write_position += 4;
943
- }
944
-
945
- /**
946
- * Writes a 64 bit integer to the byte buffer.
947
- */
948
- VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i)
949
- {
950
- byte_buffer_t *b;
951
- const int64_t i64 = NUM2LL(i);
952
-
953
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
954
- pvt_put_int64(b, i64);
955
-
956
- return self;
957
- }
958
-
959
- void pvt_put_int64(byte_buffer_t *b, const int64_t i)
960
- {
961
- const int64_t i64 = BSON_UINT64_TO_LE(i);
962
-
963
- ENSURE_BSON_WRITE(b, 8);
964
- memcpy(WRITE_PTR(b), &i64, 8);
965
- b->write_position += 8;
966
-
967
- }
968
-
969
- /**
970
- * validate the buffer contains the amount of bytes the array / hash claimns
971
- * and that it is null terminated
972
- */
973
- void pvt_validate_length(byte_buffer_t *b)
974
- {
975
- int32_t length;
976
-
977
- ENSURE_BSON_READ(b, 4);
978
- memcpy(&length, READ_PTR(b), 4);
979
- length = BSON_UINT32_TO_LE(length);
980
-
981
- /* minimum valid length is 4 (byte count) + 1 (terminating byte) */
982
- if(length >= 5){
983
- ENSURE_BSON_READ(b, length);
984
-
985
- /* The last byte should be a null byte: it should be at length - 1 */
986
- if( *(READ_PTR(b) + length - 1) != 0 ){
987
- rb_raise(rb_eRangeError, "Buffer should have contained null terminator at %zu but contained %d", b->read_position + (size_t)length, (int)*(READ_PTR(b) + length));
988
- }
989
- b->read_position += 4;
990
- }
991
- else{
992
- rb_raise(rb_eRangeError, "Buffer contained invalid length %d at %zu", length, b->read_position);
993
- }
994
- }
995
-
996
- /**
997
- * Write BSON string to byte buffer given a C string.
998
- * length is inclusive of the NULL terminator in the C string.
999
- */
1000
- static VALUE rb_bson_byte_buffer_put_bson_string(VALUE self, const char *str, int32_t length)
1001
- {
1002
- byte_buffer_t *b;
1003
- int32_t length_le;
1004
-
1005
- if (length <= 0) {
1006
- rb_raise(rb_eArgError, "The length must include the NULL terminator, and thus be at least 1");
1007
- }
1008
-
1009
- length_le = BSON_UINT32_TO_LE(length);
1010
-
1011
- if (!rb_bson_utf8_validate(str, length - 1, true)) {
1012
- rb_raise(rb_eArgError, "String %s is not valid UTF-8.", str);
1013
- }
1014
-
1015
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
1016
- ENSURE_BSON_WRITE(b, length + 4);
1017
- memcpy(WRITE_PTR(b), &length_le, 4);
1018
- b->write_position += 4;
1019
- memcpy(WRITE_PTR(b), str, length);
1020
- b->write_position += length;
1021
-
1022
- return self;
1023
- }
1024
-
1025
- /**
1026
- * Writes a string to the byte buffer.
1027
- */
1028
- VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string)
1029
- {
1030
- const char *str = RSTRING_PTR(string);
1031
- const int32_t length = RSTRING_LEN(string) + 1;
1032
-
1033
- return rb_bson_byte_buffer_put_bson_string(self, str, length);
1034
- }
1035
-
1036
- /**
1037
- * Writes a symbol to the byte buffer.
1038
- */
1039
- VALUE rb_bson_byte_buffer_put_symbol(VALUE self, VALUE symbol)
1040
- {
1041
- const char *sym = rb_id2name(SYM2ID(symbol));
1042
- const int32_t length = strlen(sym) + 1;
1043
-
1044
- return rb_bson_byte_buffer_put_bson_string(self, sym, length);
1045
- }
1046
-
1047
- /**
1048
- * Get the read position.
1049
- */
1050
- VALUE rb_bson_byte_buffer_read_position(VALUE self)
1051
- {
1052
- byte_buffer_t *b;
1053
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
1054
- return INT2NUM(b->read_position);
1055
- }
1056
-
1057
- /**
1058
- * Replace a 32 bit integer int the byte buffer.
1059
- */
1060
- VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i)
1061
- {
1062
- byte_buffer_t *b;
1063
-
1064
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
1065
- pvt_replace_int32(b, NUM2LONG(index), NUM2LONG(i));
1066
-
1067
- return self;
1068
- }
1069
-
1070
- void pvt_replace_int32(byte_buffer_t *b, int32_t position, int32_t newval)
1071
- {
1072
- const int32_t i32 = BSON_UINT32_TO_LE(newval);
1073
- memcpy(READ_PTR(b) + position, &i32, 4);
1074
- }
1075
-
1076
- /**
1077
- * Reset the read position to the beginning of the byte buffer.
1078
- */
1079
- VALUE rb_bson_byte_buffer_rewind(VALUE self)
1080
- {
1081
- byte_buffer_t *b;
1082
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
1083
- b->read_position = 0;
1084
-
1085
- return self;
1086
- }
1087
-
1088
- /**
1089
- * Get the write position.
1090
- */
1091
- VALUE rb_bson_byte_buffer_write_position(VALUE self)
1092
- {
1093
- byte_buffer_t *b;
1094
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
1095
- return INT2NUM(b->write_position);
1096
- }
1097
-
1098
- /**
1099
- * Convert the buffer to a string.
1100
- */
1101
- VALUE rb_bson_byte_buffer_to_s(VALUE self)
1102
- {
1103
- byte_buffer_t *b;
1104
- TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
1105
- return rb_str_new(READ_PTR(b), READ_SIZE(b));
1106
- }
1107
-
1108
- /**
1109
- * Get the size of the byte_buffer_t in memory.
1110
- */
1111
- size_t rb_bson_byte_buffer_memsize(const void *ptr)
1112
- {
1113
- return ptr ? sizeof(byte_buffer_t) : 0;
1114
- }
1115
-
1116
- /**
1117
- * Free the memory for the byte buffer.
1118
- */
1119
- void rb_bson_byte_buffer_free(void *ptr)
1120
- {
1121
- byte_buffer_t *b = ptr;
1122
- if (b->b_ptr != b->buffer) {
1123
- xfree(b->b_ptr);
1124
- }
1125
- xfree(b);
1126
- }
1127
-
1128
- /**
1129
- * Expand the byte buffer linearly.
1130
- */
1131
- void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length)
1132
- {
1133
- const size_t required_size = buffer_ptr->write_position - buffer_ptr->read_position + length;
1134
- if (required_size <= buffer_ptr->size) {
1135
- memmove(buffer_ptr->b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
1136
- buffer_ptr->write_position -= buffer_ptr->read_position;
1137
- buffer_ptr->read_position = 0;
1138
- } else {
1139
- char *new_b_ptr;
1140
- const size_t new_size = required_size * 2;
1141
- new_b_ptr = ALLOC_N(char, new_size);
1142
- memcpy(new_b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
1143
- if (buffer_ptr->b_ptr != buffer_ptr->buffer) {
1144
- xfree(buffer_ptr->b_ptr);
1145
- }
1146
- buffer_ptr->b_ptr = new_b_ptr;
1147
- buffer_ptr->size = new_size;
1148
- buffer_ptr->write_position -= buffer_ptr->read_position;
1149
- buffer_ptr->read_position = 0;
1150
- }
1151
- }
1152
-
1153
- /**
1154
- * Generate the next object id.
1155
- */
1156
- VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self)
1157
- {
1158
- char bytes[12];
1159
- uint32_t t;
1160
- uint32_t c;
1161
- uint16_t pid = BSON_UINT16_TO_BE(getpid());
1162
-
1163
- if (argc == 0 || (argc == 1 && *args == Qnil)) {
1164
- t = BSON_UINT32_TO_BE((int) time(NULL));
1165
- }
1166
- else {
1167
- t = BSON_UINT32_TO_BE(NUM2ULONG(rb_funcall(*args, rb_intern("to_i"), 0)));
1168
- }
1169
-
1170
- c = BSON_UINT32_TO_BE(rb_bson_object_id_counter << 8);
1171
-
1172
- memcpy(&bytes, &t, 4);
1173
- memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
1174
- memcpy(&bytes[7], &pid, 2);
1175
- memcpy(&bytes[9], &c, 3);
1176
- rb_bson_object_id_counter++;
1177
- return rb_str_new(bytes, 12);
1178
- }
1179
-
1180
- /**
1181
- * Taken from libbson.
1182
- */
1183
- static void _bson_utf8_get_sequence(const char *utf8, uint8_t *seq_length, uint8_t *first_mask)
1184
- {
1185
- unsigned char c = *(const unsigned char *)utf8;
1186
- uint8_t m;
1187
- uint8_t n;
1188
-
1189
- /*
1190
- * See the following[1] for a description of what the given multi-byte
1191
- * sequences will be based on the bits set of the first byte. We also need
1192
- * to mask the first byte based on that. All subsequent bytes are masked
1193
- * against 0x3F.
1194
- *
1195
- * [1] http://www.joelonsoftware.com/articles/Unicode.html
1196
- */
1197
-
1198
- if ((c & 0x80) == 0) {
1199
- n = 1;
1200
- m = 0x7F;
1201
- } else if ((c & 0xE0) == 0xC0) {
1202
- n = 2;
1203
- m = 0x1F;
1204
- } else if ((c & 0xF0) == 0xE0) {
1205
- n = 3;
1206
- m = 0x0F;
1207
- } else if ((c & 0xF8) == 0xF0) {
1208
- n = 4;
1209
- m = 0x07;
1210
- } else if ((c & 0xFC) == 0xF8) {
1211
- n = 5;
1212
- m = 0x03;
1213
- } else if ((c & 0xFE) == 0xFC) {
1214
- n = 6;
1215
- m = 0x01;
1216
- } else {
1217
- n = 0;
1218
- m = 0;
1219
- }
1220
-
1221
- *seq_length = n;
1222
- *first_mask = m;
1223
- }
1224
-
1225
- /**
1226
- * Taken from libbson.
1227
- */
1228
- bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null)
1229
- {
1230
- uint32_t c;
1231
- uint8_t first_mask;
1232
- uint8_t seq_length;
1233
- unsigned i;
1234
- unsigned j;
1235
-
1236
- if (!utf8) {
1237
- return false;
1238
- }
1239
-
1240
- for (i = 0; i < utf8_len; i += seq_length) {
1241
- _bson_utf8_get_sequence(&utf8[i], &seq_length, &first_mask);
1242
-
1243
- /*
1244
- * Ensure we have a valid multi-byte sequence length.
1245
- */
1246
- if (!seq_length) {
1247
- return false;
1248
- }
1249
-
1250
- /*
1251
- * Ensure we have enough bytes left.
1252
- */
1253
- if ((utf8_len - i) < seq_length) {
1254
- return false;
1255
- }
1256
-
1257
- /*
1258
- * Also calculate the next char as a unichar so we can
1259
- * check code ranges for non-shortest form.
1260
- */
1261
- c = utf8 [i] & first_mask;
1262
-
1263
- /*
1264
- * Check the high-bits for each additional sequence byte.
1265
- */
1266
- for (j = i + 1; j < (i + seq_length); j++) {
1267
- c = (c << 6) | (utf8 [j] & 0x3F);
1268
- if ((utf8[j] & 0xC0) != 0x80) {
1269
- return false;
1270
- }
1271
- }
1272
-
1273
- /*
1274
- * Check for NULL bytes afterwards.
1275
- *
1276
- * Hint: if you want to optimize this function, starting here to do
1277
- * this in the same pass as the data above would probably be a good
1278
- * idea. You would add a branch into the inner loop, but save possibly
1279
- * on cache-line bouncing on larger strings. Just a thought.
1280
- */
1281
- if (!allow_null) {
1282
- for (j = 0; j < seq_length; j++) {
1283
- if (((i + j) > utf8_len) || !utf8[i + j]) {
1284
- return false;
1285
- }
1286
- }
1287
- }
1288
-
1289
- /*
1290
- * Code point wont fit in utf-16, not allowed.
1291
- */
1292
- if (c > 0x0010FFFF) {
1293
- return false;
1294
- }
1295
-
1296
- /*
1297
- * Byte is in reserved range for UTF-16 high-marks
1298
- * for surrogate pairs.
1299
- */
1300
- if ((c & 0xFFFFF800) == 0xD800) {
1301
- return false;
1302
- }
1303
-
1304
- /*
1305
- * Check non-shortest form unicode.
1306
- */
1307
- switch (seq_length) {
1308
- case 1:
1309
- if (c <= 0x007F) {
1310
- continue;
1311
- }
1312
- return false;
1313
-
1314
- case 2:
1315
- if ((c >= 0x0080) && (c <= 0x07FF)) {
1316
- continue;
1317
- } else if (c == 0) {
1318
- /* Two-byte representation for NULL. */
1319
- continue;
1320
- }
1321
- return false;
1322
-
1323
- case 3:
1324
- if (((c >= 0x0800) && (c <= 0x0FFF)) ||
1325
- ((c >= 0x1000) && (c <= 0xFFFF))) {
1326
- continue;
1327
- }
1328
- return false;
1329
-
1330
- case 4:
1331
- if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
1332
- ((c >= 0x40000) && (c <= 0xFFFFF)) ||
1333
- ((c >= 0x100000) && (c <= 0x10FFFF))) {
1334
- continue;
1335
- }
1336
- return false;
1337
-
1338
- default:
1339
- return false;
1340
- }
1341
- }
1342
-
1343
- return true;
1344
- }