kbaum-mongo_ext 0.18.3p

Sign up to get free protection for your applications and to get access to all the features.
data/ext/cbson/cbson.c ADDED
@@ -0,0 +1,908 @@
1
+ /*
2
+ * Copyright 2009 10gen, Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ /*
18
+ * This file contains C implementations of some of the functions needed by the
19
+ * bson module. If possible, these implementations should be used to speed up
20
+ * BSON encoding and decoding.
21
+ */
22
+
23
+ #include "ruby.h"
24
+
25
+ #if HAVE_RUBY_ST_H
26
+ #include "ruby/st.h"
27
+ #endif
28
+ #if HAVE_ST_H
29
+ #include "st.h"
30
+ #endif
31
+
32
+ #if HAVE_RUBY_REGEX_H
33
+ #include "ruby/regex.h"
34
+ #endif
35
+ #if HAVE_REGEX_H
36
+ #include "regex.h"
37
+ #endif
38
+
39
+ #include <string.h>
40
+ #include <math.h>
41
+ #include <unistd.h>
42
+ #include <time.h>
43
+
44
+ #include "version.h"
45
+ #include "buffer.h"
46
+ #include "encoding_helpers.h"
47
+
48
+ #define SAFE_WRITE(buffer, data, size) \
49
+ if (buffer_write((buffer), (data), (size)) != 0) \
50
+ rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c")
51
+
52
+ #define SAFE_WRITE_AT_POS(buffer, position, data, size) \
53
+ if (buffer_write_at_position((buffer), (position), (data), (size)) != 0) \
54
+ rb_raise(rb_eRuntimeError, "invalid write at position in buffer.c")
55
+
56
+ #define MAX_HOSTNAME_LENGTH 256
57
+
58
+ static VALUE Binary;
59
+ static VALUE Time;
60
+ static VALUE ObjectID;
61
+ static VALUE DBRef;
62
+ static VALUE Code;
63
+ static VALUE MinKey;
64
+ static VALUE MaxKey;
65
+ static VALUE Regexp;
66
+ static VALUE RegexpOfHolding;
67
+ static VALUE OrderedHash;
68
+ static VALUE InvalidName;
69
+ static VALUE InvalidStringEncoding;
70
+ static VALUE InvalidDocument;
71
+ static VALUE DigestMD5;
72
+
73
+ #if HAVE_RUBY_ENCODING_H
74
+ #include "ruby/encoding.h"
75
+ #define STR_NEW(p,n) rb_enc_str_new((p), (n), rb_utf8_encoding())
76
+ /* MUST call TO_UTF8 before calling write_utf8. */
77
+ #define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
78
+ static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
79
+ result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
80
+ 0, check_null);
81
+ if (status == HAS_NULL) {
82
+ buffer_free(buffer);
83
+ rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
84
+ }
85
+ SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
86
+ }
87
+ #else
88
+ #define STR_NEW(p,n) rb_str_new((p), (n))
89
+ /* MUST call TO_UTF8 before calling write_utf8. */
90
+ #define TO_UTF8(string) (string)
91
+ static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
92
+ result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
93
+ 1, check_null);
94
+ if (status == HAS_NULL) {
95
+ buffer_free(buffer);
96
+ rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
97
+ } else if (status == NOT_UTF_8) {
98
+ buffer_free(buffer);
99
+ rb_raise(InvalidStringEncoding, "String not valid UTF-8");
100
+ }
101
+ SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
102
+ }
103
+ #endif
104
+
105
+ // this sucks. but for some reason these moved around between 1.8 and 1.9
106
+ #ifdef ONIGURUMA_H
107
+ #define IGNORECASE ONIG_OPTION_IGNORECASE
108
+ #define MULTILINE ONIG_OPTION_MULTILINE
109
+ #define EXTENDED ONIG_OPTION_EXTEND
110
+ #else
111
+ #define IGNORECASE RE_OPTION_IGNORECASE
112
+ #define MULTILINE RE_OPTION_MULTILINE
113
+ #define EXTENDED RE_OPTION_EXTENDED
114
+ #endif
115
+
116
+ /* TODO we ought to check that the malloc or asprintf was successful
117
+ * and raise an exception if not. */
118
+ /* TODO maybe we can use something more portable like vsnprintf instead
119
+ * of this hack. And share it with the Python extension ;) */
120
+ #ifndef HAVE_ASPRINTF
121
+ #define INT2STRING(buffer, i) \
122
+ { \
123
+ int vslength = _scprintf("%d", i) + 1; \
124
+ *buffer = malloc(vslength); \
125
+ _snprintf(*buffer, vslength, "%d", i); \
126
+ }
127
+ #else
128
+ #define INT2STRING(buffer, i) asprintf(buffer, "%d", i);
129
+ #endif
130
+
131
+ // this sucks too.
132
+ #ifndef RREGEXP_SRC
133
+ #define RREGEXP_SRC(r) rb_str_new(RREGEXP((r))->str, RREGEXP((r))->len)
134
+ #endif
135
+
136
+ static char zero = 0;
137
+ static char one = 1;
138
+
139
+ static int cmp_char(const void* a, const void* b) {
140
+ return *(char*)a - *(char*)b;
141
+ }
142
+
143
+ static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id);
144
+ static int write_element(VALUE key, VALUE value, VALUE extra);
145
+ static VALUE elements_to_hash(const char* buffer, int max);
146
+
147
+ static VALUE pack_extra(buffer_t buffer, VALUE check_keys) {
148
+ return rb_ary_new3(2, LL2NUM((long long)buffer), check_keys);
149
+ }
150
+
151
+ static VALUE pack_triple(buffer_t buffer, VALUE check_keys, int allow_id) {
152
+ return rb_ary_new3(3, LL2NUM((long long)buffer), check_keys, allow_id);
153
+ }
154
+
155
+ static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
156
+ SAFE_WRITE(buffer, &type, 1);
157
+ name = TO_UTF8(name);
158
+ write_utf8(buffer, name, 1);
159
+ SAFE_WRITE(buffer, &zero, 1);
160
+ }
161
+
162
+ static int write_element_allow_id(VALUE key, VALUE value, VALUE extra, int allow_id) {
163
+ buffer_t buffer = (buffer_t)NUM2LL(rb_ary_entry(extra, 0));
164
+ VALUE check_keys = rb_ary_entry(extra, 1);
165
+
166
+ if (TYPE(key) == T_SYMBOL) {
167
+ // TODO better way to do this... ?
168
+ key = rb_str_new2(rb_id2name(SYM2ID(key)));
169
+ }
170
+
171
+ if (TYPE(key) != T_STRING) {
172
+ buffer_free(buffer);
173
+ rb_raise(rb_eTypeError, "keys must be strings or symbols");
174
+ }
175
+
176
+ if (!allow_id && strcmp("_id", RSTRING_PTR(key)) == 0) {
177
+ return ST_CONTINUE;
178
+ }
179
+
180
+ if (check_keys == Qtrue) {
181
+ int i;
182
+ if (RSTRING_LEN(key) > 0 && RSTRING_PTR(key)[0] == '$') {
183
+ buffer_free(buffer);
184
+ rb_raise(InvalidName, "key must not start with '$'");
185
+ }
186
+ for (i = 0; i < RSTRING_LEN(key); i++) {
187
+ if (RSTRING_PTR(key)[i] == '.') {
188
+ buffer_free(buffer);
189
+ rb_raise(InvalidName, "key must not contain '.'");
190
+ }
191
+ }
192
+ }
193
+
194
+ switch(TYPE(value)) {
195
+ case T_BIGNUM:
196
+ case T_FIXNUM:
197
+ {
198
+ if (rb_funcall(value, rb_intern(">"), 1, LL2NUM(9223372036854775807LL)) == Qtrue ||
199
+ rb_funcall(value, rb_intern("<"), 1, LL2NUM(-9223372036854775808ULL)) == Qtrue) {
200
+ buffer_free(buffer);
201
+ rb_raise(rb_eRangeError, "MongoDB can only handle 8-byte ints");
202
+ }
203
+ if (rb_funcall(value, rb_intern(">"), 1, INT2NUM(2147483647L)) == Qtrue ||
204
+ rb_funcall(value, rb_intern("<"), 1, INT2NUM(-2147483648L)) == Qtrue) {
205
+ long long ll_value;
206
+ write_name_and_type(buffer, key, 0x12);
207
+ ll_value = NUM2LL(value);
208
+ SAFE_WRITE(buffer, (char*)&ll_value, 8);
209
+ } else {
210
+ int int_value;
211
+ write_name_and_type(buffer, key, 0x10);
212
+ int_value = NUM2LL(value);
213
+ SAFE_WRITE(buffer, (char*)&int_value, 4);
214
+ }
215
+ break;
216
+ }
217
+ case T_TRUE:
218
+ {
219
+ write_name_and_type(buffer, key, 0x08);
220
+ SAFE_WRITE(buffer, &one, 1);
221
+ break;
222
+ }
223
+ case T_FALSE:
224
+ {
225
+ write_name_and_type(buffer, key, 0x08);
226
+ SAFE_WRITE(buffer, &zero, 1);
227
+ break;
228
+ }
229
+ case T_FLOAT:
230
+ {
231
+ double d = NUM2DBL(value);
232
+ write_name_and_type(buffer, key, 0x01);
233
+ SAFE_WRITE(buffer, (char*)&d, 8);
234
+ break;
235
+ }
236
+ case T_NIL:
237
+ {
238
+ write_name_and_type(buffer, key, 0x0A);
239
+ break;
240
+ }
241
+ case T_HASH:
242
+ {
243
+ write_name_and_type(buffer, key, 0x03);
244
+ write_doc(buffer, value, check_keys, Qfalse);
245
+ break;
246
+ }
247
+ case T_ARRAY:
248
+ {
249
+ buffer_position length_location, start_position, obj_length;
250
+ int items, i;
251
+ VALUE* values;
252
+
253
+ write_name_and_type(buffer, key, 0x04);
254
+ start_position = buffer_get_position(buffer);
255
+
256
+ // save space for length
257
+ length_location = buffer_save_space(buffer, 4);
258
+ if (length_location == -1) {
259
+ rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
260
+ }
261
+
262
+ items = RARRAY_LEN(value);
263
+ values = RARRAY_PTR(value);
264
+ for(i = 0; i < items; i++) {
265
+ char* name;
266
+ VALUE key;
267
+ INT2STRING(&name, i);
268
+ key = rb_str_new2(name);
269
+ write_element(key, values[i], pack_extra(buffer, check_keys));
270
+ free(name);
271
+ }
272
+
273
+ // write null byte and fill in length
274
+ SAFE_WRITE(buffer, &zero, 1);
275
+ obj_length = buffer_get_position(buffer) - start_position;
276
+ SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
277
+ break;
278
+ }
279
+ case T_STRING:
280
+ {
281
+ if (strcmp(rb_class2name(RBASIC(value)->klass),
282
+ "Mongo::Code") == 0) {
283
+ buffer_position length_location, start_position, total_length;
284
+ int length;
285
+ write_name_and_type(buffer, key, 0x0F);
286
+
287
+ start_position = buffer_get_position(buffer);
288
+ length_location = buffer_save_space(buffer, 4);
289
+ if (length_location == -1) {
290
+ rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
291
+ }
292
+
293
+ length = RSTRING_LEN(value) + 1;
294
+ SAFE_WRITE(buffer, (char*)&length, 4);
295
+ SAFE_WRITE(buffer, RSTRING_PTR(value), length - 1);
296
+ SAFE_WRITE(buffer, &zero, 1);
297
+ write_doc(buffer, rb_funcall(value, rb_intern("scope"), 0), Qfalse, Qfalse);
298
+
299
+ total_length = buffer_get_position(buffer) - start_position;
300
+ SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&total_length, 4);
301
+ break;
302
+ } else {
303
+ int length;
304
+ write_name_and_type(buffer, key, 0x02);
305
+ value = TO_UTF8(value);
306
+ length = RSTRING_LEN(value) + 1;
307
+ SAFE_WRITE(buffer, (char*)&length, 4);
308
+ write_utf8(buffer, value, 0);
309
+ SAFE_WRITE(buffer, &zero, 1);
310
+ break;
311
+ }
312
+ }
313
+ case T_SYMBOL:
314
+ {
315
+ const char* str_value = rb_id2name(SYM2ID(value));
316
+ int length = strlen(str_value) + 1;
317
+ write_name_and_type(buffer, key, 0x0E);
318
+ SAFE_WRITE(buffer, (char*)&length, 4);
319
+ SAFE_WRITE(buffer, str_value, length);
320
+ break;
321
+ }
322
+ case T_OBJECT:
323
+ {
324
+ // TODO there has to be a better way to do these checks...
325
+ const char* cls = rb_class2name(RBASIC(value)->klass);
326
+ if (strcmp(cls, "Mongo::Binary") == 0 ||
327
+ strcmp(cls, "ByteBuffer") == 0) {
328
+ const char subtype = strcmp(cls, "ByteBuffer") ?
329
+ (const char)FIX2INT(rb_funcall(value, rb_intern("subtype"), 0)) : 2;
330
+ VALUE string_data = rb_funcall(value, rb_intern("to_s"), 0);
331
+ int length = RSTRING_LEN(string_data);
332
+ write_name_and_type(buffer, key, 0x05);
333
+ if (subtype == 2) {
334
+ const int other_length = length + 4;
335
+ SAFE_WRITE(buffer, (const char*)&other_length, 4);
336
+ SAFE_WRITE(buffer, &subtype, 1);
337
+ }
338
+ SAFE_WRITE(buffer, (const char*)&length, 4);
339
+ if (subtype != 2) {
340
+ SAFE_WRITE(buffer, &subtype, 1);
341
+ }
342
+ SAFE_WRITE(buffer, RSTRING_PTR(string_data), length);
343
+ break;
344
+ }
345
+ if (strcmp(cls, "Mongo::ObjectID") == 0) {
346
+ VALUE as_array = rb_funcall(value, rb_intern("to_a"), 0);
347
+ int i;
348
+ write_name_and_type(buffer, key, 0x07);
349
+ for (i = 0; i < 12; i++) {
350
+ char byte = (char)FIX2INT(RARRAY_PTR(as_array)[i]);
351
+ SAFE_WRITE(buffer, &byte, 1);
352
+ }
353
+ break;
354
+ }
355
+ if (strcmp(cls, "Mongo::DBRef") == 0) {
356
+ buffer_position length_location, start_position, obj_length;
357
+ VALUE ns, oid;
358
+ write_name_and_type(buffer, key, 0x03);
359
+
360
+ start_position = buffer_get_position(buffer);
361
+
362
+ // save space for length
363
+ length_location = buffer_save_space(buffer, 4);
364
+ if (length_location == -1) {
365
+ rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
366
+ }
367
+
368
+ ns = rb_funcall(value, rb_intern("namespace"), 0);
369
+ write_element(rb_str_new2("$ref"), ns, pack_extra(buffer, Qfalse));
370
+ oid = rb_funcall(value, rb_intern("object_id"), 0);
371
+ write_element(rb_str_new2("$id"), oid, pack_extra(buffer, Qfalse));
372
+
373
+ // write null byte and fill in length
374
+ SAFE_WRITE(buffer, &zero, 1);
375
+ obj_length = buffer_get_position(buffer) - start_position;
376
+ SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
377
+ break;
378
+ }
379
+ if (strcmp(cls, "Mongo::MaxKey") == 0) {
380
+ write_name_and_type(buffer, key, 0x7f);
381
+ break;
382
+ }
383
+ if (strcmp(cls, "Mongo::MinKey") == 0) {
384
+ write_name_and_type(buffer, key, 0xff);
385
+ break;
386
+ }
387
+ if (strcmp(cls, "DateTime") == 0 || strcmp(cls, "Date") == 0 || strcmp(cls, "ActiveSupport::TimeWithZone") == 0) {
388
+ buffer_free(buffer);
389
+ rb_raise(InvalidDocument, "%s is not currently supported; use a UTC Time instance instead.", cls);
390
+ break;
391
+ }
392
+ if(strcmp(cls, "Complex") == 0 || strcmp(cls, "Rational") == 0 || strcmp(cls, "BigDecimal") == 0) {
393
+ buffer_free(buffer);
394
+ rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
395
+ break;
396
+ }
397
+ buffer_free(buffer);
398
+ rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
399
+ break;
400
+ }
401
+ case T_DATA:
402
+ {
403
+ const char* cls = rb_class2name(RBASIC(value)->klass);
404
+ if (strcmp(cls, "Time") == 0) {
405
+ double t = NUM2DBL(rb_funcall(value, rb_intern("to_f"), 0));
406
+ long long time_since_epoch = (long long)round(t * 1000);
407
+ write_name_and_type(buffer, key, 0x09);
408
+ SAFE_WRITE(buffer, (const char*)&time_since_epoch, 8);
409
+ break;
410
+ }
411
+ if(strcmp(cls, "BigDecimal") == 0) {
412
+ buffer_free(buffer);
413
+ rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
414
+ break;
415
+ }
416
+ buffer_free(buffer);
417
+ rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
418
+ break;
419
+ }
420
+ case T_REGEXP:
421
+ {
422
+ VALUE pattern = RREGEXP_SRC(value);
423
+ long flags = RREGEXP(value)->ptr->options;
424
+ VALUE has_extra;
425
+
426
+ write_name_and_type(buffer, key, 0x0B);
427
+
428
+ pattern = TO_UTF8(pattern);
429
+ write_utf8(buffer, pattern, 1);
430
+ SAFE_WRITE(buffer, &zero, 1);
431
+
432
+ if (flags & IGNORECASE) {
433
+ char ignorecase = 'i';
434
+ SAFE_WRITE(buffer, &ignorecase, 1);
435
+ }
436
+ if (flags & MULTILINE) {
437
+ char multiline = 'm';
438
+ SAFE_WRITE(buffer, &multiline, 1);
439
+ }
440
+ if (flags & EXTENDED) {
441
+ char extended = 'x';
442
+ SAFE_WRITE(buffer, &extended, 1);
443
+ }
444
+
445
+ has_extra = rb_funcall(value, rb_intern("respond_to?"), 1, rb_str_new2("extra_options_str"));
446
+ if (TYPE(has_extra) == T_TRUE) {
447
+ VALUE extra = rb_funcall(value, rb_intern("extra_options_str"), 0);
448
+ buffer_position old_position = buffer_get_position(buffer);
449
+ SAFE_WRITE(buffer, RSTRING_PTR(extra), RSTRING_LEN(extra));
450
+ qsort(buffer_get_buffer(buffer) + old_position, RSTRING_LEN(extra), sizeof(char), cmp_char);
451
+ }
452
+ SAFE_WRITE(buffer, &zero, 1);
453
+
454
+ break;
455
+ }
456
+ default:
457
+ {
458
+ const char* cls = rb_class2name(RBASIC(value)->klass);
459
+ buffer_free(buffer);
460
+ rb_raise(InvalidDocument, "Cannot serialize an object of class %s (type %d) into BSON.", cls, TYPE(value));
461
+ break;
462
+ }
463
+ }
464
+ return ST_CONTINUE;
465
+ }
466
+
467
+ static int write_element(VALUE key, VALUE value, VALUE extra) {
468
+ return write_element_allow_id(key, value, extra, 0);
469
+ }
470
+
471
+ static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id) {
472
+ buffer_position start_position = buffer_get_position(buffer);
473
+ buffer_position length_location = buffer_save_space(buffer, 4);
474
+ buffer_position length;
475
+ int allow_id;
476
+ VALUE id_str = rb_str_new2("_id");
477
+ VALUE id_sym = ID2SYM(rb_intern("_id"));
478
+
479
+ if (length_location == -1) {
480
+ rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
481
+ }
482
+
483
+ // write '_id' first if move_id is true
484
+ if(move_id == Qtrue) {
485
+ allow_id = 0;
486
+ if (rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) {
487
+ VALUE id = rb_hash_aref(hash, id_str);
488
+ write_element_allow_id(id_str, id, pack_extra(buffer, check_keys), 1);
489
+ } else if (rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue) {
490
+ VALUE id = rb_hash_aref(hash, id_sym);
491
+ write_element_allow_id(id_sym, id, pack_extra(buffer, check_keys), 1);
492
+ }
493
+ }
494
+ else {
495
+ allow_id = 1;
496
+ if ((rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) &&
497
+ (rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue)) {
498
+ VALUE obj = rb_hash_delete(hash, id_str);
499
+ }
500
+ }
501
+
502
+ // we have to check for an OrderedHash and handle that specially
503
+ if (strcmp(rb_class2name(RBASIC(hash)->klass), "OrderedHash") == 0) {
504
+ VALUE keys = rb_funcall(hash, rb_intern("keys"), 0);
505
+ int i;
506
+ for(i = 0; i < RARRAY_LEN(keys); i++) {
507
+ VALUE key = RARRAY_PTR(keys)[i];
508
+ VALUE value = rb_hash_aref(hash, key);
509
+
510
+ write_element_allow_id(key, value, pack_extra(buffer, check_keys), allow_id);
511
+ }
512
+ } else {
513
+ rb_hash_foreach(hash, write_element_allow_id, pack_triple(buffer, check_keys, allow_id));
514
+ }
515
+
516
+ // write null byte and fill in length
517
+ SAFE_WRITE(buffer, &zero, 1);
518
+ length = buffer_get_position(buffer) - start_position;
519
+
520
+ // make sure that length doesn't exceed 4MB
521
+ if (length > 4 * 1024 * 1024) {
522
+ buffer_free(buffer);
523
+ rb_raise(InvalidDocument, "Document too large: BSON documents are limited to 4MB.");
524
+ return;
525
+ }
526
+ SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&length, 4);
527
+ }
528
+
529
+ static VALUE method_serialize(VALUE self, VALUE doc, VALUE check_keys, VALUE move_id) {
530
+ VALUE result;
531
+ buffer_t buffer = buffer_new();
532
+ if (buffer == NULL) {
533
+ rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
534
+ }
535
+
536
+ write_doc(buffer, doc, check_keys, move_id);
537
+
538
+ result = rb_str_new(buffer_get_buffer(buffer), buffer_get_position(buffer));
539
+ if (buffer_free(buffer) != 0) {
540
+ rb_raise(rb_eRuntimeError, "failed to free buffer");
541
+ }
542
+ return result;
543
+ }
544
+
545
+ static VALUE get_value(const char* buffer, int* position, int type) {
546
+ VALUE value;
547
+ switch (type) {
548
+ case -1:
549
+ {
550
+ value = rb_class_new_instance(0, NULL, MinKey);
551
+ break;
552
+ }
553
+ case 1:
554
+ {
555
+ double d;
556
+ memcpy(&d, buffer + *position, 8);
557
+ value = rb_float_new(d);
558
+ *position += 8;
559
+ break;
560
+ }
561
+ case 2:
562
+ case 13:
563
+ {
564
+ int value_length;
565
+ value_length = *(int*)(buffer + *position) - 1;
566
+ *position += 4;
567
+ value = STR_NEW(buffer + *position, value_length);
568
+ *position += value_length + 1;
569
+ break;
570
+ }
571
+ case 3:
572
+ {
573
+ int size;
574
+ memcpy(&size, buffer + *position, 4);
575
+ if (strcmp(buffer + *position + 5, "$ref") == 0) { // DBRef
576
+ int offset = *position + 10;
577
+ VALUE argv[2];
578
+ int collection_length = *(int*)(buffer + offset) - 1;
579
+ char id_type;
580
+ offset += 4;
581
+
582
+ argv[0] = STR_NEW(buffer + offset, collection_length);
583
+ offset += collection_length + 1;
584
+ id_type = buffer[offset];
585
+ offset += 5;
586
+ argv[1] = get_value(buffer, &offset, (int)id_type);
587
+ value = rb_class_new_instance(2, argv, DBRef);
588
+ } else {
589
+ value = elements_to_hash(buffer + *position + 4, size - 5);
590
+ }
591
+ *position += size;
592
+ break;
593
+ }
594
+ case 4:
595
+ {
596
+ int size, end;
597
+ memcpy(&size, buffer + *position, 4);
598
+ end = *position + size - 1;
599
+ *position += 4;
600
+
601
+ value = rb_ary_new();
602
+ while (*position < end) {
603
+ int type = (int)buffer[(*position)++];
604
+ int key_size = strlen(buffer + *position);
605
+ VALUE to_append;
606
+
607
+ *position += key_size + 1; // just skip the key, they're in order.
608
+ to_append = get_value(buffer, position, type);
609
+ rb_ary_push(value, to_append);
610
+ }
611
+ (*position)++;
612
+ break;
613
+ }
614
+ case 5:
615
+ {
616
+ int length, subtype;
617
+ VALUE data, st;
618
+ VALUE argv[2];
619
+ memcpy(&length, buffer + *position, 4);
620
+ subtype = (unsigned char)buffer[*position + 4];
621
+ if (subtype == 2) {
622
+ data = rb_str_new(buffer + *position + 9, length - 4);
623
+ } else {
624
+ data = rb_str_new(buffer + *position + 5, length);
625
+ }
626
+ st = INT2FIX(subtype);
627
+ argv[0] = data;
628
+ argv[1] = st;
629
+ value = rb_class_new_instance(2, argv, Binary);
630
+ *position += length + 5;
631
+ break;
632
+ }
633
+ case 6:
634
+ {
635
+ value = Qnil;
636
+ break;
637
+ }
638
+ case 7:
639
+ {
640
+ VALUE str = rb_str_new(buffer + *position, 12);
641
+ VALUE oid = rb_funcall(str, rb_intern("unpack"), 1, rb_str_new2("C*"));
642
+ value = rb_class_new_instance(1, &oid, ObjectID);
643
+ *position += 12;
644
+ break;
645
+ }
646
+ case 8:
647
+ {
648
+ value = buffer[(*position)++] ? Qtrue : Qfalse;
649
+ break;
650
+ }
651
+ case 9:
652
+ {
653
+ long long millis;
654
+ VALUE seconds, microseconds;
655
+ memcpy(&millis, buffer + *position, 8);
656
+ seconds = LL2NUM(millis / 1000);
657
+ microseconds = INT2NUM((millis % 1000) * 1000);
658
+
659
+ value = rb_funcall(Time, rb_intern("at"), 2, seconds, microseconds);
660
+ value = rb_funcall(value, rb_intern("utc"), 0);
661
+ *position += 8;
662
+ break;
663
+ }
664
+ case 10:
665
+ {
666
+ value = Qnil;
667
+ break;
668
+ }
669
+ case 11:
670
+ {
671
+ int pattern_length = strlen(buffer + *position);
672
+ VALUE pattern = STR_NEW(buffer + *position, pattern_length);
673
+ int flags_length, flags = 0, i = 0;
674
+ char extra[10];
675
+ VALUE argv[3];
676
+ *position += pattern_length + 1;
677
+
678
+ flags_length = strlen(buffer + *position);
679
+ extra[0] = 0;
680
+ for (i = 0; i < flags_length; i++) {
681
+ char flag = buffer[*position + i];
682
+ if (flag == 'i') {
683
+ flags |= IGNORECASE;
684
+ }
685
+ else if (flag == 'm') {
686
+ flags |= MULTILINE;
687
+ }
688
+ else if (flag == 'x') {
689
+ flags |= EXTENDED;
690
+ }
691
+ else if (strlen(extra) < 9) {
692
+ strncat(extra, &flag, 1);
693
+ }
694
+ }
695
+ argv[0] = pattern;
696
+ argv[1] = INT2FIX(flags);
697
+ if(extra[0] == 0) {
698
+ value = rb_class_new_instance(2, argv, Regexp);
699
+ }
700
+ else { // Deserializing a RegexpOfHolding
701
+ argv[2] = rb_str_new2(extra);
702
+ value = rb_class_new_instance(3, argv, RegexpOfHolding);
703
+ }
704
+ *position += flags_length + 1;
705
+ break;
706
+ }
707
+ case 12:
708
+ {
709
+ int collection_length;
710
+ VALUE collection, str, oid, id, argv[2];
711
+ collection_length = *(int*)(buffer + *position) - 1;
712
+ *position += 4;
713
+ collection = STR_NEW(buffer + *position, collection_length);
714
+ *position += collection_length + 1;
715
+
716
+ str = rb_str_new(buffer + *position, 12);
717
+ oid = rb_funcall(str, rb_intern("unpack"), 1, rb_str_new2("C*"));
718
+ id = rb_class_new_instance(1, &oid, ObjectID);
719
+ *position += 12;
720
+
721
+ argv[0] = collection;
722
+ argv[1] = id;
723
+ value = rb_class_new_instance(2, argv, DBRef);
724
+ break;
725
+ }
726
+ case 14:
727
+ {
728
+ int value_length;
729
+ memcpy(&value_length, buffer + *position, 4);
730
+ value = ID2SYM(rb_intern(buffer + *position + 4));
731
+ *position += value_length + 4;
732
+ break;
733
+ }
734
+ case 15:
735
+ {
736
+ int code_length, scope_size;
737
+ VALUE code, scope, argv[2];
738
+ *position += 4;
739
+ code_length = *(int*)(buffer + *position) - 1;
740
+ *position += 4;
741
+ code = STR_NEW(buffer + *position, code_length);
742
+ *position += code_length + 1;
743
+
744
+ memcpy(&scope_size, buffer + *position, 4);
745
+ scope = elements_to_hash(buffer + *position + 4, scope_size - 5);
746
+ *position += scope_size;
747
+
748
+ argv[0] = code;
749
+ argv[1] = scope;
750
+ value = rb_class_new_instance(2, argv, Code);
751
+ break;
752
+ }
753
+ case 16:
754
+ {
755
+ int i;
756
+ memcpy(&i, buffer + *position, 4);
757
+ value = LL2NUM(i);
758
+ *position += 4;
759
+ break;
760
+ }
761
+ case 17:
762
+ {
763
+ int i;
764
+ int j;
765
+ memcpy(&i, buffer + *position, 4);
766
+ memcpy(&j, buffer + *position + 4, 4);
767
+ value = rb_ary_new3(2, LL2NUM(i), LL2NUM(j));
768
+ *position += 8;
769
+ break;
770
+ }
771
+ case 18:
772
+ {
773
+ long long ll;
774
+ memcpy(&ll, buffer + *position, 8);
775
+ value = LL2NUM(ll);
776
+ *position += 8;
777
+ break;
778
+ }
779
+ case 127:
780
+ {
781
+ value = rb_class_new_instance(0, NULL, MaxKey);
782
+ break;
783
+ }
784
+ default:
785
+ {
786
+ rb_raise(rb_eTypeError, "no c decoder for this type yet (%d)", type);
787
+ break;
788
+ }
789
+ }
790
+ return value;
791
+ }
792
+
793
+ static VALUE elements_to_hash(const char* buffer, int max) {
794
+ VALUE hash = rb_class_new_instance(0, NULL, OrderedHash);
795
+ int position = 0;
796
+ while (position < max) {
797
+ int type = (int)buffer[position++];
798
+ int name_length = strlen(buffer + position);
799
+ VALUE name = STR_NEW(buffer + position, name_length);
800
+ VALUE value;
801
+ position += name_length + 1;
802
+ value = get_value(buffer, &position, type);
803
+ rb_funcall(hash, rb_intern("[]="), 2, name, value);
804
+ }
805
+ return hash;
806
+ }
807
+
808
+ static VALUE method_deserialize(VALUE self, VALUE bson) {
809
+ const char* buffer = RSTRING_PTR(bson);
810
+ int remaining = RSTRING_LEN(bson);
811
+
812
+ // NOTE we just swallow the size and end byte here
813
+ buffer += 4;
814
+ remaining -= 5;
815
+
816
+ return elements_to_hash(buffer, remaining);
817
+ }
818
+
819
+
820
+ static VALUE fast_pack(VALUE self)
821
+ {
822
+ VALUE res;
823
+ long i;
824
+ char c;
825
+
826
+ res = rb_str_buf_new(0);
827
+
828
+ for (i = 0; i < RARRAY_LEN(self); i++) {
829
+ c = FIX2LONG(RARRAY_PTR(self)[i]);
830
+ rb_str_buf_cat(res, &c, sizeof(char));
831
+ }
832
+
833
+ return res;
834
+ }
835
+
836
+
837
+ static VALUE objectid_generate(VALUE self)
838
+ {
839
+ VALUE oid, digest;
840
+ char hostname[MAX_HOSTNAME_LENGTH];
841
+ unsigned char oid_bytes[12];
842
+ unsigned long t, inc;
843
+ unsigned short pid;
844
+ int i;
845
+
846
+ t = htonl(time(NULL));
847
+ MEMCPY(&oid_bytes, &t, unsigned char, 4);
848
+
849
+ if (gethostname(hostname, MAX_HOSTNAME_LENGTH) != 0) {
850
+ rb_raise(rb_eRuntimeError, "failed to get hostname");
851
+ }
852
+ digest = rb_funcall(DigestMD5, rb_intern("digest"), 1, rb_str_new2(hostname));
853
+ MEMCPY(&oid_bytes[4], RSTRING_PTR(digest), unsigned char, 3);
854
+
855
+ pid = htons(getpid());
856
+ MEMCPY(&oid_bytes[7], &pid, unsigned char, 2);
857
+
858
+ inc = htonl(FIX2ULONG(rb_funcall(self, rb_intern("get_inc"), 0)));
859
+ MEMCPY(&oid_bytes[9], ((unsigned char*)&inc + 1), unsigned char, 3);
860
+
861
+ oid = rb_ary_new2(12);
862
+ for(i = 0; i < 12; i++) {
863
+ rb_ary_store(oid, i, INT2FIX((unsigned int)oid_bytes[i]));
864
+ }
865
+ return oid;
866
+ }
867
+
868
+
869
+ void Init_cbson() {
870
+ VALUE mongo, CBson, Digest, ext_version;
871
+ Time = rb_const_get(rb_cObject, rb_intern("Time"));
872
+
873
+ mongo = rb_const_get(rb_cObject, rb_intern("Mongo"));
874
+ rb_require("mongo/types/binary");
875
+ Binary = rb_const_get(mongo, rb_intern("Binary"));
876
+ rb_require("mongo/types/objectid");
877
+ ObjectID = rb_const_get(mongo, rb_intern("ObjectID"));
878
+ rb_require("mongo/types/dbref");
879
+ DBRef = rb_const_get(mongo, rb_intern("DBRef"));
880
+ rb_require("mongo/types/code");
881
+ Code = rb_const_get(mongo, rb_intern("Code"));
882
+ rb_require("mongo/types/min_max_keys");
883
+ MinKey = rb_const_get(mongo, rb_intern("MinKey"));
884
+ MaxKey = rb_const_get(mongo, rb_intern("MaxKey"));
885
+ rb_require("mongo/types/regexp_of_holding");
886
+ Regexp = rb_const_get(rb_cObject, rb_intern("Regexp"));
887
+ RegexpOfHolding = rb_const_get(mongo, rb_intern("RegexpOfHolding"));
888
+ rb_require("mongo/exceptions");
889
+ InvalidName = rb_const_get(mongo, rb_intern("InvalidName"));
890
+ InvalidStringEncoding = rb_const_get(mongo, rb_intern("InvalidStringEncoding"));
891
+ InvalidDocument = rb_const_get(mongo, rb_intern("InvalidDocument"));
892
+ rb_require("mongo/util/ordered_hash");
893
+ OrderedHash = rb_const_get(rb_cObject, rb_intern("OrderedHash"));
894
+
895
+ CBson = rb_define_module("CBson");
896
+ ext_version = rb_str_new2(VERSION);
897
+ rb_define_const(CBson, "VERSION", ext_version);
898
+ rb_define_module_function(CBson, "serialize", method_serialize, 3);
899
+ rb_define_module_function(CBson, "deserialize", method_deserialize, 1);
900
+
901
+ rb_require("digest/md5");
902
+ Digest = rb_const_get(rb_cObject, rb_intern("Digest"));
903
+ DigestMD5 = rb_const_get(Digest, rb_intern("MD5"));
904
+
905
+ rb_define_method(ObjectID, "generate", objectid_generate, 0);
906
+
907
+ rb_define_method(rb_cArray, "fast_pack", fast_pack, 0);
908
+ }