kbaum-mongo_ext 0.18.3p
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +138 -0
- data/ext/cbson/buffer.c +135 -0
- data/ext/cbson/buffer.h +55 -0
- data/ext/cbson/cbson.c +908 -0
- data/ext/cbson/encoding_helpers.c +118 -0
- data/ext/cbson/encoding_helpers.h +29 -0
- data/ext/cbson/extconf.rb +10 -0
- data/ext/cbson/version.h +17 -0
- data/mongo-extensions.gemspec +23 -0
- metadata +63 -0
data/ext/cbson/cbson.c
ADDED
@@ -0,0 +1,908 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2009 10gen, Inc.
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
/*
|
18
|
+
* This file contains C implementations of some of the functions needed by the
|
19
|
+
* bson module. If possible, these implementations should be used to speed up
|
20
|
+
* BSON encoding and decoding.
|
21
|
+
*/
|
22
|
+
|
23
|
+
#include "ruby.h"
|
24
|
+
|
25
|
+
#if HAVE_RUBY_ST_H
|
26
|
+
#include "ruby/st.h"
|
27
|
+
#endif
|
28
|
+
#if HAVE_ST_H
|
29
|
+
#include "st.h"
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#if HAVE_RUBY_REGEX_H
|
33
|
+
#include "ruby/regex.h"
|
34
|
+
#endif
|
35
|
+
#if HAVE_REGEX_H
|
36
|
+
#include "regex.h"
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#include <string.h>
|
40
|
+
#include <math.h>
|
41
|
+
#include <unistd.h>
|
42
|
+
#include <time.h>
|
43
|
+
|
44
|
+
#include "version.h"
|
45
|
+
#include "buffer.h"
|
46
|
+
#include "encoding_helpers.h"
|
47
|
+
|
48
|
+
#define SAFE_WRITE(buffer, data, size) \
|
49
|
+
if (buffer_write((buffer), (data), (size)) != 0) \
|
50
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c")
|
51
|
+
|
52
|
+
#define SAFE_WRITE_AT_POS(buffer, position, data, size) \
|
53
|
+
if (buffer_write_at_position((buffer), (position), (data), (size)) != 0) \
|
54
|
+
rb_raise(rb_eRuntimeError, "invalid write at position in buffer.c")
|
55
|
+
|
56
|
+
#define MAX_HOSTNAME_LENGTH 256
|
57
|
+
|
58
|
+
static VALUE Binary;
|
59
|
+
static VALUE Time;
|
60
|
+
static VALUE ObjectID;
|
61
|
+
static VALUE DBRef;
|
62
|
+
static VALUE Code;
|
63
|
+
static VALUE MinKey;
|
64
|
+
static VALUE MaxKey;
|
65
|
+
static VALUE Regexp;
|
66
|
+
static VALUE RegexpOfHolding;
|
67
|
+
static VALUE OrderedHash;
|
68
|
+
static VALUE InvalidName;
|
69
|
+
static VALUE InvalidStringEncoding;
|
70
|
+
static VALUE InvalidDocument;
|
71
|
+
static VALUE DigestMD5;
|
72
|
+
|
73
|
+
#if HAVE_RUBY_ENCODING_H
|
74
|
+
#include "ruby/encoding.h"
|
75
|
+
#define STR_NEW(p,n) rb_enc_str_new((p), (n), rb_utf8_encoding())
|
76
|
+
/* MUST call TO_UTF8 before calling write_utf8. */
|
77
|
+
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
|
78
|
+
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
79
|
+
result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
|
80
|
+
0, check_null);
|
81
|
+
if (status == HAS_NULL) {
|
82
|
+
buffer_free(buffer);
|
83
|
+
rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
|
84
|
+
}
|
85
|
+
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
|
86
|
+
}
|
87
|
+
#else
|
88
|
+
#define STR_NEW(p,n) rb_str_new((p), (n))
|
89
|
+
/* MUST call TO_UTF8 before calling write_utf8. */
|
90
|
+
#define TO_UTF8(string) (string)
|
91
|
+
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
92
|
+
result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
|
93
|
+
1, check_null);
|
94
|
+
if (status == HAS_NULL) {
|
95
|
+
buffer_free(buffer);
|
96
|
+
rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
|
97
|
+
} else if (status == NOT_UTF_8) {
|
98
|
+
buffer_free(buffer);
|
99
|
+
rb_raise(InvalidStringEncoding, "String not valid UTF-8");
|
100
|
+
}
|
101
|
+
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
|
102
|
+
}
|
103
|
+
#endif
|
104
|
+
|
105
|
+
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
106
|
+
#ifdef ONIGURUMA_H
|
107
|
+
#define IGNORECASE ONIG_OPTION_IGNORECASE
|
108
|
+
#define MULTILINE ONIG_OPTION_MULTILINE
|
109
|
+
#define EXTENDED ONIG_OPTION_EXTEND
|
110
|
+
#else
|
111
|
+
#define IGNORECASE RE_OPTION_IGNORECASE
|
112
|
+
#define MULTILINE RE_OPTION_MULTILINE
|
113
|
+
#define EXTENDED RE_OPTION_EXTENDED
|
114
|
+
#endif
|
115
|
+
|
116
|
+
/* TODO we ought to check that the malloc or asprintf was successful
|
117
|
+
* and raise an exception if not. */
|
118
|
+
/* TODO maybe we can use something more portable like vsnprintf instead
|
119
|
+
* of this hack. And share it with the Python extension ;) */
|
120
|
+
#ifndef HAVE_ASPRINTF
|
121
|
+
#define INT2STRING(buffer, i) \
|
122
|
+
{ \
|
123
|
+
int vslength = _scprintf("%d", i) + 1; \
|
124
|
+
*buffer = malloc(vslength); \
|
125
|
+
_snprintf(*buffer, vslength, "%d", i); \
|
126
|
+
}
|
127
|
+
#else
|
128
|
+
#define INT2STRING(buffer, i) asprintf(buffer, "%d", i);
|
129
|
+
#endif
|
130
|
+
|
131
|
+
// this sucks too.
|
132
|
+
#ifndef RREGEXP_SRC
|
133
|
+
#define RREGEXP_SRC(r) rb_str_new(RREGEXP((r))->str, RREGEXP((r))->len)
|
134
|
+
#endif
|
135
|
+
|
136
|
+
static char zero = 0;
|
137
|
+
static char one = 1;
|
138
|
+
|
139
|
+
static int cmp_char(const void* a, const void* b) {
|
140
|
+
return *(char*)a - *(char*)b;
|
141
|
+
}
|
142
|
+
|
143
|
+
static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id);
|
144
|
+
static int write_element(VALUE key, VALUE value, VALUE extra);
|
145
|
+
static VALUE elements_to_hash(const char* buffer, int max);
|
146
|
+
|
147
|
+
static VALUE pack_extra(buffer_t buffer, VALUE check_keys) {
|
148
|
+
return rb_ary_new3(2, LL2NUM((long long)buffer), check_keys);
|
149
|
+
}
|
150
|
+
|
151
|
+
static VALUE pack_triple(buffer_t buffer, VALUE check_keys, int allow_id) {
|
152
|
+
return rb_ary_new3(3, LL2NUM((long long)buffer), check_keys, allow_id);
|
153
|
+
}
|
154
|
+
|
155
|
+
static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
|
156
|
+
SAFE_WRITE(buffer, &type, 1);
|
157
|
+
name = TO_UTF8(name);
|
158
|
+
write_utf8(buffer, name, 1);
|
159
|
+
SAFE_WRITE(buffer, &zero, 1);
|
160
|
+
}
|
161
|
+
|
162
|
+
static int write_element_allow_id(VALUE key, VALUE value, VALUE extra, int allow_id) {
|
163
|
+
buffer_t buffer = (buffer_t)NUM2LL(rb_ary_entry(extra, 0));
|
164
|
+
VALUE check_keys = rb_ary_entry(extra, 1);
|
165
|
+
|
166
|
+
if (TYPE(key) == T_SYMBOL) {
|
167
|
+
// TODO better way to do this... ?
|
168
|
+
key = rb_str_new2(rb_id2name(SYM2ID(key)));
|
169
|
+
}
|
170
|
+
|
171
|
+
if (TYPE(key) != T_STRING) {
|
172
|
+
buffer_free(buffer);
|
173
|
+
rb_raise(rb_eTypeError, "keys must be strings or symbols");
|
174
|
+
}
|
175
|
+
|
176
|
+
if (!allow_id && strcmp("_id", RSTRING_PTR(key)) == 0) {
|
177
|
+
return ST_CONTINUE;
|
178
|
+
}
|
179
|
+
|
180
|
+
if (check_keys == Qtrue) {
|
181
|
+
int i;
|
182
|
+
if (RSTRING_LEN(key) > 0 && RSTRING_PTR(key)[0] == '$') {
|
183
|
+
buffer_free(buffer);
|
184
|
+
rb_raise(InvalidName, "key must not start with '$'");
|
185
|
+
}
|
186
|
+
for (i = 0; i < RSTRING_LEN(key); i++) {
|
187
|
+
if (RSTRING_PTR(key)[i] == '.') {
|
188
|
+
buffer_free(buffer);
|
189
|
+
rb_raise(InvalidName, "key must not contain '.'");
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
switch(TYPE(value)) {
|
195
|
+
case T_BIGNUM:
|
196
|
+
case T_FIXNUM:
|
197
|
+
{
|
198
|
+
if (rb_funcall(value, rb_intern(">"), 1, LL2NUM(9223372036854775807LL)) == Qtrue ||
|
199
|
+
rb_funcall(value, rb_intern("<"), 1, LL2NUM(-9223372036854775808ULL)) == Qtrue) {
|
200
|
+
buffer_free(buffer);
|
201
|
+
rb_raise(rb_eRangeError, "MongoDB can only handle 8-byte ints");
|
202
|
+
}
|
203
|
+
if (rb_funcall(value, rb_intern(">"), 1, INT2NUM(2147483647L)) == Qtrue ||
|
204
|
+
rb_funcall(value, rb_intern("<"), 1, INT2NUM(-2147483648L)) == Qtrue) {
|
205
|
+
long long ll_value;
|
206
|
+
write_name_and_type(buffer, key, 0x12);
|
207
|
+
ll_value = NUM2LL(value);
|
208
|
+
SAFE_WRITE(buffer, (char*)&ll_value, 8);
|
209
|
+
} else {
|
210
|
+
int int_value;
|
211
|
+
write_name_and_type(buffer, key, 0x10);
|
212
|
+
int_value = NUM2LL(value);
|
213
|
+
SAFE_WRITE(buffer, (char*)&int_value, 4);
|
214
|
+
}
|
215
|
+
break;
|
216
|
+
}
|
217
|
+
case T_TRUE:
|
218
|
+
{
|
219
|
+
write_name_and_type(buffer, key, 0x08);
|
220
|
+
SAFE_WRITE(buffer, &one, 1);
|
221
|
+
break;
|
222
|
+
}
|
223
|
+
case T_FALSE:
|
224
|
+
{
|
225
|
+
write_name_and_type(buffer, key, 0x08);
|
226
|
+
SAFE_WRITE(buffer, &zero, 1);
|
227
|
+
break;
|
228
|
+
}
|
229
|
+
case T_FLOAT:
|
230
|
+
{
|
231
|
+
double d = NUM2DBL(value);
|
232
|
+
write_name_and_type(buffer, key, 0x01);
|
233
|
+
SAFE_WRITE(buffer, (char*)&d, 8);
|
234
|
+
break;
|
235
|
+
}
|
236
|
+
case T_NIL:
|
237
|
+
{
|
238
|
+
write_name_and_type(buffer, key, 0x0A);
|
239
|
+
break;
|
240
|
+
}
|
241
|
+
case T_HASH:
|
242
|
+
{
|
243
|
+
write_name_and_type(buffer, key, 0x03);
|
244
|
+
write_doc(buffer, value, check_keys, Qfalse);
|
245
|
+
break;
|
246
|
+
}
|
247
|
+
case T_ARRAY:
|
248
|
+
{
|
249
|
+
buffer_position length_location, start_position, obj_length;
|
250
|
+
int items, i;
|
251
|
+
VALUE* values;
|
252
|
+
|
253
|
+
write_name_and_type(buffer, key, 0x04);
|
254
|
+
start_position = buffer_get_position(buffer);
|
255
|
+
|
256
|
+
// save space for length
|
257
|
+
length_location = buffer_save_space(buffer, 4);
|
258
|
+
if (length_location == -1) {
|
259
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
260
|
+
}
|
261
|
+
|
262
|
+
items = RARRAY_LEN(value);
|
263
|
+
values = RARRAY_PTR(value);
|
264
|
+
for(i = 0; i < items; i++) {
|
265
|
+
char* name;
|
266
|
+
VALUE key;
|
267
|
+
INT2STRING(&name, i);
|
268
|
+
key = rb_str_new2(name);
|
269
|
+
write_element(key, values[i], pack_extra(buffer, check_keys));
|
270
|
+
free(name);
|
271
|
+
}
|
272
|
+
|
273
|
+
// write null byte and fill in length
|
274
|
+
SAFE_WRITE(buffer, &zero, 1);
|
275
|
+
obj_length = buffer_get_position(buffer) - start_position;
|
276
|
+
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
|
277
|
+
break;
|
278
|
+
}
|
279
|
+
case T_STRING:
|
280
|
+
{
|
281
|
+
if (strcmp(rb_class2name(RBASIC(value)->klass),
|
282
|
+
"Mongo::Code") == 0) {
|
283
|
+
buffer_position length_location, start_position, total_length;
|
284
|
+
int length;
|
285
|
+
write_name_and_type(buffer, key, 0x0F);
|
286
|
+
|
287
|
+
start_position = buffer_get_position(buffer);
|
288
|
+
length_location = buffer_save_space(buffer, 4);
|
289
|
+
if (length_location == -1) {
|
290
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
291
|
+
}
|
292
|
+
|
293
|
+
length = RSTRING_LEN(value) + 1;
|
294
|
+
SAFE_WRITE(buffer, (char*)&length, 4);
|
295
|
+
SAFE_WRITE(buffer, RSTRING_PTR(value), length - 1);
|
296
|
+
SAFE_WRITE(buffer, &zero, 1);
|
297
|
+
write_doc(buffer, rb_funcall(value, rb_intern("scope"), 0), Qfalse, Qfalse);
|
298
|
+
|
299
|
+
total_length = buffer_get_position(buffer) - start_position;
|
300
|
+
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&total_length, 4);
|
301
|
+
break;
|
302
|
+
} else {
|
303
|
+
int length;
|
304
|
+
write_name_and_type(buffer, key, 0x02);
|
305
|
+
value = TO_UTF8(value);
|
306
|
+
length = RSTRING_LEN(value) + 1;
|
307
|
+
SAFE_WRITE(buffer, (char*)&length, 4);
|
308
|
+
write_utf8(buffer, value, 0);
|
309
|
+
SAFE_WRITE(buffer, &zero, 1);
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
}
|
313
|
+
case T_SYMBOL:
|
314
|
+
{
|
315
|
+
const char* str_value = rb_id2name(SYM2ID(value));
|
316
|
+
int length = strlen(str_value) + 1;
|
317
|
+
write_name_and_type(buffer, key, 0x0E);
|
318
|
+
SAFE_WRITE(buffer, (char*)&length, 4);
|
319
|
+
SAFE_WRITE(buffer, str_value, length);
|
320
|
+
break;
|
321
|
+
}
|
322
|
+
case T_OBJECT:
|
323
|
+
{
|
324
|
+
// TODO there has to be a better way to do these checks...
|
325
|
+
const char* cls = rb_class2name(RBASIC(value)->klass);
|
326
|
+
if (strcmp(cls, "Mongo::Binary") == 0 ||
|
327
|
+
strcmp(cls, "ByteBuffer") == 0) {
|
328
|
+
const char subtype = strcmp(cls, "ByteBuffer") ?
|
329
|
+
(const char)FIX2INT(rb_funcall(value, rb_intern("subtype"), 0)) : 2;
|
330
|
+
VALUE string_data = rb_funcall(value, rb_intern("to_s"), 0);
|
331
|
+
int length = RSTRING_LEN(string_data);
|
332
|
+
write_name_and_type(buffer, key, 0x05);
|
333
|
+
if (subtype == 2) {
|
334
|
+
const int other_length = length + 4;
|
335
|
+
SAFE_WRITE(buffer, (const char*)&other_length, 4);
|
336
|
+
SAFE_WRITE(buffer, &subtype, 1);
|
337
|
+
}
|
338
|
+
SAFE_WRITE(buffer, (const char*)&length, 4);
|
339
|
+
if (subtype != 2) {
|
340
|
+
SAFE_WRITE(buffer, &subtype, 1);
|
341
|
+
}
|
342
|
+
SAFE_WRITE(buffer, RSTRING_PTR(string_data), length);
|
343
|
+
break;
|
344
|
+
}
|
345
|
+
if (strcmp(cls, "Mongo::ObjectID") == 0) {
|
346
|
+
VALUE as_array = rb_funcall(value, rb_intern("to_a"), 0);
|
347
|
+
int i;
|
348
|
+
write_name_and_type(buffer, key, 0x07);
|
349
|
+
for (i = 0; i < 12; i++) {
|
350
|
+
char byte = (char)FIX2INT(RARRAY_PTR(as_array)[i]);
|
351
|
+
SAFE_WRITE(buffer, &byte, 1);
|
352
|
+
}
|
353
|
+
break;
|
354
|
+
}
|
355
|
+
if (strcmp(cls, "Mongo::DBRef") == 0) {
|
356
|
+
buffer_position length_location, start_position, obj_length;
|
357
|
+
VALUE ns, oid;
|
358
|
+
write_name_and_type(buffer, key, 0x03);
|
359
|
+
|
360
|
+
start_position = buffer_get_position(buffer);
|
361
|
+
|
362
|
+
// save space for length
|
363
|
+
length_location = buffer_save_space(buffer, 4);
|
364
|
+
if (length_location == -1) {
|
365
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
366
|
+
}
|
367
|
+
|
368
|
+
ns = rb_funcall(value, rb_intern("namespace"), 0);
|
369
|
+
write_element(rb_str_new2("$ref"), ns, pack_extra(buffer, Qfalse));
|
370
|
+
oid = rb_funcall(value, rb_intern("object_id"), 0);
|
371
|
+
write_element(rb_str_new2("$id"), oid, pack_extra(buffer, Qfalse));
|
372
|
+
|
373
|
+
// write null byte and fill in length
|
374
|
+
SAFE_WRITE(buffer, &zero, 1);
|
375
|
+
obj_length = buffer_get_position(buffer) - start_position;
|
376
|
+
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
|
377
|
+
break;
|
378
|
+
}
|
379
|
+
if (strcmp(cls, "Mongo::MaxKey") == 0) {
|
380
|
+
write_name_and_type(buffer, key, 0x7f);
|
381
|
+
break;
|
382
|
+
}
|
383
|
+
if (strcmp(cls, "Mongo::MinKey") == 0) {
|
384
|
+
write_name_and_type(buffer, key, 0xff);
|
385
|
+
break;
|
386
|
+
}
|
387
|
+
if (strcmp(cls, "DateTime") == 0 || strcmp(cls, "Date") == 0 || strcmp(cls, "ActiveSupport::TimeWithZone") == 0) {
|
388
|
+
buffer_free(buffer);
|
389
|
+
rb_raise(InvalidDocument, "%s is not currently supported; use a UTC Time instance instead.", cls);
|
390
|
+
break;
|
391
|
+
}
|
392
|
+
if(strcmp(cls, "Complex") == 0 || strcmp(cls, "Rational") == 0 || strcmp(cls, "BigDecimal") == 0) {
|
393
|
+
buffer_free(buffer);
|
394
|
+
rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
|
395
|
+
break;
|
396
|
+
}
|
397
|
+
buffer_free(buffer);
|
398
|
+
rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
|
399
|
+
break;
|
400
|
+
}
|
401
|
+
case T_DATA:
|
402
|
+
{
|
403
|
+
const char* cls = rb_class2name(RBASIC(value)->klass);
|
404
|
+
if (strcmp(cls, "Time") == 0) {
|
405
|
+
double t = NUM2DBL(rb_funcall(value, rb_intern("to_f"), 0));
|
406
|
+
long long time_since_epoch = (long long)round(t * 1000);
|
407
|
+
write_name_and_type(buffer, key, 0x09);
|
408
|
+
SAFE_WRITE(buffer, (const char*)&time_since_epoch, 8);
|
409
|
+
break;
|
410
|
+
}
|
411
|
+
if(strcmp(cls, "BigDecimal") == 0) {
|
412
|
+
buffer_free(buffer);
|
413
|
+
rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
|
414
|
+
break;
|
415
|
+
}
|
416
|
+
buffer_free(buffer);
|
417
|
+
rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
|
418
|
+
break;
|
419
|
+
}
|
420
|
+
case T_REGEXP:
|
421
|
+
{
|
422
|
+
VALUE pattern = RREGEXP_SRC(value);
|
423
|
+
long flags = RREGEXP(value)->ptr->options;
|
424
|
+
VALUE has_extra;
|
425
|
+
|
426
|
+
write_name_and_type(buffer, key, 0x0B);
|
427
|
+
|
428
|
+
pattern = TO_UTF8(pattern);
|
429
|
+
write_utf8(buffer, pattern, 1);
|
430
|
+
SAFE_WRITE(buffer, &zero, 1);
|
431
|
+
|
432
|
+
if (flags & IGNORECASE) {
|
433
|
+
char ignorecase = 'i';
|
434
|
+
SAFE_WRITE(buffer, &ignorecase, 1);
|
435
|
+
}
|
436
|
+
if (flags & MULTILINE) {
|
437
|
+
char multiline = 'm';
|
438
|
+
SAFE_WRITE(buffer, &multiline, 1);
|
439
|
+
}
|
440
|
+
if (flags & EXTENDED) {
|
441
|
+
char extended = 'x';
|
442
|
+
SAFE_WRITE(buffer, &extended, 1);
|
443
|
+
}
|
444
|
+
|
445
|
+
has_extra = rb_funcall(value, rb_intern("respond_to?"), 1, rb_str_new2("extra_options_str"));
|
446
|
+
if (TYPE(has_extra) == T_TRUE) {
|
447
|
+
VALUE extra = rb_funcall(value, rb_intern("extra_options_str"), 0);
|
448
|
+
buffer_position old_position = buffer_get_position(buffer);
|
449
|
+
SAFE_WRITE(buffer, RSTRING_PTR(extra), RSTRING_LEN(extra));
|
450
|
+
qsort(buffer_get_buffer(buffer) + old_position, RSTRING_LEN(extra), sizeof(char), cmp_char);
|
451
|
+
}
|
452
|
+
SAFE_WRITE(buffer, &zero, 1);
|
453
|
+
|
454
|
+
break;
|
455
|
+
}
|
456
|
+
default:
|
457
|
+
{
|
458
|
+
const char* cls = rb_class2name(RBASIC(value)->klass);
|
459
|
+
buffer_free(buffer);
|
460
|
+
rb_raise(InvalidDocument, "Cannot serialize an object of class %s (type %d) into BSON.", cls, TYPE(value));
|
461
|
+
break;
|
462
|
+
}
|
463
|
+
}
|
464
|
+
return ST_CONTINUE;
|
465
|
+
}
|
466
|
+
|
467
|
+
static int write_element(VALUE key, VALUE value, VALUE extra) {
|
468
|
+
return write_element_allow_id(key, value, extra, 0);
|
469
|
+
}
|
470
|
+
|
471
|
+
static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id) {
|
472
|
+
buffer_position start_position = buffer_get_position(buffer);
|
473
|
+
buffer_position length_location = buffer_save_space(buffer, 4);
|
474
|
+
buffer_position length;
|
475
|
+
int allow_id;
|
476
|
+
VALUE id_str = rb_str_new2("_id");
|
477
|
+
VALUE id_sym = ID2SYM(rb_intern("_id"));
|
478
|
+
|
479
|
+
if (length_location == -1) {
|
480
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
481
|
+
}
|
482
|
+
|
483
|
+
// write '_id' first if move_id is true
|
484
|
+
if(move_id == Qtrue) {
|
485
|
+
allow_id = 0;
|
486
|
+
if (rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) {
|
487
|
+
VALUE id = rb_hash_aref(hash, id_str);
|
488
|
+
write_element_allow_id(id_str, id, pack_extra(buffer, check_keys), 1);
|
489
|
+
} else if (rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue) {
|
490
|
+
VALUE id = rb_hash_aref(hash, id_sym);
|
491
|
+
write_element_allow_id(id_sym, id, pack_extra(buffer, check_keys), 1);
|
492
|
+
}
|
493
|
+
}
|
494
|
+
else {
|
495
|
+
allow_id = 1;
|
496
|
+
if ((rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) &&
|
497
|
+
(rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue)) {
|
498
|
+
VALUE obj = rb_hash_delete(hash, id_str);
|
499
|
+
}
|
500
|
+
}
|
501
|
+
|
502
|
+
// we have to check for an OrderedHash and handle that specially
|
503
|
+
if (strcmp(rb_class2name(RBASIC(hash)->klass), "OrderedHash") == 0) {
|
504
|
+
VALUE keys = rb_funcall(hash, rb_intern("keys"), 0);
|
505
|
+
int i;
|
506
|
+
for(i = 0; i < RARRAY_LEN(keys); i++) {
|
507
|
+
VALUE key = RARRAY_PTR(keys)[i];
|
508
|
+
VALUE value = rb_hash_aref(hash, key);
|
509
|
+
|
510
|
+
write_element_allow_id(key, value, pack_extra(buffer, check_keys), allow_id);
|
511
|
+
}
|
512
|
+
} else {
|
513
|
+
rb_hash_foreach(hash, write_element_allow_id, pack_triple(buffer, check_keys, allow_id));
|
514
|
+
}
|
515
|
+
|
516
|
+
// write null byte and fill in length
|
517
|
+
SAFE_WRITE(buffer, &zero, 1);
|
518
|
+
length = buffer_get_position(buffer) - start_position;
|
519
|
+
|
520
|
+
// make sure that length doesn't exceed 4MB
|
521
|
+
if (length > 4 * 1024 * 1024) {
|
522
|
+
buffer_free(buffer);
|
523
|
+
rb_raise(InvalidDocument, "Document too large: BSON documents are limited to 4MB.");
|
524
|
+
return;
|
525
|
+
}
|
526
|
+
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&length, 4);
|
527
|
+
}
|
528
|
+
|
529
|
+
static VALUE method_serialize(VALUE self, VALUE doc, VALUE check_keys, VALUE move_id) {
|
530
|
+
VALUE result;
|
531
|
+
buffer_t buffer = buffer_new();
|
532
|
+
if (buffer == NULL) {
|
533
|
+
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
534
|
+
}
|
535
|
+
|
536
|
+
write_doc(buffer, doc, check_keys, move_id);
|
537
|
+
|
538
|
+
result = rb_str_new(buffer_get_buffer(buffer), buffer_get_position(buffer));
|
539
|
+
if (buffer_free(buffer) != 0) {
|
540
|
+
rb_raise(rb_eRuntimeError, "failed to free buffer");
|
541
|
+
}
|
542
|
+
return result;
|
543
|
+
}
|
544
|
+
|
545
|
+
static VALUE get_value(const char* buffer, int* position, int type) {
|
546
|
+
VALUE value;
|
547
|
+
switch (type) {
|
548
|
+
case -1:
|
549
|
+
{
|
550
|
+
value = rb_class_new_instance(0, NULL, MinKey);
|
551
|
+
break;
|
552
|
+
}
|
553
|
+
case 1:
|
554
|
+
{
|
555
|
+
double d;
|
556
|
+
memcpy(&d, buffer + *position, 8);
|
557
|
+
value = rb_float_new(d);
|
558
|
+
*position += 8;
|
559
|
+
break;
|
560
|
+
}
|
561
|
+
case 2:
|
562
|
+
case 13:
|
563
|
+
{
|
564
|
+
int value_length;
|
565
|
+
value_length = *(int*)(buffer + *position) - 1;
|
566
|
+
*position += 4;
|
567
|
+
value = STR_NEW(buffer + *position, value_length);
|
568
|
+
*position += value_length + 1;
|
569
|
+
break;
|
570
|
+
}
|
571
|
+
case 3:
|
572
|
+
{
|
573
|
+
int size;
|
574
|
+
memcpy(&size, buffer + *position, 4);
|
575
|
+
if (strcmp(buffer + *position + 5, "$ref") == 0) { // DBRef
|
576
|
+
int offset = *position + 10;
|
577
|
+
VALUE argv[2];
|
578
|
+
int collection_length = *(int*)(buffer + offset) - 1;
|
579
|
+
char id_type;
|
580
|
+
offset += 4;
|
581
|
+
|
582
|
+
argv[0] = STR_NEW(buffer + offset, collection_length);
|
583
|
+
offset += collection_length + 1;
|
584
|
+
id_type = buffer[offset];
|
585
|
+
offset += 5;
|
586
|
+
argv[1] = get_value(buffer, &offset, (int)id_type);
|
587
|
+
value = rb_class_new_instance(2, argv, DBRef);
|
588
|
+
} else {
|
589
|
+
value = elements_to_hash(buffer + *position + 4, size - 5);
|
590
|
+
}
|
591
|
+
*position += size;
|
592
|
+
break;
|
593
|
+
}
|
594
|
+
case 4:
|
595
|
+
{
|
596
|
+
int size, end;
|
597
|
+
memcpy(&size, buffer + *position, 4);
|
598
|
+
end = *position + size - 1;
|
599
|
+
*position += 4;
|
600
|
+
|
601
|
+
value = rb_ary_new();
|
602
|
+
while (*position < end) {
|
603
|
+
int type = (int)buffer[(*position)++];
|
604
|
+
int key_size = strlen(buffer + *position);
|
605
|
+
VALUE to_append;
|
606
|
+
|
607
|
+
*position += key_size + 1; // just skip the key, they're in order.
|
608
|
+
to_append = get_value(buffer, position, type);
|
609
|
+
rb_ary_push(value, to_append);
|
610
|
+
}
|
611
|
+
(*position)++;
|
612
|
+
break;
|
613
|
+
}
|
614
|
+
case 5:
|
615
|
+
{
|
616
|
+
int length, subtype;
|
617
|
+
VALUE data, st;
|
618
|
+
VALUE argv[2];
|
619
|
+
memcpy(&length, buffer + *position, 4);
|
620
|
+
subtype = (unsigned char)buffer[*position + 4];
|
621
|
+
if (subtype == 2) {
|
622
|
+
data = rb_str_new(buffer + *position + 9, length - 4);
|
623
|
+
} else {
|
624
|
+
data = rb_str_new(buffer + *position + 5, length);
|
625
|
+
}
|
626
|
+
st = INT2FIX(subtype);
|
627
|
+
argv[0] = data;
|
628
|
+
argv[1] = st;
|
629
|
+
value = rb_class_new_instance(2, argv, Binary);
|
630
|
+
*position += length + 5;
|
631
|
+
break;
|
632
|
+
}
|
633
|
+
case 6:
|
634
|
+
{
|
635
|
+
value = Qnil;
|
636
|
+
break;
|
637
|
+
}
|
638
|
+
case 7:
|
639
|
+
{
|
640
|
+
VALUE str = rb_str_new(buffer + *position, 12);
|
641
|
+
VALUE oid = rb_funcall(str, rb_intern("unpack"), 1, rb_str_new2("C*"));
|
642
|
+
value = rb_class_new_instance(1, &oid, ObjectID);
|
643
|
+
*position += 12;
|
644
|
+
break;
|
645
|
+
}
|
646
|
+
case 8:
|
647
|
+
{
|
648
|
+
value = buffer[(*position)++] ? Qtrue : Qfalse;
|
649
|
+
break;
|
650
|
+
}
|
651
|
+
case 9:
|
652
|
+
{
|
653
|
+
long long millis;
|
654
|
+
VALUE seconds, microseconds;
|
655
|
+
memcpy(&millis, buffer + *position, 8);
|
656
|
+
seconds = LL2NUM(millis / 1000);
|
657
|
+
microseconds = INT2NUM((millis % 1000) * 1000);
|
658
|
+
|
659
|
+
value = rb_funcall(Time, rb_intern("at"), 2, seconds, microseconds);
|
660
|
+
value = rb_funcall(value, rb_intern("utc"), 0);
|
661
|
+
*position += 8;
|
662
|
+
break;
|
663
|
+
}
|
664
|
+
case 10:
|
665
|
+
{
|
666
|
+
value = Qnil;
|
667
|
+
break;
|
668
|
+
}
|
669
|
+
case 11:
|
670
|
+
{
|
671
|
+
int pattern_length = strlen(buffer + *position);
|
672
|
+
VALUE pattern = STR_NEW(buffer + *position, pattern_length);
|
673
|
+
int flags_length, flags = 0, i = 0;
|
674
|
+
char extra[10];
|
675
|
+
VALUE argv[3];
|
676
|
+
*position += pattern_length + 1;
|
677
|
+
|
678
|
+
flags_length = strlen(buffer + *position);
|
679
|
+
extra[0] = 0;
|
680
|
+
for (i = 0; i < flags_length; i++) {
|
681
|
+
char flag = buffer[*position + i];
|
682
|
+
if (flag == 'i') {
|
683
|
+
flags |= IGNORECASE;
|
684
|
+
}
|
685
|
+
else if (flag == 'm') {
|
686
|
+
flags |= MULTILINE;
|
687
|
+
}
|
688
|
+
else if (flag == 'x') {
|
689
|
+
flags |= EXTENDED;
|
690
|
+
}
|
691
|
+
else if (strlen(extra) < 9) {
|
692
|
+
strncat(extra, &flag, 1);
|
693
|
+
}
|
694
|
+
}
|
695
|
+
argv[0] = pattern;
|
696
|
+
argv[1] = INT2FIX(flags);
|
697
|
+
if(extra[0] == 0) {
|
698
|
+
value = rb_class_new_instance(2, argv, Regexp);
|
699
|
+
}
|
700
|
+
else { // Deserializing a RegexpOfHolding
|
701
|
+
argv[2] = rb_str_new2(extra);
|
702
|
+
value = rb_class_new_instance(3, argv, RegexpOfHolding);
|
703
|
+
}
|
704
|
+
*position += flags_length + 1;
|
705
|
+
break;
|
706
|
+
}
|
707
|
+
case 12:
|
708
|
+
{
|
709
|
+
int collection_length;
|
710
|
+
VALUE collection, str, oid, id, argv[2];
|
711
|
+
collection_length = *(int*)(buffer + *position) - 1;
|
712
|
+
*position += 4;
|
713
|
+
collection = STR_NEW(buffer + *position, collection_length);
|
714
|
+
*position += collection_length + 1;
|
715
|
+
|
716
|
+
str = rb_str_new(buffer + *position, 12);
|
717
|
+
oid = rb_funcall(str, rb_intern("unpack"), 1, rb_str_new2("C*"));
|
718
|
+
id = rb_class_new_instance(1, &oid, ObjectID);
|
719
|
+
*position += 12;
|
720
|
+
|
721
|
+
argv[0] = collection;
|
722
|
+
argv[1] = id;
|
723
|
+
value = rb_class_new_instance(2, argv, DBRef);
|
724
|
+
break;
|
725
|
+
}
|
726
|
+
case 14:
|
727
|
+
{
|
728
|
+
int value_length;
|
729
|
+
memcpy(&value_length, buffer + *position, 4);
|
730
|
+
value = ID2SYM(rb_intern(buffer + *position + 4));
|
731
|
+
*position += value_length + 4;
|
732
|
+
break;
|
733
|
+
}
|
734
|
+
case 15:
|
735
|
+
{
|
736
|
+
int code_length, scope_size;
|
737
|
+
VALUE code, scope, argv[2];
|
738
|
+
*position += 4;
|
739
|
+
code_length = *(int*)(buffer + *position) - 1;
|
740
|
+
*position += 4;
|
741
|
+
code = STR_NEW(buffer + *position, code_length);
|
742
|
+
*position += code_length + 1;
|
743
|
+
|
744
|
+
memcpy(&scope_size, buffer + *position, 4);
|
745
|
+
scope = elements_to_hash(buffer + *position + 4, scope_size - 5);
|
746
|
+
*position += scope_size;
|
747
|
+
|
748
|
+
argv[0] = code;
|
749
|
+
argv[1] = scope;
|
750
|
+
value = rb_class_new_instance(2, argv, Code);
|
751
|
+
break;
|
752
|
+
}
|
753
|
+
case 16:
|
754
|
+
{
|
755
|
+
int i;
|
756
|
+
memcpy(&i, buffer + *position, 4);
|
757
|
+
value = LL2NUM(i);
|
758
|
+
*position += 4;
|
759
|
+
break;
|
760
|
+
}
|
761
|
+
case 17:
|
762
|
+
{
|
763
|
+
int i;
|
764
|
+
int j;
|
765
|
+
memcpy(&i, buffer + *position, 4);
|
766
|
+
memcpy(&j, buffer + *position + 4, 4);
|
767
|
+
value = rb_ary_new3(2, LL2NUM(i), LL2NUM(j));
|
768
|
+
*position += 8;
|
769
|
+
break;
|
770
|
+
}
|
771
|
+
case 18:
|
772
|
+
{
|
773
|
+
long long ll;
|
774
|
+
memcpy(&ll, buffer + *position, 8);
|
775
|
+
value = LL2NUM(ll);
|
776
|
+
*position += 8;
|
777
|
+
break;
|
778
|
+
}
|
779
|
+
case 127:
|
780
|
+
{
|
781
|
+
value = rb_class_new_instance(0, NULL, MaxKey);
|
782
|
+
break;
|
783
|
+
}
|
784
|
+
default:
|
785
|
+
{
|
786
|
+
rb_raise(rb_eTypeError, "no c decoder for this type yet (%d)", type);
|
787
|
+
break;
|
788
|
+
}
|
789
|
+
}
|
790
|
+
return value;
|
791
|
+
}
|
792
|
+
|
793
|
+
static VALUE elements_to_hash(const char* buffer, int max) {
|
794
|
+
VALUE hash = rb_class_new_instance(0, NULL, OrderedHash);
|
795
|
+
int position = 0;
|
796
|
+
while (position < max) {
|
797
|
+
int type = (int)buffer[position++];
|
798
|
+
int name_length = strlen(buffer + position);
|
799
|
+
VALUE name = STR_NEW(buffer + position, name_length);
|
800
|
+
VALUE value;
|
801
|
+
position += name_length + 1;
|
802
|
+
value = get_value(buffer, &position, type);
|
803
|
+
rb_funcall(hash, rb_intern("[]="), 2, name, value);
|
804
|
+
}
|
805
|
+
return hash;
|
806
|
+
}
|
807
|
+
|
808
|
+
static VALUE method_deserialize(VALUE self, VALUE bson) {
|
809
|
+
const char* buffer = RSTRING_PTR(bson);
|
810
|
+
int remaining = RSTRING_LEN(bson);
|
811
|
+
|
812
|
+
// NOTE we just swallow the size and end byte here
|
813
|
+
buffer += 4;
|
814
|
+
remaining -= 5;
|
815
|
+
|
816
|
+
return elements_to_hash(buffer, remaining);
|
817
|
+
}
|
818
|
+
|
819
|
+
|
820
|
+
static VALUE fast_pack(VALUE self)
|
821
|
+
{
|
822
|
+
VALUE res;
|
823
|
+
long i;
|
824
|
+
char c;
|
825
|
+
|
826
|
+
res = rb_str_buf_new(0);
|
827
|
+
|
828
|
+
for (i = 0; i < RARRAY_LEN(self); i++) {
|
829
|
+
c = FIX2LONG(RARRAY_PTR(self)[i]);
|
830
|
+
rb_str_buf_cat(res, &c, sizeof(char));
|
831
|
+
}
|
832
|
+
|
833
|
+
return res;
|
834
|
+
}
|
835
|
+
|
836
|
+
|
837
|
+
static VALUE objectid_generate(VALUE self)
|
838
|
+
{
|
839
|
+
VALUE oid, digest;
|
840
|
+
char hostname[MAX_HOSTNAME_LENGTH];
|
841
|
+
unsigned char oid_bytes[12];
|
842
|
+
unsigned long t, inc;
|
843
|
+
unsigned short pid;
|
844
|
+
int i;
|
845
|
+
|
846
|
+
t = htonl(time(NULL));
|
847
|
+
MEMCPY(&oid_bytes, &t, unsigned char, 4);
|
848
|
+
|
849
|
+
if (gethostname(hostname, MAX_HOSTNAME_LENGTH) != 0) {
|
850
|
+
rb_raise(rb_eRuntimeError, "failed to get hostname");
|
851
|
+
}
|
852
|
+
digest = rb_funcall(DigestMD5, rb_intern("digest"), 1, rb_str_new2(hostname));
|
853
|
+
MEMCPY(&oid_bytes[4], RSTRING_PTR(digest), unsigned char, 3);
|
854
|
+
|
855
|
+
pid = htons(getpid());
|
856
|
+
MEMCPY(&oid_bytes[7], &pid, unsigned char, 2);
|
857
|
+
|
858
|
+
inc = htonl(FIX2ULONG(rb_funcall(self, rb_intern("get_inc"), 0)));
|
859
|
+
MEMCPY(&oid_bytes[9], ((unsigned char*)&inc + 1), unsigned char, 3);
|
860
|
+
|
861
|
+
oid = rb_ary_new2(12);
|
862
|
+
for(i = 0; i < 12; i++) {
|
863
|
+
rb_ary_store(oid, i, INT2FIX((unsigned int)oid_bytes[i]));
|
864
|
+
}
|
865
|
+
return oid;
|
866
|
+
}
|
867
|
+
|
868
|
+
|
869
|
+
void Init_cbson() {
|
870
|
+
VALUE mongo, CBson, Digest, ext_version;
|
871
|
+
Time = rb_const_get(rb_cObject, rb_intern("Time"));
|
872
|
+
|
873
|
+
mongo = rb_const_get(rb_cObject, rb_intern("Mongo"));
|
874
|
+
rb_require("mongo/types/binary");
|
875
|
+
Binary = rb_const_get(mongo, rb_intern("Binary"));
|
876
|
+
rb_require("mongo/types/objectid");
|
877
|
+
ObjectID = rb_const_get(mongo, rb_intern("ObjectID"));
|
878
|
+
rb_require("mongo/types/dbref");
|
879
|
+
DBRef = rb_const_get(mongo, rb_intern("DBRef"));
|
880
|
+
rb_require("mongo/types/code");
|
881
|
+
Code = rb_const_get(mongo, rb_intern("Code"));
|
882
|
+
rb_require("mongo/types/min_max_keys");
|
883
|
+
MinKey = rb_const_get(mongo, rb_intern("MinKey"));
|
884
|
+
MaxKey = rb_const_get(mongo, rb_intern("MaxKey"));
|
885
|
+
rb_require("mongo/types/regexp_of_holding");
|
886
|
+
Regexp = rb_const_get(rb_cObject, rb_intern("Regexp"));
|
887
|
+
RegexpOfHolding = rb_const_get(mongo, rb_intern("RegexpOfHolding"));
|
888
|
+
rb_require("mongo/exceptions");
|
889
|
+
InvalidName = rb_const_get(mongo, rb_intern("InvalidName"));
|
890
|
+
InvalidStringEncoding = rb_const_get(mongo, rb_intern("InvalidStringEncoding"));
|
891
|
+
InvalidDocument = rb_const_get(mongo, rb_intern("InvalidDocument"));
|
892
|
+
rb_require("mongo/util/ordered_hash");
|
893
|
+
OrderedHash = rb_const_get(rb_cObject, rb_intern("OrderedHash"));
|
894
|
+
|
895
|
+
CBson = rb_define_module("CBson");
|
896
|
+
ext_version = rb_str_new2(VERSION);
|
897
|
+
rb_define_const(CBson, "VERSION", ext_version);
|
898
|
+
rb_define_module_function(CBson, "serialize", method_serialize, 3);
|
899
|
+
rb_define_module_function(CBson, "deserialize", method_deserialize, 1);
|
900
|
+
|
901
|
+
rb_require("digest/md5");
|
902
|
+
Digest = rb_const_get(rb_cObject, rb_intern("Digest"));
|
903
|
+
DigestMD5 = rb_const_get(Digest, rb_intern("MD5"));
|
904
|
+
|
905
|
+
rb_define_method(ObjectID, "generate", objectid_generate, 0);
|
906
|
+
|
907
|
+
rb_define_method(rb_cArray, "fast_pack", fast_pack, 0);
|
908
|
+
}
|