bson 0.20

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bson might be problematic. Click here for more details.

@@ -0,0 +1,595 @@
1
+ # --
2
+ # Copyright (C) 2008-2010 10gen Inc.
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify it
5
+ # under the terms of the GNU Affero General Public License, version 3, as
6
+ # published by the Free Software Foundation.
7
+ #
8
+ # This program is distributed in the hope that it will be useful, but WITHOUT
9
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
11
+ # for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+ # ++
16
+
17
+ module BSON
18
+ # A BSON seralizer/deserializer in pure Ruby.
19
+ class BSON_RUBY
20
+
21
+ # why was this necessary?
22
+ #include Mongo
23
+
24
+ MINKEY = -1
25
+ EOO = 0
26
+ NUMBER = 1
27
+ STRING = 2
28
+ OBJECT = 3
29
+ ARRAY = 4
30
+ BINARY = 5
31
+ UNDEFINED = 6
32
+ OID = 7
33
+ BOOLEAN = 8
34
+ DATE = 9
35
+ NULL = 10
36
+ REGEX = 11
37
+ REF = 12
38
+ CODE = 13
39
+ SYMBOL = 14
40
+ CODE_W_SCOPE = 15
41
+ NUMBER_INT = 16
42
+ TIMESTAMP = 17
43
+ NUMBER_LONG = 18
44
+ MAXKEY = 127
45
+
46
+ def initialize
47
+ @buf = ByteBuffer.new
48
+ end
49
+
50
+ if RUBY_VERSION >= '1.9'
51
+ def self.to_utf8(str)
52
+ str.encode("utf-8")
53
+ end
54
+ else
55
+ def self.to_utf8(str)
56
+ begin
57
+ str.unpack("U*")
58
+ rescue => ex
59
+ raise InvalidStringEncoding, "String not valid utf-8: #{str}"
60
+ end
61
+ str
62
+ end
63
+ end
64
+
65
+ def self.serialize_cstr(buf, val)
66
+ buf.put_array(to_utf8(val.to_s).unpack("C*") << 0)
67
+ end
68
+
69
+ def self.serialize_key(buf, key)
70
+ raise InvalidDocument, "Key names / regex patterns must not contain the NULL byte" if key.include? "\x00"
71
+ self.serialize_cstr(buf, key)
72
+ end
73
+
74
+ def to_a
75
+ @buf.to_a
76
+ end
77
+
78
+ def to_s
79
+ @buf.to_s
80
+ end
81
+
82
+ # Serializes an object.
83
+ # Implemented to ensure an API compatible with BSON extension.
84
+ def self.serialize(obj, check_keys=false, move_id=false)
85
+ new.serialize(obj, check_keys, move_id)
86
+ end
87
+
88
+ def self.deserialize(buf=nil)
89
+ new.deserialize(buf)
90
+ end
91
+
92
+ def serialize(obj, check_keys=false, move_id=false)
93
+ raise "Document is null" unless obj
94
+
95
+ @buf.rewind
96
+ # put in a placeholder for the total size
97
+ @buf.put_int(0)
98
+
99
+ # Write key/value pairs. Always write _id first if it exists.
100
+ if move_id
101
+ if obj.has_key? '_id'
102
+ serialize_key_value('_id', obj['_id'], false)
103
+ elsif obj.has_key? :_id
104
+ serialize_key_value('_id', obj[:_id], false)
105
+ end
106
+ obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id }
107
+ else
108
+ if obj.has_key?('_id') && obj.has_key?(:_id)
109
+ obj['_id'] = obj.delete(:_id)
110
+ end
111
+ obj.each {|k, v| serialize_key_value(k, v, check_keys) }
112
+ end
113
+
114
+ serialize_eoo_element(@buf)
115
+ if @buf.size > 4 * 1024 * 1024
116
+ raise InvalidDocument, "Document is too large (#{@buf.size}). BSON documents are limited to 4MB (#{4 * 1024 * 1024})."
117
+ end
118
+ @buf.put_int(@buf.size, 0)
119
+ self
120
+ end
121
+
122
+ # Returns the array stored in the buffer.
123
+ # Implemented to ensure an API compatible with BSON extension.
124
+ def unpack(arg)
125
+ @buf.to_a
126
+ end
127
+
128
+ def serialize_key_value(k, v, check_keys)
129
+ k = k.to_s
130
+ if check_keys
131
+ if k[0] == ?$
132
+ raise InvalidKeyName.new("key #{k} must not start with '$'")
133
+ end
134
+ if k.include? ?.
135
+ raise InvalidKeyName.new("key #{k} must not contain '.'")
136
+ end
137
+ end
138
+ type = bson_type(v)
139
+ case type
140
+ when STRING, SYMBOL
141
+ serialize_string_element(@buf, k, v, type)
142
+ when NUMBER, NUMBER_INT
143
+ serialize_number_element(@buf, k, v, type)
144
+ when OBJECT
145
+ serialize_object_element(@buf, k, v, check_keys)
146
+ when OID
147
+ serialize_oid_element(@buf, k, v)
148
+ when ARRAY
149
+ serialize_array_element(@buf, k, v, check_keys)
150
+ when REGEX
151
+ serialize_regex_element(@buf, k, v)
152
+ when BOOLEAN
153
+ serialize_boolean_element(@buf, k, v)
154
+ when DATE
155
+ serialize_date_element(@buf, k, v)
156
+ when NULL
157
+ serialize_null_element(@buf, k)
158
+ when REF
159
+ serialize_dbref_element(@buf, k, v)
160
+ when BINARY
161
+ serialize_binary_element(@buf, k, v)
162
+ when UNDEFINED
163
+ serialize_null_element(@buf, k)
164
+ when CODE_W_SCOPE
165
+ serialize_code_w_scope(@buf, k, v)
166
+ when MAXKEY
167
+ serialize_max_key_element(@buf, k)
168
+ when MINKEY
169
+ serialize_min_key_element(@buf, k)
170
+ else
171
+ raise "unhandled type #{type}"
172
+ end
173
+ end
174
+
175
+ def deserialize(buf=nil)
176
+ # If buf is nil, use @buf, assumed to contain already-serialized BSON.
177
+ # This is only true during testing.
178
+ if buf.is_a? String
179
+ @buf = ByteBuffer.new(buf) if buf
180
+ else
181
+ @buf = ByteBuffer.new(buf.to_a) if buf
182
+ end
183
+ @buf.rewind
184
+ @buf.get_int # eat message size
185
+ doc = OrderedHash.new
186
+ while @buf.more?
187
+ type = @buf.get
188
+ case type
189
+ when STRING, CODE
190
+ key = deserialize_cstr(@buf)
191
+ doc[key] = deserialize_string_data(@buf)
192
+ when SYMBOL
193
+ key = deserialize_cstr(@buf)
194
+ doc[key] = deserialize_string_data(@buf).intern
195
+ when NUMBER
196
+ key = deserialize_cstr(@buf)
197
+ doc[key] = deserialize_number_data(@buf)
198
+ when NUMBER_INT
199
+ key = deserialize_cstr(@buf)
200
+ doc[key] = deserialize_number_int_data(@buf)
201
+ when NUMBER_LONG
202
+ key = deserialize_cstr(@buf)
203
+ doc[key] = deserialize_number_long_data(@buf)
204
+ when OID
205
+ key = deserialize_cstr(@buf)
206
+ doc[key] = deserialize_oid_data(@buf)
207
+ when ARRAY
208
+ key = deserialize_cstr(@buf)
209
+ doc[key] = deserialize_array_data(@buf)
210
+ when REGEX
211
+ key = deserialize_cstr(@buf)
212
+ doc[key] = deserialize_regex_data(@buf)
213
+ when OBJECT
214
+ key = deserialize_cstr(@buf)
215
+ doc[key] = deserialize_object_data(@buf)
216
+ when BOOLEAN
217
+ key = deserialize_cstr(@buf)
218
+ doc[key] = deserialize_boolean_data(@buf)
219
+ when DATE
220
+ key = deserialize_cstr(@buf)
221
+ doc[key] = deserialize_date_data(@buf)
222
+ when NULL
223
+ key = deserialize_cstr(@buf)
224
+ doc[key] = nil
225
+ when UNDEFINED
226
+ key = deserialize_cstr(@buf)
227
+ doc[key] = nil
228
+ when REF
229
+ key = deserialize_cstr(@buf)
230
+ doc[key] = deserialize_dbref_data(@buf)
231
+ when BINARY
232
+ key = deserialize_cstr(@buf)
233
+ doc[key] = deserialize_binary_data(@buf)
234
+ when CODE_W_SCOPE
235
+ key = deserialize_cstr(@buf)
236
+ doc[key] = deserialize_code_w_scope_data(@buf)
237
+ when TIMESTAMP
238
+ key = deserialize_cstr(@buf)
239
+ doc[key] = [deserialize_number_int_data(@buf),
240
+ deserialize_number_int_data(@buf)]
241
+ when MAXKEY
242
+ key = deserialize_cstr(@buf)
243
+ doc[key] = MaxKey.new
244
+ when MINKEY, 255 # This is currently easier than unpack the type byte as an unsigned char.
245
+ key = deserialize_cstr(@buf)
246
+ doc[key] = MinKey.new
247
+ when EOO
248
+ break
249
+ else
250
+ raise "Unknown type #{type}, key = #{key}"
251
+ end
252
+ end
253
+ @buf.rewind
254
+ doc
255
+ end
256
+
257
+ # For debugging.
258
+ def hex_dump
259
+ str = ''
260
+ @buf.to_a.each_with_index { |b,i|
261
+ if (i % 8) == 0
262
+ str << "\n" if i > 0
263
+ str << '%4d: ' % i
264
+ else
265
+ str << ' '
266
+ end
267
+ str << '%02X' % b
268
+ }
269
+ str
270
+ end
271
+
272
+ def deserialize_date_data(buf)
273
+ unsigned = buf.get_long()
274
+ # see note for deserialize_number_long_data below
275
+ milliseconds = unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
276
+ Time.at(milliseconds.to_f / 1000.0).utc # at() takes fractional seconds
277
+ end
278
+
279
+ def deserialize_boolean_data(buf)
280
+ buf.get == 1
281
+ end
282
+
283
+ def deserialize_number_data(buf)
284
+ buf.get_double
285
+ end
286
+
287
+ def deserialize_number_int_data(buf)
288
+ # sometimes ruby makes me angry... why would the same code pack as signed
289
+ # but unpack as unsigned
290
+ unsigned = buf.get_int
291
+ unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned
292
+ end
293
+
294
+ def deserialize_number_long_data(buf)
295
+ # same note as above applies here...
296
+ unsigned = buf.get_long
297
+ unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
298
+ end
299
+
300
+ def deserialize_object_data(buf)
301
+ size = buf.get_int
302
+ buf.position -= 4
303
+ object = BSON_CODER.new().deserialize(buf.get(size))
304
+ if object.has_key? "$ref"
305
+ DBRef.new(object["$ref"], object["$id"])
306
+ else
307
+ object
308
+ end
309
+ end
310
+
311
+ def deserialize_array_data(buf)
312
+ h = deserialize_object_data(buf)
313
+ a = []
314
+ h.each { |k, v| a[k.to_i] = v }
315
+ a
316
+ end
317
+
318
+ def deserialize_regex_data(buf)
319
+ str = deserialize_cstr(buf)
320
+ options_str = deserialize_cstr(buf)
321
+ options = 0
322
+ options |= Regexp::IGNORECASE if options_str.include?('i')
323
+ options |= Regexp::MULTILINE if options_str.include?('m')
324
+ options |= Regexp::EXTENDED if options_str.include?('x')
325
+ Regexp.new(str, options)
326
+ end
327
+
328
+ def deserialize_string_data(buf)
329
+ len = buf.get_int
330
+ bytes = buf.get(len)
331
+ str = bytes[0..-2]
332
+ if str.respond_to? "pack"
333
+ str = str.pack("C*")
334
+ end
335
+ if RUBY_VERSION >= '1.9'
336
+ str.force_encoding("utf-8")
337
+ end
338
+ str
339
+ end
340
+
341
+ def deserialize_code_w_scope_data(buf)
342
+ buf.get_int
343
+ len = buf.get_int
344
+ code = buf.get(len)[0..-2]
345
+ if code.respond_to? "pack"
346
+ code = code.pack("C*")
347
+ end
348
+ if RUBY_VERSION >= '1.9'
349
+ code.force_encoding("utf-8")
350
+ end
351
+
352
+ scope_size = buf.get_int
353
+ buf.position -= 4
354
+ scope = BSON_CODER.new().deserialize(buf.get(scope_size))
355
+
356
+ Code.new(code, scope)
357
+ end
358
+
359
+ def deserialize_oid_data(buf)
360
+ ObjectID.new(buf.get(12))
361
+ end
362
+
363
+ def deserialize_dbref_data(buf)
364
+ ns = deserialize_string_data(buf)
365
+ oid = deserialize_oid_data(buf)
366
+ DBRef.new(ns, oid)
367
+ end
368
+
369
+ def deserialize_binary_data(buf)
370
+ len = buf.get_int
371
+ type = buf.get
372
+ len = buf.get_int if type == Binary::SUBTYPE_BYTES
373
+ Binary.new(buf.get(len), type)
374
+ end
375
+
376
+ def serialize_eoo_element(buf)
377
+ buf.put(EOO)
378
+ end
379
+
380
+ def serialize_null_element(buf, key)
381
+ buf.put(NULL)
382
+ self.class.serialize_key(buf, key)
383
+ end
384
+
385
+ def serialize_dbref_element(buf, key, val)
386
+ oh = OrderedHash.new
387
+ oh['$ref'] = val.namespace
388
+ oh['$id'] = val.object_id
389
+ serialize_object_element(buf, key, oh, false)
390
+ end
391
+
392
+ def serialize_binary_element(buf, key, val)
393
+ buf.put(BINARY)
394
+ self.class.serialize_key(buf, key)
395
+
396
+ bytes = val.to_a
397
+ num_bytes = bytes.length
398
+ subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES
399
+ if subtype == Binary::SUBTYPE_BYTES
400
+ buf.put_int(num_bytes + 4)
401
+ buf.put(subtype)
402
+ buf.put_int(num_bytes)
403
+ buf.put_array(bytes)
404
+ else
405
+ buf.put_int(num_bytes)
406
+ buf.put(subtype)
407
+ buf.put_array(bytes)
408
+ end
409
+ end
410
+
411
+ def serialize_boolean_element(buf, key, val)
412
+ buf.put(BOOLEAN)
413
+ self.class.serialize_key(buf, key)
414
+ buf.put(val ? 1 : 0)
415
+ end
416
+
417
+ def serialize_date_element(buf, key, val)
418
+ buf.put(DATE)
419
+ self.class.serialize_key(buf, key)
420
+ millisecs = (val.to_f * 1000).to_i
421
+ buf.put_long(millisecs)
422
+ end
423
+
424
+ def serialize_number_element(buf, key, val, type)
425
+ if type == NUMBER
426
+ buf.put(type)
427
+ self.class.serialize_key(buf, key)
428
+ buf.put_double(val)
429
+ else
430
+ if val > 2**64 / 2 - 1 or val < -2**64 / 2
431
+ raise RangeError.new("MongoDB can only handle 8-byte ints")
432
+ end
433
+ if val > 2**32 / 2 - 1 or val < -2**32 / 2
434
+ buf.put(NUMBER_LONG)
435
+ self.class.serialize_key(buf, key)
436
+ buf.put_long(val)
437
+ else
438
+ buf.put(type)
439
+ self.class.serialize_key(buf, key)
440
+ buf.put_int(val)
441
+ end
442
+ end
443
+ end
444
+
445
+ def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT)
446
+ buf.put(opcode)
447
+ self.class.serialize_key(buf, key)
448
+ buf.put_array(BSON_CODER.new.serialize(val, check_keys).to_a)
449
+ end
450
+
451
+ def serialize_array_element(buf, key, val, check_keys)
452
+ # Turn array into hash with integer indices as keys
453
+ h = OrderedHash.new
454
+ i = 0
455
+ val.each { |v| h[i] = v; i += 1 }
456
+ serialize_object_element(buf, key, h, check_keys, ARRAY)
457
+ end
458
+
459
+ def serialize_regex_element(buf, key, val)
460
+ buf.put(REGEX)
461
+ self.class.serialize_key(buf, key)
462
+
463
+ str = val.source
464
+ # We use serialize_key here since regex patterns aren't prefixed with
465
+ # length (can't contain the NULL byte).
466
+ self.class.serialize_key(buf, str)
467
+
468
+ options = val.options
469
+ options_str = ''
470
+ options_str << 'i' if ((options & Regexp::IGNORECASE) != 0)
471
+ options_str << 'm' if ((options & Regexp::MULTILINE) != 0)
472
+ options_str << 'x' if ((options & Regexp::EXTENDED) != 0)
473
+ options_str << val.extra_options_str if val.respond_to?(:extra_options_str)
474
+ # Must store option chars in alphabetical order
475
+ self.class.serialize_cstr(buf, options_str.split(//).sort.uniq.join)
476
+ end
477
+
478
+ def serialize_max_key_element(buf, key)
479
+ buf.put(MAXKEY)
480
+ self.class.serialize_key(buf, key)
481
+ end
482
+
483
+ def serialize_min_key_element(buf, key)
484
+ buf.put(MINKEY)
485
+ self.class.serialize_key(buf, key)
486
+ end
487
+
488
+ def serialize_oid_element(buf, key, val)
489
+ buf.put(OID)
490
+ self.class.serialize_key(buf, key)
491
+
492
+ buf.put_array(val.to_a)
493
+ end
494
+
495
+ def serialize_string_element(buf, key, val, type)
496
+ buf.put(type)
497
+ self.class.serialize_key(buf, key)
498
+
499
+ # Make a hole for the length
500
+ len_pos = buf.position
501
+ buf.put_int(0)
502
+
503
+ # Save the string
504
+ start_pos = buf.position
505
+ self.class.serialize_cstr(buf, val)
506
+ end_pos = buf.position
507
+
508
+ # Put the string size in front
509
+ buf.put_int(end_pos - start_pos, len_pos)
510
+
511
+ # Go back to where we were
512
+ buf.position = end_pos
513
+ end
514
+
515
+ def serialize_code_w_scope(buf, key, val)
516
+ buf.put(CODE_W_SCOPE)
517
+ self.class.serialize_key(buf, key)
518
+
519
+ # Make a hole for the length
520
+ len_pos = buf.position
521
+ buf.put_int(0)
522
+
523
+ buf.put_int(val.length + 1)
524
+ self.class.serialize_cstr(buf, val)
525
+ buf.put_array(BSON_CODER.new.serialize(val.scope).to_a)
526
+
527
+ end_pos = buf.position
528
+ buf.put_int(end_pos - len_pos, len_pos)
529
+ buf.position = end_pos
530
+ end
531
+
532
+ def deserialize_cstr(buf)
533
+ chars = ""
534
+ while true
535
+ b = buf.get
536
+ break if b == 0
537
+ chars << b.chr
538
+ end
539
+ if RUBY_VERSION >= '1.9'
540
+ chars.force_encoding("utf-8") # Mongo stores UTF-8
541
+ end
542
+ chars
543
+ end
544
+
545
+ def bson_type(o)
546
+ case o
547
+ when nil
548
+ NULL
549
+ when Integer
550
+ NUMBER_INT
551
+ when Float
552
+ NUMBER
553
+ when ByteBuffer
554
+ BINARY
555
+ when Code
556
+ CODE_W_SCOPE
557
+ when String
558
+ STRING
559
+ when Array
560
+ ARRAY
561
+ when Regexp
562
+ REGEX
563
+ when ObjectID
564
+ OID
565
+ when DBRef
566
+ REF
567
+ when true, false
568
+ BOOLEAN
569
+ when Time
570
+ DATE
571
+ when Hash
572
+ OBJECT
573
+ when Symbol
574
+ SYMBOL
575
+ when MaxKey
576
+ MAXKEY
577
+ when MinKey
578
+ MINKEY
579
+ when Numeric
580
+ raise InvalidDocument, "Cannot serialize the Numeric type #{o.class} as BSON; only Fixum, Bignum, and Float are supported."
581
+ when Date, DateTime
582
+ raise InvalidDocument, "#{o.class} is not currently supported; " +
583
+ "use a UTC Time instance instead."
584
+ else
585
+ if defined?(ActiveSupport::TimeWithZone) && o.is_a?(ActiveSupport::TimeWithZone)
586
+ raise InvalidDocument, "ActiveSupport::TimeWithZone is not currently supported; " +
587
+ "use a UTC Time instance instead."
588
+ else
589
+ raise InvalidDocument, "Cannot serialize #{o.class} as a BSON type; it either isn't supported or won't translate to BSON."
590
+ end
591
+ end
592
+ end
593
+
594
+ end
595
+ end