bson 1.2.0-jruby

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bson might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ include Java
2
+ module BSON
3
+ class BSON_JAVA
4
+
5
+ # TODO: Pool or cache instances of RubyBSONEncoder so that
6
+ # we don't create a new one on each call to #serialize.
7
+ def self.serialize(obj, check_keys=false, move_id=false)
8
+ raise InvalidDocument, "BSON_JAVA.serialize takes a Hash" unless obj.is_a?(Hash)
9
+ enc = Java::OrgJbson::RubyBSONEncoder.new(JRuby.runtime, check_keys, move_id)
10
+ ByteBuffer.new(enc.encode(obj))
11
+ end
12
+
13
+ def self.deserialize(buf)
14
+ dec = Java::OrgBson::BSONDecoder.new
15
+ callback = Java::OrgJbson::RubyBSONCallback.new(JRuby.runtime)
16
+ dec.decode(buf.to_s.to_java_bytes, callback)
17
+ callback.get
18
+ end
19
+
20
+ def self.max_bson_size
21
+ Java::OrgJbson::RubyBSONEncoder.max_bson_size(self)
22
+ end
23
+
24
+ def self.update_max_bson_size(connection)
25
+ Java::OrgJbson::RubyBSONEncoder.update_max_bson_size(self, connection)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,612 @@
1
+ # encoding: UTF-8
2
+
3
+ # --
4
+ # Copyright (C) 2008-2011 10gen Inc.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ # ++
18
+
19
+ module BSON
20
+ # A BSON seralizer/deserializer in pure Ruby.
21
+ class BSON_RUBY
22
+
23
+ DEFAULT_MAX_BSON_SIZE = 4 * 1024 * 1024
24
+
25
+ @@max_bson_size = DEFAULT_MAX_BSON_SIZE
26
+
27
+ MINKEY = -1
28
+ EOO = 0
29
+ NUMBER = 1
30
+ STRING = 2
31
+ OBJECT = 3
32
+ ARRAY = 4
33
+ BINARY = 5
34
+ UNDEFINED = 6
35
+ OID = 7
36
+ BOOLEAN = 8
37
+ DATE = 9
38
+ NULL = 10
39
+ REGEX = 11
40
+ REF = 12
41
+ CODE = 13
42
+ SYMBOL = 14
43
+ CODE_W_SCOPE = 15
44
+ NUMBER_INT = 16
45
+ TIMESTAMP = 17
46
+ NUMBER_LONG = 18
47
+ MAXKEY = 127
48
+
49
+ def initialize
50
+ @buf = ByteBuffer.new
51
+ @encoder = BSON_RUBY
52
+ end
53
+
54
+ if RUBY_VERSION >= '1.9'
55
+ NULL_BYTE = "\0".force_encoding('binary').freeze
56
+ UTF8_ENCODING = Encoding.find('utf-8')
57
+ BINARY_ENCODING = Encoding.find('binary')
58
+
59
+ def self.to_utf8_binary(str)
60
+ str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
61
+ end
62
+ else
63
+ NULL_BYTE = "\0"
64
+
65
+ def self.to_utf8_binary(str)
66
+ begin
67
+ str.unpack("U*")
68
+ rescue => ex
69
+ raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
70
+ end
71
+ str
72
+ end
73
+ end
74
+
75
+ def self.update_max_bson_size(connection)
76
+ @@max_bson_size = connection.max_bson_size
77
+ end
78
+
79
+ def self.max_bson_size
80
+ @@max_bson_size
81
+ end
82
+
83
+ def self.serialize_cstr(buf, val)
84
+ buf.put_binary(to_utf8_binary(val.to_s))
85
+ buf.put_binary(NULL_BYTE)
86
+ end
87
+
88
+ def self.serialize_key(buf, key)
89
+ raise InvalidDocument, "Key names / regex patterns must not contain the NULL byte" if key.include? "\x00"
90
+ self.serialize_cstr(buf, key)
91
+ end
92
+
93
+ def to_a
94
+ @buf.to_a
95
+ end
96
+
97
+ def to_s
98
+ @buf.to_s
99
+ end
100
+
101
+ # Serializes an object.
102
+ # Implemented to ensure an API compatible with BSON extension.
103
+ def self.serialize(obj, check_keys=false, move_id=false)
104
+ new.serialize(obj, check_keys, move_id)
105
+ end
106
+
107
+ def self.deserialize(buf=nil)
108
+ new.deserialize(buf)
109
+ end
110
+
111
+ def serialize(obj, check_keys=false, move_id=false)
112
+ raise(InvalidDocument, "BSON.serialize takes a Hash but got a #{obj.class}") unless obj.is_a?(Hash)
113
+ raise "Document is null" unless obj
114
+
115
+ @buf.rewind
116
+ # put in a placeholder for the total size
117
+ @buf.put_int(0)
118
+
119
+ # Write key/value pairs. Always write _id first if it exists.
120
+ if move_id
121
+ if obj.has_key? '_id'
122
+ serialize_key_value('_id', obj['_id'], false)
123
+ elsif obj.has_key? :_id
124
+ serialize_key_value('_id', obj[:_id], false)
125
+ end
126
+ obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id }
127
+ else
128
+ if obj.has_key?('_id') && obj.has_key?(:_id)
129
+ obj['_id'] = obj.delete(:_id)
130
+ end
131
+ obj.each {|k, v| serialize_key_value(k, v, check_keys) }
132
+ end
133
+
134
+ serialize_eoo_element(@buf)
135
+ if @buf.size > @@max_bson_size
136
+ raise InvalidDocument, "Document is too large (#{@buf.size}). BSON documents are limited to #{@@max_bson_size} bytes."
137
+ end
138
+ @buf.put_int(@buf.size, 0)
139
+ @buf
140
+ end
141
+
142
+ # Returns the array stored in the buffer.
143
+ # Implemented to ensure an API compatible with BSON extension.
144
+ def unpack(arg)
145
+ @buf.to_a
146
+ end
147
+
148
+ def serialize_key_value(k, v, check_keys)
149
+ k = k.to_s
150
+ if check_keys
151
+ if k[0] == ?$
152
+ raise InvalidKeyName.new("key #{k} must not start with '$'")
153
+ end
154
+ if k.include? ?.
155
+ raise InvalidKeyName.new("key #{k} must not contain '.'")
156
+ end
157
+ end
158
+ type = bson_type(v)
159
+ case type
160
+ when STRING, SYMBOL
161
+ serialize_string_element(@buf, k, v, type)
162
+ when NUMBER, NUMBER_INT
163
+ serialize_number_element(@buf, k, v, type)
164
+ when OBJECT
165
+ serialize_object_element(@buf, k, v, check_keys)
166
+ when OID
167
+ serialize_oid_element(@buf, k, v)
168
+ when ARRAY
169
+ serialize_array_element(@buf, k, v, check_keys)
170
+ when REGEX
171
+ serialize_regex_element(@buf, k, v)
172
+ when BOOLEAN
173
+ serialize_boolean_element(@buf, k, v)
174
+ when DATE
175
+ serialize_date_element(@buf, k, v)
176
+ when NULL
177
+ serialize_null_element(@buf, k)
178
+ when REF
179
+ serialize_dbref_element(@buf, k, v)
180
+ when BINARY
181
+ serialize_binary_element(@buf, k, v)
182
+ when UNDEFINED
183
+ serialize_null_element(@buf, k)
184
+ when CODE_W_SCOPE
185
+ serialize_code_w_scope(@buf, k, v)
186
+ when MAXKEY
187
+ serialize_max_key_element(@buf, k)
188
+ when MINKEY
189
+ serialize_min_key_element(@buf, k)
190
+ else
191
+ raise "unhandled type #{type}"
192
+ end
193
+ end
194
+
195
+ def deserialize(buf=nil)
196
+ # If buf is nil, use @buf, assumed to contain already-serialized BSON.
197
+ # This is only true during testing.
198
+ if buf.is_a? String
199
+ @buf = ByteBuffer.new(buf.unpack("C*")) if buf
200
+ else
201
+ @buf = ByteBuffer.new(buf.to_a) if buf
202
+ end
203
+ @buf.rewind
204
+ @buf.get_int # eat message size
205
+ doc = BSON::OrderedHash.new
206
+ while @buf.more?
207
+ type = @buf.get
208
+ case type
209
+ when STRING, CODE
210
+ key = deserialize_cstr(@buf)
211
+ doc[key] = deserialize_string_data(@buf)
212
+ when SYMBOL
213
+ key = deserialize_cstr(@buf)
214
+ doc[key] = deserialize_string_data(@buf).intern
215
+ when NUMBER
216
+ key = deserialize_cstr(@buf)
217
+ doc[key] = deserialize_number_data(@buf)
218
+ when NUMBER_INT
219
+ key = deserialize_cstr(@buf)
220
+ doc[key] = deserialize_number_int_data(@buf)
221
+ when NUMBER_LONG
222
+ key = deserialize_cstr(@buf)
223
+ doc[key] = deserialize_number_long_data(@buf)
224
+ when OID
225
+ key = deserialize_cstr(@buf)
226
+ doc[key] = deserialize_oid_data(@buf)
227
+ when ARRAY
228
+ key = deserialize_cstr(@buf)
229
+ doc[key] = deserialize_array_data(@buf)
230
+ when REGEX
231
+ key = deserialize_cstr(@buf)
232
+ doc[key] = deserialize_regex_data(@buf)
233
+ when OBJECT
234
+ key = deserialize_cstr(@buf)
235
+ doc[key] = deserialize_object_data(@buf)
236
+ when BOOLEAN
237
+ key = deserialize_cstr(@buf)
238
+ doc[key] = deserialize_boolean_data(@buf)
239
+ when DATE
240
+ key = deserialize_cstr(@buf)
241
+ doc[key] = deserialize_date_data(@buf)
242
+ when NULL
243
+ key = deserialize_cstr(@buf)
244
+ doc[key] = nil
245
+ when UNDEFINED
246
+ key = deserialize_cstr(@buf)
247
+ doc[key] = nil
248
+ when REF
249
+ key = deserialize_cstr(@buf)
250
+ doc[key] = deserialize_dbref_data(@buf)
251
+ when BINARY
252
+ key = deserialize_cstr(@buf)
253
+ doc[key] = deserialize_binary_data(@buf)
254
+ when CODE_W_SCOPE
255
+ key = deserialize_cstr(@buf)
256
+ doc[key] = deserialize_code_w_scope_data(@buf)
257
+ when TIMESTAMP
258
+ key = deserialize_cstr(@buf)
259
+ doc[key] = [deserialize_number_int_data(@buf),
260
+ deserialize_number_int_data(@buf)]
261
+ when MAXKEY
262
+ key = deserialize_cstr(@buf)
263
+ doc[key] = MaxKey.new
264
+ when MINKEY, 255 # This is currently easier than unpack the type byte as an unsigned char.
265
+ key = deserialize_cstr(@buf)
266
+ doc[key] = MinKey.new
267
+ when EOO
268
+ break
269
+ else
270
+ raise "Unknown type #{type}, key = #{key}"
271
+ end
272
+ end
273
+ @buf.rewind
274
+ doc
275
+ end
276
+
277
+ # For debugging.
278
+ def hex_dump
279
+ str = ''
280
+ @buf.to_a.each_with_index { |b,i|
281
+ if (i % 8) == 0
282
+ str << "\n" if i > 0
283
+ str << '%4d: ' % i
284
+ else
285
+ str << ' '
286
+ end
287
+ str << '%02X' % b
288
+ }
289
+ str
290
+ end
291
+
292
+ def deserialize_date_data(buf)
293
+ unsigned = buf.get_long()
294
+ milliseconds = unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
295
+ Time.at(milliseconds.to_f / 1000.0).utc # at() takes fractional seconds
296
+ end
297
+
298
+ def deserialize_boolean_data(buf)
299
+ buf.get == 1
300
+ end
301
+
302
+ def deserialize_number_data(buf)
303
+ buf.get_double
304
+ end
305
+
306
+ def deserialize_number_int_data(buf)
307
+ unsigned = buf.get_int
308
+ unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned
309
+ end
310
+
311
+ def deserialize_number_long_data(buf)
312
+ unsigned = buf.get_long
313
+ unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
314
+ end
315
+
316
+ def deserialize_object_data(buf)
317
+ size = buf.get_int
318
+ buf.position -= 4
319
+ object = @encoder.new().deserialize(buf.get(size))
320
+ if object.has_key? "$ref"
321
+ DBRef.new(object["$ref"], object["$id"])
322
+ else
323
+ object
324
+ end
325
+ end
326
+
327
+ def deserialize_array_data(buf)
328
+ h = deserialize_object_data(buf)
329
+ a = []
330
+ h.each { |k, v| a[k.to_i] = v }
331
+ a
332
+ end
333
+
334
+ def deserialize_regex_data(buf)
335
+ str = deserialize_cstr(buf)
336
+ options_str = deserialize_cstr(buf)
337
+ opts = 0
338
+ opts |= Regexp::IGNORECASE if options_str.include?('i')
339
+ opts |= Regexp::MULTILINE if options_str.include?('m')
340
+ opts |= Regexp::EXTENDED if options_str.include?('x')
341
+ Regexp.new(str, opts)
342
+ end
343
+
344
+ def encoded_str(str)
345
+ if RUBY_VERSION >= '1.9'
346
+ str.force_encoding("utf-8")
347
+ if Encoding.default_internal
348
+ str.encode!(Encoding.default_internal)
349
+ end
350
+ end
351
+ str
352
+ end
353
+
354
+ def deserialize_string_data(buf)
355
+ len = buf.get_int
356
+ bytes = buf.get(len)
357
+ str = bytes[0..-2]
358
+ if str.respond_to? "pack"
359
+ str = str.pack("C*")
360
+ end
361
+ encoded_str(str)
362
+ end
363
+
364
+ def deserialize_code_w_scope_data(buf)
365
+ buf.get_int
366
+ len = buf.get_int
367
+ code = buf.get(len)[0..-2]
368
+ if code.respond_to? "pack"
369
+ code = code.pack("C*")
370
+ end
371
+
372
+ scope_size = buf.get_int
373
+ buf.position -= 4
374
+ scope = @encoder.new().deserialize(buf.get(scope_size))
375
+
376
+ Code.new(encoded_str(code), scope)
377
+ end
378
+
379
+ def deserialize_oid_data(buf)
380
+ ObjectId.new(buf.get(12))
381
+ end
382
+
383
+ def deserialize_dbref_data(buf)
384
+ ns = deserialize_string_data(buf)
385
+ oid = deserialize_oid_data(buf)
386
+ DBRef.new(ns, oid)
387
+ end
388
+
389
+ def deserialize_binary_data(buf)
390
+ len = buf.get_int
391
+ type = buf.get
392
+ len = buf.get_int if type == Binary::SUBTYPE_BYTES
393
+ Binary.new(buf.get(len), type)
394
+ end
395
+
396
+ def serialize_eoo_element(buf)
397
+ buf.put(EOO)
398
+ end
399
+
400
+ def serialize_null_element(buf, key)
401
+ buf.put(NULL)
402
+ self.class.serialize_key(buf, key)
403
+ end
404
+
405
+ def serialize_dbref_element(buf, key, val)
406
+ oh = BSON::OrderedHash.new
407
+ oh['$ref'] = val.namespace
408
+ oh['$id'] = val.object_id
409
+ serialize_object_element(buf, key, oh, false)
410
+ end
411
+
412
+ def serialize_binary_element(buf, key, val)
413
+ buf.put(BINARY)
414
+ self.class.serialize_key(buf, key)
415
+
416
+ bytes = val.to_a
417
+ num_bytes = bytes.length
418
+ subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES
419
+ if subtype == Binary::SUBTYPE_BYTES
420
+ buf.put_int(num_bytes + 4)
421
+ buf.put(subtype)
422
+ buf.put_int(num_bytes)
423
+ buf.put_array(bytes)
424
+ else
425
+ buf.put_int(num_bytes)
426
+ buf.put(subtype)
427
+ buf.put_array(bytes)
428
+ end
429
+ end
430
+
431
+ def serialize_boolean_element(buf, key, val)
432
+ buf.put(BOOLEAN)
433
+ self.class.serialize_key(buf, key)
434
+ buf.put(val ? 1 : 0)
435
+ end
436
+
437
+ def serialize_date_element(buf, key, val)
438
+ buf.put(DATE)
439
+ self.class.serialize_key(buf, key)
440
+ millisecs = (val.to_f * 1000).to_i
441
+ buf.put_long(millisecs)
442
+ end
443
+
444
+ def serialize_number_element(buf, key, val, type)
445
+ if type == NUMBER
446
+ buf.put(type)
447
+ self.class.serialize_key(buf, key)
448
+ buf.put_double(val)
449
+ else
450
+ if val > 2**64 / 2 - 1 or val < -2**64 / 2
451
+ raise RangeError.new("MongoDB can only handle 8-byte ints")
452
+ end
453
+ if val > 2**32 / 2 - 1 or val < -2**32 / 2
454
+ buf.put(NUMBER_LONG)
455
+ self.class.serialize_key(buf, key)
456
+ buf.put_long(val)
457
+ else
458
+ buf.put(type)
459
+ self.class.serialize_key(buf, key)
460
+ buf.put_int(val)
461
+ end
462
+ end
463
+ end
464
+
465
+ def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT)
466
+ buf.put(opcode)
467
+ self.class.serialize_key(buf, key)
468
+ buf.put_array(@encoder.new.serialize(val, check_keys).to_a)
469
+ end
470
+
471
+ def serialize_array_element(buf, key, val, check_keys)
472
+ # Turn array into hash with integer indices as keys
473
+ h = BSON::OrderedHash.new
474
+ i = 0
475
+ val.each { |v| h[i] = v; i += 1 }
476
+ serialize_object_element(buf, key, h, check_keys, ARRAY)
477
+ end
478
+
479
+ def serialize_regex_element(buf, key, val)
480
+ buf.put(REGEX)
481
+ self.class.serialize_key(buf, key)
482
+
483
+ str = val.source
484
+ # We use serialize_key here since regex patterns aren't prefixed with
485
+ # length (can't contain the NULL byte).
486
+ self.class.serialize_key(buf, str)
487
+
488
+ options = val.options
489
+ options_str = ''
490
+ options_str << 'i' if ((options & Regexp::IGNORECASE) != 0)
491
+ options_str << 'm' if ((options & Regexp::MULTILINE) != 0)
492
+ options_str << 'x' if ((options & Regexp::EXTENDED) != 0)
493
+ options_str << val.extra_options_str if val.respond_to?(:extra_options_str)
494
+ # Must store option chars in alphabetical order
495
+ self.class.serialize_cstr(buf, options_str.split(//).sort.uniq.join)
496
+ end
497
+
498
+ def serialize_max_key_element(buf, key)
499
+ buf.put(MAXKEY)
500
+ self.class.serialize_key(buf, key)
501
+ end
502
+
503
+ def serialize_min_key_element(buf, key)
504
+ buf.put(MINKEY)
505
+ self.class.serialize_key(buf, key)
506
+ end
507
+
508
+ def serialize_oid_element(buf, key, val)
509
+ buf.put(OID)
510
+ self.class.serialize_key(buf, key)
511
+
512
+ buf.put_array(val.to_a)
513
+ end
514
+
515
+ def serialize_string_element(buf, key, val, type)
516
+ buf.put(type)
517
+ self.class.serialize_key(buf, key)
518
+
519
+ # Make a hole for the length
520
+ len_pos = buf.position
521
+ buf.put_int(0)
522
+
523
+ # Save the string
524
+ start_pos = buf.position
525
+ self.class.serialize_cstr(buf, val)
526
+ end_pos = buf.position
527
+
528
+ # Put the string size in front
529
+ buf.put_int(end_pos - start_pos, len_pos)
530
+
531
+ # Go back to where we were
532
+ buf.position = end_pos
533
+ end
534
+
535
+ def serialize_code_w_scope(buf, key, val)
536
+ buf.put(CODE_W_SCOPE)
537
+ self.class.serialize_key(buf, key)
538
+
539
+ # Make a hole for the length
540
+ len_pos = buf.position
541
+ buf.put_int(0)
542
+
543
+ buf.put_int(val.code.length + 1)
544
+ self.class.serialize_cstr(buf, val.code)
545
+ buf.put_array(@encoder.new.serialize(val.scope).to_a)
546
+
547
+ end_pos = buf.position
548
+ buf.put_int(end_pos - len_pos, len_pos)
549
+ buf.position = end_pos
550
+ end
551
+
552
+ def deserialize_cstr(buf)
553
+ chars = ""
554
+ while true
555
+ b = buf.get
556
+ break if b == 0
557
+ chars << b.chr
558
+ end
559
+ encoded_str(chars)
560
+ end
561
+
562
+ def bson_type(o)
563
+ case o
564
+ when nil
565
+ NULL
566
+ when Integer
567
+ NUMBER_INT
568
+ when Float
569
+ NUMBER
570
+ when ByteBuffer
571
+ BINARY
572
+ when Code
573
+ CODE_W_SCOPE
574
+ when String
575
+ STRING
576
+ when Array
577
+ ARRAY
578
+ when Regexp
579
+ REGEX
580
+ when ObjectId
581
+ OID
582
+ when DBRef
583
+ REF
584
+ when true, false
585
+ BOOLEAN
586
+ when Time
587
+ DATE
588
+ when Hash
589
+ OBJECT
590
+ when Symbol
591
+ SYMBOL
592
+ when MaxKey
593
+ MAXKEY
594
+ when MinKey
595
+ MINKEY
596
+ when Numeric
597
+ raise InvalidDocument, "Cannot serialize the Numeric type #{o.class} as BSON; only Fixum, Bignum, and Float are supported."
598
+ when Date, DateTime
599
+ raise InvalidDocument, "#{o.class} is not currently supported; " +
600
+ "use a UTC Time instance instead."
601
+ else
602
+ if defined?(ActiveSupport::TimeWithZone) && o.is_a?(ActiveSupport::TimeWithZone)
603
+ raise InvalidDocument, "ActiveSupport::TimeWithZone is not currently supported; " +
604
+ "use a UTC Time instance instead."
605
+ else
606
+ raise InvalidDocument, "Cannot serialize #{o.class} as a BSON type; it either isn't supported or won't translate to BSON."
607
+ end
608
+ end
609
+ end
610
+
611
+ end
612
+ end