mongo-proxy 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ require 'bson'
2
+
3
+ # This is a set of functions for dealing with the Mongo wire protocol. It has
4
+ # methods for moving between Ruby hashes and Mongo wire segments, including
5
+ # their embedded BSON. The MongoDB wire protocol is documented at:
6
+ #
7
+ module WireMongo
8
+
9
+ HEADER_SIZE = 16
10
+
11
+ OP_REPLY = :reply
12
+ OP_MSG = :msg
13
+ OP_UPDATE = :update
14
+ OP_INSERT = :insert
15
+ OP_QUERY = :query
16
+ OP_GET_MORE = :get_more
17
+ OP_DELETE = :delete
18
+ OP_KILL_CURSORS = :kill_cursors
19
+
20
+ OPS = {
21
+ 1 => OP_REPLY,
22
+ 1000 => OP_MSG,
23
+ 2001 => OP_UPDATE,
24
+ 2002 => OP_INSERT,
25
+ 2004 => OP_QUERY,
26
+ 2005 => OP_GET_MORE,
27
+ 2006 => OP_DELETE,
28
+ 2007 => OP_KILL_CURSORS
29
+ }
30
+ OPS_INVERTED = OPS.invert
31
+
32
+ FLAG_UPDATE_UPSERT = 1
33
+ FLAG_UPDATE_MULTIUPDATE = (1 << 1)
34
+ FLAG_DELETE_MULTI = 1
35
+
36
+ # Parse out an arbitrary binary mongo message, returning a hash
37
+ # representation for easy manipulation.
38
+ def self.receive socket
39
+ if socket.is_a?(String)
40
+ socket = StringIO.new(socket)
41
+ socket.set_encoding('UTF-8', 'UTF-8')
42
+ end
43
+
44
+ chunk1, x = receive_header(socket)
45
+ return nil, nil unless x && chunk1
46
+
47
+ parsed = {}
48
+
49
+ chunk2 = socket.read(x[:messageLength] - HEADER_SIZE)
50
+
51
+ case x[:opCode]
52
+ when OP_REPLY
53
+ parsed = receive_reply(chunk2)
54
+ when OP_UPDATE
55
+ parsed = receive_update(chunk2)
56
+ when OP_INSERT
57
+ parsed = receive_insert(chunk2)
58
+ when OP_QUERY
59
+ parsed = receive_query(chunk2)
60
+ when OP_DELETE
61
+ parsed = receive_delete(chunk2)
62
+ when OP_GET_MORE
63
+ parsed = receive_get_more(chunk2)
64
+ when OP_KILL_CURSORS
65
+ parsed = receive_kill_cursors(chunk2)
66
+ else
67
+ puts "could not parse message type :#{x[:opCode]}:"
68
+ end
69
+
70
+ parsed[:header] = x
71
+ full = chunk1 + chunk2
72
+ full = full.force_encoding('UTF-8')
73
+ return full, parsed
74
+
75
+ rescue Exception => e
76
+ @@log.warn "failed to read from socket #{socket.to_s}"
77
+ return nil
78
+ end
79
+
80
+ # Write a hash document representation into its corresponding binary form.
81
+ # This method can be used with documents in the format that receive returns,
82
+ # making it easy to parse a message, change it, and re-encode it.
83
+ def self.write doc
84
+ body = nil
85
+
86
+ case doc[:header][:opCode]
87
+ when OP_REPLY
88
+ body = write_reply(doc)
89
+ when OP_UPDATE
90
+ body = write_update(doc)
91
+ when OP_INSERT
92
+ body = write_insert(doc)
93
+ when OP_QUERY
94
+ body = write_query(doc)
95
+ when OP_DELETE
96
+ body = write_delete(doc)
97
+ when OP_GET_MORE
98
+ body = write_get_more(doc)
99
+ when OP_KILL_CURSORS
100
+ body = write_kill_cursors(doc)
101
+ else
102
+ puts "could not write message type :#{doc[:header][:opCode]}:"
103
+ return nil
104
+ end
105
+
106
+ body = body.force_encoding('UTF-8')
107
+
108
+ return write_header(doc[:header], body)
109
+ end
110
+
111
+ # Receive the Mongo Wire message header from a stream.
112
+ #
113
+ # int32 :messageLength - Length in bytes of subsequent message.
114
+ # int32 :requestID - Identifier of this message.
115
+ # int32 :responseTo - RequestID from the original request.
116
+ # int32 :opCode - Message type.
117
+ def self.receive_header(stream)
118
+ chunk = stream.read(HEADER_SIZE)
119
+ return nil unless chunk != nil && chunk.bytesize == HEADER_SIZE
120
+
121
+ x = {}
122
+ x[:messageLength], x[:requestID], x[:responseTo], x[:opCode] = chunk.unpack('VVVV')
123
+ x[:opCode] = OPS[x[:opCode]]
124
+ return chunk, x
125
+ end
126
+
127
+ def self.write_header doc, body
128
+ raise 'no requestID' unless doc[:requestID]
129
+ raise 'no opCode' unless doc[:opCode]
130
+ response_to = (doc[:responseTo] or 0)
131
+ length = body.bytesize + HEADER_SIZE
132
+
133
+ header = [length, doc[:requestID], response_to, OPS_INVERTED[doc[:opCode]]].pack('VVVV')
134
+ header = header.force_encoding('UTF-8')
135
+ return header + body
136
+ end
137
+
138
+ def self.receive_bson(chunk, start, max = 10000)
139
+ docs = []
140
+
141
+ while start < chunk.bytesize and docs.size < max
142
+ bsonLength = chunk[start..(start + 4)].unpack('V')[0]
143
+ doc = nil
144
+
145
+ begin
146
+ doc = BSON.deserialize(chunk[start..(start + bsonLength - 1)])
147
+ rescue Exception => e
148
+ puts 'could not deserialize BSON:'
149
+ pp chunk[start..(start + bsonLength)]
150
+ return nil, nil
151
+ end
152
+
153
+ docs << doc
154
+ start += bsonLength
155
+ end
156
+
157
+ return docs, start
158
+ end
159
+
160
+ def self.write_bson(docs)
161
+ docs = [docs] if docs.is_a? Hash
162
+
163
+ x = ''
164
+ docs.each do |doc|
165
+ x << BSON.serialize(doc).to_s
166
+ end
167
+
168
+ return x
169
+ end
170
+
171
+ def self.min(a, b)
172
+ (a > b ? b : a)
173
+ end
174
+
175
+ def self.parse_full_collection(full_collection)
176
+ x = full_collection.split('.')
177
+ return x[0], x[1..-1].join('.')
178
+ end
179
+
180
+ def self.build_full_collection(database, collection)
181
+ return "#{database}.#{collection}"
182
+ end
183
+
184
+ # OP_REPLY: 1
185
+ # A reply to a client request.
186
+ #
187
+ # header :header - Message header.
188
+ # int32 :responseFlags - A bit vector of response flags.
189
+ # int64 :cursorID - ID of open cursor, if there is one. 0 otherwise.
190
+ # int32 :startingFrom - Offset in cursor of this reply message.
191
+ # int64 :numberReturned - Number of documents in the reply.
192
+ def self.receive_reply(chunk)
193
+ x = {}
194
+ x[:responseFlags], x[:cursorID], x[:startingFrom], x[:numberReturned] = chunk.unpack('VQ<VV')
195
+ x[:documents], _ = receive_bson(chunk, 20, x[:numberReturned])
196
+ return x
197
+ end
198
+
199
+ def self.build_reply(documents, request_id, response_to,
200
+ response_flags = 0, cursor_id = 0, starting_from = 0)
201
+ documents = [documents] if documents.is_a?(Hash)
202
+
203
+ return {
204
+ :responseFlags => response_flags,
205
+ :startingFrom => starting_from,
206
+ :numberReturned => documents.size,
207
+ :cursorID => cursor_id,
208
+ :documents => documents,
209
+ :header => {
210
+ :requestID => request_id,
211
+ :responseTo => response_to,
212
+ :opCode => OP_REPLY
213
+ }
214
+ }
215
+ end
216
+
217
+ def self.write_reply(doc)
218
+ raise 'no responseTo' unless doc[:header][:responseTo]
219
+ raise 'no documents' unless doc[:documents]
220
+ responseFlags = (doc[:responseFlags] || 0)
221
+ cursorId = (doc[:cursorID] || 0)
222
+ startingFrom = (doc[:startingFrom] || 0)
223
+ numberReturned = doc[:numberReturned]
224
+
225
+ msg = [responseFlags, cursorId, startingFrom, numberReturned].pack('VQ<VV')
226
+ msg << write_bson(doc[:documents])
227
+
228
+ return msg
229
+ end
230
+
231
+ # OP_UPDATE: 2001
232
+ # A MongoDB update query message.
233
+ #
234
+ # header :header - Message header.
235
+ # int32 - An empty value.
236
+ # string :database.:collection - Database and collection name for update.
237
+ # int32 :flags - Bit vector of update flags.
238
+ # document :selector - BSON document representing update target.
239
+ # document :update - BSON document representing the update to perform.
240
+ def self.receive_update(chunk)
241
+ x = {}
242
+ _, full, x[:flags] = chunk.unpack('VZ*V')
243
+ x[:database], x[:collection] = parse_full_collection(full)
244
+ # TODO break out flags
245
+ docs, _ = receive_bson(chunk, full.bytesize + 9, 2)
246
+ x[:selector] = docs[0]
247
+ x[:update] = docs[1]
248
+
249
+ return x
250
+ end
251
+
252
+ def self.build_update(request_id, database_name, collection_name, selector, update, flags = [])
253
+ flag = 0
254
+ flag = (flag | FLAG_UPDATE_UPSERT) if flags.include?(:upsert)
255
+ flag = (flag | FLAG_UPDATE_MULTIUPDATE) if flags.include?(:multi)
256
+
257
+ return {
258
+ :header => {
259
+ :opCode => OP_UPDATE,
260
+ :requestID => request_id,
261
+ :responseTo => 0
262
+ },
263
+ :database => database_name,
264
+ :collection => collection_name,
265
+ :selector => selector,
266
+ :update => update,
267
+ :flags => flag
268
+ }
269
+ end
270
+
271
+ def self.write_update doc
272
+ raise 'missing collection info' unless doc[:database] && doc[:collection]
273
+ raise 'missing selector' unless doc[:selector]
274
+ raise 'missing update' unless doc[:update]
275
+ flags = (doc[:flags] or 0)
276
+
277
+ msg = [0, build_full_collection(doc[:database], doc[:collection]), flags].pack('VZ*V')
278
+ msg << write_bson(doc[:selector])
279
+ msg << write_bson(doc[:update])
280
+
281
+ return msg
282
+ end
283
+
284
+ # OP_INSERT: 2002
285
+ #
286
+ # header :header - Message header.
287
+ # int32 :flags - Bit vector flags.
288
+ # string :database.:collection - Database + collection name.
289
+ # document[] :documents - An array of BSON documents.
290
+ def self.receive_insert(chunk)
291
+ x = {}
292
+ x[:flags], full = chunk.unpack('VZ*')
293
+ x[:database], x[:collection] = parse_full_collection(full)
294
+ x[:documents], _ = receive_bson(chunk, full.bytesize + 5)
295
+
296
+ return x
297
+ end
298
+
299
+ def self.build_insert(request_id, database_name, collection_name, documents, flags = 0)
300
+ documents = [documents] if documents.is_a?(Hash)
301
+ return {
302
+ :flags => flags,
303
+ :database => database_name,
304
+ :collection => collection_name,
305
+ :documents => documents,
306
+ :header => {
307
+ :requestID => request_id,
308
+ :responseTo => 0,
309
+ :opCode => OP_INSERT
310
+ }
311
+ }
312
+ end
313
+
314
+ def self.write_insert(doc)
315
+ raise 'missing full collection' unless doc[:database] && doc[:collection]
316
+ raise 'missing documents' unless doc[:documents]
317
+ flags = (doc[:flags] or 0)
318
+ docs = doc[:documents]
319
+ docs = [docs] if docs.is_a? Hash
320
+
321
+ msg = [flags, build_full_collection(doc[:database], doc[:collection])].pack('VZ*')
322
+ msg << write_bson(docs)
323
+
324
+ return msg
325
+ end
326
+
327
+ # OP_QUERY: 2004
328
+ #
329
+ # header :header - Message header.
330
+ # int32 :flags - A bit vector of query flags.
331
+ # string :database.:collection - Database + collection name.
332
+ # int32 :numberToSkip - Offset for results.
333
+ # int32 :numberToReturn - Limit for results.
334
+ # document :query - BSON document of query.
335
+ # document :returnFieldsSelector - Optional BSON document to select fields in response.
336
+ def self.receive_query(chunk)
337
+ x = {}
338
+ x[:flags], full, x[:numberToSkip], x[:numberToReturn] = chunk.unpack('VZ*VV')
339
+ start = 3 * 4 + full.bytesize + 1
340
+ x[:database], x[:collection] = parse_full_collection(full)
341
+ docs, start = receive_bson(chunk, start, 2)
342
+ x[:query] = docs[0]
343
+ x[:returnFieldSelector] = (docs.size > 1 ? docs[1] : nil)
344
+
345
+ return x
346
+ end
347
+
348
+ def self.build_query(request_id, database_name, collection_name,
349
+ query = {}, fields = nil, num_to_return = 4294967295, number_to_skip = 0, flags = 0)
350
+ {
351
+ :header => {
352
+ :opCode => OP_QUERY,
353
+ :requestID => request_id,
354
+ :responseTo => 0
355
+ },
356
+ :database => database_name,
357
+ :collection => collection_name,
358
+ :query => query,
359
+ :returnFieldSelector => fields,
360
+ :numberToReturn => num_to_return,
361
+ :flags => flags,
362
+ :numberToSkip => number_to_skip
363
+ }
364
+ end
365
+
366
+ def self.write_query(doc)
367
+ raise 'missing full collection name' unless doc[:database] && doc[:collection]
368
+ flags = (doc[:flags] or 0)
369
+ numberToSkip = (doc[:numberToSkip] or 0)
370
+ numberToReturn = (doc[:numberToReturn] or 4294967295)
371
+ query = (doc[:query] or {})
372
+ returnFieldSelector = (doc[:returnFieldSelector] or nil)
373
+
374
+ msg = [flags, build_full_collection(doc[:database], doc[:collection]), numberToSkip, numberToReturn].pack('VZ*VV')
375
+ msg << write_bson([query])
376
+ msg << write_bson([returnFieldSelector]) if returnFieldSelector
377
+
378
+ return msg
379
+ end
380
+
381
+ # OP_QUERY: 2005
382
+ #
383
+ # header :header - Message header.
384
+ # int32 - Empty.
385
+ # string :database.:collection - Database + collection name.
386
+ # int32 :numberToReturn - Limit for next results reply.
387
+ # int64 :cursorID - ID of cursor to consume more from.
388
+ def self.receive_get_more(chunk)
389
+ x = {}
390
+ _, full, x[:numberToReturn], x[:cursorID] = chunk.unpack('VZ*VQ<')
391
+ x[:database], x[:collection] = parse_full_collection(full)
392
+
393
+ return x
394
+ end
395
+
396
+ def self.build_get_more(request_id, response_to, database_name, collection_name, cursor_id, number_to_return = 0)
397
+ {
398
+ :header => {
399
+ :opCode => OP_GET_MORE,
400
+ :requestID => request_id,
401
+ :responseTo => response_to
402
+ },
403
+ :database => database_name,
404
+ :collection => collection_name,
405
+ :cursorID => cursor_id,
406
+ :numberToReturn => number_to_return
407
+ }
408
+ end
409
+
410
+ def self.write_get_more(doc)
411
+ raise 'missing full collection name' unless doc[:database] && doc[:collection]
412
+ raise 'missing cursorID' unless doc[:cursorID]
413
+
414
+ numberToReturn = (doc[:numberToReturn] or 0)
415
+ msg = [0, build_full_collection(doc[:database], doc[:collection]), numberToReturn, doc[:cursorID]].pack('VZ*VQ<')
416
+
417
+ return msg
418
+ end
419
+
420
+ # OP_DELETE: 2006
421
+ #
422
+ # header :header - Message header.
423
+ # int32 - Empty.
424
+ # string :database.:collection - Database + collection name.
425
+ # int32 :flags - Bit vector of delete-related flags.
426
+ # document selector - Selector for deletion
427
+ def self.receive_delete(chunk)
428
+ x = {}
429
+ _, full, x[:flags] = chunk.unpack('VZ*V')
430
+ docs, start = receive_bson(chunk, full.bytesize + 9, 1)
431
+ x[:database], x[:collection] = parse_full_collection(full)
432
+ x[:selector], _ = docs[0]
433
+
434
+ return x
435
+ end
436
+
437
+ def self.build_delete(request_id, database_name, collection_name, selector, opt = [])
438
+ flags = 0
439
+ flags = FLAGS_DELETE_MULTI if opt.include?(:multi)
440
+
441
+ {
442
+ :header => {
443
+ :opCode => OP_DELETE,
444
+ :requestID => request_id,
445
+ :responseTo => 0
446
+ },
447
+ :database => database_name,
448
+ :collection => collection_name,
449
+ :selector => selector,
450
+ :flags => flags
451
+ }
452
+ end
453
+
454
+ def self.write_delete(doc)
455
+ raise 'missing full collection name' unless doc[:database] && doc[:collection]
456
+ raise 'missing selector' unless doc[:selector]
457
+ flags = (doc[:flags] or 0)
458
+
459
+ msg = [0, build_full_collection(doc[:database], doc[:collection]), flags].pack('VZ*V')
460
+ msg << write_bson(doc[:selector])
461
+
462
+ return msg
463
+ end
464
+
465
+ # OP_KILL_CURSORS: 2007
466
+ # Message to explicitly delete a cursor. Only sent from the client in very
467
+ # specific circumstances, cursors can also time out.
468
+ #
469
+ # header :header - Message header.
470
+ # int32 - Empty.
471
+ # int32 :numberOfCursorIDs - Number of cursors to kill.
472
+ # int64[] :cursorIDs - Array of cursor ids to kill.
473
+ def self.receive_kill_cursors(chunk)
474
+ x = {}
475
+ _, n = chunk.unpack('VV')
476
+ x[:cursorIDs] = chunk[8..-1].unpack("Q<#{n}")
477
+ return x
478
+ end
479
+
480
+ def self.build_kill_cursors(request_id, response_to, cursor_ids)
481
+ return {
482
+ :cursorIDs => cursor_ids,
483
+ :header => {
484
+ :opCode => OP_KILL_CURSORS,
485
+ :requestID => request_id,
486
+ :responseTo => response_to
487
+ }
488
+ }
489
+ end
490
+
491
+ def self.write_kill_cursors(doc)
492
+ raise 'missing cursorIDs' unless doc[:cursorIDs]
493
+ return ([0, doc[:cursorIDs].size] + doc[:cursorIDs]).pack("VVQ<*")
494
+ end
495
+
496
+ @@hash_getmore_history = {}
497
+
498
+ def self.hash doc
499
+ if doc[:header][:requestID]
500
+ temp_req_id = doc[:header][:requestID]
501
+ doc[:header][:requestID] = 1234
502
+ end
503
+ if doc[:header][:responseTo]
504
+ temp_response_to = doc[:header][:responseTo]
505
+ doc[:header][:responseTo] = 4321
506
+ end
507
+
508
+ if doc[:header][:opCode] == OP_GET_MORE
509
+ key = build_full_collection(doc[:database], doc[:collection]) + doc[:cursorID].to_s
510
+ @@hash_getmore_history[key] ||= 0
511
+ doc[:header][:requestID] = @@hash_getmore_history[key]
512
+ temp_cursor_id = doc[:cursorID]
513
+ doc[:cursorID] = 0
514
+ @@hash_getmore_history[key] += 1
515
+ end
516
+
517
+ x = Digest::SHA1.hexdigest(write doc)
518
+
519
+ doc[:header][:requestID] = temp_req_id if temp_req_id
520
+ doc[:header][:responseTo] = temp_response_to if temp_response_to
521
+ doc[:cursorID] = temp_cursor_id if temp_cursor_id
522
+
523
+ return x
524
+ end
525
+ end
526
+