mongo-proxy 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,526 @@
1
+ require 'bson'
2
+
3
+ # This is a set of functions for dealing with the Mongo wire protocol. It has
4
+ # methods for moving between Ruby hashes and Mongo wire segments, including
5
+ # their embedded BSON. The MongoDB wire protocol is documented at:
6
+ #
7
+ module WireMongo
8
+
9
+ HEADER_SIZE = 16
10
+
11
+ OP_REPLY = :reply
12
+ OP_MSG = :msg
13
+ OP_UPDATE = :update
14
+ OP_INSERT = :insert
15
+ OP_QUERY = :query
16
+ OP_GET_MORE = :get_more
17
+ OP_DELETE = :delete
18
+ OP_KILL_CURSORS = :kill_cursors
19
+
20
+ OPS = {
21
+ 1 => OP_REPLY,
22
+ 1000 => OP_MSG,
23
+ 2001 => OP_UPDATE,
24
+ 2002 => OP_INSERT,
25
+ 2004 => OP_QUERY,
26
+ 2005 => OP_GET_MORE,
27
+ 2006 => OP_DELETE,
28
+ 2007 => OP_KILL_CURSORS
29
+ }
30
+ OPS_INVERTED = OPS.invert
31
+
32
+ FLAG_UPDATE_UPSERT = 1
33
+ FLAG_UPDATE_MULTIUPDATE = (1 << 1)
34
+ FLAG_DELETE_MULTI = 1
35
+
36
+ # Parse out an arbitrary binary mongo message, returning a hash
37
+ # representation for easy manipulation.
38
+ def self.receive socket
39
+ if socket.is_a?(String)
40
+ socket = StringIO.new(socket)
41
+ socket.set_encoding('UTF-8', 'UTF-8')
42
+ end
43
+
44
+ chunk1, x = receive_header(socket)
45
+ return nil, nil unless x && chunk1
46
+
47
+ parsed = {}
48
+
49
+ chunk2 = socket.read(x[:messageLength] - HEADER_SIZE)
50
+
51
+ case x[:opCode]
52
+ when OP_REPLY
53
+ parsed = receive_reply(chunk2)
54
+ when OP_UPDATE
55
+ parsed = receive_update(chunk2)
56
+ when OP_INSERT
57
+ parsed = receive_insert(chunk2)
58
+ when OP_QUERY
59
+ parsed = receive_query(chunk2)
60
+ when OP_DELETE
61
+ parsed = receive_delete(chunk2)
62
+ when OP_GET_MORE
63
+ parsed = receive_get_more(chunk2)
64
+ when OP_KILL_CURSORS
65
+ parsed = receive_kill_cursors(chunk2)
66
+ else
67
+ puts "could not parse message type :#{x[:opCode]}:"
68
+ end
69
+
70
+ parsed[:header] = x
71
+ full = chunk1 + chunk2
72
+ full = full.force_encoding('UTF-8')
73
+ return full, parsed
74
+
75
+ rescue Exception => e
76
+ @@log.warn "failed to read from socket #{socket.to_s}"
77
+ return nil
78
+ end
79
+
80
+ # Write a hash document representation into its corresponding binary form.
81
+ # This method can be used with documents in the format that receive returns,
82
+ # making it easy to parse a message, change it, and re-encode it.
83
+ def self.write doc
84
+ body = nil
85
+
86
+ case doc[:header][:opCode]
87
+ when OP_REPLY
88
+ body = write_reply(doc)
89
+ when OP_UPDATE
90
+ body = write_update(doc)
91
+ when OP_INSERT
92
+ body = write_insert(doc)
93
+ when OP_QUERY
94
+ body = write_query(doc)
95
+ when OP_DELETE
96
+ body = write_delete(doc)
97
+ when OP_GET_MORE
98
+ body = write_get_more(doc)
99
+ when OP_KILL_CURSORS
100
+ body = write_kill_cursors(doc)
101
+ else
102
+ puts "could not write message type :#{doc[:header][:opCode]}:"
103
+ return nil
104
+ end
105
+
106
+ body = body.force_encoding('UTF-8')
107
+
108
+ return write_header(doc[:header], body)
109
+ end
110
+
111
+ # Receive the Mongo Wire message header from a stream.
112
+ #
113
+ # int32 :messageLength - Length in bytes of subsequent message.
114
+ # int32 :requestID - Identifier of this message.
115
+ # int32 :responseTo - RequestID from the original request.
116
+ # int32 :opCode - Message type.
117
+ def self.receive_header(stream)
118
+ chunk = stream.read(HEADER_SIZE)
119
+ return nil unless chunk != nil && chunk.bytesize == HEADER_SIZE
120
+
121
+ x = {}
122
+ x[:messageLength], x[:requestID], x[:responseTo], x[:opCode] = chunk.unpack('VVVV')
123
+ x[:opCode] = OPS[x[:opCode]]
124
+ return chunk, x
125
+ end
126
+
127
+ def self.write_header doc, body
128
+ raise 'no requestID' unless doc[:requestID]
129
+ raise 'no opCode' unless doc[:opCode]
130
+ response_to = (doc[:responseTo] or 0)
131
+ length = body.bytesize + HEADER_SIZE
132
+
133
+ header = [length, doc[:requestID], response_to, OPS_INVERTED[doc[:opCode]]].pack('VVVV')
134
+ header = header.force_encoding('UTF-8')
135
+ return header + body
136
+ end
137
+
138
+ def self.receive_bson(chunk, start, max = 10000)
139
+ docs = []
140
+
141
+ while start < chunk.bytesize and docs.size < max
142
+ bsonLength = chunk[start..(start + 4)].unpack('V')[0]
143
+ doc = nil
144
+
145
+ begin
146
+ doc = BSON.deserialize(chunk[start..(start + bsonLength - 1)])
147
+ rescue Exception => e
148
+ puts 'could not deserialize BSON:'
149
+ pp chunk[start..(start + bsonLength)]
150
+ return nil, nil
151
+ end
152
+
153
+ docs << doc
154
+ start += bsonLength
155
+ end
156
+
157
+ return docs, start
158
+ end
159
+
160
+ def self.write_bson(docs)
161
+ docs = [docs] if docs.is_a? Hash
162
+
163
+ x = ''
164
+ docs.each do |doc|
165
+ x << BSON.serialize(doc).to_s
166
+ end
167
+
168
+ return x
169
+ end
170
+
171
+ def self.min(a, b)
172
+ (a > b ? b : a)
173
+ end
174
+
175
+ def self.parse_full_collection(full_collection)
176
+ x = full_collection.split('.')
177
+ return x[0], x[1..-1].join('.')
178
+ end
179
+
180
+ def self.build_full_collection(database, collection)
181
+ return "#{database}.#{collection}"
182
+ end
183
+
184
+ # OP_REPLY: 1
185
+ # A reply to a client request.
186
+ #
187
+ # header :header - Message header.
188
+ # int32 :responseFlags - A bit vector of response flags.
189
+ # int64 :cursorID - ID of open cursor, if there is one. 0 otherwise.
190
+ # int32 :startingFrom - Offset in cursor of this reply message.
191
+ # int64 :numberReturned - Number of documents in the reply.
192
+ def self.receive_reply(chunk)
193
+ x = {}
194
+ x[:responseFlags], x[:cursorID], x[:startingFrom], x[:numberReturned] = chunk.unpack('VQ<VV')
195
+ x[:documents], _ = receive_bson(chunk, 20, x[:numberReturned])
196
+ return x
197
+ end
198
+
199
+ def self.build_reply(documents, request_id, response_to,
200
+ response_flags = 0, cursor_id = 0, starting_from = 0)
201
+ documents = [documents] if documents.is_a?(Hash)
202
+
203
+ return {
204
+ :responseFlags => response_flags,
205
+ :startingFrom => starting_from,
206
+ :numberReturned => documents.size,
207
+ :cursorID => cursor_id,
208
+ :documents => documents,
209
+ :header => {
210
+ :requestID => request_id,
211
+ :responseTo => response_to,
212
+ :opCode => OP_REPLY
213
+ }
214
+ }
215
+ end
216
+
217
+ def self.write_reply(doc)
218
+ raise 'no responseTo' unless doc[:header][:responseTo]
219
+ raise 'no documents' unless doc[:documents]
220
+ responseFlags = (doc[:responseFlags] || 0)
221
+ cursorId = (doc[:cursorID] || 0)
222
+ startingFrom = (doc[:startingFrom] || 0)
223
+ numberReturned = doc[:numberReturned]
224
+
225
+ msg = [responseFlags, cursorId, startingFrom, numberReturned].pack('VQ<VV')
226
+ msg << write_bson(doc[:documents])
227
+
228
+ return msg
229
+ end
230
+
231
+ # OP_UPDATE: 2001
232
+ # A MongoDB update query message.
233
+ #
234
+ # header :header - Message header.
235
+ # int32 - An empty value.
236
+ # string :database.:collection - Database and collection name for update.
237
+ # int32 :flags - Bit vector of update flags.
238
+ # document :selector - BSON document representing update target.
239
+ # document :update - BSON document representing the update to perform.
240
+ def self.receive_update(chunk)
241
+ x = {}
242
+ _, full, x[:flags] = chunk.unpack('VZ*V')
243
+ x[:database], x[:collection] = parse_full_collection(full)
244
+ # TODO break out flags
245
+ docs, _ = receive_bson(chunk, full.bytesize + 9, 2)
246
+ x[:selector] = docs[0]
247
+ x[:update] = docs[1]
248
+
249
+ return x
250
+ end
251
+
252
+ def self.build_update(request_id, database_name, collection_name, selector, update, flags = [])
253
+ flag = 0
254
+ flag = (flag | FLAG_UPDATE_UPSERT) if flags.include?(:upsert)
255
+ flag = (flag | FLAG_UPDATE_MULTIUPDATE) if flags.include?(:multi)
256
+
257
+ return {
258
+ :header => {
259
+ :opCode => OP_UPDATE,
260
+ :requestID => request_id,
261
+ :responseTo => 0
262
+ },
263
+ :database => database_name,
264
+ :collection => collection_name,
265
+ :selector => selector,
266
+ :update => update,
267
+ :flags => flag
268
+ }
269
+ end
270
+
271
+ def self.write_update doc
272
+ raise 'missing collection info' unless doc[:database] && doc[:collection]
273
+ raise 'missing selector' unless doc[:selector]
274
+ raise 'missing update' unless doc[:update]
275
+ flags = (doc[:flags] or 0)
276
+
277
+ msg = [0, build_full_collection(doc[:database], doc[:collection]), flags].pack('VZ*V')
278
+ msg << write_bson(doc[:selector])
279
+ msg << write_bson(doc[:update])
280
+
281
+ return msg
282
+ end
283
+
284
+ # OP_INSERT: 2002
285
+ #
286
+ # header :header - Message header.
287
+ # int32 :flags - Bit vector flags.
288
+ # string :database.:collection - Database + collection name.
289
+ # document[] :documents - An array of BSON documents.
290
+ def self.receive_insert(chunk)
291
+ x = {}
292
+ x[:flags], full = chunk.unpack('VZ*')
293
+ x[:database], x[:collection] = parse_full_collection(full)
294
+ x[:documents], _ = receive_bson(chunk, full.bytesize + 5)
295
+
296
+ return x
297
+ end
298
+
299
+ def self.build_insert(request_id, database_name, collection_name, documents, flags = 0)
300
+ documents = [documents] if documents.is_a?(Hash)
301
+ return {
302
+ :flags => flags,
303
+ :database => database_name,
304
+ :collection => collection_name,
305
+ :documents => documents,
306
+ :header => {
307
+ :requestID => request_id,
308
+ :responseTo => 0,
309
+ :opCode => OP_INSERT
310
+ }
311
+ }
312
+ end
313
+
314
+ def self.write_insert(doc)
315
+ raise 'missing full collection' unless doc[:database] && doc[:collection]
316
+ raise 'missing documents' unless doc[:documents]
317
+ flags = (doc[:flags] or 0)
318
+ docs = doc[:documents]
319
+ docs = [docs] if docs.is_a? Hash
320
+
321
+ msg = [flags, build_full_collection(doc[:database], doc[:collection])].pack('VZ*')
322
+ msg << write_bson(docs)
323
+
324
+ return msg
325
+ end
326
+
327
+ # OP_QUERY: 2004
328
+ #
329
+ # header :header - Message header.
330
+ # int32 :flags - A bit vector of query flags.
331
+ # string :database.:collection - Database + collection name.
332
+ # int32 :numberToSkip - Offset for results.
333
+ # int32 :numberToReturn - Limit for results.
334
+ # document :query - BSON document of query.
335
+ # document :returnFieldsSelector - Optional BSON document to select fields in response.
336
+ def self.receive_query(chunk)
337
+ x = {}
338
+ x[:flags], full, x[:numberToSkip], x[:numberToReturn] = chunk.unpack('VZ*VV')
339
+ start = 3 * 4 + full.bytesize + 1
340
+ x[:database], x[:collection] = parse_full_collection(full)
341
+ docs, start = receive_bson(chunk, start, 2)
342
+ x[:query] = docs[0]
343
+ x[:returnFieldSelector] = (docs.size > 1 ? docs[1] : nil)
344
+
345
+ return x
346
+ end
347
+
348
+ def self.build_query(request_id, database_name, collection_name,
349
+ query = {}, fields = nil, num_to_return = 4294967295, number_to_skip = 0, flags = 0)
350
+ {
351
+ :header => {
352
+ :opCode => OP_QUERY,
353
+ :requestID => request_id,
354
+ :responseTo => 0
355
+ },
356
+ :database => database_name,
357
+ :collection => collection_name,
358
+ :query => query,
359
+ :returnFieldSelector => fields,
360
+ :numberToReturn => num_to_return,
361
+ :flags => flags,
362
+ :numberToSkip => number_to_skip
363
+ }
364
+ end
365
+
366
+ def self.write_query(doc)
367
+ raise 'missing full collection name' unless doc[:database] && doc[:collection]
368
+ flags = (doc[:flags] or 0)
369
+ numberToSkip = (doc[:numberToSkip] or 0)
370
+ numberToReturn = (doc[:numberToReturn] or 4294967295)
371
+ query = (doc[:query] or {})
372
+ returnFieldSelector = (doc[:returnFieldSelector] or nil)
373
+
374
+ msg = [flags, build_full_collection(doc[:database], doc[:collection]), numberToSkip, numberToReturn].pack('VZ*VV')
375
+ msg << write_bson([query])
376
+ msg << write_bson([returnFieldSelector]) if returnFieldSelector
377
+
378
+ return msg
379
+ end
380
+
381
+ # OP_QUERY: 2005
382
+ #
383
+ # header :header - Message header.
384
+ # int32 - Empty.
385
+ # string :database.:collection - Database + collection name.
386
+ # int32 :numberToReturn - Limit for next results reply.
387
+ # int64 :cursorID - ID of cursor to consume more from.
388
+ def self.receive_get_more(chunk)
389
+ x = {}
390
+ _, full, x[:numberToReturn], x[:cursorID] = chunk.unpack('VZ*VQ<')
391
+ x[:database], x[:collection] = parse_full_collection(full)
392
+
393
+ return x
394
+ end
395
+
396
+ def self.build_get_more(request_id, response_to, database_name, collection_name, cursor_id, number_to_return = 0)
397
+ {
398
+ :header => {
399
+ :opCode => OP_GET_MORE,
400
+ :requestID => request_id,
401
+ :responseTo => response_to
402
+ },
403
+ :database => database_name,
404
+ :collection => collection_name,
405
+ :cursorID => cursor_id,
406
+ :numberToReturn => number_to_return
407
+ }
408
+ end
409
+
410
+ def self.write_get_more(doc)
411
+ raise 'missing full collection name' unless doc[:database] && doc[:collection]
412
+ raise 'missing cursorID' unless doc[:cursorID]
413
+
414
+ numberToReturn = (doc[:numberToReturn] or 0)
415
+ msg = [0, build_full_collection(doc[:database], doc[:collection]), numberToReturn, doc[:cursorID]].pack('VZ*VQ<')
416
+
417
+ return msg
418
+ end
419
+
420
+ # OP_DELETE: 2006
421
+ #
422
+ # header :header - Message header.
423
+ # int32 - Empty.
424
+ # string :database.:collection - Database + collection name.
425
+ # int32 :flags - Bit vector of delete-related flags.
426
+ # document selector - Selector for deletion
427
+ def self.receive_delete(chunk)
428
+ x = {}
429
+ _, full, x[:flags] = chunk.unpack('VZ*V')
430
+ docs, start = receive_bson(chunk, full.bytesize + 9, 1)
431
+ x[:database], x[:collection] = parse_full_collection(full)
432
+ x[:selector], _ = docs[0]
433
+
434
+ return x
435
+ end
436
+
437
+ def self.build_delete(request_id, database_name, collection_name, selector, opt = [])
438
+ flags = 0
439
+ flags = FLAGS_DELETE_MULTI if opt.include?(:multi)
440
+
441
+ {
442
+ :header => {
443
+ :opCode => OP_DELETE,
444
+ :requestID => request_id,
445
+ :responseTo => 0
446
+ },
447
+ :database => database_name,
448
+ :collection => collection_name,
449
+ :selector => selector,
450
+ :flags => flags
451
+ }
452
+ end
453
+
454
+ def self.write_delete(doc)
455
+ raise 'missing full collection name' unless doc[:database] && doc[:collection]
456
+ raise 'missing selector' unless doc[:selector]
457
+ flags = (doc[:flags] or 0)
458
+
459
+ msg = [0, build_full_collection(doc[:database], doc[:collection]), flags].pack('VZ*V')
460
+ msg << write_bson(doc[:selector])
461
+
462
+ return msg
463
+ end
464
+
465
+ # OP_KILL_CURSORS: 2007
466
+ # Message to explicitly delete a cursor. Only sent from the client in very
467
+ # specific circumstances, cursors can also time out.
468
+ #
469
+ # header :header - Message header.
470
+ # int32 - Empty.
471
+ # int32 :numberOfCursorIDs - Number of cursors to kill.
472
+ # int64[] :cursorIDs - Array of cursor ids to kill.
473
+ def self.receive_kill_cursors(chunk)
474
+ x = {}
475
+ _, n = chunk.unpack('VV')
476
+ x[:cursorIDs] = chunk[8..-1].unpack("Q<#{n}")
477
+ return x
478
+ end
479
+
480
+ def self.build_kill_cursors(request_id, response_to, cursor_ids)
481
+ return {
482
+ :cursorIDs => cursor_ids,
483
+ :header => {
484
+ :opCode => OP_KILL_CURSORS,
485
+ :requestID => request_id,
486
+ :responseTo => response_to
487
+ }
488
+ }
489
+ end
490
+
491
+ def self.write_kill_cursors(doc)
492
+ raise 'missing cursorIDs' unless doc[:cursorIDs]
493
+ return ([0, doc[:cursorIDs].size] + doc[:cursorIDs]).pack("VVQ<*")
494
+ end
495
+
496
+ @@hash_getmore_history = {}
497
+
498
+ def self.hash doc
499
+ if doc[:header][:requestID]
500
+ temp_req_id = doc[:header][:requestID]
501
+ doc[:header][:requestID] = 1234
502
+ end
503
+ if doc[:header][:responseTo]
504
+ temp_response_to = doc[:header][:responseTo]
505
+ doc[:header][:responseTo] = 4321
506
+ end
507
+
508
+ if doc[:header][:opCode] == OP_GET_MORE
509
+ key = build_full_collection(doc[:database], doc[:collection]) + doc[:cursorID].to_s
510
+ @@hash_getmore_history[key] ||= 0
511
+ doc[:header][:requestID] = @@hash_getmore_history[key]
512
+ temp_cursor_id = doc[:cursorID]
513
+ doc[:cursorID] = 0
514
+ @@hash_getmore_history[key] += 1
515
+ end
516
+
517
+ x = Digest::SHA1.hexdigest(write doc)
518
+
519
+ doc[:header][:requestID] = temp_req_id if temp_req_id
520
+ doc[:header][:responseTo] = temp_response_to if temp_response_to
521
+ doc[:cursorID] = temp_cursor_id if temp_cursor_id
522
+
523
+ return x
524
+ end
525
+ end
526
+