mongoriver 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,2 +1,16 @@
1
1
  #!/usr/bin/env rake
2
- require "bundler/gem_tasks"
2
+ require 'bundler/setup'
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.test_files = FileList['test/test_*.rb']
8
+ end
9
+
10
+ Rake::TestTask.new(:'test-unit') do |t|
11
+ t.test_files = FileList['test/test_mongoriver.rb']
12
+ end
13
+
14
+ Rake::TestTask.new(:'test-connected') do |t|
15
+ t.test_files = FileList['test/test_*_connected.rb']
16
+ end
@@ -7,38 +7,25 @@ require 'bundler/setup'
7
7
  require 'mongoriver'
8
8
 
9
9
  module Mongoriver
10
- class Mongocp < Streambed
10
+ class OplogWatcher < AbstractOutlet
11
11
  include Mongoriver::Logging
12
12
 
13
- def initialize(upstreams, type, start_optime, pause)
14
- super(upstreams, type)
15
- @start_optime = start_optime
16
- @pause = pause
13
+ def insert(db_name, collection_name, document)
14
+ log.info("got an insert for #{db_name}.#{collection_name}! #{document.inspect}")
17
15
  end
18
16
 
19
- def pause
20
- if @pause
21
- $stderr.puts("Press enter to continue")
22
- $stdin.readline
23
- end
17
+ def remove(db_name, collection_name, document)
18
+ log.info("got a remove for #{db_name}.#{collection_name}! #{document.inspect}")
24
19
  end
25
20
 
26
- def hook_optime
27
- @start_optime
28
- end
29
-
30
- def hook_update_optime(ts, mandatory)
31
- end
32
-
33
- all_hooks.each do |name, _, opts|
34
- next if name == :optime || name == :update_optime
35
- define_method(hook_name(name)) {|*args| pause}
21
+ def update(db_name, collection_name, selector, updates)
22
+ log.info("got an update for #{db_name}.#{collection_name}! #{selector}, #{updates}")
36
23
  end
37
24
  end
38
25
  end
39
26
 
40
27
  def main
41
- options = {:host => nil, :port => nil, :type => :slave, :optime => 0, :pause => true, :verbose => 0}
28
+ options = {:host => nil, :port => nil, :type => :direct, :optime => 0, :pause => true, :verbose => 0}
42
29
  optparse = OptionParser.new do |opts|
43
30
  opts.banner = "Usage: #{$0} [options]"
44
31
 
@@ -59,17 +46,9 @@ def main
59
46
  options[:port] = Integer(port)
60
47
  end
61
48
 
62
- opts.on('-a', '--all', 'Allow connections even directly to a primary') do
63
- options[:type] = :direct
64
- end
65
-
66
49
  opts.on('-s OPTIME', '--start', 'Starting optime') do |optime|
67
50
  options[:optime] = Integer(optime)
68
51
  end
69
-
70
- opts.on('-f', '--follow-automatically', "Don't prompt between ops") do
71
- options[:pause] = false
72
- end
73
52
  end
74
53
  optparse.parse!
75
54
 
@@ -86,8 +65,19 @@ def main
86
65
  log.level = Log4r::INFO
87
66
  end
88
67
 
89
- runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], options[:optime], options[:pause])
90
- runner.run
68
+ tailer = Mongoriver::Tailer.new(["#{options[:host]}:#{options[:port]}"], options[:type])
69
+ outlet = Mongoriver::OplogWatcher.new
70
+
71
+ stream = Mongoriver::Stream.new(tailer, outlet)
72
+
73
+ %w[TERM INT USR2].each do |sig|
74
+ Signal.trap(sig) do
75
+ log.info("Got SIG#{sig}. Preparing to exit...")
76
+ stream.stop
77
+ end
78
+ end
79
+
80
+ stream.run_forever
91
81
  return 0
92
82
  end
93
83
 
@@ -3,10 +3,12 @@ require 'log4r'
3
3
 
4
4
  module Mongoriver; end
5
5
 
6
+ require 'mongoriver/version'
6
7
  require 'mongoriver/log'
8
+ require 'mongoriver/assertions'
7
9
 
8
- require 'mongoriver/streambed'
9
10
  require 'mongoriver/tailer'
10
11
  require 'mongoriver/abstract_persistent_tailer'
11
12
  require 'mongoriver/persistent_tailer'
12
- require 'mongoriver/version'
13
+ require 'mongoriver/abstract_outlet'
14
+ require 'mongoriver/stream'
@@ -0,0 +1,20 @@
1
+ module Mongoriver
2
+ class AbstractOutlet
3
+
4
+ # implement these methods in your subclass
5
+ def update_optime(timestamp); end
6
+
7
+ def insert(db_name, collection_name, document); end
8
+ def remove(db_name, collection_name, document); end
9
+ def update(db_name, collection_name, selector, update); end
10
+
11
+ def create_index(db_name, collection_name, index_key, options); end
12
+ def drop_index(db_name, collection_name, index_name); end
13
+
14
+ def create_collection(db_name, collection_name, options); end
15
+ def drop_collection(db_name, collection_name); end
16
+ def rename_collection(db_name, old_collection_name, new_collection_name); end
17
+
18
+ def drop_database(db_name); end
19
+ end
20
+ end
@@ -0,0 +1,9 @@
1
+ module Mongoriver
2
+ module Assertions
3
+ class AssertionFailure < StandardError; end
4
+
5
+ def assert(condition, msg)
6
+ raise AssertionFailure.new(msg) unless condition
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,169 @@
1
+ module Mongoriver
2
+ class Stream
3
+ include Mongoriver::Logging
4
+ include Mongoriver::Assertions
5
+
6
+ attr_accessor :tailer, :outlet
7
+
8
+ def initialize(tailer, outlet)
9
+ assert(tailer.is_a?(Tailer),
10
+ "tailer must be a subclass/instance of Tailer")
11
+ assert(outlet.is_a?(AbstractOutlet),
12
+ "outlet must be a subclass (or instance) of AbstractOutlet")
13
+
14
+ @tailer = tailer
15
+ @outlet = outlet
16
+ @stop = false
17
+ @stats = {}
18
+ end
19
+
20
+ def stats
21
+ @stats
22
+ end
23
+
24
+ def run_forever(starting_timestamp=nil)
25
+ if starting_timestamp
26
+ @tailer.tail_from(optime_from_ts(starting_timestamp))
27
+ else
28
+ @tailer.tail_from
29
+ end
30
+
31
+ until @stop
32
+ @tailer.stream do |op|
33
+ handle_op(op)
34
+ end
35
+ end
36
+ end
37
+
38
+ def stop
39
+ @stop = true
40
+ @tailer.stop
41
+ end
42
+
43
+ private
44
+
45
+ def optime_from_ts(timestamp)
46
+ if timestamp.is_a?(Integer)
47
+ if timestamp >= 0
48
+ BSON::Timestamp.new(timestamp, 0)
49
+ else
50
+ raise "Invalid optime: #{timestamp}"
51
+ end
52
+ else
53
+ raise "Unrecognized type #{timestamp.class} (#{timestamp.inspect}) " \
54
+ "for start_timestamp"
55
+ end
56
+ end
57
+
58
+ def trigger(name, *args)
59
+ signature = "#{name}(" + args.map { |arg| arg.inspect }.join(', ') + ")"
60
+ log.debug("triggering #{signature}")
61
+ @stats[name] ||= 0
62
+ @stats[name] += 1
63
+
64
+ @outlet.send(name, *args)
65
+ end
66
+
67
+ def parse_ns(ns)
68
+ ns.split('.', 2)
69
+ end
70
+
71
+ def handle_op(entry)
72
+ op = entry['op']
73
+ data = entry['o']
74
+ ns = entry['ns']
75
+
76
+ if op == 'n'
77
+ # This happens for initial rs.initiate() op, maybe others.
78
+ log.debug("Skipping no-op #{entry.inspect}")
79
+ return
80
+ end
81
+
82
+ db_name, collection_name = parse_ns(ns)
83
+ assert(db_name, "nil db name #{db_name.inspect} for #{entry.inspect}")
84
+
85
+ case op
86
+ when 'i'
87
+ handle_insert(db_name, collection_name, data)
88
+ when 'u'
89
+ selector = entry['o2']
90
+ trigger(:update, db_name, collection_name, selector, data)
91
+ when 'd'
92
+ trigger(:remove, db_name, collection_name, data)
93
+ when 'c'
94
+ assert(collection_name == '$cmd',
95
+ "Command collection name is #{collection_name.inspect} for " \
96
+ "#{entry.inspect}, but should be '$cmd'}")
97
+
98
+ handle_cmd(db_name, collection_name, data)
99
+ else
100
+ raise "Unrecognized op: #{op} (#{entry.inspect})"
101
+ end
102
+
103
+ optime = entry['ts']
104
+ trigger(:update_optime, optime.seconds)
105
+ end
106
+
107
+ def handle_insert(db_name, collection_name, data)
108
+ if collection_name == 'system.indexes'
109
+ handle_create_index(data)
110
+ else
111
+ trigger(:insert, db_name, collection_name, data)
112
+ end
113
+ end
114
+
115
+ def handle_create_index(spec)
116
+ db_name, collection_name = parse_ns(spec['ns'])
117
+ index_key = spec['key'].map { |field, dir| [field, dir.round] }
118
+ options = {}
119
+
120
+ spec.each do |key, value|
121
+ case key
122
+ when 'v'
123
+ unless value == 1
124
+ raise NotImplementedError.new("Only v=1 indexes are supported, " \
125
+ "not v=#{value.inspect}")
126
+ end
127
+ when 'ns', 'key', '_id' # do nothing
128
+ else
129
+ options[key.to_sym] = value
130
+ end
131
+ end
132
+
133
+ assert(options.include?(:name),
134
+ "No name defined for index spec #{spec.inspect}")
135
+
136
+ trigger(:create_index, db_name, collection_name, index_key, options)
137
+ end
138
+
139
+ def handle_cmd(db_name, collection_name, data)
140
+ if deleted_from_collection = data['deleteIndexes']
141
+ index_name = data['index']
142
+ trigger(:drop_index, db_name, deleted_from_collection, index_name)
143
+ elsif created_collection = data['create']
144
+ handle_create_collection(db_name, data)
145
+ elsif dropped_collection = data['drop']
146
+ trigger(:drop_collection, db_name, dropped_collection)
147
+ elsif old_collection_ns = data['renameCollection']
148
+ db_name, old_collection_name = parse_ns(old_collection_ns)
149
+ _, new_collection_name = parse_ns(data['to'])
150
+ trigger(:rename_collection, db_name, old_collection_name, new_collection_name)
151
+ elsif data['dropDatabase'] == 1
152
+ trigger(:drop_database, db_name)
153
+ else
154
+ raise "Unrecognized command #{data.inspect}"
155
+ end
156
+ end
157
+
158
+ def handle_create_collection(db_name, data)
159
+ collection_name = data.delete('create')
160
+
161
+ options = {}
162
+ data.each do |k, v|
163
+ options[k.to_sym] = (k == 'size') ? v.round : v
164
+ end
165
+
166
+ trigger(:create_collection, db_name, collection_name, options)
167
+ end
168
+ end
169
+ end
@@ -3,14 +3,17 @@ module Mongoriver
3
3
  include Mongoriver::Logging
4
4
 
5
5
  attr_reader :upstream_conn
6
+ attr_reader :oplog
6
7
 
7
- def initialize(upstreams, type)
8
+ def initialize(upstreams, type, oplog = "oplog.rs")
8
9
  @upstreams = upstreams
9
10
  @type = type
11
+ @oplog = oplog
10
12
  # This number seems high
11
13
  @conn_opts = {:op_timeout => 86400}
12
14
 
13
15
  @cursor = nil
16
+ @stop = false
14
17
 
15
18
  connect_upstream
16
19
  end
@@ -29,7 +32,7 @@ module Mongoriver
29
32
  opts = @conn_opts.merge(:slave_ok => true)
30
33
  host, port = parse_direct_upstream
31
34
  @upstream_conn = Mongo::Connection.new(host, port, opts)
32
- raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave (HINT: try passing a -a to scripts like optail or mongocp)" if @type == :slave && @upstream_conn.primary?
35
+ raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave" if @type == :slave && @upstream_conn.primary?
33
36
  ensure_upstream_replset!
34
37
  when :existing
35
38
  raise "Must pass in a single existing Mongo::Connection with :existing" unless @upstreams.length == 1 && @upstreams[0].respond_to?(:db)
@@ -61,7 +64,7 @@ module Mongoriver
61
64
  end
62
65
 
63
66
  def oplog_collection
64
- @upstream_conn.db('local').collection('oplog.rs')
67
+ @upstream_conn.db('local').collection(oplog)
65
68
  end
66
69
 
67
70
  def tail_from(ts, opts = {})
@@ -81,20 +84,26 @@ module Mongoriver
81
84
  end
82
85
  end
83
86
 
84
- def stop
85
- @cursor.close if @cursor
86
- @cursor = nil
87
- end
88
-
89
87
  def stream(limit=nil)
90
88
  count = 0
91
- while @cursor.has_next?
89
+ while !@stop && @cursor.has_next?
92
90
  count += 1
93
91
  break if limit && count >= limit
92
+
94
93
  yield @cursor.next
95
94
  end
96
95
 
97
96
  return @cursor.has_next?
98
97
  end
98
+
99
+ def stop
100
+ @stop = true
101
+ end
102
+
103
+ def close
104
+ @cursor.close if @cursor
105
+ @cursor = nil
106
+ @stop = false
107
+ end
99
108
  end
100
109
  end
@@ -1,3 +1,3 @@
1
1
  module Mongoriver
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -19,4 +19,8 @@ Gem::Specification.new do |gem|
19
19
  gem.add_runtime_dependency('mongo', '>= 1.7')
20
20
  gem.add_runtime_dependency('bson_ext')
21
21
  gem.add_runtime_dependency('log4r')
22
+
23
+ gem.add_development_dependency('rake')
24
+ gem.add_development_dependency('minitest')
25
+ gem.add_development_dependency('mocha', '>= 0.13')
22
26
  end
@@ -0,0 +1,65 @@
1
+ require 'mongoriver'
2
+ require 'mongo'
3
+ require 'minitest/autorun'
4
+ require 'mocha/setup'
5
+
6
+ describe 'Mongoriver::Stream' do
7
+ def create_op(op)
8
+ ts = Time.now.to_i
9
+ {'ts'=>BSON::Timestamp.new(ts, 0), 'h'=>1234, 'v'=>1, 'ns'=>'foo.bar'}.merge(op)
10
+ end
11
+
12
+ before do
13
+ conn = stub(:db => nil)
14
+ @tailer = Mongoriver::Tailer.new([conn], :existing)
15
+ @outlet = Mongoriver::AbstractOutlet.new
16
+ @stream = Mongoriver::Stream.new(@tailer, @outlet)
17
+
18
+ @outlet.expects(:update_optime).at_least_once
19
+ end
20
+
21
+ it 'triggers insert' do
22
+ @outlet.expects(:insert).once.with('foo', 'bar', {'_id' => 'baz'})
23
+ @stream.send(:handle_op, create_op({'op'=>'i', 'o'=>{'_id'=>'baz'}}))
24
+ end
25
+
26
+ it 'triggers update' do
27
+ @outlet.expects(:update).once.with('foo', 'bar', {'_id' => 'baz'}, {'a' => 'b'})
28
+ @stream.send(:handle_op, create_op({'op'=>'u', 'o2'=>{'_id'=>'baz'}, 'o'=>{'a'=>'b'}}))
29
+ end
30
+
31
+ it 'triggers remove' do
32
+ @outlet.expects(:remove).once.with('foo', 'bar', {'_id' => 'baz'})
33
+ @stream.send(:handle_op, create_op({'op'=>'d', 'b'=>true, 'o'=>{'_id'=>'baz'}}))
34
+ end
35
+
36
+ it 'triggers create_collection' do
37
+ @outlet.expects(:create_collection).once.with('foo', 'bar', {:capped => true, :size => 10})
38
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'create'=>'bar', 'capped'=>true, 'size'=>10.0}}))
39
+ end
40
+
41
+ it 'triggers drop_collection' do
42
+ @outlet.expects(:drop_collection).once.with('foo', 'bar')
43
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'drop'=>'bar'}}))
44
+ end
45
+
46
+ it 'triggers rename_collection' do
47
+ @outlet.expects(:rename_collection).once.with('foo', 'bar', 'bar_2')
48
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'admin.$cmd', 'o'=>{'renameCollection'=>'foo.bar', 'to'=>'foo.bar_2'}}))
49
+ end
50
+
51
+ it 'triggers create_index' do
52
+ @outlet.expects(:create_index).once.with('foo', 'bar', [['baz', 1]], {:name => 'baz_1'})
53
+ @stream.send(:handle_op, create_op({'op'=>'i', 'ns'=>'foo.system.indexes', 'o'=>{'_id'=>'index_id', 'ns'=>'foo.bar', 'key'=>{'baz'=>1.0}, 'name'=>'baz_1'}}))
54
+ end
55
+
56
+ it 'triggers drop_index' do
57
+ @outlet.expects(:drop_index).once.with('foo', 'bar', 'baz_1')
58
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'deleteIndexes'=>'bar', 'index'=>'baz_1'}}))
59
+ end
60
+
61
+ it 'triggers drop_database' do
62
+ @outlet.expects(:drop_database).once.with('foo')
63
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'dropDatabase'=>1.0}}))
64
+ end
65
+ end
@@ -0,0 +1,83 @@
1
+ require 'mongoriver'
2
+ require 'mongo'
3
+ require 'minitest/autorun'
4
+ require 'mocha/setup'
5
+
6
+ # Connected tests: run these with eg MONGO_SERVER=localhost:27017
7
+
8
+ MONGO_SERVER = ENV['MONGO_SERVER'] || 'localhost:27017'
9
+
10
+ def connect
11
+ begin
12
+ host, port = MONGO_SERVER.split(':', 2)
13
+ Mongo::Connection.new(host, port)
14
+ rescue Mongo::ConnectionFailure
15
+ nil
16
+ end
17
+ end
18
+
19
+ describe 'connected tests' do
20
+ before do
21
+ @mongo = connect
22
+ skip unless @mongo
23
+ end
24
+
25
+ describe 'Mongoriver::Stream' do
26
+ before do
27
+ @tailer = Mongoriver::Tailer.new([MONGO_SERVER], :direct)
28
+ @outlet = Mongoriver::AbstractOutlet.new
29
+
30
+ @stream = Mongoriver::Stream.new(@tailer, @outlet)
31
+
32
+ @tail_from = @tailer.most_recent_timestamp.seconds + 1
33
+ sleep(1)
34
+ end
35
+
36
+ it 'triggers the correct ops in the correct order' do
37
+ db = 'test'
38
+ collection = 'test'
39
+ doc = {'_id' => 'foo', 'bar' => 'baz'}
40
+ updated_doc = doc.dup.merge('bar' => 'qux')
41
+ index_keys = [['bar', 1]]
42
+
43
+ @outlet.expects(:update_optime).at_least_once
44
+
45
+ op_sequence = sequence('op_sequence')
46
+
47
+ @outlet.expects(:insert).once.with(db, collection, doc).in_sequence(op_sequence)
48
+ @outlet.expects(:update).once.with(db, collection, {'_id' => 'foo'}, updated_doc).in_sequence(op_sequence)
49
+ @outlet.expects(:remove).once.with(db, collection, {'_id' => 'foo'}).in_sequence(op_sequence)
50
+
51
+ @outlet.expects(:create_index).once.with(db, collection, index_keys, {:name => 'bar_1'}).in_sequence(op_sequence)
52
+ @outlet.expects(:drop_index).once.with(db, collection, 'bar_1').in_sequence(op_sequence)
53
+
54
+ @outlet.expects(:rename_collection).once.with(db, collection, collection+'_foo').in_sequence(op_sequence)
55
+ @outlet.expects(:drop_collection).once.with(db, collection+'_foo').in_sequence(op_sequence)
56
+ @outlet.expects(:drop_database).once.with(db) { @stream.stop }.in_sequence(op_sequence)
57
+
58
+ coll = @mongo[db][collection]
59
+ coll.insert(doc)
60
+ coll.update({'_id' => 'foo'}, doc.merge('bar' => 'qux'))
61
+ coll.remove({'_id' => 'foo'})
62
+
63
+ name = coll.ensure_index(index_keys)
64
+ coll.drop_index(name)
65
+
66
+ @mongo[db].rename_collection(collection, collection+'_foo')
67
+ @mongo[db].drop_collection(collection+'_foo')
68
+ @mongo.drop_database(db)
69
+
70
+ @stream.run_forever(@tail_from)
71
+ end
72
+
73
+ it 'passes options to create_collection' do
74
+ @outlet.expects(:create_collection).once.with('test', 'test', {:capped => true, :size => 10}) { @stream.stop }
75
+ @outlet.expects(:update_optime).at_least_once.with(anything) { @stream.stop }
76
+
77
+ @mongo['test'].create_collection('test', :capped => true, :size => 10)
78
+ @mongo.drop_database('test')
79
+
80
+ @stream.run_forever(@tail_from)
81
+ end
82
+ end
83
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoriver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-05 00:00:00.000000000 Z
12
+ date: 2013-05-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
@@ -59,12 +59,59 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: mocha
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0.13'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0.13'
62
110
  description: Some tools and libraries to simplify tailing the mongod oplog
63
111
  email:
64
112
  - gdb@gregbrockman.com
65
113
  executables:
66
- - mongocp
67
- - optail
114
+ - watch-oplog
68
115
  extensions: []
69
116
  extra_rdoc_files: []
70
117
  files:
@@ -73,16 +120,19 @@ files:
73
120
  - LICENSE
74
121
  - README.md
75
122
  - Rakefile
76
- - bin/mongocp
77
- - bin/optail
123
+ - bin/watch-oplog
78
124
  - lib/mongoriver.rb
125
+ - lib/mongoriver/abstract_outlet.rb
79
126
  - lib/mongoriver/abstract_persistent_tailer.rb
127
+ - lib/mongoriver/assertions.rb
80
128
  - lib/mongoriver/log.rb
81
129
  - lib/mongoriver/persistent_tailer.rb
82
- - lib/mongoriver/streambed.rb
130
+ - lib/mongoriver/stream.rb
83
131
  - lib/mongoriver/tailer.rb
84
132
  - lib/mongoriver/version.rb
85
133
  - mongoriver.gemspec
134
+ - test/test_mongoriver.rb
135
+ - test/test_mongoriver_connected.rb
86
136
  homepage: ''
87
137
  licenses: []
88
138
  post_install_message:
@@ -107,4 +157,7 @@ rubygems_version: 1.8.23
107
157
  signing_key:
108
158
  specification_version: 3
109
159
  summary: monogdb oplog-tailing utilities.
110
- test_files: []
160
+ test_files:
161
+ - test/test_mongoriver.rb
162
+ - test/test_mongoriver_connected.rb
163
+ has_rdoc:
@@ -1,250 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'logger'
3
- require 'optparse'
4
-
5
- require 'rubygems'
6
- require 'bundler/setup'
7
- require 'mongoriver'
8
-
9
- module Mongoriver
10
- class Mongocp < Streambed
11
- include Mongoriver::Logging
12
-
13
- def initialize(upstreams, type, downstream, prefix)
14
- super(upstreams, type)
15
- @downstream = downstream
16
- @prefix = prefix
17
- connect_downstream
18
- end
19
-
20
- def hook_optime
21
- if optime = optime_collection.find_one(:_id => @prefix)
22
- optime['ts']
23
- else
24
- nil
25
- end
26
- end
27
-
28
- def hook_update_optime(ts, mandatory)
29
- optime_collection.update({:_id => @prefix}, {'$set' => {:ts => ts}}, :upsert => true) if mandatory || rand(20) == 0
30
- end
31
-
32
- def hook_initial_sync_index(db_name, collection_name, index_key, options)
33
- collection = downstream_collection(db_name, collection_name)
34
- index_hash = BSON::OrderedHash.new
35
- index_key.each {|k,v| index_hash[k] = v}
36
- collection.send(:generate_indexes, index_hash, nil, options)
37
- end
38
-
39
- def hook_initial_sync_record_batch(db_name, collection_name, records)
40
- collection = downstream_collection(db_name, collection_name)
41
- bulk_insert(collection, records)
42
- end
43
-
44
- # TODO: should probably do the same key checking nonsense as the above
45
- def hook_stream_insert(db_name, collection_name, object)
46
- collection = downstream_collection(db_name, collection_name)
47
- wrap_errors(collection, object['_id']) do
48
- # Only needed if safe mode is set in the driver. Note that the
49
- # argument here for oplog idempotency in the case of unique
50
- # keys is kind of interesting. I believe I can prove
51
- # idempotency as long as Mongo has no insert order-dependent
52
- # unique indexes (which I believe is true) and that you do all
53
- # your object updates as upserts.
54
- allow_dupkeys do
55
- collection.insert(object)
56
- end
57
- end
58
- end
59
-
60
- def hook_stream_update(db_name, collection_name, selector, update)
61
- collection = downstream_collection(db_name, collection_name)
62
- wrap_errors(collection, selector['_id']) do
63
- collection.update(selector, update, :upsert => true)
64
- end
65
- end
66
-
67
- def hook_stream_remove(db_name, collection_name, object)
68
- collection = downstream_collection(db_name, collection_name)
69
- wrap_errors(collection, object['_id']) do
70
- collection.remove(object)
71
- end
72
- end
73
-
74
- def hook_stream_create_collection(db_name, create)
75
- db = downstream_db(db_name)
76
- wrap_errors(db, create) do
77
- db.create_collection(create)
78
- end
79
- end
80
-
81
- # "Error renaming collection: #<BSON::OrderedHash:0x83869e34 {\"errmsg\"=>\"exception: source namespace does not exist\", \"code\"=>10026, \"ok\"=>0.0}>"
82
- #
83
- # Possibly need the same thing if the destination already exists
84
- def hook_stream_rename_collection(db_name, source, target)
85
- db = downstream_db(db_name)
86
- wrap_errors(db, "#{source} -> #{target}") do
87
- begin
88
- db.rename_collection(source, target)
89
- rescue Mongo::MongoDBError => e
90
- if e.message =~ /Error renaming collection: .*exception: source namespace does not exist"/
91
- log.warn("Ignoring rename of non-existent collection #{source} -> #{target}: #{e} (expected when replaying part of the oplog)")
92
- elsif e.message =~ /Error renaming collection: .*exception: target namespace exists"/
93
- log.warn("Ignoring rename of #{source} to existing collection #{target}: #{e} (expected when replaying part of the oplog)")
94
- else
95
- raise
96
- end
97
- end
98
- end
99
- end
100
-
101
- def hook_stream_drop_index(db_name, collection_name, index_name)
102
- collection = downstream_collection(db_name, collection_name)
103
- wrap_errors(collection, index_name) do
104
- begin
105
- collection.drop_index(index_name)
106
- rescue Mongo::MongoDBError => e
107
- raise
108
- if e.message =~ /index not found/
109
- log.warn("Ignoring drop of non-existent index #{index_name.inspect}: #{e} (expected when replaying part of the oplog)")
110
- else
111
- raise
112
- end
113
- end
114
- end
115
- end
116
-
117
- def hook_stream_drop_collection(db_name, dropped)
118
- db = downstream_db(db_name)
119
- wrap_errors(db, dropped) do
120
- db.drop_collection(dropped)
121
- end
122
- end
123
-
124
- def hook_stream_drop_database(db_name)
125
- db = downstream_db(db_name)
126
- wrap_errors(db, db_name) do
127
- db.command(:dropDatabase => 1)
128
- end
129
- end
130
-
131
- private
132
-
133
- def allow_dupkeys(&blk)
134
- begin
135
- blk.call
136
- rescue Mongo::OperationFailure => e
137
- if e.error_code == 11000
138
- log.warn("Ignoring unique index violation: #{e} (expected when replaying part of the oplog)")
139
- else
140
- raise
141
- end
142
- end
143
- end
144
-
145
- def bulk_insert(collection, docs)
146
- begin
147
- # Use the internal insert_documents method because it lets us
148
- # disable key verification
149
- collection.send(:insert_documents, docs, collection.name, false)
150
- rescue Mongo::MongoRubyError => e
151
- log.error("#{ns}: Caught error on batch insert", e)
152
- docs.each do |doc|
153
- wrap_errors(collection, doc['_id']) do
154
- collection.send(:insert_documents, [doc], collection.name, false)
155
- end
156
- end
157
- end
158
- end
159
-
160
- def wrap_errors(collection_or_db, object, &blk)
161
- begin
162
- blk.call
163
- rescue Mongo::MongoRubyError => e
164
- if collecton_or_db.kind_of?(Mongo::Collection)
165
- ns = "#{collection_or_db.db.name}.#{collection_or_db.name}"
166
- else
167
- ns = collection_or_db.db.name
168
- end
169
- log.error("#{ns}: Unknown error for #{object}", e)
170
- end
171
- end
172
-
173
- def downstream_db(db_name)
174
- prefixed = "#{@prefix}_#{db_name}"
175
- @downstream_conn.db(prefixed)
176
- end
177
-
178
- def downstream_collection(db_name, collection_name)
179
- downstream_db(db_name).collection(collection_name)
180
- end
181
-
182
- def optime_collection
183
- @optime_collection ||= @downstream_conn.db('_mongocp').collection('optime')
184
- end
185
-
186
- def connect_downstream
187
- host, port = @tailer.parse_host_spec(@downstream)
188
- @downstream_conn = Mongo::Connection.new(host, port, :safe => true)
189
- end
190
- end
191
- end
192
-
193
- def main
194
- options = {:host => nil, :port => nil, :type => :slave, :verbose => 0}
195
- optparse = OptionParser.new do |opts|
196
- opts.banner = "Usage: #{$0} [options]"
197
-
198
- opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
199
- options[:verbose] += 1
200
- end
201
-
202
- opts.on('-h', '--help', 'Display this message') do
203
- puts opts
204
- exit(1)
205
- end
206
-
207
- opts.on('--help', 'Display this message') do
208
- puts opts
209
- exit(1)
210
- end
211
-
212
- opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
213
- options[:host] = host
214
- end
215
-
216
- opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
217
- options[:port] = Integer(port)
218
- end
219
-
220
- opts.on('-a', '--all', 'Allow connections even directly to a primary') do
221
- options[:type] = :direct
222
- end
223
- end
224
- optparse.parse!
225
-
226
- if ARGV.length != 0
227
- puts optparse
228
- return 1
229
- end
230
-
231
- log = Log4r::Logger.new('Stripe')
232
- log.outputters = Log4r::StdoutOutputter.new(STDERR)
233
- if options[:verbose] >= 1
234
- log.level = Log4r::DEBUG
235
- else
236
- log.level = Log4r::INFO
237
- end
238
- runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], 'localhost:5001', 'test')
239
- runner.run
240
- return 0
241
- end
242
-
243
- if $0 == __FILE__
244
- ret = main
245
- begin
246
- exit(ret)
247
- rescue TypeError
248
- exit(0)
249
- end
250
- end
@@ -1,299 +0,0 @@
1
- module Mongoriver
2
- class Streambed
3
- include Mongoriver::Logging
4
-
5
- attr_reader :stats
6
-
7
- class AssertionFailure < StandardError; end
8
-
9
- def assert(condition, msg)
10
- raise AssertionFailure.new(msg) unless condition
11
- end
12
-
13
- def initialize(upstreams, type)
14
- @tailer = Mongoriver::Tailer.new(upstreams, type)
15
- @record_fetch_batch_size = 1024
16
- @record_sync_batch_size = 256
17
- @stats = Hash.new(0)
18
- end
19
-
20
- def run
21
- self.class.validate_hooks!
22
-
23
- unless ts = starting_optime
24
- ts = @tailer.most_recent_timestamp
25
- initial_sync
26
- hook_update_optime(ts, true)
27
- end
28
-
29
- tail_from(ts)
30
- end
31
-
32
- def self.my_hooks
33
- @hooks ||= []
34
- end
35
-
36
- def self.all_hooks
37
- hooks = my_hooks
38
- if superclass <= Streambed
39
- hooks + superclass.all_hooks
40
- else
41
- hooks
42
- end
43
- end
44
-
45
- def self.validate_hooks!
46
- errors = []
47
- all_hooks.each do |name, args, opts|
48
- method = self.instance_method(hook_name(name))
49
- signature = "#{method.name}(#{args.join(', ')})"
50
- if method.owner == Streambed && !opts[:default]
51
- errors << "Must provide implementation of #{signature}"
52
- end
53
- end
54
-
55
- raise "You need to fix the following hook errors:
56
-
57
- #{errors.join("\n ")}" if errors.length > 0
58
- end
59
-
60
- def self.hook_name(name)
61
- "hook_#{name}"
62
- end
63
-
64
- def self.hook(name, args=[], opts={})
65
- if default = opts[:default]
66
- target = hook_name(default)
67
- implementation = Proc.new do |*args, &blk|
68
- send(target, *args, &blk)
69
- end
70
- else
71
- implementation = Proc.new do
72
- raise NotImplementedError.new("Override in subclass")
73
- end
74
- end
75
-
76
- define_method(hook_name(name), implementation)
77
- my_hooks << [name, args, opts]
78
- end
79
-
80
- hook :optime
81
- hook :update_optime, [:ts, :mandatory]
82
- hook :initial_sync_index, [:db_name, :collection_name, :index_key, :options]
83
- hook :initial_sync_record_batch, [:db_name, :collection_name, :records]
84
- hook :stream_insert, [:db_name, :collection_name, :object]
85
- hook :stream_update, [:db_name, :collection_name, :selector, :update]
86
- hook :stream_remove, [:db_name, :collection_name, :object]
87
- # Not usually a difference between the initial index creation and
88
- # creating it while streaming ops.
89
- hook :stream_create_index, [:db_name, :collection_name, :index_key, :options], :default => :initial_sync_index
90
- # This seems to be called while doing a mapreduce.
91
- hook :stream_create_collection, [:db_name, :create]
92
- # This also seems to be called while doing a mapreduce. Note that
93
- # I think mongo has a concept of temporary table, which I should
94
- # look into, and renameCollection has some temporary table option.
95
- hook :stream_rename_collection, [:db_name, :source, :target]
96
- hook :stream_drop_index, [:db_name, :collection_name, :index_name]
97
- hook :stream_drop_collection, [:db_name, :dropped]
98
- hook :stream_drop_database, [:db_name]
99
-
100
- private
101
-
102
- def starting_optime
103
- case time = hook_optime
104
- when Integer
105
- if time >= 0
106
- BSON::Timestamp.new(time, 0)
107
- elsif time == -1
108
- @tailer.most_recent_timestamp
109
- else
110
- raise "Invalid optime: #{time}"
111
- end
112
- when BSON::Timestamp, nil
113
- time
114
- else
115
- raise "Unrecognized type #{time.class} (#{time.inspect}) for start time"
116
- end
117
- end
118
-
119
- def initial_sync
120
- initial_sync_all_indexes
121
- initial_sync_all_records
122
- end
123
-
124
- def initial_sync_all_indexes
125
- log.info("Beginning initial sync of indexes")
126
- syncable_databases.each {|db| initial_sync_indexes_for_db(db)}
127
- log.info("Done initial sync of indexes")
128
- end
129
-
130
- def initial_sync_indexes_for_db(db)
131
- db.collection('system.indexes').find.each do |index|
132
- options = extract_options_from_index_spec(index)
133
- index_key = index['key'].to_a
134
-
135
- ns = index['ns']
136
- db_name, collection_name = parse_ns(ns)
137
- assert(db_name == db.name, "Index db name #{db_name.inspect} differs from current db name #{db.name.inspect}")
138
-
139
- log.info("#{ns}: Initial sync of index #{options[:name]}")
140
- hook_initial_sync_index(db_name, collection_name, index_key, options)
141
- end
142
- end
143
-
144
- def initial_sync_all_records
145
- log.info("Beginning initial sync of records")
146
- syncable_databases.each {|db| initial_sync_records_for_db(db)}
147
- log.info("Done initial sync of records")
148
- end
149
-
150
- def initial_sync_records_for_db(db)
151
- syncable_collections(db).each do |collection|
152
- initial_sync_records_for_collection(collection)
153
- end
154
- end
155
-
156
- def initial_sync_records_for_collection(collection)
157
- db_name = collection.db.name
158
- collection_name = collection.name
159
- ns = "#{db_name}.#{collection_name}"
160
-
161
- log.info("#{ns}: Starting record initial sync")
162
-
163
- records = []
164
- collection.find({}, :batch_size => @record_fetch_batch_size, :timeout => false, :sort => [['$natural', 1]]) do |cursor|
165
- while cursor.has_next?
166
- records << cursor.next
167
- if records.length > @record_sync_batch_size
168
- # TODO: add better logging than this
169
- log.info("#{ns}: Running sync of batch of #{records.length} records")
170
- hook_initial_sync_record_batch(db_name, collection_name, records)
171
- records = []
172
- end
173
- end
174
- end
175
- log.info("#{ns}: Finishing sync with a batch of #{records.length} records")
176
- hook_initial_sync_record_batch(db_name, collection_name, records)
177
-
178
- log.info("#{ns}: Finished record initial sync")
179
- end
180
-
181
- # This should be fine to instantiate all at once, since
182
- # database_names returns all the dbs as strings anyway
183
- def syncable_databases
184
- @tailer.upstream_conn.database_names.map do |db_name|
185
- next if db_name == 'local'
186
- @tailer.upstream_conn.db(db_name)
187
- end.compact
188
- end
189
-
190
- def syncable_collections(db)
191
- db.collection_names.map do |collection_name|
192
- next if collection_name.start_with?('system.')
193
- db.collection(collection_name)
194
- end.compact
195
- end
196
-
197
- def extract_options_from_index_spec(index)
198
- options = {}
199
- index.each do |key, value|
200
- case key
201
- when 'v'
202
- raise NotImplementedError.new("Only v=1 indexes are supported at the moment, not v=#{value.inspect}") unless value == 1
203
- when 'ns', 'key'
204
- else
205
- options[key.to_sym] = value
206
- end
207
- end
208
-
209
- assert(options.include?(:name), "No name defined for index spec #{index.inspect}")
210
- options
211
- end
212
-
213
- def stream_op(entry)
214
- op = entry['op']
215
- data = entry['o']
216
- ns = entry['ns']
217
-
218
- if op == 'n'
219
- # This happens for initial rs.initiate() op, maybe others.
220
- log.info("Skipping no-op #{entry.inspect}")
221
- return
222
- end
223
-
224
- db_name, collection_name = parse_ns(ns)
225
- assert(db_name, "Nil db name #{db_name.inspect} for #{entry.inspect}")
226
-
227
- case op
228
- when 'i'
229
- if collection_name == 'system.indexes'
230
- record(ns, entry, :create_index)
231
- index_db_name, index_collection_name = parse_ns(data['ns'])
232
- index_key = data['key'].to_a
233
- options = extract_options_from_index_spec(data)
234
- hook_stream_create_index(index_db_name, index_collection_name, index_key, options)
235
- else
236
- record(ns, entry, :insert)
237
- hook_stream_insert(db_name, collection_name, data)
238
- end
239
- when 'u'
240
- record(ns, entry, :update)
241
- hook_stream_update(db_name, collection_name, entry['o2'], data)
242
- when 'd'
243
- record(ns, entry, :remove)
244
- hook_stream_remove(db_name, collection_name, data)
245
- when 'c'
246
- assert(collection_name == '$cmd', "Command collection name is #{collection_name.inspect} for #{entry.inspect}")
247
- if deleted_from = data['deleteIndexes']
248
- record(ns, entry, :drop_index)
249
- index = data['index']
250
- hook_stream_drop_index(db_name, deleted_from, index)
251
- elsif dropped = data['drop']
252
- record(ns, entry, :drop_collection)
253
- hook_stream_drop_collection(db_name, dropped)
254
- elsif dropped = data['dropDatabase']
255
- record(ns, entry, :drop_database)
256
- hook_stream_drop_database(db_name)
257
- elsif source = data['renameCollection']
258
- record(ns, entry, :rename_collection)
259
- target = data['to']
260
- hook_stream_rename_collection(db_name, source, target)
261
- elsif create = data['create']
262
- record(ns, entry, :create)
263
- hook_stream_create_collection(db_name, create)
264
- else
265
- raise "Unrecognized command #{data.inspect}"
266
- end
267
- else
268
- raise "Unrecognized op: #{op} (#{entry.inspect})"
269
- end
270
-
271
- optime = entry['ts']
272
- hook_update_optime(optime, false)
273
- end
274
-
275
- def tail_from(ts)
276
- begin
277
- @tailer.tail_from(ts)
278
- loop do
279
- @tailer.stream do |op|
280
- stream_op(op)
281
- end
282
- end
283
- ensure
284
- @tailer.stop
285
- end
286
- end
287
-
288
- def record(ns, entry, type)
289
- stats[type] += 1
290
- log.debug("#{ns}: #{type.inspect} #{entry.inspect}")
291
- end
292
-
293
- protected
294
-
295
- def parse_ns(ns)
296
- ns.split('.', 2)
297
- end
298
- end
299
- end