mongoriver 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,2 +1,16 @@
1
1
  #!/usr/bin/env rake
2
- require "bundler/gem_tasks"
2
+ require 'bundler/setup'
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.test_files = FileList['test/test_*.rb']
8
+ end
9
+
10
+ Rake::TestTask.new(:'test-unit') do |t|
11
+ t.test_files = FileList['test/test_mongoriver.rb']
12
+ end
13
+
14
+ Rake::TestTask.new(:'test-connected') do |t|
15
+ t.test_files = FileList['test/test_*_connected.rb']
16
+ end
@@ -7,38 +7,25 @@ require 'bundler/setup'
7
7
  require 'mongoriver'
8
8
 
9
9
  module Mongoriver
10
- class Mongocp < Streambed
10
+ class OplogWatcher < AbstractOutlet
11
11
  include Mongoriver::Logging
12
12
 
13
- def initialize(upstreams, type, start_optime, pause)
14
- super(upstreams, type)
15
- @start_optime = start_optime
16
- @pause = pause
13
+ def insert(db_name, collection_name, document)
14
+ log.info("got an insert for #{db_name}.#{collection_name}! #{document.inspect}")
17
15
  end
18
16
 
19
- def pause
20
- if @pause
21
- $stderr.puts("Press enter to continue")
22
- $stdin.readline
23
- end
17
+ def remove(db_name, collection_name, document)
18
+ log.info("got a remove for #{db_name}.#{collection_name}! #{document.inspect}")
24
19
  end
25
20
 
26
- def hook_optime
27
- @start_optime
28
- end
29
-
30
- def hook_update_optime(ts, mandatory)
31
- end
32
-
33
- all_hooks.each do |name, _, opts|
34
- next if name == :optime || name == :update_optime
35
- define_method(hook_name(name)) {|*args| pause}
21
+ def update(db_name, collection_name, selector, updates)
22
+ log.info("got an update for #{db_name}.#{collection_name}! #{selector}, #{updates}")
36
23
  end
37
24
  end
38
25
  end
39
26
 
40
27
  def main
41
- options = {:host => nil, :port => nil, :type => :slave, :optime => 0, :pause => true, :verbose => 0}
28
+ options = {:host => nil, :port => nil, :type => :direct, :optime => 0, :pause => true, :verbose => 0}
42
29
  optparse = OptionParser.new do |opts|
43
30
  opts.banner = "Usage: #{$0} [options]"
44
31
 
@@ -59,17 +46,9 @@ def main
59
46
  options[:port] = Integer(port)
60
47
  end
61
48
 
62
- opts.on('-a', '--all', 'Allow connections even directly to a primary') do
63
- options[:type] = :direct
64
- end
65
-
66
49
  opts.on('-s OPTIME', '--start', 'Starting optime') do |optime|
67
50
  options[:optime] = Integer(optime)
68
51
  end
69
-
70
- opts.on('-f', '--follow-automatically', "Don't prompt between ops") do
71
- options[:pause] = false
72
- end
73
52
  end
74
53
  optparse.parse!
75
54
 
@@ -86,8 +65,19 @@ def main
86
65
  log.level = Log4r::INFO
87
66
  end
88
67
 
89
- runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], options[:optime], options[:pause])
90
- runner.run
68
+ tailer = Mongoriver::Tailer.new(["#{options[:host]}:#{options[:port]}"], options[:type])
69
+ outlet = Mongoriver::OplogWatcher.new
70
+
71
+ stream = Mongoriver::Stream.new(tailer, outlet)
72
+
73
+ %w[TERM INT USR2].each do |sig|
74
+ Signal.trap(sig) do
75
+ log.info("Got SIG#{sig}. Preparing to exit...")
76
+ stream.stop
77
+ end
78
+ end
79
+
80
+ stream.run_forever
91
81
  return 0
92
82
  end
93
83
 
@@ -3,10 +3,12 @@ require 'log4r'
3
3
 
4
4
  module Mongoriver; end
5
5
 
6
+ require 'mongoriver/version'
6
7
  require 'mongoriver/log'
8
+ require 'mongoriver/assertions'
7
9
 
8
- require 'mongoriver/streambed'
9
10
  require 'mongoriver/tailer'
10
11
  require 'mongoriver/abstract_persistent_tailer'
11
12
  require 'mongoriver/persistent_tailer'
12
- require 'mongoriver/version'
13
+ require 'mongoriver/abstract_outlet'
14
+ require 'mongoriver/stream'
@@ -0,0 +1,20 @@
1
+ module Mongoriver
2
+ class AbstractOutlet
3
+
4
+ # implement these methods in your subclass
5
+ def update_optime(timestamp); end
6
+
7
+ def insert(db_name, collection_name, document); end
8
+ def remove(db_name, collection_name, document); end
9
+ def update(db_name, collection_name, selector, update); end
10
+
11
+ def create_index(db_name, collection_name, index_key, options); end
12
+ def drop_index(db_name, collection_name, index_name); end
13
+
14
+ def create_collection(db_name, collection_name, options); end
15
+ def drop_collection(db_name, collection_name); end
16
+ def rename_collection(db_name, old_collection_name, new_collection_name); end
17
+
18
+ def drop_database(db_name); end
19
+ end
20
+ end
@@ -0,0 +1,9 @@
1
+ module Mongoriver
2
+ module Assertions
3
+ class AssertionFailure < StandardError; end
4
+
5
+ def assert(condition, msg)
6
+ raise AssertionFailure.new(msg) unless condition
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,169 @@
1
+ module Mongoriver
2
+ class Stream
3
+ include Mongoriver::Logging
4
+ include Mongoriver::Assertions
5
+
6
+ attr_accessor :tailer, :outlet
7
+
8
+ def initialize(tailer, outlet)
9
+ assert(tailer.is_a?(Tailer),
10
+ "tailer must be a subclass/instance of Tailer")
11
+ assert(outlet.is_a?(AbstractOutlet),
12
+ "outlet must be a subclass (or instance) of AbstractOutlet")
13
+
14
+ @tailer = tailer
15
+ @outlet = outlet
16
+ @stop = false
17
+ @stats = {}
18
+ end
19
+
20
+ def stats
21
+ @stats
22
+ end
23
+
24
+ def run_forever(starting_timestamp=nil)
25
+ if starting_timestamp
26
+ @tailer.tail_from(optime_from_ts(starting_timestamp))
27
+ else
28
+ @tailer.tail_from
29
+ end
30
+
31
+ until @stop
32
+ @tailer.stream do |op|
33
+ handle_op(op)
34
+ end
35
+ end
36
+ end
37
+
38
+ def stop
39
+ @stop = true
40
+ @tailer.stop
41
+ end
42
+
43
+ private
44
+
45
+ def optime_from_ts(timestamp)
46
+ if timestamp.is_a?(Integer)
47
+ if timestamp >= 0
48
+ BSON::Timestamp.new(timestamp, 0)
49
+ else
50
+ raise "Invalid optime: #{timestamp}"
51
+ end
52
+ else
53
+ raise "Unrecognized type #{timestamp.class} (#{timestamp.inspect}) " \
54
+ "for start_timestamp"
55
+ end
56
+ end
57
+
58
+ def trigger(name, *args)
59
+ signature = "#{name}(" + args.map { |arg| arg.inspect }.join(', ') + ")"
60
+ log.debug("triggering #{signature}")
61
+ @stats[name] ||= 0
62
+ @stats[name] += 1
63
+
64
+ @outlet.send(name, *args)
65
+ end
66
+
67
+ def parse_ns(ns)
68
+ ns.split('.', 2)
69
+ end
70
+
71
+ def handle_op(entry)
72
+ op = entry['op']
73
+ data = entry['o']
74
+ ns = entry['ns']
75
+
76
+ if op == 'n'
77
+ # This happens for initial rs.initiate() op, maybe others.
78
+ log.debug("Skipping no-op #{entry.inspect}")
79
+ return
80
+ end
81
+
82
+ db_name, collection_name = parse_ns(ns)
83
+ assert(db_name, "nil db name #{db_name.inspect} for #{entry.inspect}")
84
+
85
+ case op
86
+ when 'i'
87
+ handle_insert(db_name, collection_name, data)
88
+ when 'u'
89
+ selector = entry['o2']
90
+ trigger(:update, db_name, collection_name, selector, data)
91
+ when 'd'
92
+ trigger(:remove, db_name, collection_name, data)
93
+ when 'c'
94
+ assert(collection_name == '$cmd',
95
+ "Command collection name is #{collection_name.inspect} for " \
96
+ "#{entry.inspect}, but should be '$cmd'}")
97
+
98
+ handle_cmd(db_name, collection_name, data)
99
+ else
100
+ raise "Unrecognized op: #{op} (#{entry.inspect})"
101
+ end
102
+
103
+ optime = entry['ts']
104
+ trigger(:update_optime, optime.seconds)
105
+ end
106
+
107
+ def handle_insert(db_name, collection_name, data)
108
+ if collection_name == 'system.indexes'
109
+ handle_create_index(data)
110
+ else
111
+ trigger(:insert, db_name, collection_name, data)
112
+ end
113
+ end
114
+
115
+ def handle_create_index(spec)
116
+ db_name, collection_name = parse_ns(spec['ns'])
117
+ index_key = spec['key'].map { |field, dir| [field, dir.round] }
118
+ options = {}
119
+
120
+ spec.each do |key, value|
121
+ case key
122
+ when 'v'
123
+ unless value == 1
124
+ raise NotImplementedError.new("Only v=1 indexes are supported, " \
125
+ "not v=#{value.inspect}")
126
+ end
127
+ when 'ns', 'key', '_id' # do nothing
128
+ else
129
+ options[key.to_sym] = value
130
+ end
131
+ end
132
+
133
+ assert(options.include?(:name),
134
+ "No name defined for index spec #{spec.inspect}")
135
+
136
+ trigger(:create_index, db_name, collection_name, index_key, options)
137
+ end
138
+
139
+ def handle_cmd(db_name, collection_name, data)
140
+ if deleted_from_collection = data['deleteIndexes']
141
+ index_name = data['index']
142
+ trigger(:drop_index, db_name, deleted_from_collection, index_name)
143
+ elsif created_collection = data['create']
144
+ handle_create_collection(db_name, data)
145
+ elsif dropped_collection = data['drop']
146
+ trigger(:drop_collection, db_name, dropped_collection)
147
+ elsif old_collection_ns = data['renameCollection']
148
+ db_name, old_collection_name = parse_ns(old_collection_ns)
149
+ _, new_collection_name = parse_ns(data['to'])
150
+ trigger(:rename_collection, db_name, old_collection_name, new_collection_name)
151
+ elsif data['dropDatabase'] == 1
152
+ trigger(:drop_database, db_name)
153
+ else
154
+ raise "Unrecognized command #{data.inspect}"
155
+ end
156
+ end
157
+
158
+ def handle_create_collection(db_name, data)
159
+ collection_name = data.delete('create')
160
+
161
+ options = {}
162
+ data.each do |k, v|
163
+ options[k.to_sym] = (k == 'size') ? v.round : v
164
+ end
165
+
166
+ trigger(:create_collection, db_name, collection_name, options)
167
+ end
168
+ end
169
+ end
@@ -3,14 +3,17 @@ module Mongoriver
3
3
  include Mongoriver::Logging
4
4
 
5
5
  attr_reader :upstream_conn
6
+ attr_reader :oplog
6
7
 
7
- def initialize(upstreams, type)
8
+ def initialize(upstreams, type, oplog = "oplog.rs")
8
9
  @upstreams = upstreams
9
10
  @type = type
11
+ @oplog = oplog
10
12
  # This number seems high
11
13
  @conn_opts = {:op_timeout => 86400}
12
14
 
13
15
  @cursor = nil
16
+ @stop = false
14
17
 
15
18
  connect_upstream
16
19
  end
@@ -29,7 +32,7 @@ module Mongoriver
29
32
  opts = @conn_opts.merge(:slave_ok => true)
30
33
  host, port = parse_direct_upstream
31
34
  @upstream_conn = Mongo::Connection.new(host, port, opts)
32
- raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave (HINT: try passing a -a to scripts like optail or mongocp)" if @type == :slave && @upstream_conn.primary?
35
+ raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave" if @type == :slave && @upstream_conn.primary?
33
36
  ensure_upstream_replset!
34
37
  when :existing
35
38
  raise "Must pass in a single existing Mongo::Connection with :existing" unless @upstreams.length == 1 && @upstreams[0].respond_to?(:db)
@@ -61,7 +64,7 @@ module Mongoriver
61
64
  end
62
65
 
63
66
  def oplog_collection
64
- @upstream_conn.db('local').collection('oplog.rs')
67
+ @upstream_conn.db('local').collection(oplog)
65
68
  end
66
69
 
67
70
  def tail_from(ts, opts = {})
@@ -81,20 +84,26 @@ module Mongoriver
81
84
  end
82
85
  end
83
86
 
84
- def stop
85
- @cursor.close if @cursor
86
- @cursor = nil
87
- end
88
-
89
87
  def stream(limit=nil)
90
88
  count = 0
91
- while @cursor.has_next?
89
+ while !@stop && @cursor.has_next?
92
90
  count += 1
93
91
  break if limit && count >= limit
92
+
94
93
  yield @cursor.next
95
94
  end
96
95
 
97
96
  return @cursor.has_next?
98
97
  end
98
+
99
+ def stop
100
+ @stop = true
101
+ end
102
+
103
+ def close
104
+ @cursor.close if @cursor
105
+ @cursor = nil
106
+ @stop = false
107
+ end
99
108
  end
100
109
  end
@@ -1,3 +1,3 @@
1
1
  module Mongoriver
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -19,4 +19,8 @@ Gem::Specification.new do |gem|
19
19
  gem.add_runtime_dependency('mongo', '>= 1.7')
20
20
  gem.add_runtime_dependency('bson_ext')
21
21
  gem.add_runtime_dependency('log4r')
22
+
23
+ gem.add_development_dependency('rake')
24
+ gem.add_development_dependency('minitest')
25
+ gem.add_development_dependency('mocha', '>= 0.13')
22
26
  end
@@ -0,0 +1,65 @@
1
+ require 'mongoriver'
2
+ require 'mongo'
3
+ require 'minitest/autorun'
4
+ require 'mocha/setup'
5
+
6
+ describe 'Mongoriver::Stream' do
7
+ def create_op(op)
8
+ ts = Time.now.to_i
9
+ {'ts'=>BSON::Timestamp.new(ts, 0), 'h'=>1234, 'v'=>1, 'ns'=>'foo.bar'}.merge(op)
10
+ end
11
+
12
+ before do
13
+ conn = stub(:db => nil)
14
+ @tailer = Mongoriver::Tailer.new([conn], :existing)
15
+ @outlet = Mongoriver::AbstractOutlet.new
16
+ @stream = Mongoriver::Stream.new(@tailer, @outlet)
17
+
18
+ @outlet.expects(:update_optime).at_least_once
19
+ end
20
+
21
+ it 'triggers insert' do
22
+ @outlet.expects(:insert).once.with('foo', 'bar', {'_id' => 'baz'})
23
+ @stream.send(:handle_op, create_op({'op'=>'i', 'o'=>{'_id'=>'baz'}}))
24
+ end
25
+
26
+ it 'triggers update' do
27
+ @outlet.expects(:update).once.with('foo', 'bar', {'_id' => 'baz'}, {'a' => 'b'})
28
+ @stream.send(:handle_op, create_op({'op'=>'u', 'o2'=>{'_id'=>'baz'}, 'o'=>{'a'=>'b'}}))
29
+ end
30
+
31
+ it 'triggers remove' do
32
+ @outlet.expects(:remove).once.with('foo', 'bar', {'_id' => 'baz'})
33
+ @stream.send(:handle_op, create_op({'op'=>'d', 'b'=>true, 'o'=>{'_id'=>'baz'}}))
34
+ end
35
+
36
+ it 'triggers create_collection' do
37
+ @outlet.expects(:create_collection).once.with('foo', 'bar', {:capped => true, :size => 10})
38
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'create'=>'bar', 'capped'=>true, 'size'=>10.0}}))
39
+ end
40
+
41
+ it 'triggers drop_collection' do
42
+ @outlet.expects(:drop_collection).once.with('foo', 'bar')
43
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'drop'=>'bar'}}))
44
+ end
45
+
46
+ it 'triggers rename_collection' do
47
+ @outlet.expects(:rename_collection).once.with('foo', 'bar', 'bar_2')
48
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'admin.$cmd', 'o'=>{'renameCollection'=>'foo.bar', 'to'=>'foo.bar_2'}}))
49
+ end
50
+
51
+ it 'triggers create_index' do
52
+ @outlet.expects(:create_index).once.with('foo', 'bar', [['baz', 1]], {:name => 'baz_1'})
53
+ @stream.send(:handle_op, create_op({'op'=>'i', 'ns'=>'foo.system.indexes', 'o'=>{'_id'=>'index_id', 'ns'=>'foo.bar', 'key'=>{'baz'=>1.0}, 'name'=>'baz_1'}}))
54
+ end
55
+
56
+ it 'triggers drop_index' do
57
+ @outlet.expects(:drop_index).once.with('foo', 'bar', 'baz_1')
58
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'deleteIndexes'=>'bar', 'index'=>'baz_1'}}))
59
+ end
60
+
61
+ it 'triggers drop_database' do
62
+ @outlet.expects(:drop_database).once.with('foo')
63
+ @stream.send(:handle_op, create_op({'op'=>'c', 'ns'=>'foo.$cmd', 'o'=>{'dropDatabase'=>1.0}}))
64
+ end
65
+ end
@@ -0,0 +1,83 @@
1
+ require 'mongoriver'
2
+ require 'mongo'
3
+ require 'minitest/autorun'
4
+ require 'mocha/setup'
5
+
6
+ # Connected tests: run these with eg MONGO_SERVER=localhost:27017
7
+
8
+ MONGO_SERVER = ENV['MONGO_SERVER'] || 'localhost:27017'
9
+
10
+ def connect
11
+ begin
12
+ host, port = MONGO_SERVER.split(':', 2)
13
+ Mongo::Connection.new(host, port)
14
+ rescue Mongo::ConnectionFailure
15
+ nil
16
+ end
17
+ end
18
+
19
+ describe 'connected tests' do
20
+ before do
21
+ @mongo = connect
22
+ skip unless @mongo
23
+ end
24
+
25
+ describe 'Mongoriver::Stream' do
26
+ before do
27
+ @tailer = Mongoriver::Tailer.new([MONGO_SERVER], :direct)
28
+ @outlet = Mongoriver::AbstractOutlet.new
29
+
30
+ @stream = Mongoriver::Stream.new(@tailer, @outlet)
31
+
32
+ @tail_from = @tailer.most_recent_timestamp.seconds + 1
33
+ sleep(1)
34
+ end
35
+
36
+ it 'triggers the correct ops in the correct order' do
37
+ db = 'test'
38
+ collection = 'test'
39
+ doc = {'_id' => 'foo', 'bar' => 'baz'}
40
+ updated_doc = doc.dup.merge('bar' => 'qux')
41
+ index_keys = [['bar', 1]]
42
+
43
+ @outlet.expects(:update_optime).at_least_once
44
+
45
+ op_sequence = sequence('op_sequence')
46
+
47
+ @outlet.expects(:insert).once.with(db, collection, doc).in_sequence(op_sequence)
48
+ @outlet.expects(:update).once.with(db, collection, {'_id' => 'foo'}, updated_doc).in_sequence(op_sequence)
49
+ @outlet.expects(:remove).once.with(db, collection, {'_id' => 'foo'}).in_sequence(op_sequence)
50
+
51
+ @outlet.expects(:create_index).once.with(db, collection, index_keys, {:name => 'bar_1'}).in_sequence(op_sequence)
52
+ @outlet.expects(:drop_index).once.with(db, collection, 'bar_1').in_sequence(op_sequence)
53
+
54
+ @outlet.expects(:rename_collection).once.with(db, collection, collection+'_foo').in_sequence(op_sequence)
55
+ @outlet.expects(:drop_collection).once.with(db, collection+'_foo').in_sequence(op_sequence)
56
+ @outlet.expects(:drop_database).once.with(db) { @stream.stop }.in_sequence(op_sequence)
57
+
58
+ coll = @mongo[db][collection]
59
+ coll.insert(doc)
60
+ coll.update({'_id' => 'foo'}, doc.merge('bar' => 'qux'))
61
+ coll.remove({'_id' => 'foo'})
62
+
63
+ name = coll.ensure_index(index_keys)
64
+ coll.drop_index(name)
65
+
66
+ @mongo[db].rename_collection(collection, collection+'_foo')
67
+ @mongo[db].drop_collection(collection+'_foo')
68
+ @mongo.drop_database(db)
69
+
70
+ @stream.run_forever(@tail_from)
71
+ end
72
+
73
+ it 'passes options to create_collection' do
74
+ @outlet.expects(:create_collection).once.with('test', 'test', {:capped => true, :size => 10}) { @stream.stop }
75
+ @outlet.expects(:update_optime).at_least_once.with(anything) { @stream.stop }
76
+
77
+ @mongo['test'].create_collection('test', :capped => true, :size => 10)
78
+ @mongo.drop_database('test')
79
+
80
+ @stream.run_forever(@tail_from)
81
+ end
82
+ end
83
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoriver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-05 00:00:00.000000000 Z
12
+ date: 2013-05-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
@@ -59,12 +59,59 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: mocha
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0.13'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0.13'
62
110
  description: Some tools and libraries to simplify tailing the mongod oplog
63
111
  email:
64
112
  - gdb@gregbrockman.com
65
113
  executables:
66
- - mongocp
67
- - optail
114
+ - watch-oplog
68
115
  extensions: []
69
116
  extra_rdoc_files: []
70
117
  files:
@@ -73,16 +120,19 @@ files:
73
120
  - LICENSE
74
121
  - README.md
75
122
  - Rakefile
76
- - bin/mongocp
77
- - bin/optail
123
+ - bin/watch-oplog
78
124
  - lib/mongoriver.rb
125
+ - lib/mongoriver/abstract_outlet.rb
79
126
  - lib/mongoriver/abstract_persistent_tailer.rb
127
+ - lib/mongoriver/assertions.rb
80
128
  - lib/mongoriver/log.rb
81
129
  - lib/mongoriver/persistent_tailer.rb
82
- - lib/mongoriver/streambed.rb
130
+ - lib/mongoriver/stream.rb
83
131
  - lib/mongoriver/tailer.rb
84
132
  - lib/mongoriver/version.rb
85
133
  - mongoriver.gemspec
134
+ - test/test_mongoriver.rb
135
+ - test/test_mongoriver_connected.rb
86
136
  homepage: ''
87
137
  licenses: []
88
138
  post_install_message:
@@ -107,4 +157,7 @@ rubygems_version: 1.8.23
107
157
  signing_key:
108
158
  specification_version: 3
109
159
  summary: monogdb oplog-tailing utilities.
110
- test_files: []
160
+ test_files:
161
+ - test/test_mongoriver.rb
162
+ - test/test_mongoriver_connected.rb
163
+ has_rdoc:
@@ -1,250 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'logger'
3
- require 'optparse'
4
-
5
- require 'rubygems'
6
- require 'bundler/setup'
7
- require 'mongoriver'
8
-
9
- module Mongoriver
10
- class Mongocp < Streambed
11
- include Mongoriver::Logging
12
-
13
- def initialize(upstreams, type, downstream, prefix)
14
- super(upstreams, type)
15
- @downstream = downstream
16
- @prefix = prefix
17
- connect_downstream
18
- end
19
-
20
- def hook_optime
21
- if optime = optime_collection.find_one(:_id => @prefix)
22
- optime['ts']
23
- else
24
- nil
25
- end
26
- end
27
-
28
- def hook_update_optime(ts, mandatory)
29
- optime_collection.update({:_id => @prefix}, {'$set' => {:ts => ts}}, :upsert => true) if mandatory || rand(20) == 0
30
- end
31
-
32
- def hook_initial_sync_index(db_name, collection_name, index_key, options)
33
- collection = downstream_collection(db_name, collection_name)
34
- index_hash = BSON::OrderedHash.new
35
- index_key.each {|k,v| index_hash[k] = v}
36
- collection.send(:generate_indexes, index_hash, nil, options)
37
- end
38
-
39
- def hook_initial_sync_record_batch(db_name, collection_name, records)
40
- collection = downstream_collection(db_name, collection_name)
41
- bulk_insert(collection, records)
42
- end
43
-
44
- # TODO: should probably do the same key checking nonsense as the above
45
- def hook_stream_insert(db_name, collection_name, object)
46
- collection = downstream_collection(db_name, collection_name)
47
- wrap_errors(collection, object['_id']) do
48
- # Only needed if safe mode is set in the driver. Note that the
49
- # argument here for oplog idempotency in the case of unique
50
- # keys is kind of interesting. I believe I can prove
51
- # idempotency as long as Mongo has no insert order-dependent
52
- # unique indexes (which I believe is true) and that you do all
53
- # your object updates as upserts.
54
- allow_dupkeys do
55
- collection.insert(object)
56
- end
57
- end
58
- end
59
-
60
- def hook_stream_update(db_name, collection_name, selector, update)
61
- collection = downstream_collection(db_name, collection_name)
62
- wrap_errors(collection, selector['_id']) do
63
- collection.update(selector, update, :upsert => true)
64
- end
65
- end
66
-
67
- def hook_stream_remove(db_name, collection_name, object)
68
- collection = downstream_collection(db_name, collection_name)
69
- wrap_errors(collection, object['_id']) do
70
- collection.remove(object)
71
- end
72
- end
73
-
74
- def hook_stream_create_collection(db_name, create)
75
- db = downstream_db(db_name)
76
- wrap_errors(db, create) do
77
- db.create_collection(create)
78
- end
79
- end
80
-
81
- # "Error renaming collection: #<BSON::OrderedHash:0x83869e34 {\"errmsg\"=>\"exception: source namespace does not exist\", \"code\"=>10026, \"ok\"=>0.0}>"
82
- #
83
- # Possibly need the same thing if the destination already exists
84
- def hook_stream_rename_collection(db_name, source, target)
85
- db = downstream_db(db_name)
86
- wrap_errors(db, "#{source} -> #{target}") do
87
- begin
88
- db.rename_collection(source, target)
89
- rescue Mongo::MongoDBError => e
90
- if e.message =~ /Error renaming collection: .*exception: source namespace does not exist"/
91
- log.warn("Ignoring rename of non-existent collection #{source} -> #{target}: #{e} (expected when replaying part of the oplog)")
92
- elsif e.message =~ /Error renaming collection: .*exception: target namespace exists"/
93
- log.warn("Ignoring rename of #{source} to existing collection #{target}: #{e} (expected when replaying part of the oplog)")
94
- else
95
- raise
96
- end
97
- end
98
- end
99
- end
100
-
101
- def hook_stream_drop_index(db_name, collection_name, index_name)
102
- collection = downstream_collection(db_name, collection_name)
103
- wrap_errors(collection, index_name) do
104
- begin
105
- collection.drop_index(index_name)
106
- rescue Mongo::MongoDBError => e
107
- raise
108
- if e.message =~ /index not found/
109
- log.warn("Ignoring drop of non-existent index #{index_name.inspect}: #{e} (expected when replaying part of the oplog)")
110
- else
111
- raise
112
- end
113
- end
114
- end
115
- end
116
-
117
- def hook_stream_drop_collection(db_name, dropped)
118
- db = downstream_db(db_name)
119
- wrap_errors(db, dropped) do
120
- db.drop_collection(dropped)
121
- end
122
- end
123
-
124
- def hook_stream_drop_database(db_name)
125
- db = downstream_db(db_name)
126
- wrap_errors(db, db_name) do
127
- db.command(:dropDatabase => 1)
128
- end
129
- end
130
-
131
- private
132
-
133
- def allow_dupkeys(&blk)
134
- begin
135
- blk.call
136
- rescue Mongo::OperationFailure => e
137
- if e.error_code == 11000
138
- log.warn("Ignoring unique index violation: #{e} (expected when replaying part of the oplog)")
139
- else
140
- raise
141
- end
142
- end
143
- end
144
-
145
- def bulk_insert(collection, docs)
146
- begin
147
- # Use the internal insert_documents method because it lets us
148
- # disable key verification
149
- collection.send(:insert_documents, docs, collection.name, false)
150
- rescue Mongo::MongoRubyError => e
151
- log.error("#{ns}: Caught error on batch insert", e)
152
- docs.each do |doc|
153
- wrap_errors(collection, doc['_id']) do
154
- collection.send(:insert_documents, [doc], collection.name, false)
155
- end
156
- end
157
- end
158
- end
159
-
160
- def wrap_errors(collection_or_db, object, &blk)
161
- begin
162
- blk.call
163
- rescue Mongo::MongoRubyError => e
164
- if collecton_or_db.kind_of?(Mongo::Collection)
165
- ns = "#{collection_or_db.db.name}.#{collection_or_db.name}"
166
- else
167
- ns = collection_or_db.db.name
168
- end
169
- log.error("#{ns}: Unknown error for #{object}", e)
170
- end
171
- end
172
-
173
- def downstream_db(db_name)
174
- prefixed = "#{@prefix}_#{db_name}"
175
- @downstream_conn.db(prefixed)
176
- end
177
-
178
- def downstream_collection(db_name, collection_name)
179
- downstream_db(db_name).collection(collection_name)
180
- end
181
-
182
- def optime_collection
183
- @optime_collection ||= @downstream_conn.db('_mongocp').collection('optime')
184
- end
185
-
186
- def connect_downstream
187
- host, port = @tailer.parse_host_spec(@downstream)
188
- @downstream_conn = Mongo::Connection.new(host, port, :safe => true)
189
- end
190
- end
191
- end
192
-
193
- def main
194
- options = {:host => nil, :port => nil, :type => :slave, :verbose => 0}
195
- optparse = OptionParser.new do |opts|
196
- opts.banner = "Usage: #{$0} [options]"
197
-
198
- opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
199
- options[:verbose] += 1
200
- end
201
-
202
- opts.on('-h', '--help', 'Display this message') do
203
- puts opts
204
- exit(1)
205
- end
206
-
207
- opts.on('--help', 'Display this message') do
208
- puts opts
209
- exit(1)
210
- end
211
-
212
- opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
213
- options[:host] = host
214
- end
215
-
216
- opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
217
- options[:port] = Integer(port)
218
- end
219
-
220
- opts.on('-a', '--all', 'Allow connections even directly to a primary') do
221
- options[:type] = :direct
222
- end
223
- end
224
- optparse.parse!
225
-
226
- if ARGV.length != 0
227
- puts optparse
228
- return 1
229
- end
230
-
231
- log = Log4r::Logger.new('Stripe')
232
- log.outputters = Log4r::StdoutOutputter.new(STDERR)
233
- if options[:verbose] >= 1
234
- log.level = Log4r::DEBUG
235
- else
236
- log.level = Log4r::INFO
237
- end
238
- runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], 'localhost:5001', 'test')
239
- runner.run
240
- return 0
241
- end
242
-
243
- if $0 == __FILE__
244
- ret = main
245
- begin
246
- exit(ret)
247
- rescue TypeError
248
- exit(0)
249
- end
250
- end
@@ -1,299 +0,0 @@
1
- module Mongoriver
2
- class Streambed
3
- include Mongoriver::Logging
4
-
5
- attr_reader :stats
6
-
7
- class AssertionFailure < StandardError; end
8
-
9
- def assert(condition, msg)
10
- raise AssertionFailure.new(msg) unless condition
11
- end
12
-
13
- def initialize(upstreams, type)
14
- @tailer = Mongoriver::Tailer.new(upstreams, type)
15
- @record_fetch_batch_size = 1024
16
- @record_sync_batch_size = 256
17
- @stats = Hash.new(0)
18
- end
19
-
20
- def run
21
- self.class.validate_hooks!
22
-
23
- unless ts = starting_optime
24
- ts = @tailer.most_recent_timestamp
25
- initial_sync
26
- hook_update_optime(ts, true)
27
- end
28
-
29
- tail_from(ts)
30
- end
31
-
32
- def self.my_hooks
33
- @hooks ||= []
34
- end
35
-
36
- def self.all_hooks
37
- hooks = my_hooks
38
- if superclass <= Streambed
39
- hooks + superclass.all_hooks
40
- else
41
- hooks
42
- end
43
- end
44
-
45
- def self.validate_hooks!
46
- errors = []
47
- all_hooks.each do |name, args, opts|
48
- method = self.instance_method(hook_name(name))
49
- signature = "#{method.name}(#{args.join(', ')})"
50
- if method.owner == Streambed && !opts[:default]
51
- errors << "Must provide implementation of #{signature}"
52
- end
53
- end
54
-
55
- raise "You need to fix the following hook errors:
56
-
57
- #{errors.join("\n ")}" if errors.length > 0
58
- end
59
-
60
- def self.hook_name(name)
61
- "hook_#{name}"
62
- end
63
-
64
- def self.hook(name, args=[], opts={})
65
- if default = opts[:default]
66
- target = hook_name(default)
67
- implementation = Proc.new do |*args, &blk|
68
- send(target, *args, &blk)
69
- end
70
- else
71
- implementation = Proc.new do
72
- raise NotImplementedError.new("Override in subclass")
73
- end
74
- end
75
-
76
- define_method(hook_name(name), implementation)
77
- my_hooks << [name, args, opts]
78
- end
79
-
80
- hook :optime
81
- hook :update_optime, [:ts, :mandatory]
82
- hook :initial_sync_index, [:db_name, :collection_name, :index_key, :options]
83
- hook :initial_sync_record_batch, [:db_name, :collection_name, :records]
84
- hook :stream_insert, [:db_name, :collection_name, :object]
85
- hook :stream_update, [:db_name, :collection_name, :selector, :update]
86
- hook :stream_remove, [:db_name, :collection_name, :object]
87
- # Not usually a difference between the initial index creation and
88
- # creating it while streaming ops.
89
- hook :stream_create_index, [:db_name, :collection_name, :index_key, :options], :default => :initial_sync_index
90
- # This seems to be called while doing a mapreduce.
91
- hook :stream_create_collection, [:db_name, :create]
92
- # This also seems to be called while doing a mapreduce. Note that
93
- # I think mongo has a concept of temporary table, which I should
94
- # look into, and renameCollection has some temporary table option.
95
- hook :stream_rename_collection, [:db_name, :source, :target]
96
- hook :stream_drop_index, [:db_name, :collection_name, :index_name]
97
- hook :stream_drop_collection, [:db_name, :dropped]
98
- hook :stream_drop_database, [:db_name]
99
-
100
- private
101
-
102
- def starting_optime
103
- case time = hook_optime
104
- when Integer
105
- if time >= 0
106
- BSON::Timestamp.new(time, 0)
107
- elsif time == -1
108
- @tailer.most_recent_timestamp
109
- else
110
- raise "Invalid optime: #{time}"
111
- end
112
- when BSON::Timestamp, nil
113
- time
114
- else
115
- raise "Unrecognized type #{time.class} (#{time.inspect}) for start time"
116
- end
117
- end
118
-
119
- def initial_sync
120
- initial_sync_all_indexes
121
- initial_sync_all_records
122
- end
123
-
124
- def initial_sync_all_indexes
125
- log.info("Beginning initial sync of indexes")
126
- syncable_databases.each {|db| initial_sync_indexes_for_db(db)}
127
- log.info("Done initial sync of indexes")
128
- end
129
-
130
- def initial_sync_indexes_for_db(db)
131
- db.collection('system.indexes').find.each do |index|
132
- options = extract_options_from_index_spec(index)
133
- index_key = index['key'].to_a
134
-
135
- ns = index['ns']
136
- db_name, collection_name = parse_ns(ns)
137
- assert(db_name == db.name, "Index db name #{db_name.inspect} differs from current db name #{db.name.inspect}")
138
-
139
- log.info("#{ns}: Initial sync of index #{options[:name]}")
140
- hook_initial_sync_index(db_name, collection_name, index_key, options)
141
- end
142
- end
143
-
144
- def initial_sync_all_records
145
- log.info("Beginning initial sync of records")
146
- syncable_databases.each {|db| initial_sync_records_for_db(db)}
147
- log.info("Done initial sync of records")
148
- end
149
-
150
- def initial_sync_records_for_db(db)
151
- syncable_collections(db).each do |collection|
152
- initial_sync_records_for_collection(collection)
153
- end
154
- end
155
-
156
- def initial_sync_records_for_collection(collection)
157
- db_name = collection.db.name
158
- collection_name = collection.name
159
- ns = "#{db_name}.#{collection_name}"
160
-
161
- log.info("#{ns}: Starting record initial sync")
162
-
163
- records = []
164
- collection.find({}, :batch_size => @record_fetch_batch_size, :timeout => false, :sort => [['$natural', 1]]) do |cursor|
165
- while cursor.has_next?
166
- records << cursor.next
167
- if records.length > @record_sync_batch_size
168
- # TODO: add better logging than this
169
- log.info("#{ns}: Running sync of batch of #{records.length} records")
170
- hook_initial_sync_record_batch(db_name, collection_name, records)
171
- records = []
172
- end
173
- end
174
- end
175
- log.info("#{ns}: Finishing sync with a batch of #{records.length} records")
176
- hook_initial_sync_record_batch(db_name, collection_name, records)
177
-
178
- log.info("#{ns}: Finished record initial sync")
179
- end
180
-
181
- # This should be fine to instantiate all at once, since
182
- # database_names returns all the dbs as strings anyway
183
- def syncable_databases
184
- @tailer.upstream_conn.database_names.map do |db_name|
185
- next if db_name == 'local'
186
- @tailer.upstream_conn.db(db_name)
187
- end.compact
188
- end
189
-
190
- def syncable_collections(db)
191
- db.collection_names.map do |collection_name|
192
- next if collection_name.start_with?('system.')
193
- db.collection(collection_name)
194
- end.compact
195
- end
196
-
197
- def extract_options_from_index_spec(index)
198
- options = {}
199
- index.each do |key, value|
200
- case key
201
- when 'v'
202
- raise NotImplementedError.new("Only v=1 indexes are supported at the moment, not v=#{value.inspect}") unless value == 1
203
- when 'ns', 'key'
204
- else
205
- options[key.to_sym] = value
206
- end
207
- end
208
-
209
- assert(options.include?(:name), "No name defined for index spec #{index.inspect}")
210
- options
211
- end
212
-
213
- def stream_op(entry)
214
- op = entry['op']
215
- data = entry['o']
216
- ns = entry['ns']
217
-
218
- if op == 'n'
219
- # This happens for initial rs.initiate() op, maybe others.
220
- log.info("Skipping no-op #{entry.inspect}")
221
- return
222
- end
223
-
224
- db_name, collection_name = parse_ns(ns)
225
- assert(db_name, "Nil db name #{db_name.inspect} for #{entry.inspect}")
226
-
227
- case op
228
- when 'i'
229
- if collection_name == 'system.indexes'
230
- record(ns, entry, :create_index)
231
- index_db_name, index_collection_name = parse_ns(data['ns'])
232
- index_key = data['key'].to_a
233
- options = extract_options_from_index_spec(data)
234
- hook_stream_create_index(index_db_name, index_collection_name, index_key, options)
235
- else
236
- record(ns, entry, :insert)
237
- hook_stream_insert(db_name, collection_name, data)
238
- end
239
- when 'u'
240
- record(ns, entry, :update)
241
- hook_stream_update(db_name, collection_name, entry['o2'], data)
242
- when 'd'
243
- record(ns, entry, :remove)
244
- hook_stream_remove(db_name, collection_name, data)
245
- when 'c'
246
- assert(collection_name == '$cmd', "Command collection name is #{collection_name.inspect} for #{entry.inspect}")
247
- if deleted_from = data['deleteIndexes']
248
- record(ns, entry, :drop_index)
249
- index = data['index']
250
- hook_stream_drop_index(db_name, deleted_from, index)
251
- elsif dropped = data['drop']
252
- record(ns, entry, :drop_collection)
253
- hook_stream_drop_collection(db_name, dropped)
254
- elsif dropped = data['dropDatabase']
255
- record(ns, entry, :drop_database)
256
- hook_stream_drop_database(db_name)
257
- elsif source = data['renameCollection']
258
- record(ns, entry, :rename_collection)
259
- target = data['to']
260
- hook_stream_rename_collection(db_name, source, target)
261
- elsif create = data['create']
262
- record(ns, entry, :create)
263
- hook_stream_create_collection(db_name, create)
264
- else
265
- raise "Unrecognized command #{data.inspect}"
266
- end
267
- else
268
- raise "Unrecognized op: #{op} (#{entry.inspect})"
269
- end
270
-
271
- optime = entry['ts']
272
- hook_update_optime(optime, false)
273
- end
274
-
275
- def tail_from(ts)
276
- begin
277
- @tailer.tail_from(ts)
278
- loop do
279
- @tailer.stream do |op|
280
- stream_op(op)
281
- end
282
- end
283
- ensure
284
- @tailer.stop
285
- end
286
- end
287
-
288
- def record(ns, entry, type)
289
- stats[type] += 1
290
- log.debug("#{ns}: #{type.inspect} #{entry.inspect}")
291
- end
292
-
293
- protected
294
-
295
- def parse_ns(ns)
296
- ns.split('.', 2)
297
- end
298
- end
299
- end