mongoriver 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Greg Brockman
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Mongoriver
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'mongoriver'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install mongoriver
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/mongocp ADDED
@@ -0,0 +1,250 @@
1
+ #!/usr/bin/env ruby
2
+ require 'logger'
3
+ require 'optparse'
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'mongoriver'
8
+
9
+ module Mongoriver
10
+ class Mongocp < Streambed
11
+ include Mongoriver::Logging
12
+
13
+ def initialize(upstreams, type, downstream, prefix)
14
+ super(upstreams, type)
15
+ @downstream = downstream
16
+ @prefix = prefix
17
+ connect_downstream
18
+ end
19
+
20
+ def hook_optime
21
+ if optime = optime_collection.find_one(:_id => @prefix)
22
+ optime['ts']
23
+ else
24
+ nil
25
+ end
26
+ end
27
+
28
+ def hook_update_optime(ts, mandatory)
29
+ optime_collection.update({:_id => @prefix}, {'$set' => {:ts => ts}}, :upsert => true) if mandatory || rand(20) == 0
30
+ end
31
+
32
+ def hook_initial_sync_index(db_name, collection_name, index_key, options)
33
+ collection = downstream_collection(db_name, collection_name)
34
+ index_hash = BSON::OrderedHash.new
35
+ index_key.each {|k,v| index_hash[k] = v}
36
+ collection.send(:generate_indexes, index_hash, nil, options)
37
+ end
38
+
39
+ def hook_initial_sync_record_batch(db_name, collection_name, records)
40
+ collection = downstream_collection(db_name, collection_name)
41
+ bulk_insert(collection, records)
42
+ end
43
+
44
+ # TODO: should probably do the same key checking nonsense as the above
45
+ def hook_stream_insert(db_name, collection_name, object)
46
+ collection = downstream_collection(db_name, collection_name)
47
+ wrap_errors(collection, object['_id']) do
48
+ # Only needed if safe mode is set in the driver. Note that the
49
+ # argument here for oplog idempotency in the case of unique
50
+ # keys is kind of interesting. I believe I can prove
51
+ # idempotency as long as Mongo has no insert order-dependent
52
+ # unique indexes (which I believe is true) and that you do all
53
+ # your object updates as upserts.
54
+ allow_dupkeys do
55
+ collection.insert(object)
56
+ end
57
+ end
58
+ end
59
+
60
+ def hook_stream_update(db_name, collection_name, selector, update)
61
+ collection = downstream_collection(db_name, collection_name)
62
+ wrap_errors(collection, selector['_id']) do
63
+ collection.update(selector, update, :upsert => true)
64
+ end
65
+ end
66
+
67
+ def hook_stream_remove(db_name, collection_name, object)
68
+ collection = downstream_collection(db_name, collection_name)
69
+ wrap_errors(collection, object['_id']) do
70
+ collection.remove(object)
71
+ end
72
+ end
73
+
74
+ def hook_stream_create_collection(db_name, create)
75
+ db = downstream_db(db_name)
76
+ wrap_errors(db, create) do
77
+ db.create_collection(create)
78
+ end
79
+ end
80
+
81
+ # "Error renaming collection: #<BSON::OrderedHash:0x83869e34 {\"errmsg\"=>\"exception: source namespace does not exist\", \"code\"=>10026, \"ok\"=>0.0}>"
82
+ #
83
+ # Possibly need the same thing if the destination already exists
84
+ def hook_stream_rename_collection(db_name, source, target)
85
+ db = downstream_db(db_name)
86
+ wrap_errors(db, "#{source} -> #{target}") do
87
+ begin
88
+ db.rename_collection(source, target)
89
+ rescue Mongo::MongoDBError => e
90
+ if e.message =~ /Error renaming collection: .*exception: source namespace does not exist"/
91
+ log.warn("Ignoring rename of non-existent collection #{source} -> #{target}: #{e} (expected when replaying part of the oplog)")
92
+ elsif e.message =~ /Error renaming collection: .*exception: target namespace exists"/
93
+ log.warn("Ignoring rename of #{source} to existing collection #{target}: #{e} (expected when replaying part of the oplog)")
94
+ else
95
+ raise
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ def hook_stream_drop_index(db_name, collection_name, index_name)
102
+ collection = downstream_collection(db_name, collection_name)
103
+ wrap_errors(collection, index_name) do
104
+ begin
105
+ collection.drop_index(index_name)
106
+ rescue Mongo::MongoDBError => e
107
+ raise
108
+ if e.message =~ /index not found/
109
+ log.warn("Ignoring drop of non-existent index #{index_name.inspect}: #{e} (expected when replaying part of the oplog)")
110
+ else
111
+ raise
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ def hook_stream_drop_collection(db_name, dropped)
118
+ db = downstream_db(db_name)
119
+ wrap_errors(db, dropped) do
120
+ db.drop_collection(dropped)
121
+ end
122
+ end
123
+
124
+ def hook_stream_drop_database(db_name)
125
+ db = downstream_db(db_name)
126
+ wrap_errors(db, db_name) do
127
+ db.command(:dropDatabase => 1)
128
+ end
129
+ end
130
+
131
+ private
132
+
133
+ def allow_dupkeys(&blk)
134
+ begin
135
+ blk.call
136
+ rescue Mongo::OperationFailure => e
137
+ if e.error_code == 11000
138
+ log.warn("Ignoring unique index violation: #{e} (expected when replaying part of the oplog)")
139
+ else
140
+ raise
141
+ end
142
+ end
143
+ end
144
+
145
+ def bulk_insert(collection, docs)
146
+ begin
147
+ # Use the internal insert_documents method because it lets us
148
+ # disable key verification
149
+ collection.send(:insert_documents, docs, collection.name, false)
150
+ rescue Mongo::MongoRubyError => e
151
+ log.error("#{ns}: Caught error on batch insert", e)
152
+ docs.each do |doc|
153
+ wrap_errors(collection, doc['_id']) do
154
+ collection.send(:insert_documents, [doc], collection.name, false)
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ def wrap_errors(collection_or_db, object, &blk)
161
+ begin
162
+ blk.call
163
+ rescue Mongo::MongoRubyError => e
164
+ if collecton_or_db.kind_of?(Mongo::Collection)
165
+ ns = "#{collection_or_db.db.name}.#{collection_or_db.name}"
166
+ else
167
+ ns = collection_or_db.db.name
168
+ end
169
+ log.error("#{ns}: Unknown error for #{object}", e)
170
+ end
171
+ end
172
+
173
+ def downstream_db(db_name)
174
+ prefixed = "#{@prefix}_#{db_name}"
175
+ @downstream_conn.db(prefixed)
176
+ end
177
+
178
+ def downstream_collection(db_name, collection_name)
179
+ downstream_db(db_name).collection(collection_name)
180
+ end
181
+
182
+ def optime_collection
183
+ @optime_collection ||= @downstream_conn.db('_mongocp').collection('optime')
184
+ end
185
+
186
+ def connect_downstream
187
+ host, port = @tailer.parse_host_spec(@downstream)
188
+ @downstream_conn = Mongo::Connection.new(host, port, :safe => true)
189
+ end
190
+ end
191
+ end
192
+
193
+ def main
194
+ options = {:host => nil, :port => nil, :type => :slave, :verbose => 0}
195
+ optparse = OptionParser.new do |opts|
196
+ opts.banner = "Usage: #{$0} [options]"
197
+
198
+ opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
199
+ options[:verbose] += 1
200
+ end
201
+
202
+ opts.on('-h', '--help', 'Display this message') do
203
+ puts opts
204
+ exit(1)
205
+ end
206
+
207
+ opts.on('--help', 'Display this message') do
208
+ puts opts
209
+ exit(1)
210
+ end
211
+
212
+ opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
213
+ options[:host] = host
214
+ end
215
+
216
+ opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
217
+ options[:port] = Integer(port)
218
+ end
219
+
220
+ opts.on('-a', '--all', 'Allow connections even directly to a primary') do
221
+ options[:type] = :direct
222
+ end
223
+ end
224
+ optparse.parse!
225
+
226
+ if ARGV.length != 0
227
+ puts optparse
228
+ return 1
229
+ end
230
+
231
+ log = Log4r::Logger.new('Stripe')
232
+ log.outputters = Log4r::StdoutOutputter.new(STDERR)
233
+ if options[:verbose] >= 1
234
+ log.level = Log4r::DEBUG
235
+ else
236
+ log.level = Log4r::INFO
237
+ end
238
+ runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], 'localhost:5001', 'test')
239
+ runner.run
240
+ return 0
241
+ end
242
+
243
+ if $0 == __FILE__
244
+ ret = main
245
+ begin
246
+ exit(ret)
247
+ rescue TypeError
248
+ exit(0)
249
+ end
250
+ end
data/bin/optail ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+ require 'logger'
3
+ require 'optparse'
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'mongoriver'
8
+
9
+ module Mongoriver
10
+ class Mongocp < Streambed
11
+ include Mongoriver::Logging
12
+
13
+ def initialize(upstreams, type, start_optime, pause)
14
+ super(upstreams, type)
15
+ @start_optime = start_optime
16
+ @pause = pause
17
+ end
18
+
19
+ def pause
20
+ if @pause
21
+ $stderr.puts("Press enter to continue")
22
+ $stdin.readline
23
+ end
24
+ end
25
+
26
+ def hook_optime
27
+ @start_optime
28
+ end
29
+
30
+ def hook_update_optime(ts, mandatory)
31
+ end
32
+
33
+ all_hooks.each do |name, _, opts|
34
+ next if name == :optime || name == :update_optime
35
+ define_method(hook_name(name)) {|*args| pause}
36
+ end
37
+ end
38
+ end
39
+
40
+ def main
41
+ options = {:host => nil, :port => nil, :type => :slave, :optime => 0, :pause => true, :verbose => 0}
42
+ optparse = OptionParser.new do |opts|
43
+ opts.banner = "Usage: #{$0} [options]"
44
+
45
+ opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
46
+ options[:verbose] += 1
47
+ end
48
+
49
+ opts.on('--help', 'Display this message') do
50
+ puts opts
51
+ exit(1)
52
+ end
53
+
54
+ opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
55
+ options[:host] = host
56
+ end
57
+
58
+ opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
59
+ options[:port] = Integer(port)
60
+ end
61
+
62
+ opts.on('-a', '--all', 'Allow connections even directly to a primary') do
63
+ options[:type] = :direct
64
+ end
65
+
66
+ opts.on('-s OPTIME', '--start', 'Starting optime') do |optime|
67
+ options[:optime] = Integer(optime)
68
+ end
69
+
70
+ opts.on('-f', '--follow-automatically', "Don't prompt between ops") do
71
+ options[:pause] = false
72
+ end
73
+ end
74
+ optparse.parse!
75
+
76
+ if ARGV.length != 0
77
+ puts optparse
78
+ return 1
79
+ end
80
+
81
+ log = Log4r::Logger.new('Stripe')
82
+ log.outputters = Log4r::StdoutOutputter.new(STDERR)
83
+ if options[:verbose] >= 1
84
+ log.level = Log4r::DEBUG
85
+ else
86
+ log.level = Log4r::INFO
87
+ end
88
+
89
+ runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], options[:optime], options[:pause])
90
+ runner.run
91
+ return 0
92
+ end
93
+
94
+ if $0 == __FILE__
95
+ ret = main
96
+ begin
97
+ exit(ret)
98
+ rescue TypeError
99
+ exit(0)
100
+ end
101
+ end
@@ -0,0 +1,58 @@
1
+ module Mongoriver
2
+
3
+ # A variant of Tailer that automatically loads and persists the
4
+ # "last timestamp processes" state. See PersistentTailer for a
5
+ # concrete subclass that uses the same mongod you are already
6
+ # tailing.
7
+
8
+ class AbstractPersistentTailer < Tailer
9
+ def initialize(upstream, type, opts={})
10
+ raise "You can't instantiate an AbstractPersistentTailer -- did you want PersistentTailer? " if self.class == AbstractPersistentTailer
11
+ super(upstream, type)
12
+
13
+ @last_saved = nil
14
+ @batch = opts[:batch]
15
+ @last_read = nil
16
+ end
17
+
18
+ def tail_from(ts, opts={})
19
+ if ts.nil?
20
+ ts = read_timestamp
21
+ end
22
+ super(ts, opts)
23
+ end
24
+
25
+ def stream(limit=nil)
26
+ super(limit) do |entry|
27
+ yield entry
28
+ @last_read = entry['ts']
29
+ maybe_save_timestamp unless @batch
30
+ end
31
+ end
32
+
33
+ def batch_done
34
+ raise "You must specify :batch => true to use the batch-processing interface." unless @batch
35
+ maybe_save_timestamp
36
+ end
37
+
38
+ def read_timestamp
39
+ raise "read_timestamp unimplemented!"
40
+ end
41
+
42
+ def write_timestamp
43
+ raise "save_timestamp unimplemented!"
44
+ end
45
+
46
+ def save_timestamp
47
+ write_timestamp(@last_read)
48
+ @last_saved = @last_read
49
+ log.info("Saved timestamp: #{@last_saved} (#{Time.at(@last_saved.seconds)})")
50
+ end
51
+
52
+ def maybe_save_timestamp
53
+ # Write timestamps once a minute
54
+ return unless @last_read
55
+ save_timestamp if @last_saved.nil? || (@last_read.seconds - @last_saved.seconds) > 60
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,7 @@
1
+ module Mongoriver
2
+ module Logging
3
+ def log
4
+ @@logger ||= Log4r::Logger.new("Stripe::Mongoriver")
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,30 @@
1
+ module Mongoriver
2
+ # A variant of AbstractPersistentTailer that automatically persists
3
+ # the "last timestamp processes" state into the database we are
4
+ # tailing.
5
+ class PersistentTailer < AbstractPersistentTailer
6
+ def initialize(upstream, type, service, opts={})
7
+ raise "You can't use PersistentTailer against only a slave. How am I supposed to write state? " if type == :slave
8
+ super(upstream, type, opts)
9
+
10
+ db = opts[:db] || "_mongoriver"
11
+ collection = opts[:collection] || 'oplog-tailers'
12
+ @service = service
13
+ @state_collection = @upstream_conn.db(db).collection(collection)
14
+ end
15
+
16
+ def read_timestamp
17
+ row = @state_collection.find_one(:service => @service)
18
+ row ? row['timestamp'] : BSON::Timestamp.new(0, 0)
19
+ end
20
+
21
+ def write_timestamp(ts)
22
+ row = @state_collection.find_one(:service => @service)
23
+ if row
24
+ @state_collection.update({'_id' => row['_id']}, '$set' => { 'timestamp' => ts })
25
+ else
26
+ @state_collection.insert('service' => @service, 'timestamp' => ts)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,299 @@
1
+ module Mongoriver
2
+ class Streambed
3
+ include Mongoriver::Logging
4
+
5
+ attr_reader :stats
6
+
7
+ class AssertionFailure < StandardError; end
8
+
9
+ def assert(condition, msg)
10
+ raise AssertionFailure.new(msg) unless condition
11
+ end
12
+
13
+ def initialize(upstreams, type)
14
+ @tailer = Mongoriver::Tailer.new(upstreams, type)
15
+ @record_fetch_batch_size = 1024
16
+ @record_sync_batch_size = 256
17
+ @stats = Hash.new(0)
18
+ end
19
+
20
+ def run
21
+ self.class.validate_hooks!
22
+
23
+ unless ts = starting_optime
24
+ ts = @tailer.most_recent_timestamp
25
+ initial_sync
26
+ hook_update_optime(ts, true)
27
+ end
28
+
29
+ tail_from(ts)
30
+ end
31
+
32
+ def self.my_hooks
33
+ @hooks ||= []
34
+ end
35
+
36
+ def self.all_hooks
37
+ hooks = my_hooks
38
+ if superclass <= Streambed
39
+ hooks + superclass.all_hooks
40
+ else
41
+ hooks
42
+ end
43
+ end
44
+
45
+ def self.validate_hooks!
46
+ errors = []
47
+ all_hooks.each do |name, args, opts|
48
+ method = self.instance_method(hook_name(name))
49
+ signature = "#{method.name}(#{args.join(', ')})"
50
+ if method.owner == Streambed && !opts[:default]
51
+ errors << "Must provide implementation of #{signature}"
52
+ end
53
+ end
54
+
55
+ raise "You need to fix the following hook errors:
56
+
57
+ #{errors.join("\n ")}" if errors.length > 0
58
+ end
59
+
60
+ def self.hook_name(name)
61
+ "hook_#{name}"
62
+ end
63
+
64
+ def self.hook(name, args=[], opts={})
65
+ if default = opts[:default]
66
+ target = hook_name(default)
67
+ implementation = Proc.new do |*args, &blk|
68
+ send(target, *args, &blk)
69
+ end
70
+ else
71
+ implementation = Proc.new do
72
+ raise NotImplementedError.new("Override in subclass")
73
+ end
74
+ end
75
+
76
+ define_method(hook_name(name), implementation)
77
+ my_hooks << [name, args, opts]
78
+ end
79
+
80
+ hook :optime
81
+ hook :update_optime, [:ts, :mandatory]
82
+ hook :initial_sync_index, [:db_name, :collection_name, :index_key, :options]
83
+ hook :initial_sync_record_batch, [:db_name, :collection_name, :records]
84
+ hook :stream_insert, [:db_name, :collection_name, :object]
85
+ hook :stream_update, [:db_name, :collection_name, :selector, :update]
86
+ hook :stream_remove, [:db_name, :collection_name, :object]
87
+ # Not usually a difference between the initial index creation and
88
+ # creating it while streaming ops.
89
+ hook :stream_create_index, [:db_name, :collection_name, :index_key, :options], :default => :initial_sync_index
90
+ # This seems to be called while doing a mapreduce.
91
+ hook :stream_create_collection, [:db_name, :create]
92
+ # This also seems to be called while doing a mapreduce. Note that
93
+ # I think mongo has a concept of temporary table, which I should
94
+ # look into, and renameCollection has some temporary table option.
95
+ hook :stream_rename_collection, [:db_name, :source, :target]
96
+ hook :stream_drop_index, [:db_name, :collection_name, :index_name]
97
+ hook :stream_drop_collection, [:db_name, :dropped]
98
+ hook :stream_drop_database, [:db_name]
99
+
100
+ private
101
+
102
+ def starting_optime
103
+ case time = hook_optime
104
+ when Integer
105
+ if time >= 0
106
+ BSON::Timestamp.new(time, 0)
107
+ elsif time == -1
108
+ @tailer.most_recent_timestamp
109
+ else
110
+ raise "Invalid optime: #{time}"
111
+ end
112
+ when BSON::Timestamp, nil
113
+ time
114
+ else
115
+ raise "Unrecognized type #{time.class} (#{time.inspect}) for start time"
116
+ end
117
+ end
118
+
119
+ def initial_sync
120
+ initial_sync_all_indexes
121
+ initial_sync_all_records
122
+ end
123
+
124
+ def initial_sync_all_indexes
125
+ log.info("Beginning initial sync of indexes")
126
+ syncable_databases.each {|db| initial_sync_indexes_for_db(db)}
127
+ log.info("Done initial sync of indexes")
128
+ end
129
+
130
+ def initial_sync_indexes_for_db(db)
131
+ db.collection('system.indexes').find.each do |index|
132
+ options = extract_options_from_index_spec(index)
133
+ index_key = index['key'].to_a
134
+
135
+ ns = index['ns']
136
+ db_name, collection_name = parse_ns(ns)
137
+ assert(db_name == db.name, "Index db name #{db_name.inspect} differs from current db name #{db.name.inspect}")
138
+
139
+ log.info("#{ns}: Initial sync of index #{options[:name]}")
140
+ hook_initial_sync_index(db_name, collection_name, index_key, options)
141
+ end
142
+ end
143
+
144
+ def initial_sync_all_records
145
+ log.info("Beginning initial sync of records")
146
+ syncable_databases.each {|db| initial_sync_records_for_db(db)}
147
+ log.info("Done initial sync of records")
148
+ end
149
+
150
+ def initial_sync_records_for_db(db)
151
+ syncable_collections(db).each do |collection|
152
+ initial_sync_records_for_collection(collection)
153
+ end
154
+ end
155
+
156
+ def initial_sync_records_for_collection(collection)
157
+ db_name = collection.db.name
158
+ collection_name = collection.name
159
+ ns = "#{db_name}.#{collection_name}"
160
+
161
+ log.info("#{ns}: Starting record initial sync")
162
+
163
+ records = []
164
+ collection.find({}, :batch_size => @record_fetch_batch_size, :timeout => false, :sort => [['$natural', 1]]) do |cursor|
165
+ while cursor.has_next?
166
+ records << cursor.next
167
+ if records.length > @record_sync_batch_size
168
+ # TODO: add better logging than this
169
+ log.info("#{ns}: Running sync of batch of #{records.length} records")
170
+ hook_initial_sync_record_batch(db_name, collection_name, records)
171
+ records = []
172
+ end
173
+ end
174
+ end
175
+ log.info("#{ns}: Finishing sync with a batch of #{records.length} records")
176
+ hook_initial_sync_record_batch(db_name, collection_name, records)
177
+
178
+ log.info("#{ns}: Finished record initial sync")
179
+ end
180
+
181
+ # This should be fine to instantiate all at once, since
182
+ # database_names returns all the dbs as strings anyway
183
+ def syncable_databases
184
+ @tailer.upstream_conn.database_names.map do |db_name|
185
+ next if db_name == 'local'
186
+ @tailer.upstream_conn.db(db_name)
187
+ end.compact
188
+ end
189
+
190
+ def syncable_collections(db)
191
+ db.collection_names.map do |collection_name|
192
+ next if collection_name.start_with?('system.')
193
+ db.collection(collection_name)
194
+ end.compact
195
+ end
196
+
197
+ def extract_options_from_index_spec(index)
198
+ options = {}
199
+ index.each do |key, value|
200
+ case key
201
+ when 'v'
202
+ raise NotImplementedError.new("Only v=1 indexes are supported at the moment, not v=#{value.inspect}") unless value == 1
203
+ when 'ns', 'key'
204
+ else
205
+ options[key.to_sym] = value
206
+ end
207
+ end
208
+
209
+ assert(options.include?(:name), "No name defined for index spec #{index.inspect}")
210
+ options
211
+ end
212
+
213
+ def stream_op(entry)
214
+ op = entry['op']
215
+ data = entry['o']
216
+ ns = entry['ns']
217
+
218
+ if op == 'n'
219
+ # This happens for initial rs.initiate() op, maybe others.
220
+ log.info("Skipping no-op #{entry.inspect}")
221
+ return
222
+ end
223
+
224
+ db_name, collection_name = parse_ns(ns)
225
+ assert(db_name, "Nil db name #{db_name.inspect} for #{entry.inspect}")
226
+
227
+ case op
228
+ when 'i'
229
+ if collection_name == 'system.indexes'
230
+ record(ns, entry, :create_index)
231
+ index_db_name, index_collection_name = parse_ns(data['ns'])
232
+ index_key = data['key'].to_a
233
+ options = extract_options_from_index_spec(data)
234
+ hook_stream_create_index(index_db_name, index_collection_name, index_key, options)
235
+ else
236
+ record(ns, entry, :insert)
237
+ hook_stream_insert(db_name, collection_name, data)
238
+ end
239
+ when 'u'
240
+ record(ns, entry, :update)
241
+ hook_stream_update(db_name, collection_name, entry['o2'], data)
242
+ when 'd'
243
+ record(ns, entry, :remove)
244
+ hook_stream_remove(db_name, collection_name, data)
245
+ when 'c'
246
+ assert(collection_name == '$cmd', "Command collection name is #{collection_name.inspect} for #{entry.inspect}")
247
+ if deleted_from = data['deleteIndexes']
248
+ record(ns, entry, :drop_index)
249
+ index = data['index']
250
+ hook_stream_drop_index(db_name, deleted_from, index)
251
+ elsif dropped = data['drop']
252
+ record(ns, entry, :drop_collection)
253
+ hook_stream_drop_collection(db_name, dropped)
254
+ elsif dropped = data['dropDatabase']
255
+ record(ns, entry, :drop_database)
256
+ hook_stream_drop_database(db_name)
257
+ elsif source = data['renameCollection']
258
+ record(ns, entry, :rename_collection)
259
+ target = data['to']
260
+ hook_stream_rename_collection(db_name, source, target)
261
+ elsif create = data['create']
262
+ record(ns, entry, :create)
263
+ hook_stream_create_collection(db_name, create)
264
+ else
265
+ raise "Unrecognized command #{data.inspect}"
266
+ end
267
+ else
268
+ raise "Unrecognized op: #{op} (#{entry.inspect})"
269
+ end
270
+
271
+ optime = entry['ts']
272
+ hook_update_optime(optime, false)
273
+ end
274
+
275
+ def tail_from(ts)
276
+ begin
277
+ @tailer.tail_from(ts)
278
+ loop do
279
+ @tailer.stream do |op|
280
+ stream_op(op)
281
+ end
282
+ end
283
+ ensure
284
+ @tailer.stop
285
+ end
286
+ end
287
+
288
+ def record(ns, entry, type)
289
+ stats[type] += 1
290
+ log.debug("#{ns}: #{type.inspect} #{entry.inspect}")
291
+ end
292
+
293
+ protected
294
+
295
+ def parse_ns(ns)
296
+ ns.split('.', 2)
297
+ end
298
+ end
299
+ end
@@ -0,0 +1,100 @@
1
+ module Mongoriver
2
+ class Tailer
3
+ include Mongoriver::Logging
4
+
5
+ attr_reader :upstream_conn
6
+
7
+ def initialize(upstreams, type)
8
+ @upstreams = upstreams
9
+ @type = type
10
+ # This number seems high
11
+ @conn_opts = {:op_timeout => 86400}
12
+
13
+ @cursor = nil
14
+
15
+ connect_upstream
16
+ end
17
+
18
+ def most_recent_timestamp
19
+ record = oplog_collection.find_one({}, :sort => [['$natural', -1]])
20
+ record['ts']
21
+ end
22
+
23
+ def connect_upstream
24
+ case @type
25
+ when :replset
26
+ opts = @conn_opts.merge(:read => :secondary)
27
+ @upstream_conn = Mongo::ReplSetConnection.new(@upstreams, opts)
28
+ when :slave, :direct
29
+ opts = @conn_opts.merge(:slave_ok => true)
30
+ host, port = parse_direct_upstream
31
+ @upstream_conn = Mongo::Connection.new(host, port, opts)
32
+ raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave (HINT: try passing a -a to scripts like optail or mongocp)" if @type == :slave && @upstream_conn.primary?
33
+ ensure_upstream_replset!
34
+ when :existing
35
+ raise "Must pass in a single existing Mongo::Connection with :existing" unless @upstreams.length == 1 && @upstreams[0].respond_to?(:db)
36
+ @upstream_conn = @upstreams[0]
37
+ else
38
+ raise "Invalid connection type: #{@type.inspect}"
39
+ end
40
+ end
41
+
42
+ def ensure_upstream_replset!
43
+ # Might be a better way to do this, but not seeing one.
44
+ config = @upstream_conn['admin'].command(:ismaster => 1)
45
+ unless config['setName']
46
+ raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is not running as a replica set"
47
+ end
48
+ end
49
+
50
+ def parse_direct_upstream
51
+ raise "When connecting directly to a mongo instance, must provide a single upstream" unless @upstreams.length == 1
52
+ upstream = @upstreams[0]
53
+ parse_host_spec(upstream)
54
+ end
55
+
56
+ def parse_host_spec(host_spec)
57
+ host, port = host_spec.split(':')
58
+ host = '127.0.0.1' if host.to_s.length == 0
59
+ port = '27017' if port.to_s.length == 0
60
+ [host, port.to_i]
61
+ end
62
+
63
+ def oplog_collection
64
+ @upstream_conn.db('local').collection('oplog.rs')
65
+ end
66
+
67
+ def tail_from(ts, opts = {})
68
+ raise "Already tailing the oplog!" if @cursor
69
+
70
+ # Maybe if ts is old enough, just start from the beginning?
71
+ query = (opts[:filter] || {}).merge({ 'ts' => { '$gte' => ts } })
72
+
73
+ oplog_collection.find(query, :timeout => false) do |oplog|
74
+ oplog.add_option(Mongo::Constants::OP_QUERY_TAILABLE)
75
+ oplog.add_option(Mongo::Constants::OP_QUERY_OPLOG_REPLAY)
76
+
77
+ oplog.add_option(Mongo::Constants::OP_QUERY_AWAIT_DATA) unless opts[:dont_wait]
78
+
79
+ log.info("Starting oplog stream from #{ts}")
80
+ @cursor = oplog
81
+ end
82
+ end
83
+
84
+ def stop
85
+ @cursor.close if @cursor
86
+ @cursor = nil
87
+ end
88
+
89
+ def stream(limit=nil)
90
+ count = 0
91
+ while @cursor.has_next?
92
+ count += 1
93
+ break if limit && count >= limit
94
+ yield @cursor.next
95
+ end
96
+
97
+ return @cursor.has_next?
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,3 @@
1
+ module Mongoriver
2
+ VERSION = "0.1.0"
3
+ end
data/lib/mongoriver.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'mongo'
2
+ require 'log4r'
3
+
4
+ module Mongoriver; end
5
+
6
+ require 'mongoriver/log'
7
+
8
+ require 'mongoriver/streambed'
9
+ require 'mongoriver/tailer'
10
+ require 'mongoriver/abstract_persistent_tailer'
11
+ require 'mongoriver/persistent_tailer'
12
+ require 'mongoriver/version'
@@ -0,0 +1,22 @@
1
+ # -*- coding: utf-8 -*-
2
+ $:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
3
+ require 'mongoriver/version'
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = ["Greg Brockman"]
7
+ gem.email = ["gdb@gregbrockman.com"]
8
+ gem.description = %q{Some tools and libraries to simplify tailing the mongod oplog}
9
+ gem.summary = %q{monogdb oplog-tailing utilities.}
10
+ gem.homepage = ""
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.name = "mongoriver"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = Mongoriver::VERSION
18
+
19
+ gem.add_runtime_dependency('mongo', '>= 1.7')
20
+ gem.add_runtime_dependency('bson_ext')
21
+ gem.add_runtime_dependency('log4r')
22
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongoriver
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Greg Brockman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mongo
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '1.7'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '1.7'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bson_ext
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: log4r
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Some tools and libraries to simplify tailing the mongod oplog
63
+ email:
64
+ - gdb@gregbrockman.com
65
+ executables:
66
+ - mongocp
67
+ - optail
68
+ extensions: []
69
+ extra_rdoc_files: []
70
+ files:
71
+ - .gitignore
72
+ - Gemfile
73
+ - LICENSE
74
+ - README.md
75
+ - Rakefile
76
+ - bin/mongocp
77
+ - bin/optail
78
+ - lib/mongoriver.rb
79
+ - lib/mongoriver/abstract_persistent_tailer.rb
80
+ - lib/mongoriver/log.rb
81
+ - lib/mongoriver/persistent_tailer.rb
82
+ - lib/mongoriver/streambed.rb
83
+ - lib/mongoriver/tailer.rb
84
+ - lib/mongoriver/version.rb
85
+ - mongoriver.gemspec
86
+ homepage: ''
87
+ licenses: []
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.23
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: monogdb oplog-tailing utilities.
110
+ test_files: []