mongoriver 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Greg Brockman
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Mongoriver
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'mongoriver'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install mongoriver
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/mongocp ADDED
@@ -0,0 +1,250 @@
1
+ #!/usr/bin/env ruby
2
+ require 'logger'
3
+ require 'optparse'
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'mongoriver'
8
+
9
+ module Mongoriver
10
+ class Mongocp < Streambed
11
+ include Mongoriver::Logging
12
+
13
+ def initialize(upstreams, type, downstream, prefix)
14
+ super(upstreams, type)
15
+ @downstream = downstream
16
+ @prefix = prefix
17
+ connect_downstream
18
+ end
19
+
20
+ def hook_optime
21
+ if optime = optime_collection.find_one(:_id => @prefix)
22
+ optime['ts']
23
+ else
24
+ nil
25
+ end
26
+ end
27
+
28
+ def hook_update_optime(ts, mandatory)
29
+ optime_collection.update({:_id => @prefix}, {'$set' => {:ts => ts}}, :upsert => true) if mandatory || rand(20) == 0
30
+ end
31
+
32
+ def hook_initial_sync_index(db_name, collection_name, index_key, options)
33
+ collection = downstream_collection(db_name, collection_name)
34
+ index_hash = BSON::OrderedHash.new
35
+ index_key.each {|k,v| index_hash[k] = v}
36
+ collection.send(:generate_indexes, index_hash, nil, options)
37
+ end
38
+
39
+ def hook_initial_sync_record_batch(db_name, collection_name, records)
40
+ collection = downstream_collection(db_name, collection_name)
41
+ bulk_insert(collection, records)
42
+ end
43
+
44
+ # TODO: should probably do the same key checking nonsense as the above
45
+ def hook_stream_insert(db_name, collection_name, object)
46
+ collection = downstream_collection(db_name, collection_name)
47
+ wrap_errors(collection, object['_id']) do
48
+ # Only needed if safe mode is set in the driver. Note that the
49
+ # argument here for oplog idempotency in the case of unique
50
+ # keys is kind of interesting. I believe I can prove
51
+ # idempotency as long as Mongo has no insert order-dependent
52
+ # unique indexes (which I believe is true) and that you do all
53
+ # your object updates as upserts.
54
+ allow_dupkeys do
55
+ collection.insert(object)
56
+ end
57
+ end
58
+ end
59
+
60
+ def hook_stream_update(db_name, collection_name, selector, update)
61
+ collection = downstream_collection(db_name, collection_name)
62
+ wrap_errors(collection, selector['_id']) do
63
+ collection.update(selector, update, :upsert => true)
64
+ end
65
+ end
66
+
67
+ def hook_stream_remove(db_name, collection_name, object)
68
+ collection = downstream_collection(db_name, collection_name)
69
+ wrap_errors(collection, object['_id']) do
70
+ collection.remove(object)
71
+ end
72
+ end
73
+
74
+ def hook_stream_create_collection(db_name, create)
75
+ db = downstream_db(db_name)
76
+ wrap_errors(db, create) do
77
+ db.create_collection(create)
78
+ end
79
+ end
80
+
81
+ # "Error renaming collection: #<BSON::OrderedHash:0x83869e34 {\"errmsg\"=>\"exception: source namespace does not exist\", \"code\"=>10026, \"ok\"=>0.0}>"
82
+ #
83
+ # Possibly need the same thing if the destination already exists
84
+ def hook_stream_rename_collection(db_name, source, target)
85
+ db = downstream_db(db_name)
86
+ wrap_errors(db, "#{source} -> #{target}") do
87
+ begin
88
+ db.rename_collection(source, target)
89
+ rescue Mongo::MongoDBError => e
90
+ if e.message =~ /Error renaming collection: .*exception: source namespace does not exist"/
91
+ log.warn("Ignoring rename of non-existent collection #{source} -> #{target}: #{e} (expected when replaying part of the oplog)")
92
+ elsif e.message =~ /Error renaming collection: .*exception: target namespace exists"/
93
+ log.warn("Ignoring rename of #{source} to existing collection #{target}: #{e} (expected when replaying part of the oplog)")
94
+ else
95
+ raise
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ def hook_stream_drop_index(db_name, collection_name, index_name)
102
+ collection = downstream_collection(db_name, collection_name)
103
+ wrap_errors(collection, index_name) do
104
+ begin
105
+ collection.drop_index(index_name)
106
+ rescue Mongo::MongoDBError => e
107
+ raise
108
+ if e.message =~ /index not found/
109
+ log.warn("Ignoring drop of non-existent index #{index_name.inspect}: #{e} (expected when replaying part of the oplog)")
110
+ else
111
+ raise
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ def hook_stream_drop_collection(db_name, dropped)
118
+ db = downstream_db(db_name)
119
+ wrap_errors(db, dropped) do
120
+ db.drop_collection(dropped)
121
+ end
122
+ end
123
+
124
+ def hook_stream_drop_database(db_name)
125
+ db = downstream_db(db_name)
126
+ wrap_errors(db, db_name) do
127
+ db.command(:dropDatabase => 1)
128
+ end
129
+ end
130
+
131
+ private
132
+
133
+ def allow_dupkeys(&blk)
134
+ begin
135
+ blk.call
136
+ rescue Mongo::OperationFailure => e
137
+ if e.error_code == 11000
138
+ log.warn("Ignoring unique index violation: #{e} (expected when replaying part of the oplog)")
139
+ else
140
+ raise
141
+ end
142
+ end
143
+ end
144
+
145
+ def bulk_insert(collection, docs)
146
+ begin
147
+ # Use the internal insert_documents method because it lets us
148
+ # disable key verification
149
+ collection.send(:insert_documents, docs, collection.name, false)
150
+ rescue Mongo::MongoRubyError => e
151
+ log.error("#{ns}: Caught error on batch insert", e)
152
+ docs.each do |doc|
153
+ wrap_errors(collection, doc['_id']) do
154
+ collection.send(:insert_documents, [doc], collection.name, false)
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ def wrap_errors(collection_or_db, object, &blk)
161
+ begin
162
+ blk.call
163
+ rescue Mongo::MongoRubyError => e
164
+ if collecton_or_db.kind_of?(Mongo::Collection)
165
+ ns = "#{collection_or_db.db.name}.#{collection_or_db.name}"
166
+ else
167
+ ns = collection_or_db.db.name
168
+ end
169
+ log.error("#{ns}: Unknown error for #{object}", e)
170
+ end
171
+ end
172
+
173
+ def downstream_db(db_name)
174
+ prefixed = "#{@prefix}_#{db_name}"
175
+ @downstream_conn.db(prefixed)
176
+ end
177
+
178
+ def downstream_collection(db_name, collection_name)
179
+ downstream_db(db_name).collection(collection_name)
180
+ end
181
+
182
+ def optime_collection
183
+ @optime_collection ||= @downstream_conn.db('_mongocp').collection('optime')
184
+ end
185
+
186
+ def connect_downstream
187
+ host, port = @tailer.parse_host_spec(@downstream)
188
+ @downstream_conn = Mongo::Connection.new(host, port, :safe => true)
189
+ end
190
+ end
191
+ end
192
+
193
+ def main
194
+ options = {:host => nil, :port => nil, :type => :slave, :verbose => 0}
195
+ optparse = OptionParser.new do |opts|
196
+ opts.banner = "Usage: #{$0} [options]"
197
+
198
+ opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
199
+ options[:verbose] += 1
200
+ end
201
+
202
+ opts.on('-h', '--help', 'Display this message') do
203
+ puts opts
204
+ exit(1)
205
+ end
206
+
207
+ opts.on('--help', 'Display this message') do
208
+ puts opts
209
+ exit(1)
210
+ end
211
+
212
+ opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
213
+ options[:host] = host
214
+ end
215
+
216
+ opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
217
+ options[:port] = Integer(port)
218
+ end
219
+
220
+ opts.on('-a', '--all', 'Allow connections even directly to a primary') do
221
+ options[:type] = :direct
222
+ end
223
+ end
224
+ optparse.parse!
225
+
226
+ if ARGV.length != 0
227
+ puts optparse
228
+ return 1
229
+ end
230
+
231
+ log = Log4r::Logger.new('Stripe')
232
+ log.outputters = Log4r::StdoutOutputter.new(STDERR)
233
+ if options[:verbose] >= 1
234
+ log.level = Log4r::DEBUG
235
+ else
236
+ log.level = Log4r::INFO
237
+ end
238
+ runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], 'localhost:5001', 'test')
239
+ runner.run
240
+ return 0
241
+ end
242
+
243
+ if $0 == __FILE__
244
+ ret = main
245
+ begin
246
+ exit(ret)
247
+ rescue TypeError
248
+ exit(0)
249
+ end
250
+ end
data/bin/optail ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+ require 'logger'
3
+ require 'optparse'
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'mongoriver'
8
+
9
+ module Mongoriver
10
+ class Mongocp < Streambed
11
+ include Mongoriver::Logging
12
+
13
+ def initialize(upstreams, type, start_optime, pause)
14
+ super(upstreams, type)
15
+ @start_optime = start_optime
16
+ @pause = pause
17
+ end
18
+
19
+ def pause
20
+ if @pause
21
+ $stderr.puts("Press enter to continue")
22
+ $stdin.readline
23
+ end
24
+ end
25
+
26
+ def hook_optime
27
+ @start_optime
28
+ end
29
+
30
+ def hook_update_optime(ts, mandatory)
31
+ end
32
+
33
+ all_hooks.each do |name, _, opts|
34
+ next if name == :optime || name == :update_optime
35
+ define_method(hook_name(name)) {|*args| pause}
36
+ end
37
+ end
38
+ end
39
+
40
+ def main
41
+ options = {:host => nil, :port => nil, :type => :slave, :optime => 0, :pause => true, :verbose => 0}
42
+ optparse = OptionParser.new do |opts|
43
+ opts.banner = "Usage: #{$0} [options]"
44
+
45
+ opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
46
+ options[:verbose] += 1
47
+ end
48
+
49
+ opts.on('--help', 'Display this message') do
50
+ puts opts
51
+ exit(1)
52
+ end
53
+
54
+ opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
55
+ options[:host] = host
56
+ end
57
+
58
+ opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
59
+ options[:port] = Integer(port)
60
+ end
61
+
62
+ opts.on('-a', '--all', 'Allow connections even directly to a primary') do
63
+ options[:type] = :direct
64
+ end
65
+
66
+ opts.on('-s OPTIME', '--start', 'Starting optime') do |optime|
67
+ options[:optime] = Integer(optime)
68
+ end
69
+
70
+ opts.on('-f', '--follow-automatically', "Don't prompt between ops") do
71
+ options[:pause] = false
72
+ end
73
+ end
74
+ optparse.parse!
75
+
76
+ if ARGV.length != 0
77
+ puts optparse
78
+ return 1
79
+ end
80
+
81
+ log = Log4r::Logger.new('Stripe')
82
+ log.outputters = Log4r::StdoutOutputter.new(STDERR)
83
+ if options[:verbose] >= 1
84
+ log.level = Log4r::DEBUG
85
+ else
86
+ log.level = Log4r::INFO
87
+ end
88
+
89
+ runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], options[:optime], options[:pause])
90
+ runner.run
91
+ return 0
92
+ end
93
+
94
+ if $0 == __FILE__
95
+ ret = main
96
+ begin
97
+ exit(ret)
98
+ rescue TypeError
99
+ exit(0)
100
+ end
101
+ end
@@ -0,0 +1,58 @@
1
+ module Mongoriver
2
+
3
+ # A variant of Tailer that automatically loads and persists the
4
+ # "last timestamp processes" state. See PersistentTailer for a
5
+ # concrete subclass that uses the same mongod you are already
6
+ # tailing.
7
+
8
+ class AbstractPersistentTailer < Tailer
9
+ def initialize(upstream, type, opts={})
10
+ raise "You can't instantiate an AbstractPersistentTailer -- did you want PersistentTailer? " if self.class == AbstractPersistentTailer
11
+ super(upstream, type)
12
+
13
+ @last_saved = nil
14
+ @batch = opts[:batch]
15
+ @last_read = nil
16
+ end
17
+
18
+ def tail_from(ts, opts={})
19
+ if ts.nil?
20
+ ts = read_timestamp
21
+ end
22
+ super(ts, opts)
23
+ end
24
+
25
+ def stream(limit=nil)
26
+ super(limit) do |entry|
27
+ yield entry
28
+ @last_read = entry['ts']
29
+ maybe_save_timestamp unless @batch
30
+ end
31
+ end
32
+
33
+ def batch_done
34
+ raise "You must specify :batch => true to use the batch-processing interface." unless @batch
35
+ maybe_save_timestamp
36
+ end
37
+
38
+ def read_timestamp
39
+ raise "read_timestamp unimplemented!"
40
+ end
41
+
42
+ def write_timestamp
43
+ raise "save_timestamp unimplemented!"
44
+ end
45
+
46
+ def save_timestamp
47
+ write_timestamp(@last_read)
48
+ @last_saved = @last_read
49
+ log.info("Saved timestamp: #{@last_saved} (#{Time.at(@last_saved.seconds)})")
50
+ end
51
+
52
+ def maybe_save_timestamp
53
+ # Write timestamps once a minute
54
+ return unless @last_read
55
+ save_timestamp if @last_saved.nil? || (@last_read.seconds - @last_saved.seconds) > 60
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,7 @@
1
+ module Mongoriver
2
+ module Logging
3
+ def log
4
+ @@logger ||= Log4r::Logger.new("Stripe::Mongoriver")
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,30 @@
1
+ module Mongoriver
2
+ # A variant of AbstractPersistentTailer that automatically persists
3
+ # the "last timestamp processes" state into the database we are
4
+ # tailing.
5
+ class PersistentTailer < AbstractPersistentTailer
6
+ def initialize(upstream, type, service, opts={})
7
+ raise "You can't use PersistentTailer against only a slave. How am I supposed to write state? " if type == :slave
8
+ super(upstream, type, opts)
9
+
10
+ db = opts[:db] || "_mongoriver"
11
+ collection = opts[:collection] || 'oplog-tailers'
12
+ @service = service
13
+ @state_collection = @upstream_conn.db(db).collection(collection)
14
+ end
15
+
16
+ def read_timestamp
17
+ row = @state_collection.find_one(:service => @service)
18
+ row ? row['timestamp'] : BSON::Timestamp.new(0, 0)
19
+ end
20
+
21
+ def write_timestamp(ts)
22
+ row = @state_collection.find_one(:service => @service)
23
+ if row
24
+ @state_collection.update({'_id' => row['_id']}, '$set' => { 'timestamp' => ts })
25
+ else
26
+ @state_collection.insert('service' => @service, 'timestamp' => ts)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,299 @@
1
+ module Mongoriver
2
+ class Streambed
3
+ include Mongoriver::Logging
4
+
5
+ attr_reader :stats
6
+
7
+ class AssertionFailure < StandardError; end
8
+
9
+ def assert(condition, msg)
10
+ raise AssertionFailure.new(msg) unless condition
11
+ end
12
+
13
+ def initialize(upstreams, type)
14
+ @tailer = Mongoriver::Tailer.new(upstreams, type)
15
+ @record_fetch_batch_size = 1024
16
+ @record_sync_batch_size = 256
17
+ @stats = Hash.new(0)
18
+ end
19
+
20
+ def run
21
+ self.class.validate_hooks!
22
+
23
+ unless ts = starting_optime
24
+ ts = @tailer.most_recent_timestamp
25
+ initial_sync
26
+ hook_update_optime(ts, true)
27
+ end
28
+
29
+ tail_from(ts)
30
+ end
31
+
32
+ def self.my_hooks
33
+ @hooks ||= []
34
+ end
35
+
36
+ def self.all_hooks
37
+ hooks = my_hooks
38
+ if superclass <= Streambed
39
+ hooks + superclass.all_hooks
40
+ else
41
+ hooks
42
+ end
43
+ end
44
+
45
+ def self.validate_hooks!
46
+ errors = []
47
+ all_hooks.each do |name, args, opts|
48
+ method = self.instance_method(hook_name(name))
49
+ signature = "#{method.name}(#{args.join(', ')})"
50
+ if method.owner == Streambed && !opts[:default]
51
+ errors << "Must provide implementation of #{signature}"
52
+ end
53
+ end
54
+
55
+ raise "You need to fix the following hook errors:
56
+
57
+ #{errors.join("\n ")}" if errors.length > 0
58
+ end
59
+
60
+ def self.hook_name(name)
61
+ "hook_#{name}"
62
+ end
63
+
64
+ def self.hook(name, args=[], opts={})
65
+ if default = opts[:default]
66
+ target = hook_name(default)
67
+ implementation = Proc.new do |*args, &blk|
68
+ send(target, *args, &blk)
69
+ end
70
+ else
71
+ implementation = Proc.new do
72
+ raise NotImplementedError.new("Override in subclass")
73
+ end
74
+ end
75
+
76
+ define_method(hook_name(name), implementation)
77
+ my_hooks << [name, args, opts]
78
+ end
79
+
80
+ hook :optime
81
+ hook :update_optime, [:ts, :mandatory]
82
+ hook :initial_sync_index, [:db_name, :collection_name, :index_key, :options]
83
+ hook :initial_sync_record_batch, [:db_name, :collection_name, :records]
84
+ hook :stream_insert, [:db_name, :collection_name, :object]
85
+ hook :stream_update, [:db_name, :collection_name, :selector, :update]
86
+ hook :stream_remove, [:db_name, :collection_name, :object]
87
+ # Not usually a difference between the initial index creation and
88
+ # creating it while streaming ops.
89
+ hook :stream_create_index, [:db_name, :collection_name, :index_key, :options], :default => :initial_sync_index
90
+ # This seems to be called while doing a mapreduce.
91
+ hook :stream_create_collection, [:db_name, :create]
92
+ # This also seems to be called while doing a mapreduce. Note that
93
+ # I think mongo has a concept of temporary table, which I should
94
+ # look into, and renameCollection has some temporary table option.
95
+ hook :stream_rename_collection, [:db_name, :source, :target]
96
+ hook :stream_drop_index, [:db_name, :collection_name, :index_name]
97
+ hook :stream_drop_collection, [:db_name, :dropped]
98
+ hook :stream_drop_database, [:db_name]
99
+
100
+ private
101
+
102
+ def starting_optime
103
+ case time = hook_optime
104
+ when Integer
105
+ if time >= 0
106
+ BSON::Timestamp.new(time, 0)
107
+ elsif time == -1
108
+ @tailer.most_recent_timestamp
109
+ else
110
+ raise "Invalid optime: #{time}"
111
+ end
112
+ when BSON::Timestamp, nil
113
+ time
114
+ else
115
+ raise "Unrecognized type #{time.class} (#{time.inspect}) for start time"
116
+ end
117
+ end
118
+
119
+ def initial_sync
120
+ initial_sync_all_indexes
121
+ initial_sync_all_records
122
+ end
123
+
124
+ def initial_sync_all_indexes
125
+ log.info("Beginning initial sync of indexes")
126
+ syncable_databases.each {|db| initial_sync_indexes_for_db(db)}
127
+ log.info("Done initial sync of indexes")
128
+ end
129
+
130
+ def initial_sync_indexes_for_db(db)
131
+ db.collection('system.indexes').find.each do |index|
132
+ options = extract_options_from_index_spec(index)
133
+ index_key = index['key'].to_a
134
+
135
+ ns = index['ns']
136
+ db_name, collection_name = parse_ns(ns)
137
+ assert(db_name == db.name, "Index db name #{db_name.inspect} differs from current db name #{db.name.inspect}")
138
+
139
+ log.info("#{ns}: Initial sync of index #{options[:name]}")
140
+ hook_initial_sync_index(db_name, collection_name, index_key, options)
141
+ end
142
+ end
143
+
144
+ def initial_sync_all_records
145
+ log.info("Beginning initial sync of records")
146
+ syncable_databases.each {|db| initial_sync_records_for_db(db)}
147
+ log.info("Done initial sync of records")
148
+ end
149
+
150
+ def initial_sync_records_for_db(db)
151
+ syncable_collections(db).each do |collection|
152
+ initial_sync_records_for_collection(collection)
153
+ end
154
+ end
155
+
156
+ def initial_sync_records_for_collection(collection)
157
+ db_name = collection.db.name
158
+ collection_name = collection.name
159
+ ns = "#{db_name}.#{collection_name}"
160
+
161
+ log.info("#{ns}: Starting record initial sync")
162
+
163
+ records = []
164
+ collection.find({}, :batch_size => @record_fetch_batch_size, :timeout => false, :sort => [['$natural', 1]]) do |cursor|
165
+ while cursor.has_next?
166
+ records << cursor.next
167
+ if records.length > @record_sync_batch_size
168
+ # TODO: add better logging than this
169
+ log.info("#{ns}: Running sync of batch of #{records.length} records")
170
+ hook_initial_sync_record_batch(db_name, collection_name, records)
171
+ records = []
172
+ end
173
+ end
174
+ end
175
+ log.info("#{ns}: Finishing sync with a batch of #{records.length} records")
176
+ hook_initial_sync_record_batch(db_name, collection_name, records)
177
+
178
+ log.info("#{ns}: Finished record initial sync")
179
+ end
180
+
181
+ # This should be fine to instantiate all at once, since
182
+ # database_names returns all the dbs as strings anyway
183
+ def syncable_databases
184
+ @tailer.upstream_conn.database_names.map do |db_name|
185
+ next if db_name == 'local'
186
+ @tailer.upstream_conn.db(db_name)
187
+ end.compact
188
+ end
189
+
190
+ def syncable_collections(db)
191
+ db.collection_names.map do |collection_name|
192
+ next if collection_name.start_with?('system.')
193
+ db.collection(collection_name)
194
+ end.compact
195
+ end
196
+
197
+ def extract_options_from_index_spec(index)
198
+ options = {}
199
+ index.each do |key, value|
200
+ case key
201
+ when 'v'
202
+ raise NotImplementedError.new("Only v=1 indexes are supported at the moment, not v=#{value.inspect}") unless value == 1
203
+ when 'ns', 'key'
204
+ else
205
+ options[key.to_sym] = value
206
+ end
207
+ end
208
+
209
+ assert(options.include?(:name), "No name defined for index spec #{index.inspect}")
210
+ options
211
+ end
212
+
213
+ def stream_op(entry)
214
+ op = entry['op']
215
+ data = entry['o']
216
+ ns = entry['ns']
217
+
218
+ if op == 'n'
219
+ # This happens for initial rs.initiate() op, maybe others.
220
+ log.info("Skipping no-op #{entry.inspect}")
221
+ return
222
+ end
223
+
224
+ db_name, collection_name = parse_ns(ns)
225
+ assert(db_name, "Nil db name #{db_name.inspect} for #{entry.inspect}")
226
+
227
+ case op
228
+ when 'i'
229
+ if collection_name == 'system.indexes'
230
+ record(ns, entry, :create_index)
231
+ index_db_name, index_collection_name = parse_ns(data['ns'])
232
+ index_key = data['key'].to_a
233
+ options = extract_options_from_index_spec(data)
234
+ hook_stream_create_index(index_db_name, index_collection_name, index_key, options)
235
+ else
236
+ record(ns, entry, :insert)
237
+ hook_stream_insert(db_name, collection_name, data)
238
+ end
239
+ when 'u'
240
+ record(ns, entry, :update)
241
+ hook_stream_update(db_name, collection_name, entry['o2'], data)
242
+ when 'd'
243
+ record(ns, entry, :remove)
244
+ hook_stream_remove(db_name, collection_name, data)
245
+ when 'c'
246
+ assert(collection_name == '$cmd', "Command collection name is #{collection_name.inspect} for #{entry.inspect}")
247
+ if deleted_from = data['deleteIndexes']
248
+ record(ns, entry, :drop_index)
249
+ index = data['index']
250
+ hook_stream_drop_index(db_name, deleted_from, index)
251
+ elsif dropped = data['drop']
252
+ record(ns, entry, :drop_collection)
253
+ hook_stream_drop_collection(db_name, dropped)
254
+ elsif dropped = data['dropDatabase']
255
+ record(ns, entry, :drop_database)
256
+ hook_stream_drop_database(db_name)
257
+ elsif source = data['renameCollection']
258
+ record(ns, entry, :rename_collection)
259
+ target = data['to']
260
+ hook_stream_rename_collection(db_name, source, target)
261
+ elsif create = data['create']
262
+ record(ns, entry, :create)
263
+ hook_stream_create_collection(db_name, create)
264
+ else
265
+ raise "Unrecognized command #{data.inspect}"
266
+ end
267
+ else
268
+ raise "Unrecognized op: #{op} (#{entry.inspect})"
269
+ end
270
+
271
+ optime = entry['ts']
272
+ hook_update_optime(optime, false)
273
+ end
274
+
275
+ def tail_from(ts)
276
+ begin
277
+ @tailer.tail_from(ts)
278
+ loop do
279
+ @tailer.stream do |op|
280
+ stream_op(op)
281
+ end
282
+ end
283
+ ensure
284
+ @tailer.stop
285
+ end
286
+ end
287
+
288
+ def record(ns, entry, type)
289
+ stats[type] += 1
290
+ log.debug("#{ns}: #{type.inspect} #{entry.inspect}")
291
+ end
292
+
293
+ protected
294
+
295
+ def parse_ns(ns)
296
+ ns.split('.', 2)
297
+ end
298
+ end
299
+ end
@@ -0,0 +1,100 @@
1
+ module Mongoriver
2
+ class Tailer
3
+ include Mongoriver::Logging
4
+
5
+ attr_reader :upstream_conn
6
+
7
+ def initialize(upstreams, type)
8
+ @upstreams = upstreams
9
+ @type = type
10
+ # This number seems high
11
+ @conn_opts = {:op_timeout => 86400}
12
+
13
+ @cursor = nil
14
+
15
+ connect_upstream
16
+ end
17
+
18
+ def most_recent_timestamp
19
+ record = oplog_collection.find_one({}, :sort => [['$natural', -1]])
20
+ record['ts']
21
+ end
22
+
23
+ def connect_upstream
24
+ case @type
25
+ when :replset
26
+ opts = @conn_opts.merge(:read => :secondary)
27
+ @upstream_conn = Mongo::ReplSetConnection.new(@upstreams, opts)
28
+ when :slave, :direct
29
+ opts = @conn_opts.merge(:slave_ok => true)
30
+ host, port = parse_direct_upstream
31
+ @upstream_conn = Mongo::Connection.new(host, port, opts)
32
+ raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave (HINT: try passing a -a to scripts like optail or mongocp)" if @type == :slave && @upstream_conn.primary?
33
+ ensure_upstream_replset!
34
+ when :existing
35
+ raise "Must pass in a single existing Mongo::Connection with :existing" unless @upstreams.length == 1 && @upstreams[0].respond_to?(:db)
36
+ @upstream_conn = @upstreams[0]
37
+ else
38
+ raise "Invalid connection type: #{@type.inspect}"
39
+ end
40
+ end
41
+
42
+ def ensure_upstream_replset!
43
+ # Might be a better way to do this, but not seeing one.
44
+ config = @upstream_conn['admin'].command(:ismaster => 1)
45
+ unless config['setName']
46
+ raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is not running as a replica set"
47
+ end
48
+ end
49
+
50
+ def parse_direct_upstream
51
+ raise "When connecting directly to a mongo instance, must provide a single upstream" unless @upstreams.length == 1
52
+ upstream = @upstreams[0]
53
+ parse_host_spec(upstream)
54
+ end
55
+
56
+ def parse_host_spec(host_spec)
57
+ host, port = host_spec.split(':')
58
+ host = '127.0.0.1' if host.to_s.length == 0
59
+ port = '27017' if port.to_s.length == 0
60
+ [host, port.to_i]
61
+ end
62
+
63
+ def oplog_collection
64
+ @upstream_conn.db('local').collection('oplog.rs')
65
+ end
66
+
67
+ def tail_from(ts, opts = {})
68
+ raise "Already tailing the oplog!" if @cursor
69
+
70
+ # Maybe if ts is old enough, just start from the beginning?
71
+ query = (opts[:filter] || {}).merge({ 'ts' => { '$gte' => ts } })
72
+
73
+ oplog_collection.find(query, :timeout => false) do |oplog|
74
+ oplog.add_option(Mongo::Constants::OP_QUERY_TAILABLE)
75
+ oplog.add_option(Mongo::Constants::OP_QUERY_OPLOG_REPLAY)
76
+
77
+ oplog.add_option(Mongo::Constants::OP_QUERY_AWAIT_DATA) unless opts[:dont_wait]
78
+
79
+ log.info("Starting oplog stream from #{ts}")
80
+ @cursor = oplog
81
+ end
82
+ end
83
+
84
+ def stop
85
+ @cursor.close if @cursor
86
+ @cursor = nil
87
+ end
88
+
89
+ def stream(limit=nil)
90
+ count = 0
91
+ while @cursor.has_next?
92
+ count += 1
93
+ break if limit && count >= limit
94
+ yield @cursor.next
95
+ end
96
+
97
+ return @cursor.has_next?
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,3 @@
1
+ module Mongoriver
2
+ VERSION = "0.1.0"
3
+ end
data/lib/mongoriver.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'mongo'
2
+ require 'log4r'
3
+
4
+ module Mongoriver; end
5
+
6
+ require 'mongoriver/log'
7
+
8
+ require 'mongoriver/streambed'
9
+ require 'mongoriver/tailer'
10
+ require 'mongoriver/abstract_persistent_tailer'
11
+ require 'mongoriver/persistent_tailer'
12
+ require 'mongoriver/version'
@@ -0,0 +1,22 @@
1
+ # -*- coding: utf-8 -*-
2
+ $:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
3
+ require 'mongoriver/version'
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = ["Greg Brockman"]
7
+ gem.email = ["gdb@gregbrockman.com"]
8
+ gem.description = %q{Some tools and libraries to simplify tailing the mongod oplog}
9
+ gem.summary = %q{monogdb oplog-tailing utilities.}
10
+ gem.homepage = ""
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.name = "mongoriver"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = Mongoriver::VERSION
18
+
19
+ gem.add_runtime_dependency('mongo', '>= 1.7')
20
+ gem.add_runtime_dependency('bson_ext')
21
+ gem.add_runtime_dependency('log4r')
22
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongoriver
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Greg Brockman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mongo
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '1.7'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '1.7'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bson_ext
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: log4r
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Some tools and libraries to simplify tailing the mongod oplog
63
+ email:
64
+ - gdb@gregbrockman.com
65
+ executables:
66
+ - mongocp
67
+ - optail
68
+ extensions: []
69
+ extra_rdoc_files: []
70
+ files:
71
+ - .gitignore
72
+ - Gemfile
73
+ - LICENSE
74
+ - README.md
75
+ - Rakefile
76
+ - bin/mongocp
77
+ - bin/optail
78
+ - lib/mongoriver.rb
79
+ - lib/mongoriver/abstract_persistent_tailer.rb
80
+ - lib/mongoriver/log.rb
81
+ - lib/mongoriver/persistent_tailer.rb
82
+ - lib/mongoriver/streambed.rb
83
+ - lib/mongoriver/tailer.rb
84
+ - lib/mongoriver/version.rb
85
+ - mongoriver.gemspec
86
+ homepage: ''
87
+ licenses: []
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.23
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: monogdb oplog-tailing utilities.
110
+ test_files: []