mongoriver 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +3 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/mongocp +250 -0
- data/bin/optail +101 -0
- data/lib/mongoriver/abstract_persistent_tailer.rb +58 -0
- data/lib/mongoriver/log.rb +7 -0
- data/lib/mongoriver/persistent_tailer.rb +30 -0
- data/lib/mongoriver/streambed.rb +299 -0
- data/lib/mongoriver/tailer.rb +100 -0
- data/lib/mongoriver/version.rb +3 -0
- data/lib/mongoriver.rb +12 -0
- data/mongoriver.gemspec +22 -0
- metadata +110 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Greg Brockman
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Mongoriver
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'mongoriver'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install mongoriver
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/mongocp
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'mongoriver'
|
8
|
+
|
9
|
+
module Mongoriver
|
10
|
+
class Mongocp < Streambed
|
11
|
+
include Mongoriver::Logging
|
12
|
+
|
13
|
+
def initialize(upstreams, type, downstream, prefix)
|
14
|
+
super(upstreams, type)
|
15
|
+
@downstream = downstream
|
16
|
+
@prefix = prefix
|
17
|
+
connect_downstream
|
18
|
+
end
|
19
|
+
|
20
|
+
def hook_optime
|
21
|
+
if optime = optime_collection.find_one(:_id => @prefix)
|
22
|
+
optime['ts']
|
23
|
+
else
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def hook_update_optime(ts, mandatory)
|
29
|
+
optime_collection.update({:_id => @prefix}, {'$set' => {:ts => ts}}, :upsert => true) if mandatory || rand(20) == 0
|
30
|
+
end
|
31
|
+
|
32
|
+
def hook_initial_sync_index(db_name, collection_name, index_key, options)
|
33
|
+
collection = downstream_collection(db_name, collection_name)
|
34
|
+
index_hash = BSON::OrderedHash.new
|
35
|
+
index_key.each {|k,v| index_hash[k] = v}
|
36
|
+
collection.send(:generate_indexes, index_hash, nil, options)
|
37
|
+
end
|
38
|
+
|
39
|
+
def hook_initial_sync_record_batch(db_name, collection_name, records)
|
40
|
+
collection = downstream_collection(db_name, collection_name)
|
41
|
+
bulk_insert(collection, records)
|
42
|
+
end
|
43
|
+
|
44
|
+
# TODO: should probably do the same key checking nonsense as the above
|
45
|
+
def hook_stream_insert(db_name, collection_name, object)
|
46
|
+
collection = downstream_collection(db_name, collection_name)
|
47
|
+
wrap_errors(collection, object['_id']) do
|
48
|
+
# Only needed if safe mode is set in the driver. Note that the
|
49
|
+
# argument here for oplog idempotency in the case of unique
|
50
|
+
# keys is kind of interesting. I believe I can prove
|
51
|
+
# idempotency as long as Mongo has no insert order-dependent
|
52
|
+
# unique indexes (which I believe is true) and that you do all
|
53
|
+
# your object updates as upserts.
|
54
|
+
allow_dupkeys do
|
55
|
+
collection.insert(object)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def hook_stream_update(db_name, collection_name, selector, update)
|
61
|
+
collection = downstream_collection(db_name, collection_name)
|
62
|
+
wrap_errors(collection, selector['_id']) do
|
63
|
+
collection.update(selector, update, :upsert => true)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def hook_stream_remove(db_name, collection_name, object)
|
68
|
+
collection = downstream_collection(db_name, collection_name)
|
69
|
+
wrap_errors(collection, object['_id']) do
|
70
|
+
collection.remove(object)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def hook_stream_create_collection(db_name, create)
|
75
|
+
db = downstream_db(db_name)
|
76
|
+
wrap_errors(db, create) do
|
77
|
+
db.create_collection(create)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# "Error renaming collection: #<BSON::OrderedHash:0x83869e34 {\"errmsg\"=>\"exception: source namespace does not exist\", \"code\"=>10026, \"ok\"=>0.0}>"
|
82
|
+
#
|
83
|
+
# Possibly need the same thing if the destination already exists
|
84
|
+
def hook_stream_rename_collection(db_name, source, target)
|
85
|
+
db = downstream_db(db_name)
|
86
|
+
wrap_errors(db, "#{source} -> #{target}") do
|
87
|
+
begin
|
88
|
+
db.rename_collection(source, target)
|
89
|
+
rescue Mongo::MongoDBError => e
|
90
|
+
if e.message =~ /Error renaming collection: .*exception: source namespace does not exist"/
|
91
|
+
log.warn("Ignoring rename of non-existent collection #{source} -> #{target}: #{e} (expected when replaying part of the oplog)")
|
92
|
+
elsif e.message =~ /Error renaming collection: .*exception: target namespace exists"/
|
93
|
+
log.warn("Ignoring rename of #{source} to existing collection #{target}: #{e} (expected when replaying part of the oplog)")
|
94
|
+
else
|
95
|
+
raise
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def hook_stream_drop_index(db_name, collection_name, index_name)
|
102
|
+
collection = downstream_collection(db_name, collection_name)
|
103
|
+
wrap_errors(collection, index_name) do
|
104
|
+
begin
|
105
|
+
collection.drop_index(index_name)
|
106
|
+
rescue Mongo::MongoDBError => e
|
107
|
+
raise
|
108
|
+
if e.message =~ /index not found/
|
109
|
+
log.warn("Ignoring drop of non-existent index #{index_name.inspect}: #{e} (expected when replaying part of the oplog)")
|
110
|
+
else
|
111
|
+
raise
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def hook_stream_drop_collection(db_name, dropped)
|
118
|
+
db = downstream_db(db_name)
|
119
|
+
wrap_errors(db, dropped) do
|
120
|
+
db.drop_collection(dropped)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def hook_stream_drop_database(db_name)
|
125
|
+
db = downstream_db(db_name)
|
126
|
+
wrap_errors(db, db_name) do
|
127
|
+
db.command(:dropDatabase => 1)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
def allow_dupkeys(&blk)
|
134
|
+
begin
|
135
|
+
blk.call
|
136
|
+
rescue Mongo::OperationFailure => e
|
137
|
+
if e.error_code == 11000
|
138
|
+
log.warn("Ignoring unique index violation: #{e} (expected when replaying part of the oplog)")
|
139
|
+
else
|
140
|
+
raise
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def bulk_insert(collection, docs)
|
146
|
+
begin
|
147
|
+
# Use the internal insert_documents method because it lets us
|
148
|
+
# disable key verification
|
149
|
+
collection.send(:insert_documents, docs, collection.name, false)
|
150
|
+
rescue Mongo::MongoRubyError => e
|
151
|
+
log.error("#{ns}: Caught error on batch insert", e)
|
152
|
+
docs.each do |doc|
|
153
|
+
wrap_errors(collection, doc['_id']) do
|
154
|
+
collection.send(:insert_documents, [doc], collection.name, false)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def wrap_errors(collection_or_db, object, &blk)
|
161
|
+
begin
|
162
|
+
blk.call
|
163
|
+
rescue Mongo::MongoRubyError => e
|
164
|
+
if collecton_or_db.kind_of?(Mongo::Collection)
|
165
|
+
ns = "#{collection_or_db.db.name}.#{collection_or_db.name}"
|
166
|
+
else
|
167
|
+
ns = collection_or_db.db.name
|
168
|
+
end
|
169
|
+
log.error("#{ns}: Unknown error for #{object}", e)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def downstream_db(db_name)
|
174
|
+
prefixed = "#{@prefix}_#{db_name}"
|
175
|
+
@downstream_conn.db(prefixed)
|
176
|
+
end
|
177
|
+
|
178
|
+
def downstream_collection(db_name, collection_name)
|
179
|
+
downstream_db(db_name).collection(collection_name)
|
180
|
+
end
|
181
|
+
|
182
|
+
def optime_collection
|
183
|
+
@optime_collection ||= @downstream_conn.db('_mongocp').collection('optime')
|
184
|
+
end
|
185
|
+
|
186
|
+
def connect_downstream
|
187
|
+
host, port = @tailer.parse_host_spec(@downstream)
|
188
|
+
@downstream_conn = Mongo::Connection.new(host, port, :safe => true)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def main
|
194
|
+
options = {:host => nil, :port => nil, :type => :slave, :verbose => 0}
|
195
|
+
optparse = OptionParser.new do |opts|
|
196
|
+
opts.banner = "Usage: #{$0} [options]"
|
197
|
+
|
198
|
+
opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
|
199
|
+
options[:verbose] += 1
|
200
|
+
end
|
201
|
+
|
202
|
+
opts.on('-h', '--help', 'Display this message') do
|
203
|
+
puts opts
|
204
|
+
exit(1)
|
205
|
+
end
|
206
|
+
|
207
|
+
opts.on('--help', 'Display this message') do
|
208
|
+
puts opts
|
209
|
+
exit(1)
|
210
|
+
end
|
211
|
+
|
212
|
+
opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
|
213
|
+
options[:host] = host
|
214
|
+
end
|
215
|
+
|
216
|
+
opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
|
217
|
+
options[:port] = Integer(port)
|
218
|
+
end
|
219
|
+
|
220
|
+
opts.on('-a', '--all', 'Allow connections even directly to a primary') do
|
221
|
+
options[:type] = :direct
|
222
|
+
end
|
223
|
+
end
|
224
|
+
optparse.parse!
|
225
|
+
|
226
|
+
if ARGV.length != 0
|
227
|
+
puts optparse
|
228
|
+
return 1
|
229
|
+
end
|
230
|
+
|
231
|
+
log = Log4r::Logger.new('Stripe')
|
232
|
+
log.outputters = Log4r::StdoutOutputter.new(STDERR)
|
233
|
+
if options[:verbose] >= 1
|
234
|
+
log.level = Log4r::DEBUG
|
235
|
+
else
|
236
|
+
log.level = Log4r::INFO
|
237
|
+
end
|
238
|
+
runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], 'localhost:5001', 'test')
|
239
|
+
runner.run
|
240
|
+
return 0
|
241
|
+
end
|
242
|
+
|
243
|
+
if $0 == __FILE__
|
244
|
+
ret = main
|
245
|
+
begin
|
246
|
+
exit(ret)
|
247
|
+
rescue TypeError
|
248
|
+
exit(0)
|
249
|
+
end
|
250
|
+
end
|
data/bin/optail
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'mongoriver'
|
8
|
+
|
9
|
+
module Mongoriver
|
10
|
+
class Mongocp < Streambed
|
11
|
+
include Mongoriver::Logging
|
12
|
+
|
13
|
+
def initialize(upstreams, type, start_optime, pause)
|
14
|
+
super(upstreams, type)
|
15
|
+
@start_optime = start_optime
|
16
|
+
@pause = pause
|
17
|
+
end
|
18
|
+
|
19
|
+
def pause
|
20
|
+
if @pause
|
21
|
+
$stderr.puts("Press enter to continue")
|
22
|
+
$stdin.readline
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def hook_optime
|
27
|
+
@start_optime
|
28
|
+
end
|
29
|
+
|
30
|
+
def hook_update_optime(ts, mandatory)
|
31
|
+
end
|
32
|
+
|
33
|
+
all_hooks.each do |name, _, opts|
|
34
|
+
next if name == :optime || name == :update_optime
|
35
|
+
define_method(hook_name(name)) {|*args| pause}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def main
|
41
|
+
options = {:host => nil, :port => nil, :type => :slave, :optime => 0, :pause => true, :verbose => 0}
|
42
|
+
optparse = OptionParser.new do |opts|
|
43
|
+
opts.banner = "Usage: #{$0} [options]"
|
44
|
+
|
45
|
+
opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
|
46
|
+
options[:verbose] += 1
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on('--help', 'Display this message') do
|
50
|
+
puts opts
|
51
|
+
exit(1)
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
|
55
|
+
options[:host] = host
|
56
|
+
end
|
57
|
+
|
58
|
+
opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
|
59
|
+
options[:port] = Integer(port)
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on('-a', '--all', 'Allow connections even directly to a primary') do
|
63
|
+
options[:type] = :direct
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on('-s OPTIME', '--start', 'Starting optime') do |optime|
|
67
|
+
options[:optime] = Integer(optime)
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on('-f', '--follow-automatically', "Don't prompt between ops") do
|
71
|
+
options[:pause] = false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
optparse.parse!
|
75
|
+
|
76
|
+
if ARGV.length != 0
|
77
|
+
puts optparse
|
78
|
+
return 1
|
79
|
+
end
|
80
|
+
|
81
|
+
log = Log4r::Logger.new('Stripe')
|
82
|
+
log.outputters = Log4r::StdoutOutputter.new(STDERR)
|
83
|
+
if options[:verbose] >= 1
|
84
|
+
log.level = Log4r::DEBUG
|
85
|
+
else
|
86
|
+
log.level = Log4r::INFO
|
87
|
+
end
|
88
|
+
|
89
|
+
runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], options[:optime], options[:pause])
|
90
|
+
runner.run
|
91
|
+
return 0
|
92
|
+
end
|
93
|
+
|
94
|
+
if $0 == __FILE__
|
95
|
+
ret = main
|
96
|
+
begin
|
97
|
+
exit(ret)
|
98
|
+
rescue TypeError
|
99
|
+
exit(0)
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
|
3
|
+
# A variant of Tailer that automatically loads and persists the
|
4
|
+
# "last timestamp processes" state. See PersistentTailer for a
|
5
|
+
# concrete subclass that uses the same mongod you are already
|
6
|
+
# tailing.
|
7
|
+
|
8
|
+
class AbstractPersistentTailer < Tailer
|
9
|
+
def initialize(upstream, type, opts={})
|
10
|
+
raise "You can't instantiate an AbstractPersistentTailer -- did you want PersistentTailer? " if self.class == AbstractPersistentTailer
|
11
|
+
super(upstream, type)
|
12
|
+
|
13
|
+
@last_saved = nil
|
14
|
+
@batch = opts[:batch]
|
15
|
+
@last_read = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def tail_from(ts, opts={})
|
19
|
+
if ts.nil?
|
20
|
+
ts = read_timestamp
|
21
|
+
end
|
22
|
+
super(ts, opts)
|
23
|
+
end
|
24
|
+
|
25
|
+
def stream(limit=nil)
|
26
|
+
super(limit) do |entry|
|
27
|
+
yield entry
|
28
|
+
@last_read = entry['ts']
|
29
|
+
maybe_save_timestamp unless @batch
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def batch_done
|
34
|
+
raise "You must specify :batch => true to use the batch-processing interface." unless @batch
|
35
|
+
maybe_save_timestamp
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_timestamp
|
39
|
+
raise "read_timestamp unimplemented!"
|
40
|
+
end
|
41
|
+
|
42
|
+
def write_timestamp
|
43
|
+
raise "save_timestamp unimplemented!"
|
44
|
+
end
|
45
|
+
|
46
|
+
def save_timestamp
|
47
|
+
write_timestamp(@last_read)
|
48
|
+
@last_saved = @last_read
|
49
|
+
log.info("Saved timestamp: #{@last_saved} (#{Time.at(@last_saved.seconds)})")
|
50
|
+
end
|
51
|
+
|
52
|
+
def maybe_save_timestamp
|
53
|
+
# Write timestamps once a minute
|
54
|
+
return unless @last_read
|
55
|
+
save_timestamp if @last_saved.nil? || (@last_read.seconds - @last_saved.seconds) > 60
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
# A variant of AbstractPersistentTailer that automatically persists
|
3
|
+
# the "last timestamp processes" state into the database we are
|
4
|
+
# tailing.
|
5
|
+
class PersistentTailer < AbstractPersistentTailer
|
6
|
+
def initialize(upstream, type, service, opts={})
|
7
|
+
raise "You can't use PersistentTailer against only a slave. How am I supposed to write state? " if type == :slave
|
8
|
+
super(upstream, type, opts)
|
9
|
+
|
10
|
+
db = opts[:db] || "_mongoriver"
|
11
|
+
collection = opts[:collection] || 'oplog-tailers'
|
12
|
+
@service = service
|
13
|
+
@state_collection = @upstream_conn.db(db).collection(collection)
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_timestamp
|
17
|
+
row = @state_collection.find_one(:service => @service)
|
18
|
+
row ? row['timestamp'] : BSON::Timestamp.new(0, 0)
|
19
|
+
end
|
20
|
+
|
21
|
+
def write_timestamp(ts)
|
22
|
+
row = @state_collection.find_one(:service => @service)
|
23
|
+
if row
|
24
|
+
@state_collection.update({'_id' => row['_id']}, '$set' => { 'timestamp' => ts })
|
25
|
+
else
|
26
|
+
@state_collection.insert('service' => @service, 'timestamp' => ts)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,299 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
class Streambed
|
3
|
+
include Mongoriver::Logging
|
4
|
+
|
5
|
+
attr_reader :stats
|
6
|
+
|
7
|
+
class AssertionFailure < StandardError; end
|
8
|
+
|
9
|
+
def assert(condition, msg)
|
10
|
+
raise AssertionFailure.new(msg) unless condition
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(upstreams, type)
|
14
|
+
@tailer = Mongoriver::Tailer.new(upstreams, type)
|
15
|
+
@record_fetch_batch_size = 1024
|
16
|
+
@record_sync_batch_size = 256
|
17
|
+
@stats = Hash.new(0)
|
18
|
+
end
|
19
|
+
|
20
|
+
def run
|
21
|
+
self.class.validate_hooks!
|
22
|
+
|
23
|
+
unless ts = starting_optime
|
24
|
+
ts = @tailer.most_recent_timestamp
|
25
|
+
initial_sync
|
26
|
+
hook_update_optime(ts, true)
|
27
|
+
end
|
28
|
+
|
29
|
+
tail_from(ts)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.my_hooks
|
33
|
+
@hooks ||= []
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.all_hooks
|
37
|
+
hooks = my_hooks
|
38
|
+
if superclass <= Streambed
|
39
|
+
hooks + superclass.all_hooks
|
40
|
+
else
|
41
|
+
hooks
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.validate_hooks!
|
46
|
+
errors = []
|
47
|
+
all_hooks.each do |name, args, opts|
|
48
|
+
method = self.instance_method(hook_name(name))
|
49
|
+
signature = "#{method.name}(#{args.join(', ')})"
|
50
|
+
if method.owner == Streambed && !opts[:default]
|
51
|
+
errors << "Must provide implementation of #{signature}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
raise "You need to fix the following hook errors:
|
56
|
+
|
57
|
+
#{errors.join("\n ")}" if errors.length > 0
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.hook_name(name)
|
61
|
+
"hook_#{name}"
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.hook(name, args=[], opts={})
|
65
|
+
if default = opts[:default]
|
66
|
+
target = hook_name(default)
|
67
|
+
implementation = Proc.new do |*args, &blk|
|
68
|
+
send(target, *args, &blk)
|
69
|
+
end
|
70
|
+
else
|
71
|
+
implementation = Proc.new do
|
72
|
+
raise NotImplementedError.new("Override in subclass")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
define_method(hook_name(name), implementation)
|
77
|
+
my_hooks << [name, args, opts]
|
78
|
+
end
|
79
|
+
|
80
|
+
hook :optime
|
81
|
+
hook :update_optime, [:ts, :mandatory]
|
82
|
+
hook :initial_sync_index, [:db_name, :collection_name, :index_key, :options]
|
83
|
+
hook :initial_sync_record_batch, [:db_name, :collection_name, :records]
|
84
|
+
hook :stream_insert, [:db_name, :collection_name, :object]
|
85
|
+
hook :stream_update, [:db_name, :collection_name, :selector, :update]
|
86
|
+
hook :stream_remove, [:db_name, :collection_name, :object]
|
87
|
+
# Not usually a difference between the initial index creation and
|
88
|
+
# creating it while streaming ops.
|
89
|
+
hook :stream_create_index, [:db_name, :collection_name, :index_key, :options], :default => :initial_sync_index
|
90
|
+
# This seems to be called while doing a mapreduce.
|
91
|
+
hook :stream_create_collection, [:db_name, :create]
|
92
|
+
# This also seems to be called while doing a mapreduce. Note that
|
93
|
+
# I think mongo has a concept of temporary table, which I should
|
94
|
+
# look into, and renameCollection has some temporary table option.
|
95
|
+
hook :stream_rename_collection, [:db_name, :source, :target]
|
96
|
+
hook :stream_drop_index, [:db_name, :collection_name, :index_name]
|
97
|
+
hook :stream_drop_collection, [:db_name, :dropped]
|
98
|
+
hook :stream_drop_database, [:db_name]
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def starting_optime
|
103
|
+
case time = hook_optime
|
104
|
+
when Integer
|
105
|
+
if time >= 0
|
106
|
+
BSON::Timestamp.new(time, 0)
|
107
|
+
elsif time == -1
|
108
|
+
@tailer.most_recent_timestamp
|
109
|
+
else
|
110
|
+
raise "Invalid optime: #{time}"
|
111
|
+
end
|
112
|
+
when BSON::Timestamp, nil
|
113
|
+
time
|
114
|
+
else
|
115
|
+
raise "Unrecognized type #{time.class} (#{time.inspect}) for start time"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def initial_sync
|
120
|
+
initial_sync_all_indexes
|
121
|
+
initial_sync_all_records
|
122
|
+
end
|
123
|
+
|
124
|
+
def initial_sync_all_indexes
|
125
|
+
log.info("Beginning initial sync of indexes")
|
126
|
+
syncable_databases.each {|db| initial_sync_indexes_for_db(db)}
|
127
|
+
log.info("Done initial sync of indexes")
|
128
|
+
end
|
129
|
+
|
130
|
+
def initial_sync_indexes_for_db(db)
|
131
|
+
db.collection('system.indexes').find.each do |index|
|
132
|
+
options = extract_options_from_index_spec(index)
|
133
|
+
index_key = index['key'].to_a
|
134
|
+
|
135
|
+
ns = index['ns']
|
136
|
+
db_name, collection_name = parse_ns(ns)
|
137
|
+
assert(db_name == db.name, "Index db name #{db_name.inspect} differs from current db name #{db.name.inspect}")
|
138
|
+
|
139
|
+
log.info("#{ns}: Initial sync of index #{options[:name]}")
|
140
|
+
hook_initial_sync_index(db_name, collection_name, index_key, options)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def initial_sync_all_records
|
145
|
+
log.info("Beginning initial sync of records")
|
146
|
+
syncable_databases.each {|db| initial_sync_records_for_db(db)}
|
147
|
+
log.info("Done initial sync of records")
|
148
|
+
end
|
149
|
+
|
150
|
+
def initial_sync_records_for_db(db)
|
151
|
+
syncable_collections(db).each do |collection|
|
152
|
+
initial_sync_records_for_collection(collection)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def initial_sync_records_for_collection(collection)
|
157
|
+
db_name = collection.db.name
|
158
|
+
collection_name = collection.name
|
159
|
+
ns = "#{db_name}.#{collection_name}"
|
160
|
+
|
161
|
+
log.info("#{ns}: Starting record initial sync")
|
162
|
+
|
163
|
+
records = []
|
164
|
+
collection.find({}, :batch_size => @record_fetch_batch_size, :timeout => false, :sort => [['$natural', 1]]) do |cursor|
|
165
|
+
while cursor.has_next?
|
166
|
+
records << cursor.next
|
167
|
+
if records.length > @record_sync_batch_size
|
168
|
+
# TODO: add better logging than this
|
169
|
+
log.info("#{ns}: Running sync of batch of #{records.length} records")
|
170
|
+
hook_initial_sync_record_batch(db_name, collection_name, records)
|
171
|
+
records = []
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
log.info("#{ns}: Finishing sync with a batch of #{records.length} records")
|
176
|
+
hook_initial_sync_record_batch(db_name, collection_name, records)
|
177
|
+
|
178
|
+
log.info("#{ns}: Finished record initial sync")
|
179
|
+
end
|
180
|
+
|
181
|
+
# This should be fine to instantiate all at once, since
|
182
|
+
# database_names returns all the dbs as strings anyway
|
183
|
+
def syncable_databases
|
184
|
+
@tailer.upstream_conn.database_names.map do |db_name|
|
185
|
+
next if db_name == 'local'
|
186
|
+
@tailer.upstream_conn.db(db_name)
|
187
|
+
end.compact
|
188
|
+
end
|
189
|
+
|
190
|
+
def syncable_collections(db)
|
191
|
+
db.collection_names.map do |collection_name|
|
192
|
+
next if collection_name.start_with?('system.')
|
193
|
+
db.collection(collection_name)
|
194
|
+
end.compact
|
195
|
+
end
|
196
|
+
|
197
|
+
def extract_options_from_index_spec(index)
|
198
|
+
options = {}
|
199
|
+
index.each do |key, value|
|
200
|
+
case key
|
201
|
+
when 'v'
|
202
|
+
raise NotImplementedError.new("Only v=1 indexes are supported at the moment, not v=#{value.inspect}") unless value == 1
|
203
|
+
when 'ns', 'key'
|
204
|
+
else
|
205
|
+
options[key.to_sym] = value
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
assert(options.include?(:name), "No name defined for index spec #{index.inspect}")
|
210
|
+
options
|
211
|
+
end
|
212
|
+
|
213
|
+
def stream_op(entry)
|
214
|
+
op = entry['op']
|
215
|
+
data = entry['o']
|
216
|
+
ns = entry['ns']
|
217
|
+
|
218
|
+
if op == 'n'
|
219
|
+
# This happens for initial rs.initiate() op, maybe others.
|
220
|
+
log.info("Skipping no-op #{entry.inspect}")
|
221
|
+
return
|
222
|
+
end
|
223
|
+
|
224
|
+
db_name, collection_name = parse_ns(ns)
|
225
|
+
assert(db_name, "Nil db name #{db_name.inspect} for #{entry.inspect}")
|
226
|
+
|
227
|
+
case op
|
228
|
+
when 'i'
|
229
|
+
if collection_name == 'system.indexes'
|
230
|
+
record(ns, entry, :create_index)
|
231
|
+
index_db_name, index_collection_name = parse_ns(data['ns'])
|
232
|
+
index_key = data['key'].to_a
|
233
|
+
options = extract_options_from_index_spec(data)
|
234
|
+
hook_stream_create_index(index_db_name, index_collection_name, index_key, options)
|
235
|
+
else
|
236
|
+
record(ns, entry, :insert)
|
237
|
+
hook_stream_insert(db_name, collection_name, data)
|
238
|
+
end
|
239
|
+
when 'u'
|
240
|
+
record(ns, entry, :update)
|
241
|
+
hook_stream_update(db_name, collection_name, entry['o2'], data)
|
242
|
+
when 'd'
|
243
|
+
record(ns, entry, :remove)
|
244
|
+
hook_stream_remove(db_name, collection_name, data)
|
245
|
+
when 'c'
|
246
|
+
assert(collection_name == '$cmd', "Command collection name is #{collection_name.inspect} for #{entry.inspect}")
|
247
|
+
if deleted_from = data['deleteIndexes']
|
248
|
+
record(ns, entry, :drop_index)
|
249
|
+
index = data['index']
|
250
|
+
hook_stream_drop_index(db_name, deleted_from, index)
|
251
|
+
elsif dropped = data['drop']
|
252
|
+
record(ns, entry, :drop_collection)
|
253
|
+
hook_stream_drop_collection(db_name, dropped)
|
254
|
+
elsif dropped = data['dropDatabase']
|
255
|
+
record(ns, entry, :drop_database)
|
256
|
+
hook_stream_drop_database(db_name)
|
257
|
+
elsif source = data['renameCollection']
|
258
|
+
record(ns, entry, :rename_collection)
|
259
|
+
target = data['to']
|
260
|
+
hook_stream_rename_collection(db_name, source, target)
|
261
|
+
elsif create = data['create']
|
262
|
+
record(ns, entry, :create)
|
263
|
+
hook_stream_create_collection(db_name, create)
|
264
|
+
else
|
265
|
+
raise "Unrecognized command #{data.inspect}"
|
266
|
+
end
|
267
|
+
else
|
268
|
+
raise "Unrecognized op: #{op} (#{entry.inspect})"
|
269
|
+
end
|
270
|
+
|
271
|
+
optime = entry['ts']
|
272
|
+
hook_update_optime(optime, false)
|
273
|
+
end
|
274
|
+
|
275
|
+
def tail_from(ts)
|
276
|
+
begin
|
277
|
+
@tailer.tail_from(ts)
|
278
|
+
loop do
|
279
|
+
@tailer.stream do |op|
|
280
|
+
stream_op(op)
|
281
|
+
end
|
282
|
+
end
|
283
|
+
ensure
|
284
|
+
@tailer.stop
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def record(ns, entry, type)
|
289
|
+
stats[type] += 1
|
290
|
+
log.debug("#{ns}: #{type.inspect} #{entry.inspect}")
|
291
|
+
end
|
292
|
+
|
293
|
+
protected
|
294
|
+
|
295
|
+
def parse_ns(ns)
|
296
|
+
ns.split('.', 2)
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
class Tailer
|
3
|
+
include Mongoriver::Logging
|
4
|
+
|
5
|
+
attr_reader :upstream_conn
|
6
|
+
|
7
|
+
def initialize(upstreams, type)
|
8
|
+
@upstreams = upstreams
|
9
|
+
@type = type
|
10
|
+
# This number seems high
|
11
|
+
@conn_opts = {:op_timeout => 86400}
|
12
|
+
|
13
|
+
@cursor = nil
|
14
|
+
|
15
|
+
connect_upstream
|
16
|
+
end
|
17
|
+
|
18
|
+
def most_recent_timestamp
|
19
|
+
record = oplog_collection.find_one({}, :sort => [['$natural', -1]])
|
20
|
+
record['ts']
|
21
|
+
end
|
22
|
+
|
23
|
+
def connect_upstream
|
24
|
+
case @type
|
25
|
+
when :replset
|
26
|
+
opts = @conn_opts.merge(:read => :secondary)
|
27
|
+
@upstream_conn = Mongo::ReplSetConnection.new(@upstreams, opts)
|
28
|
+
when :slave, :direct
|
29
|
+
opts = @conn_opts.merge(:slave_ok => true)
|
30
|
+
host, port = parse_direct_upstream
|
31
|
+
@upstream_conn = Mongo::Connection.new(host, port, opts)
|
32
|
+
raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave (HINT: try passing a -a to scripts like optail or mongocp)" if @type == :slave && @upstream_conn.primary?
|
33
|
+
ensure_upstream_replset!
|
34
|
+
when :existing
|
35
|
+
raise "Must pass in a single existing Mongo::Connection with :existing" unless @upstreams.length == 1 && @upstreams[0].respond_to?(:db)
|
36
|
+
@upstream_conn = @upstreams[0]
|
37
|
+
else
|
38
|
+
raise "Invalid connection type: #{@type.inspect}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def ensure_upstream_replset!
|
43
|
+
# Might be a better way to do this, but not seeing one.
|
44
|
+
config = @upstream_conn['admin'].command(:ismaster => 1)
|
45
|
+
unless config['setName']
|
46
|
+
raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is not running as a replica set"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_direct_upstream
|
51
|
+
raise "When connecting directly to a mongo instance, must provide a single upstream" unless @upstreams.length == 1
|
52
|
+
upstream = @upstreams[0]
|
53
|
+
parse_host_spec(upstream)
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_host_spec(host_spec)
|
57
|
+
host, port = host_spec.split(':')
|
58
|
+
host = '127.0.0.1' if host.to_s.length == 0
|
59
|
+
port = '27017' if port.to_s.length == 0
|
60
|
+
[host, port.to_i]
|
61
|
+
end
|
62
|
+
|
63
|
+
def oplog_collection
|
64
|
+
@upstream_conn.db('local').collection('oplog.rs')
|
65
|
+
end
|
66
|
+
|
67
|
+
def tail_from(ts, opts = {})
|
68
|
+
raise "Already tailing the oplog!" if @cursor
|
69
|
+
|
70
|
+
# Maybe if ts is old enough, just start from the beginning?
|
71
|
+
query = (opts[:filter] || {}).merge({ 'ts' => { '$gte' => ts } })
|
72
|
+
|
73
|
+
oplog_collection.find(query, :timeout => false) do |oplog|
|
74
|
+
oplog.add_option(Mongo::Constants::OP_QUERY_TAILABLE)
|
75
|
+
oplog.add_option(Mongo::Constants::OP_QUERY_OPLOG_REPLAY)
|
76
|
+
|
77
|
+
oplog.add_option(Mongo::Constants::OP_QUERY_AWAIT_DATA) unless opts[:dont_wait]
|
78
|
+
|
79
|
+
log.info("Starting oplog stream from #{ts}")
|
80
|
+
@cursor = oplog
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def stop
|
85
|
+
@cursor.close if @cursor
|
86
|
+
@cursor = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
def stream(limit=nil)
|
90
|
+
count = 0
|
91
|
+
while @cursor.has_next?
|
92
|
+
count += 1
|
93
|
+
break if limit && count >= limit
|
94
|
+
yield @cursor.next
|
95
|
+
end
|
96
|
+
|
97
|
+
return @cursor.has_next?
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/mongoriver.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mongo'
|
2
|
+
require 'log4r'
|
3
|
+
|
4
|
+
module Mongoriver; end
|
5
|
+
|
6
|
+
require 'mongoriver/log'
|
7
|
+
|
8
|
+
require 'mongoriver/streambed'
|
9
|
+
require 'mongoriver/tailer'
|
10
|
+
require 'mongoriver/abstract_persistent_tailer'
|
11
|
+
require 'mongoriver/persistent_tailer'
|
12
|
+
require 'mongoriver/version'
|
data/mongoriver.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
$:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
|
3
|
+
require 'mongoriver/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.authors = ["Greg Brockman"]
|
7
|
+
gem.email = ["gdb@gregbrockman.com"]
|
8
|
+
gem.description = %q{Some tools and libraries to simplify tailing the mongod oplog}
|
9
|
+
gem.summary = %q{monogdb oplog-tailing utilities.}
|
10
|
+
gem.homepage = ""
|
11
|
+
|
12
|
+
gem.files = `git ls-files`.split($\)
|
13
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
14
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
15
|
+
gem.name = "mongoriver"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.version = Mongoriver::VERSION
|
18
|
+
|
19
|
+
gem.add_runtime_dependency('mongo', '>= 1.7')
|
20
|
+
gem.add_runtime_dependency('bson_ext')
|
21
|
+
gem.add_runtime_dependency('log4r')
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongoriver
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Greg Brockman
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-05 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mongo
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.7'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.7'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bson_ext
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: log4r
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Some tools and libraries to simplify tailing the mongod oplog
|
63
|
+
email:
|
64
|
+
- gdb@gregbrockman.com
|
65
|
+
executables:
|
66
|
+
- mongocp
|
67
|
+
- optail
|
68
|
+
extensions: []
|
69
|
+
extra_rdoc_files: []
|
70
|
+
files:
|
71
|
+
- .gitignore
|
72
|
+
- Gemfile
|
73
|
+
- LICENSE
|
74
|
+
- README.md
|
75
|
+
- Rakefile
|
76
|
+
- bin/mongocp
|
77
|
+
- bin/optail
|
78
|
+
- lib/mongoriver.rb
|
79
|
+
- lib/mongoriver/abstract_persistent_tailer.rb
|
80
|
+
- lib/mongoriver/log.rb
|
81
|
+
- lib/mongoriver/persistent_tailer.rb
|
82
|
+
- lib/mongoriver/streambed.rb
|
83
|
+
- lib/mongoriver/tailer.rb
|
84
|
+
- lib/mongoriver/version.rb
|
85
|
+
- mongoriver.gemspec
|
86
|
+
homepage: ''
|
87
|
+
licenses: []
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project:
|
106
|
+
rubygems_version: 1.8.23
|
107
|
+
signing_key:
|
108
|
+
specification_version: 3
|
109
|
+
summary: monogdb oplog-tailing utilities.
|
110
|
+
test_files: []
|