mongoriver 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +3 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/mongocp +250 -0
- data/bin/optail +101 -0
- data/lib/mongoriver/abstract_persistent_tailer.rb +58 -0
- data/lib/mongoriver/log.rb +7 -0
- data/lib/mongoriver/persistent_tailer.rb +30 -0
- data/lib/mongoriver/streambed.rb +299 -0
- data/lib/mongoriver/tailer.rb +100 -0
- data/lib/mongoriver/version.rb +3 -0
- data/lib/mongoriver.rb +12 -0
- data/mongoriver.gemspec +22 -0
- metadata +110 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Greg Brockman
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Mongoriver
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'mongoriver'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install mongoriver
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/mongocp
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'mongoriver'
|
8
|
+
|
9
|
+
module Mongoriver
|
10
|
+
class Mongocp < Streambed
|
11
|
+
include Mongoriver::Logging
|
12
|
+
|
13
|
+
def initialize(upstreams, type, downstream, prefix)
|
14
|
+
super(upstreams, type)
|
15
|
+
@downstream = downstream
|
16
|
+
@prefix = prefix
|
17
|
+
connect_downstream
|
18
|
+
end
|
19
|
+
|
20
|
+
def hook_optime
|
21
|
+
if optime = optime_collection.find_one(:_id => @prefix)
|
22
|
+
optime['ts']
|
23
|
+
else
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def hook_update_optime(ts, mandatory)
|
29
|
+
optime_collection.update({:_id => @prefix}, {'$set' => {:ts => ts}}, :upsert => true) if mandatory || rand(20) == 0
|
30
|
+
end
|
31
|
+
|
32
|
+
def hook_initial_sync_index(db_name, collection_name, index_key, options)
|
33
|
+
collection = downstream_collection(db_name, collection_name)
|
34
|
+
index_hash = BSON::OrderedHash.new
|
35
|
+
index_key.each {|k,v| index_hash[k] = v}
|
36
|
+
collection.send(:generate_indexes, index_hash, nil, options)
|
37
|
+
end
|
38
|
+
|
39
|
+
def hook_initial_sync_record_batch(db_name, collection_name, records)
|
40
|
+
collection = downstream_collection(db_name, collection_name)
|
41
|
+
bulk_insert(collection, records)
|
42
|
+
end
|
43
|
+
|
44
|
+
# TODO: should probably do the same key checking nonsense as the above
|
45
|
+
def hook_stream_insert(db_name, collection_name, object)
|
46
|
+
collection = downstream_collection(db_name, collection_name)
|
47
|
+
wrap_errors(collection, object['_id']) do
|
48
|
+
# Only needed if safe mode is set in the driver. Note that the
|
49
|
+
# argument here for oplog idempotency in the case of unique
|
50
|
+
# keys is kind of interesting. I believe I can prove
|
51
|
+
# idempotency as long as Mongo has no insert order-dependent
|
52
|
+
# unique indexes (which I believe is true) and that you do all
|
53
|
+
# your object updates as upserts.
|
54
|
+
allow_dupkeys do
|
55
|
+
collection.insert(object)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def hook_stream_update(db_name, collection_name, selector, update)
|
61
|
+
collection = downstream_collection(db_name, collection_name)
|
62
|
+
wrap_errors(collection, selector['_id']) do
|
63
|
+
collection.update(selector, update, :upsert => true)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def hook_stream_remove(db_name, collection_name, object)
|
68
|
+
collection = downstream_collection(db_name, collection_name)
|
69
|
+
wrap_errors(collection, object['_id']) do
|
70
|
+
collection.remove(object)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def hook_stream_create_collection(db_name, create)
|
75
|
+
db = downstream_db(db_name)
|
76
|
+
wrap_errors(db, create) do
|
77
|
+
db.create_collection(create)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# "Error renaming collection: #<BSON::OrderedHash:0x83869e34 {\"errmsg\"=>\"exception: source namespace does not exist\", \"code\"=>10026, \"ok\"=>0.0}>"
|
82
|
+
#
|
83
|
+
# Possibly need the same thing if the destination already exists
|
84
|
+
def hook_stream_rename_collection(db_name, source, target)
|
85
|
+
db = downstream_db(db_name)
|
86
|
+
wrap_errors(db, "#{source} -> #{target}") do
|
87
|
+
begin
|
88
|
+
db.rename_collection(source, target)
|
89
|
+
rescue Mongo::MongoDBError => e
|
90
|
+
if e.message =~ /Error renaming collection: .*exception: source namespace does not exist"/
|
91
|
+
log.warn("Ignoring rename of non-existent collection #{source} -> #{target}: #{e} (expected when replaying part of the oplog)")
|
92
|
+
elsif e.message =~ /Error renaming collection: .*exception: target namespace exists"/
|
93
|
+
log.warn("Ignoring rename of #{source} to existing collection #{target}: #{e} (expected when replaying part of the oplog)")
|
94
|
+
else
|
95
|
+
raise
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def hook_stream_drop_index(db_name, collection_name, index_name)
|
102
|
+
collection = downstream_collection(db_name, collection_name)
|
103
|
+
wrap_errors(collection, index_name) do
|
104
|
+
begin
|
105
|
+
collection.drop_index(index_name)
|
106
|
+
rescue Mongo::MongoDBError => e
|
107
|
+
raise
|
108
|
+
if e.message =~ /index not found/
|
109
|
+
log.warn("Ignoring drop of non-existent index #{index_name.inspect}: #{e} (expected when replaying part of the oplog)")
|
110
|
+
else
|
111
|
+
raise
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def hook_stream_drop_collection(db_name, dropped)
|
118
|
+
db = downstream_db(db_name)
|
119
|
+
wrap_errors(db, dropped) do
|
120
|
+
db.drop_collection(dropped)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def hook_stream_drop_database(db_name)
|
125
|
+
db = downstream_db(db_name)
|
126
|
+
wrap_errors(db, db_name) do
|
127
|
+
db.command(:dropDatabase => 1)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
def allow_dupkeys(&blk)
|
134
|
+
begin
|
135
|
+
blk.call
|
136
|
+
rescue Mongo::OperationFailure => e
|
137
|
+
if e.error_code == 11000
|
138
|
+
log.warn("Ignoring unique index violation: #{e} (expected when replaying part of the oplog)")
|
139
|
+
else
|
140
|
+
raise
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def bulk_insert(collection, docs)
|
146
|
+
begin
|
147
|
+
# Use the internal insert_documents method because it lets us
|
148
|
+
# disable key verification
|
149
|
+
collection.send(:insert_documents, docs, collection.name, false)
|
150
|
+
rescue Mongo::MongoRubyError => e
|
151
|
+
log.error("#{ns}: Caught error on batch insert", e)
|
152
|
+
docs.each do |doc|
|
153
|
+
wrap_errors(collection, doc['_id']) do
|
154
|
+
collection.send(:insert_documents, [doc], collection.name, false)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def wrap_errors(collection_or_db, object, &blk)
|
161
|
+
begin
|
162
|
+
blk.call
|
163
|
+
rescue Mongo::MongoRubyError => e
|
164
|
+
if collecton_or_db.kind_of?(Mongo::Collection)
|
165
|
+
ns = "#{collection_or_db.db.name}.#{collection_or_db.name}"
|
166
|
+
else
|
167
|
+
ns = collection_or_db.db.name
|
168
|
+
end
|
169
|
+
log.error("#{ns}: Unknown error for #{object}", e)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def downstream_db(db_name)
|
174
|
+
prefixed = "#{@prefix}_#{db_name}"
|
175
|
+
@downstream_conn.db(prefixed)
|
176
|
+
end
|
177
|
+
|
178
|
+
def downstream_collection(db_name, collection_name)
|
179
|
+
downstream_db(db_name).collection(collection_name)
|
180
|
+
end
|
181
|
+
|
182
|
+
def optime_collection
|
183
|
+
@optime_collection ||= @downstream_conn.db('_mongocp').collection('optime')
|
184
|
+
end
|
185
|
+
|
186
|
+
def connect_downstream
|
187
|
+
host, port = @tailer.parse_host_spec(@downstream)
|
188
|
+
@downstream_conn = Mongo::Connection.new(host, port, :safe => true)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def main
|
194
|
+
options = {:host => nil, :port => nil, :type => :slave, :verbose => 0}
|
195
|
+
optparse = OptionParser.new do |opts|
|
196
|
+
opts.banner = "Usage: #{$0} [options]"
|
197
|
+
|
198
|
+
opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
|
199
|
+
options[:verbose] += 1
|
200
|
+
end
|
201
|
+
|
202
|
+
opts.on('-h', '--help', 'Display this message') do
|
203
|
+
puts opts
|
204
|
+
exit(1)
|
205
|
+
end
|
206
|
+
|
207
|
+
opts.on('--help', 'Display this message') do
|
208
|
+
puts opts
|
209
|
+
exit(1)
|
210
|
+
end
|
211
|
+
|
212
|
+
opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
|
213
|
+
options[:host] = host
|
214
|
+
end
|
215
|
+
|
216
|
+
opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
|
217
|
+
options[:port] = Integer(port)
|
218
|
+
end
|
219
|
+
|
220
|
+
opts.on('-a', '--all', 'Allow connections even directly to a primary') do
|
221
|
+
options[:type] = :direct
|
222
|
+
end
|
223
|
+
end
|
224
|
+
optparse.parse!
|
225
|
+
|
226
|
+
if ARGV.length != 0
|
227
|
+
puts optparse
|
228
|
+
return 1
|
229
|
+
end
|
230
|
+
|
231
|
+
log = Log4r::Logger.new('Stripe')
|
232
|
+
log.outputters = Log4r::StdoutOutputter.new(STDERR)
|
233
|
+
if options[:verbose] >= 1
|
234
|
+
log.level = Log4r::DEBUG
|
235
|
+
else
|
236
|
+
log.level = Log4r::INFO
|
237
|
+
end
|
238
|
+
runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], 'localhost:5001', 'test')
|
239
|
+
runner.run
|
240
|
+
return 0
|
241
|
+
end
|
242
|
+
|
243
|
+
if $0 == __FILE__
|
244
|
+
ret = main
|
245
|
+
begin
|
246
|
+
exit(ret)
|
247
|
+
rescue TypeError
|
248
|
+
exit(0)
|
249
|
+
end
|
250
|
+
end
|
data/bin/optail
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'mongoriver'
|
8
|
+
|
9
|
+
module Mongoriver
|
10
|
+
class Mongocp < Streambed
|
11
|
+
include Mongoriver::Logging
|
12
|
+
|
13
|
+
def initialize(upstreams, type, start_optime, pause)
|
14
|
+
super(upstreams, type)
|
15
|
+
@start_optime = start_optime
|
16
|
+
@pause = pause
|
17
|
+
end
|
18
|
+
|
19
|
+
def pause
|
20
|
+
if @pause
|
21
|
+
$stderr.puts("Press enter to continue")
|
22
|
+
$stdin.readline
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def hook_optime
|
27
|
+
@start_optime
|
28
|
+
end
|
29
|
+
|
30
|
+
def hook_update_optime(ts, mandatory)
|
31
|
+
end
|
32
|
+
|
33
|
+
all_hooks.each do |name, _, opts|
|
34
|
+
next if name == :optime || name == :update_optime
|
35
|
+
define_method(hook_name(name)) {|*args| pause}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def main
|
41
|
+
options = {:host => nil, :port => nil, :type => :slave, :optime => 0, :pause => true, :verbose => 0}
|
42
|
+
optparse = OptionParser.new do |opts|
|
43
|
+
opts.banner = "Usage: #{$0} [options]"
|
44
|
+
|
45
|
+
opts.on('-v', '--verbosity', 'Verbosity of debugging output') do
|
46
|
+
options[:verbose] += 1
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on('--help', 'Display this message') do
|
50
|
+
puts opts
|
51
|
+
exit(1)
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on('-h HOST', '--host', 'Upstream host to connect to') do |host|
|
55
|
+
options[:host] = host
|
56
|
+
end
|
57
|
+
|
58
|
+
opts.on('-p PORT', '--port', 'Upstream host to connect to') do |port|
|
59
|
+
options[:port] = Integer(port)
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on('-a', '--all', 'Allow connections even directly to a primary') do
|
63
|
+
options[:type] = :direct
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on('-s OPTIME', '--start', 'Starting optime') do |optime|
|
67
|
+
options[:optime] = Integer(optime)
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on('-f', '--follow-automatically', "Don't prompt between ops") do
|
71
|
+
options[:pause] = false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
optparse.parse!
|
75
|
+
|
76
|
+
if ARGV.length != 0
|
77
|
+
puts optparse
|
78
|
+
return 1
|
79
|
+
end
|
80
|
+
|
81
|
+
log = Log4r::Logger.new('Stripe')
|
82
|
+
log.outputters = Log4r::StdoutOutputter.new(STDERR)
|
83
|
+
if options[:verbose] >= 1
|
84
|
+
log.level = Log4r::DEBUG
|
85
|
+
else
|
86
|
+
log.level = Log4r::INFO
|
87
|
+
end
|
88
|
+
|
89
|
+
runner = Mongoriver::Mongocp.new(["#{options[:host]}:#{options[:port]}"], options[:type], options[:optime], options[:pause])
|
90
|
+
runner.run
|
91
|
+
return 0
|
92
|
+
end
|
93
|
+
|
94
|
+
if $0 == __FILE__
|
95
|
+
ret = main
|
96
|
+
begin
|
97
|
+
exit(ret)
|
98
|
+
rescue TypeError
|
99
|
+
exit(0)
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
|
3
|
+
# A variant of Tailer that automatically loads and persists the
|
4
|
+
# "last timestamp processes" state. See PersistentTailer for a
|
5
|
+
# concrete subclass that uses the same mongod you are already
|
6
|
+
# tailing.
|
7
|
+
|
8
|
+
class AbstractPersistentTailer < Tailer
|
9
|
+
def initialize(upstream, type, opts={})
|
10
|
+
raise "You can't instantiate an AbstractPersistentTailer -- did you want PersistentTailer? " if self.class == AbstractPersistentTailer
|
11
|
+
super(upstream, type)
|
12
|
+
|
13
|
+
@last_saved = nil
|
14
|
+
@batch = opts[:batch]
|
15
|
+
@last_read = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def tail_from(ts, opts={})
|
19
|
+
if ts.nil?
|
20
|
+
ts = read_timestamp
|
21
|
+
end
|
22
|
+
super(ts, opts)
|
23
|
+
end
|
24
|
+
|
25
|
+
def stream(limit=nil)
|
26
|
+
super(limit) do |entry|
|
27
|
+
yield entry
|
28
|
+
@last_read = entry['ts']
|
29
|
+
maybe_save_timestamp unless @batch
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def batch_done
|
34
|
+
raise "You must specify :batch => true to use the batch-processing interface." unless @batch
|
35
|
+
maybe_save_timestamp
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_timestamp
|
39
|
+
raise "read_timestamp unimplemented!"
|
40
|
+
end
|
41
|
+
|
42
|
+
def write_timestamp
|
43
|
+
raise "save_timestamp unimplemented!"
|
44
|
+
end
|
45
|
+
|
46
|
+
def save_timestamp
|
47
|
+
write_timestamp(@last_read)
|
48
|
+
@last_saved = @last_read
|
49
|
+
log.info("Saved timestamp: #{@last_saved} (#{Time.at(@last_saved.seconds)})")
|
50
|
+
end
|
51
|
+
|
52
|
+
def maybe_save_timestamp
|
53
|
+
# Write timestamps once a minute
|
54
|
+
return unless @last_read
|
55
|
+
save_timestamp if @last_saved.nil? || (@last_read.seconds - @last_saved.seconds) > 60
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
# A variant of AbstractPersistentTailer that automatically persists
|
3
|
+
# the "last timestamp processes" state into the database we are
|
4
|
+
# tailing.
|
5
|
+
class PersistentTailer < AbstractPersistentTailer
|
6
|
+
def initialize(upstream, type, service, opts={})
|
7
|
+
raise "You can't use PersistentTailer against only a slave. How am I supposed to write state? " if type == :slave
|
8
|
+
super(upstream, type, opts)
|
9
|
+
|
10
|
+
db = opts[:db] || "_mongoriver"
|
11
|
+
collection = opts[:collection] || 'oplog-tailers'
|
12
|
+
@service = service
|
13
|
+
@state_collection = @upstream_conn.db(db).collection(collection)
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_timestamp
|
17
|
+
row = @state_collection.find_one(:service => @service)
|
18
|
+
row ? row['timestamp'] : BSON::Timestamp.new(0, 0)
|
19
|
+
end
|
20
|
+
|
21
|
+
def write_timestamp(ts)
|
22
|
+
row = @state_collection.find_one(:service => @service)
|
23
|
+
if row
|
24
|
+
@state_collection.update({'_id' => row['_id']}, '$set' => { 'timestamp' => ts })
|
25
|
+
else
|
26
|
+
@state_collection.insert('service' => @service, 'timestamp' => ts)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,299 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
class Streambed
|
3
|
+
include Mongoriver::Logging
|
4
|
+
|
5
|
+
attr_reader :stats
|
6
|
+
|
7
|
+
class AssertionFailure < StandardError; end
|
8
|
+
|
9
|
+
def assert(condition, msg)
|
10
|
+
raise AssertionFailure.new(msg) unless condition
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(upstreams, type)
|
14
|
+
@tailer = Mongoriver::Tailer.new(upstreams, type)
|
15
|
+
@record_fetch_batch_size = 1024
|
16
|
+
@record_sync_batch_size = 256
|
17
|
+
@stats = Hash.new(0)
|
18
|
+
end
|
19
|
+
|
20
|
+
def run
|
21
|
+
self.class.validate_hooks!
|
22
|
+
|
23
|
+
unless ts = starting_optime
|
24
|
+
ts = @tailer.most_recent_timestamp
|
25
|
+
initial_sync
|
26
|
+
hook_update_optime(ts, true)
|
27
|
+
end
|
28
|
+
|
29
|
+
tail_from(ts)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.my_hooks
|
33
|
+
@hooks ||= []
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.all_hooks
|
37
|
+
hooks = my_hooks
|
38
|
+
if superclass <= Streambed
|
39
|
+
hooks + superclass.all_hooks
|
40
|
+
else
|
41
|
+
hooks
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.validate_hooks!
|
46
|
+
errors = []
|
47
|
+
all_hooks.each do |name, args, opts|
|
48
|
+
method = self.instance_method(hook_name(name))
|
49
|
+
signature = "#{method.name}(#{args.join(', ')})"
|
50
|
+
if method.owner == Streambed && !opts[:default]
|
51
|
+
errors << "Must provide implementation of #{signature}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
raise "You need to fix the following hook errors:
|
56
|
+
|
57
|
+
#{errors.join("\n ")}" if errors.length > 0
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.hook_name(name)
|
61
|
+
"hook_#{name}"
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.hook(name, args=[], opts={})
|
65
|
+
if default = opts[:default]
|
66
|
+
target = hook_name(default)
|
67
|
+
implementation = Proc.new do |*args, &blk|
|
68
|
+
send(target, *args, &blk)
|
69
|
+
end
|
70
|
+
else
|
71
|
+
implementation = Proc.new do
|
72
|
+
raise NotImplementedError.new("Override in subclass")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
define_method(hook_name(name), implementation)
|
77
|
+
my_hooks << [name, args, opts]
|
78
|
+
end
|
79
|
+
|
80
|
+
hook :optime
|
81
|
+
hook :update_optime, [:ts, :mandatory]
|
82
|
+
hook :initial_sync_index, [:db_name, :collection_name, :index_key, :options]
|
83
|
+
hook :initial_sync_record_batch, [:db_name, :collection_name, :records]
|
84
|
+
hook :stream_insert, [:db_name, :collection_name, :object]
|
85
|
+
hook :stream_update, [:db_name, :collection_name, :selector, :update]
|
86
|
+
hook :stream_remove, [:db_name, :collection_name, :object]
|
87
|
+
# Not usually a difference between the initial index creation and
|
88
|
+
# creating it while streaming ops.
|
89
|
+
hook :stream_create_index, [:db_name, :collection_name, :index_key, :options], :default => :initial_sync_index
|
90
|
+
# This seems to be called while doing a mapreduce.
|
91
|
+
hook :stream_create_collection, [:db_name, :create]
|
92
|
+
# This also seems to be called while doing a mapreduce. Note that
|
93
|
+
# I think mongo has a concept of temporary table, which I should
|
94
|
+
# look into, and renameCollection has some temporary table option.
|
95
|
+
hook :stream_rename_collection, [:db_name, :source, :target]
|
96
|
+
hook :stream_drop_index, [:db_name, :collection_name, :index_name]
|
97
|
+
hook :stream_drop_collection, [:db_name, :dropped]
|
98
|
+
hook :stream_drop_database, [:db_name]
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def starting_optime
|
103
|
+
case time = hook_optime
|
104
|
+
when Integer
|
105
|
+
if time >= 0
|
106
|
+
BSON::Timestamp.new(time, 0)
|
107
|
+
elsif time == -1
|
108
|
+
@tailer.most_recent_timestamp
|
109
|
+
else
|
110
|
+
raise "Invalid optime: #{time}"
|
111
|
+
end
|
112
|
+
when BSON::Timestamp, nil
|
113
|
+
time
|
114
|
+
else
|
115
|
+
raise "Unrecognized type #{time.class} (#{time.inspect}) for start time"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def initial_sync
|
120
|
+
initial_sync_all_indexes
|
121
|
+
initial_sync_all_records
|
122
|
+
end
|
123
|
+
|
124
|
+
def initial_sync_all_indexes
|
125
|
+
log.info("Beginning initial sync of indexes")
|
126
|
+
syncable_databases.each {|db| initial_sync_indexes_for_db(db)}
|
127
|
+
log.info("Done initial sync of indexes")
|
128
|
+
end
|
129
|
+
|
130
|
+
def initial_sync_indexes_for_db(db)
|
131
|
+
db.collection('system.indexes').find.each do |index|
|
132
|
+
options = extract_options_from_index_spec(index)
|
133
|
+
index_key = index['key'].to_a
|
134
|
+
|
135
|
+
ns = index['ns']
|
136
|
+
db_name, collection_name = parse_ns(ns)
|
137
|
+
assert(db_name == db.name, "Index db name #{db_name.inspect} differs from current db name #{db.name.inspect}")
|
138
|
+
|
139
|
+
log.info("#{ns}: Initial sync of index #{options[:name]}")
|
140
|
+
hook_initial_sync_index(db_name, collection_name, index_key, options)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def initial_sync_all_records
|
145
|
+
log.info("Beginning initial sync of records")
|
146
|
+
syncable_databases.each {|db| initial_sync_records_for_db(db)}
|
147
|
+
log.info("Done initial sync of records")
|
148
|
+
end
|
149
|
+
|
150
|
+
def initial_sync_records_for_db(db)
|
151
|
+
syncable_collections(db).each do |collection|
|
152
|
+
initial_sync_records_for_collection(collection)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def initial_sync_records_for_collection(collection)
|
157
|
+
db_name = collection.db.name
|
158
|
+
collection_name = collection.name
|
159
|
+
ns = "#{db_name}.#{collection_name}"
|
160
|
+
|
161
|
+
log.info("#{ns}: Starting record initial sync")
|
162
|
+
|
163
|
+
records = []
|
164
|
+
collection.find({}, :batch_size => @record_fetch_batch_size, :timeout => false, :sort => [['$natural', 1]]) do |cursor|
|
165
|
+
while cursor.has_next?
|
166
|
+
records << cursor.next
|
167
|
+
if records.length > @record_sync_batch_size
|
168
|
+
# TODO: add better logging than this
|
169
|
+
log.info("#{ns}: Running sync of batch of #{records.length} records")
|
170
|
+
hook_initial_sync_record_batch(db_name, collection_name, records)
|
171
|
+
records = []
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
log.info("#{ns}: Finishing sync with a batch of #{records.length} records")
|
176
|
+
hook_initial_sync_record_batch(db_name, collection_name, records)
|
177
|
+
|
178
|
+
log.info("#{ns}: Finished record initial sync")
|
179
|
+
end
|
180
|
+
|
181
|
+
# This should be fine to instantiate all at once, since
|
182
|
+
# database_names returns all the dbs as strings anyway
|
183
|
+
def syncable_databases
|
184
|
+
@tailer.upstream_conn.database_names.map do |db_name|
|
185
|
+
next if db_name == 'local'
|
186
|
+
@tailer.upstream_conn.db(db_name)
|
187
|
+
end.compact
|
188
|
+
end
|
189
|
+
|
190
|
+
def syncable_collections(db)
|
191
|
+
db.collection_names.map do |collection_name|
|
192
|
+
next if collection_name.start_with?('system.')
|
193
|
+
db.collection(collection_name)
|
194
|
+
end.compact
|
195
|
+
end
|
196
|
+
|
197
|
+
def extract_options_from_index_spec(index)
|
198
|
+
options = {}
|
199
|
+
index.each do |key, value|
|
200
|
+
case key
|
201
|
+
when 'v'
|
202
|
+
raise NotImplementedError.new("Only v=1 indexes are supported at the moment, not v=#{value.inspect}") unless value == 1
|
203
|
+
when 'ns', 'key'
|
204
|
+
else
|
205
|
+
options[key.to_sym] = value
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
assert(options.include?(:name), "No name defined for index spec #{index.inspect}")
|
210
|
+
options
|
211
|
+
end
|
212
|
+
|
213
|
+
def stream_op(entry)
|
214
|
+
op = entry['op']
|
215
|
+
data = entry['o']
|
216
|
+
ns = entry['ns']
|
217
|
+
|
218
|
+
if op == 'n'
|
219
|
+
# This happens for initial rs.initiate() op, maybe others.
|
220
|
+
log.info("Skipping no-op #{entry.inspect}")
|
221
|
+
return
|
222
|
+
end
|
223
|
+
|
224
|
+
db_name, collection_name = parse_ns(ns)
|
225
|
+
assert(db_name, "Nil db name #{db_name.inspect} for #{entry.inspect}")
|
226
|
+
|
227
|
+
case op
|
228
|
+
when 'i'
|
229
|
+
if collection_name == 'system.indexes'
|
230
|
+
record(ns, entry, :create_index)
|
231
|
+
index_db_name, index_collection_name = parse_ns(data['ns'])
|
232
|
+
index_key = data['key'].to_a
|
233
|
+
options = extract_options_from_index_spec(data)
|
234
|
+
hook_stream_create_index(index_db_name, index_collection_name, index_key, options)
|
235
|
+
else
|
236
|
+
record(ns, entry, :insert)
|
237
|
+
hook_stream_insert(db_name, collection_name, data)
|
238
|
+
end
|
239
|
+
when 'u'
|
240
|
+
record(ns, entry, :update)
|
241
|
+
hook_stream_update(db_name, collection_name, entry['o2'], data)
|
242
|
+
when 'd'
|
243
|
+
record(ns, entry, :remove)
|
244
|
+
hook_stream_remove(db_name, collection_name, data)
|
245
|
+
when 'c'
|
246
|
+
assert(collection_name == '$cmd', "Command collection name is #{collection_name.inspect} for #{entry.inspect}")
|
247
|
+
if deleted_from = data['deleteIndexes']
|
248
|
+
record(ns, entry, :drop_index)
|
249
|
+
index = data['index']
|
250
|
+
hook_stream_drop_index(db_name, deleted_from, index)
|
251
|
+
elsif dropped = data['drop']
|
252
|
+
record(ns, entry, :drop_collection)
|
253
|
+
hook_stream_drop_collection(db_name, dropped)
|
254
|
+
elsif dropped = data['dropDatabase']
|
255
|
+
record(ns, entry, :drop_database)
|
256
|
+
hook_stream_drop_database(db_name)
|
257
|
+
elsif source = data['renameCollection']
|
258
|
+
record(ns, entry, :rename_collection)
|
259
|
+
target = data['to']
|
260
|
+
hook_stream_rename_collection(db_name, source, target)
|
261
|
+
elsif create = data['create']
|
262
|
+
record(ns, entry, :create)
|
263
|
+
hook_stream_create_collection(db_name, create)
|
264
|
+
else
|
265
|
+
raise "Unrecognized command #{data.inspect}"
|
266
|
+
end
|
267
|
+
else
|
268
|
+
raise "Unrecognized op: #{op} (#{entry.inspect})"
|
269
|
+
end
|
270
|
+
|
271
|
+
optime = entry['ts']
|
272
|
+
hook_update_optime(optime, false)
|
273
|
+
end
|
274
|
+
|
275
|
+
def tail_from(ts)
|
276
|
+
begin
|
277
|
+
@tailer.tail_from(ts)
|
278
|
+
loop do
|
279
|
+
@tailer.stream do |op|
|
280
|
+
stream_op(op)
|
281
|
+
end
|
282
|
+
end
|
283
|
+
ensure
|
284
|
+
@tailer.stop
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def record(ns, entry, type)
|
289
|
+
stats[type] += 1
|
290
|
+
log.debug("#{ns}: #{type.inspect} #{entry.inspect}")
|
291
|
+
end
|
292
|
+
|
293
|
+
protected
|
294
|
+
|
295
|
+
def parse_ns(ns)
|
296
|
+
ns.split('.', 2)
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Mongoriver
|
2
|
+
class Tailer
|
3
|
+
include Mongoriver::Logging
|
4
|
+
|
5
|
+
attr_reader :upstream_conn
|
6
|
+
|
7
|
+
def initialize(upstreams, type)
|
8
|
+
@upstreams = upstreams
|
9
|
+
@type = type
|
10
|
+
# This number seems high
|
11
|
+
@conn_opts = {:op_timeout => 86400}
|
12
|
+
|
13
|
+
@cursor = nil
|
14
|
+
|
15
|
+
connect_upstream
|
16
|
+
end
|
17
|
+
|
18
|
+
def most_recent_timestamp
|
19
|
+
record = oplog_collection.find_one({}, :sort => [['$natural', -1]])
|
20
|
+
record['ts']
|
21
|
+
end
|
22
|
+
|
23
|
+
def connect_upstream
|
24
|
+
case @type
|
25
|
+
when :replset
|
26
|
+
opts = @conn_opts.merge(:read => :secondary)
|
27
|
+
@upstream_conn = Mongo::ReplSetConnection.new(@upstreams, opts)
|
28
|
+
when :slave, :direct
|
29
|
+
opts = @conn_opts.merge(:slave_ok => true)
|
30
|
+
host, port = parse_direct_upstream
|
31
|
+
@upstream_conn = Mongo::Connection.new(host, port, opts)
|
32
|
+
raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is the primary -- if you're ok with that, check why your wrapper is passing :direct rather than :slave (HINT: try passing a -a to scripts like optail or mongocp)" if @type == :slave && @upstream_conn.primary?
|
33
|
+
ensure_upstream_replset!
|
34
|
+
when :existing
|
35
|
+
raise "Must pass in a single existing Mongo::Connection with :existing" unless @upstreams.length == 1 && @upstreams[0].respond_to?(:db)
|
36
|
+
@upstream_conn = @upstreams[0]
|
37
|
+
else
|
38
|
+
raise "Invalid connection type: #{@type.inspect}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def ensure_upstream_replset!
|
43
|
+
# Might be a better way to do this, but not seeing one.
|
44
|
+
config = @upstream_conn['admin'].command(:ismaster => 1)
|
45
|
+
unless config['setName']
|
46
|
+
raise "Server at #{@upstream_conn.host}:#{@upstream_conn.port} is not running as a replica set"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_direct_upstream
|
51
|
+
raise "When connecting directly to a mongo instance, must provide a single upstream" unless @upstreams.length == 1
|
52
|
+
upstream = @upstreams[0]
|
53
|
+
parse_host_spec(upstream)
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_host_spec(host_spec)
|
57
|
+
host, port = host_spec.split(':')
|
58
|
+
host = '127.0.0.1' if host.to_s.length == 0
|
59
|
+
port = '27017' if port.to_s.length == 0
|
60
|
+
[host, port.to_i]
|
61
|
+
end
|
62
|
+
|
63
|
+
def oplog_collection
|
64
|
+
@upstream_conn.db('local').collection('oplog.rs')
|
65
|
+
end
|
66
|
+
|
67
|
+
def tail_from(ts, opts = {})
|
68
|
+
raise "Already tailing the oplog!" if @cursor
|
69
|
+
|
70
|
+
# Maybe if ts is old enough, just start from the beginning?
|
71
|
+
query = (opts[:filter] || {}).merge({ 'ts' => { '$gte' => ts } })
|
72
|
+
|
73
|
+
oplog_collection.find(query, :timeout => false) do |oplog|
|
74
|
+
oplog.add_option(Mongo::Constants::OP_QUERY_TAILABLE)
|
75
|
+
oplog.add_option(Mongo::Constants::OP_QUERY_OPLOG_REPLAY)
|
76
|
+
|
77
|
+
oplog.add_option(Mongo::Constants::OP_QUERY_AWAIT_DATA) unless opts[:dont_wait]
|
78
|
+
|
79
|
+
log.info("Starting oplog stream from #{ts}")
|
80
|
+
@cursor = oplog
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def stop
|
85
|
+
@cursor.close if @cursor
|
86
|
+
@cursor = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
def stream(limit=nil)
|
90
|
+
count = 0
|
91
|
+
while @cursor.has_next?
|
92
|
+
count += 1
|
93
|
+
break if limit && count >= limit
|
94
|
+
yield @cursor.next
|
95
|
+
end
|
96
|
+
|
97
|
+
return @cursor.has_next?
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/mongoriver.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mongo'
|
2
|
+
require 'log4r'
|
3
|
+
|
4
|
+
module Mongoriver; end
|
5
|
+
|
6
|
+
require 'mongoriver/log'
|
7
|
+
|
8
|
+
require 'mongoriver/streambed'
|
9
|
+
require 'mongoriver/tailer'
|
10
|
+
require 'mongoriver/abstract_persistent_tailer'
|
11
|
+
require 'mongoriver/persistent_tailer'
|
12
|
+
require 'mongoriver/version'
|
data/mongoriver.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
$:.unshift(File.expand_path("lib", File.dirname(__FILE__)))
|
3
|
+
require 'mongoriver/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.authors = ["Greg Brockman"]
|
7
|
+
gem.email = ["gdb@gregbrockman.com"]
|
8
|
+
gem.description = %q{Some tools and libraries to simplify tailing the mongod oplog}
|
9
|
+
gem.summary = %q{monogdb oplog-tailing utilities.}
|
10
|
+
gem.homepage = ""
|
11
|
+
|
12
|
+
gem.files = `git ls-files`.split($\)
|
13
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
14
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
15
|
+
gem.name = "mongoriver"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.version = Mongoriver::VERSION
|
18
|
+
|
19
|
+
gem.add_runtime_dependency('mongo', '>= 1.7')
|
20
|
+
gem.add_runtime_dependency('bson_ext')
|
21
|
+
gem.add_runtime_dependency('log4r')
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongoriver
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Greg Brockman
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-05 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mongo
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.7'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.7'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bson_ext
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: log4r
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Some tools and libraries to simplify tailing the mongod oplog
|
63
|
+
email:
|
64
|
+
- gdb@gregbrockman.com
|
65
|
+
executables:
|
66
|
+
- mongocp
|
67
|
+
- optail
|
68
|
+
extensions: []
|
69
|
+
extra_rdoc_files: []
|
70
|
+
files:
|
71
|
+
- .gitignore
|
72
|
+
- Gemfile
|
73
|
+
- LICENSE
|
74
|
+
- README.md
|
75
|
+
- Rakefile
|
76
|
+
- bin/mongocp
|
77
|
+
- bin/optail
|
78
|
+
- lib/mongoriver.rb
|
79
|
+
- lib/mongoriver/abstract_persistent_tailer.rb
|
80
|
+
- lib/mongoriver/log.rb
|
81
|
+
- lib/mongoriver/persistent_tailer.rb
|
82
|
+
- lib/mongoriver/streambed.rb
|
83
|
+
- lib/mongoriver/tailer.rb
|
84
|
+
- lib/mongoriver/version.rb
|
85
|
+
- mongoriver.gemspec
|
86
|
+
homepage: ''
|
87
|
+
licenses: []
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project:
|
106
|
+
rubygems_version: 1.8.23
|
107
|
+
signing_key:
|
108
|
+
specification_version: 3
|
109
|
+
summary: monogdb oplog-tailing utilities.
|
110
|
+
test_files: []
|