taps2 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
2
+
3
+ class DbSession < Sequel::Model
4
+ plugin :schema
5
+ set_schema do
6
+ primary_key :id
7
+ text :key
8
+ text :database_url
9
+ timestamp :started_at
10
+ timestamp :last_access
11
+ end
12
+
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
16
+ end
17
+ end
18
+ end
19
+
20
+ DbSession.create_table! unless DbSession.table_exists?
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ class BaseError < StandardError
3
+ attr_reader :original_backtrace
4
+
5
+ def initialize(message, opts={})
6
+ @original_backtrace = opts.delete(:backtrace)
7
+ super(message)
8
+ end
9
+ end
10
+
11
+ class NotImplemented < BaseError; end
12
+ class DuplicatePrimaryKeyError < BaseError; end
13
+ class CorruptedData < BaseError; end
14
+ class InvalidData < BaseError; end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'stringio'
4
+ require 'vendor/okjson'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,578 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+ require 'taps/errors'
10
+
11
+ # disable warnings, rest client makes a lot of noise right now
12
+ $VERBOSE = nil
13
+
14
+ module Taps
15
+ class Operation
16
+ attr_reader :database_url, :remote_url, :opts
17
+ attr_reader :session_uri
18
+
19
+ def initialize(database_url, remote_url, opts={})
20
+ @database_url = database_url
21
+ @remote_url = remote_url
22
+ @opts = opts
23
+ @exiting = false
24
+ @session_uri = opts[:session_uri]
25
+ end
26
+
27
+ def file_prefix
28
+ "op"
29
+ end
30
+
31
+ def skip_schema?
32
+ !!opts[:skip_schema]
33
+ end
34
+
35
+ def indexes_first?
36
+ !!opts[:indexes_first]
37
+ end
38
+
39
+ def table_filter
40
+ opts[:table_filter]
41
+ end
42
+
43
+ def exclude_tables
44
+ opts[:exclude_tables] || []
45
+ end
46
+
47
+ def apply_table_filter(tables)
48
+ return tables unless table_filter || exclude_tables
49
+
50
+ re = table_filter ? Regexp.new(table_filter) : nil
51
+ if tables.kind_of?(Hash)
52
+ ntables = {}
53
+ tables.each do |t, d|
54
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
55
+ ntables[t] = d
56
+ end
57
+ end
58
+ ntables
59
+ else
60
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
61
+ end
62
+ end
63
+
64
+ def log
65
+ Taps.log
66
+ end
67
+
68
+ def store_session
69
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
70
+ puts "\nSaving session to #{file}.."
71
+ File.open(file, 'w') do |f|
72
+ f.write(::OkJson.encode(to_hash))
73
+ end
74
+ end
75
+
76
+ def to_hash
77
+ {
78
+ :klass => self.class.to_s,
79
+ :database_url => database_url,
80
+ :remote_url => remote_url,
81
+ :session_uri => session_uri,
82
+ :stream_state => stream_state,
83
+ :completed_tables => completed_tables,
84
+ :table_filter => table_filter,
85
+ }
86
+ end
87
+
88
+ def exiting?
89
+ !!@exiting
90
+ end
91
+
92
+ def setup_signal_trap
93
+ trap("INT") {
94
+ puts "\nCompleting current action..."
95
+ @exiting = true
96
+ }
97
+
98
+ trap("TERM") {
99
+ puts "\nCompleting current action..."
100
+ @exiting = true
101
+ }
102
+ end
103
+
104
+ def resuming?
105
+ opts[:resume] == true
106
+ end
107
+
108
+ def default_chunksize
109
+ opts[:default_chunksize]
110
+ end
111
+
112
+ def completed_tables
113
+ opts[:completed_tables] ||= []
114
+ end
115
+
116
+ def stream_state
117
+ opts[:stream_state] ||= {}
118
+ end
119
+
120
+ def stream_state=(val)
121
+ opts[:stream_state] = val
122
+ end
123
+
124
+ def compression_disabled?
125
+ !!opts[:disable_compression]
126
+ end
127
+
128
+ def db
129
+ @db ||= Sequel.connect(database_url)
130
+ end
131
+
132
+ def server
133
+ @server ||= RestClient::Resource.new(remote_url)
134
+ end
135
+
136
+ def session_resource
137
+ @session_resource ||= begin
138
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
139
+ server[@session_uri]
140
+ end
141
+ end
142
+
143
+ def set_session(uri)
144
+ session_uri = uri
145
+ @session_resource = server[session_uri]
146
+ end
147
+
148
+ def close_session
149
+ @session_resource.delete(http_headers) if @session_resource
150
+ end
151
+
152
+ def safe_url(url)
153
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
154
+ end
155
+
156
+ def safe_remote_url
157
+ safe_url(remote_url)
158
+ end
159
+
160
+ def safe_database_url
161
+ safe_url(database_url)
162
+ end
163
+
164
+ def http_headers(extra = {})
165
+ base = { :taps_version => Taps.version }
166
+ if compression_disabled?
167
+ base[:accept_encoding] = ""
168
+ else
169
+ base[:accept_encoding] = "gzip, deflate"
170
+ end
171
+ base.merge(extra)
172
+ end
173
+
174
+ def format_number(num)
175
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
176
+ end
177
+
178
+ def verify_server
179
+ begin
180
+ server['/'].get(http_headers)
181
+ rescue RestClient::RequestFailed => e
182
+ if e.http_code == 417
183
+ puts "#{safe_remote_url} is running a different minor version of taps."
184
+ puts "#{e.response.to_s}"
185
+ exit(1)
186
+ else
187
+ raise
188
+ end
189
+ rescue RestClient::Unauthorized
190
+ puts "Bad credentials given for #{safe_remote_url}"
191
+ exit(1)
192
+ rescue Errno::ECONNREFUSED
193
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
194
+ exit(1)
195
+ end
196
+ end
197
+
198
+ def catch_errors(&blk)
199
+ verify_server
200
+
201
+ begin
202
+ blk.call
203
+ close_session
204
+ rescue RestClient::Exception, Taps::BaseError => e
205
+ store_session
206
+ if e.kind_of?(Taps::BaseError)
207
+ puts "!!! Caught Server Exception"
208
+ puts "#{e.class}: #{e.message}"
209
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
210
+ exit(1)
211
+ elsif e.respond_to?(:response)
212
+ puts "!!! Caught Server Exception"
213
+ puts "HTTP CODE: #{e.http_code}"
214
+ puts "#{e.response.to_s}"
215
+ exit(1)
216
+ else
217
+ raise
218
+ end
219
+ end
220
+ end
221
+
222
+ def self.factory(type, database_url, remote_url, opts)
223
+ type = :resume if opts[:resume]
224
+ klass = case type
225
+ when :pull then Taps::Pull
226
+ when :push then Taps::Push
227
+ when :resume then eval(opts[:klass])
228
+ else raise "Unknown Operation Type -> #{type}"
229
+ end
230
+
231
+ klass.new(database_url, remote_url, opts)
232
+ end
233
+ end
234
+
235
+ class Pull < Operation
236
+ def file_prefix
237
+ "pull"
238
+ end
239
+
240
+ def to_hash
241
+ super.merge(:remote_tables_info => remote_tables_info)
242
+ end
243
+
244
+ def run
245
+ catch_errors do
246
+ unless resuming?
247
+ pull_schema if !skip_schema?
248
+ pull_indexes if indexes_first? && !skip_schema?
249
+ end
250
+ setup_signal_trap
251
+ pull_partial_data if resuming?
252
+ pull_data
253
+ pull_indexes if !indexes_first? && !skip_schema?
254
+ pull_reset_sequences
255
+ end
256
+ end
257
+
258
+ def pull_schema
259
+ puts "Receiving schema"
260
+
261
+ progress = ProgressBar.new('Schema', tables.size)
262
+ tables.each do |table_name, count|
263
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
264
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
265
+ output = Taps::Utils.load_schema(database_url, schema_data)
266
+ output = output.to_s.strip
267
+ puts output unless output.empty?
268
+ progress.inc(1)
269
+ end
270
+ progress.finish
271
+ end
272
+
273
+ def pull_data
274
+ puts "Receiving data"
275
+
276
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
277
+
278
+ tables.each do |table_name, count|
279
+ progress = ProgressBar.new(table_name.to_s, count)
280
+ stream = Taps::DataStream.factory(db, {
281
+ :chunksize => default_chunksize,
282
+ :table_name => table_name
283
+ })
284
+ pull_data_from_table(stream, progress)
285
+ end
286
+ end
287
+
288
+ def pull_partial_data
289
+ return if stream_state == {}
290
+
291
+ table_name = stream_state[:table_name]
292
+ record_count = tables[table_name.to_s]
293
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
294
+
295
+ progress = ProgressBar.new(table_name.to_s, record_count)
296
+ stream = Taps::DataStream.factory(db, stream_state)
297
+ pull_data_from_table(stream, progress)
298
+ end
299
+
300
+ def pull_data_from_table(stream, progress)
301
+ loop do
302
+ begin
303
+ if exiting?
304
+ store_session
305
+ exit 0
306
+ end
307
+
308
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
309
+ break if stream.complete?
310
+ progress.inc(size) unless exiting?
311
+ stream.error = false
312
+ self.stream_state = stream.to_hash
313
+ rescue Taps::CorruptedData => e
314
+ puts "Corrupted Data Received #{e.message}, retrying..."
315
+ stream.error = true
316
+ next
317
+ end
318
+ end
319
+
320
+ progress.finish
321
+ completed_tables << stream.table_name.to_s
322
+ self.stream_state = {}
323
+ end
324
+
325
+ def tables
326
+ h = {}
327
+ remote_tables_info.each do |table_name, count|
328
+ next if completed_tables.include?(table_name.to_s)
329
+ h[table_name.to_s] = count
330
+ end
331
+ h
332
+ end
333
+
334
+ def record_count
335
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
336
+ end
337
+
338
+ def remote_tables_info
339
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
340
+ end
341
+
342
+ def fetch_remote_tables_info
343
+ retries = 0
344
+ max_retries = 10
345
+ begin
346
+ tables = ::OkJson.decode(session_resource['pull/table_names'].get(http_headers).to_s)
347
+ rescue RestClient::Exception
348
+ retries += 1
349
+ retry if retries <= max_retries
350
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
351
+ exit(1)
352
+ end
353
+
354
+ data = {}
355
+ apply_table_filter(tables).each do |table_name|
356
+ retries = 0
357
+ begin
358
+ count = Integer(session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s)
359
+ data[table_name] = count
360
+ rescue RestClient::Exception
361
+ retries += 1
362
+ retry if retries <= max_retries
363
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
364
+ exit(1)
365
+ end
366
+ end
367
+ data
368
+ end
369
+
370
+ def pull_indexes
371
+ puts "Receiving indexes"
372
+
373
+ idxs = ::OkJson.decode(session_resource['pull/indexes'].get(http_headers).to_s)
374
+
375
+ apply_table_filter(idxs).each do |table, indexes|
376
+ next unless indexes.size > 0
377
+ progress = ProgressBar.new(table, indexes.size)
378
+ indexes.each do |idx|
379
+ output = Taps::Utils.load_indexes(database_url, idx)
380
+ output = output.to_s.strip
381
+ puts output unless output.empty?
382
+ progress.inc(1)
383
+ end
384
+ progress.finish
385
+ end
386
+ end
387
+
388
+ def pull_reset_sequences
389
+ puts "Resetting sequences"
390
+
391
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
392
+ output = output.to_s.strip
393
+ puts output unless output.empty?
394
+ end
395
+ end
396
+
397
+ class Push < Operation
398
+ def file_prefix
399
+ "push"
400
+ end
401
+
402
+ def to_hash
403
+ super.merge(:local_tables_info => local_tables_info)
404
+ end
405
+
406
+ def run
407
+ catch_errors do
408
+ unless resuming?
409
+ push_schema if !skip_schema?
410
+ push_indexes if indexes_first? && !skip_schema?
411
+ end
412
+ setup_signal_trap
413
+ push_partial_data if resuming?
414
+ push_data
415
+ push_indexes if !indexes_first? && !skip_schema?
416
+ push_reset_sequences
417
+ end
418
+ end
419
+
420
+ def push_indexes
421
+ idxs = ::OkJson.decode(Taps::Utils.schema_bin(:indexes_individual, database_url))
422
+
423
+ return unless idxs.size > 0
424
+
425
+ puts "Sending indexes"
426
+
427
+ apply_table_filter(idxs).each do |table, indexes|
428
+ next unless indexes.size > 0
429
+ progress = ProgressBar.new(table, indexes.size)
430
+ indexes.each do |idx|
431
+ session_resource['push/indexes'].post(idx, http_headers)
432
+ progress.inc(1)
433
+ end
434
+ progress.finish
435
+ end
436
+ end
437
+
438
+ def push_schema
439
+ puts "Sending schema"
440
+
441
+ progress = ProgressBar.new('Schema', tables.size)
442
+ tables.each do |table, count|
443
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
444
+ log.debug "Table: #{table}\n#{schema_data}\n"
445
+ session_resource['push/schema'].post(schema_data, http_headers)
446
+ progress.inc(1)
447
+ end
448
+ progress.finish
449
+ end
450
+
451
+ def push_reset_sequences
452
+ puts "Resetting sequences"
453
+
454
+ session_resource['push/reset_sequences'].post('', http_headers)
455
+ end
456
+
457
+ def push_partial_data
458
+ return if stream_state == {}
459
+
460
+ table_name = stream_state[:table_name]
461
+ record_count = tables[table_name.to_s]
462
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
463
+ progress = ProgressBar.new(table_name.to_s, record_count)
464
+ stream = Taps::DataStream.factory(db, stream_state)
465
+ push_data_from_table(stream, progress)
466
+ end
467
+
468
+ def push_data
469
+ puts "Sending data"
470
+
471
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
472
+
473
+ tables.each do |table_name, count|
474
+ stream = Taps::DataStream.factory(db,
475
+ :table_name => table_name,
476
+ :chunksize => default_chunksize)
477
+ progress = ProgressBar.new(table_name.to_s, count)
478
+ push_data_from_table(stream, progress)
479
+ end
480
+ end
481
+
482
+ def push_data_from_table(stream, progress)
483
+ loop do
484
+ if exiting?
485
+ store_session
486
+ exit 0
487
+ end
488
+
489
+ row_size = 0
490
+ chunksize = stream.state[:chunksize]
491
+
492
+ begin
493
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
494
+ stream.state[:chunksize] = c.to_i
495
+ encoded_data, row_size, elapsed_time = nil
496
+ d1 = c.time_delta do
497
+ encoded_data, row_size, elapsed_time = stream.fetch
498
+ end
499
+ break if stream.complete?
500
+
501
+ data = nil
502
+ d2 = c.time_delta do
503
+ data = {
504
+ :state => stream.to_hash,
505
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
506
+ }
507
+ end
508
+
509
+ begin
510
+ content, content_type = nil
511
+ d3 = c.time_delta do
512
+ content, content_type = Taps::Multipart.create do |r|
513
+ r.attach :name => :encoded_data,
514
+ :payload => encoded_data,
515
+ :content_type => 'application/octet-stream'
516
+ r.attach :name => :json,
517
+ :payload => ::OkJson.encode(data),
518
+ :content_type => 'application/json'
519
+ end
520
+ end
521
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
522
+ self.stream_state = stream.to_hash
523
+ rescue => e
524
+ Taps::Utils.reraise_server_exception(e)
525
+ end
526
+
527
+ c.idle_secs = (d1 + d2 + d3)
528
+
529
+ elapsed_time
530
+ end
531
+ rescue Taps::CorruptedData => e
532
+ # retry the same data, it got corrupted somehow.
533
+ next
534
+ rescue Taps::DuplicatePrimaryKeyError => e
535
+ # verify the stream and retry it
536
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
537
+ next
538
+ end
539
+ stream.state[:chunksize] = chunksize
540
+
541
+ progress.inc(row_size)
542
+
543
+ stream.increment(row_size)
544
+ break if stream.complete?
545
+ end
546
+
547
+ progress.finish
548
+ completed_tables << stream.table_name.to_s
549
+ self.stream_state = {}
550
+ end
551
+
552
+ def local_tables_info
553
+ opts[:local_tables_info] ||= fetch_local_tables_info
554
+ end
555
+
556
+ def tables
557
+ h = {}
558
+ local_tables_info.each do |table_name, count|
559
+ next if completed_tables.include?(table_name.to_s)
560
+ h[table_name.to_s] = count
561
+ end
562
+ h
563
+ end
564
+
565
+ def record_count
566
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
567
+ end
568
+
569
+ def fetch_local_tables_info
570
+ tables_with_counts = {}
571
+ db.tables.each do |table|
572
+ tables_with_counts[table] = db[table.to_sym.identifier].count
573
+ end
574
+ apply_table_filter(tables_with_counts)
575
+ end
576
+
577
+ end
578
+ end