taps-taps 0.3.24

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
2
+
3
+ class DbSession < Sequel::Model
4
+ plugin :schema
5
+ set_schema do
6
+ primary_key :id
7
+ text :key
8
+ text :database_url
9
+ timestamp :started_at
10
+ timestamp :last_access
11
+ end
12
+
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
16
+ end
17
+ end
18
+ end
19
+
20
+ DbSession.create_table! unless DbSession.table_exists?
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ class BaseError < StandardError
3
+ attr_reader :original_backtrace
4
+
5
+ def initialize(message, opts={})
6
+ @original_backtrace = opts.delete(:backtrace)
7
+ super(message)
8
+ end
9
+ end
10
+
11
+ class NotImplemented < BaseError; end
12
+ class DuplicatePrimaryKeyError < BaseError; end
13
+ class CorruptedData < BaseError; end
14
+ class InvalidData < BaseError; end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'stringio'
4
+ require 'vendor/okjson'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,577 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+ require 'taps/errors'
10
+
11
+ # disable warnings, rest client makes a lot of noise right now
12
+ $VERBOSE = nil
13
+
14
+ module Taps
15
+
16
+ class Operation
17
+ attr_reader :database_url, :remote_url, :opts
18
+ attr_reader :session_uri
19
+
20
+ def initialize(database_url, remote_url, opts={})
21
+ @database_url = database_url
22
+ @remote_url = remote_url
23
+ @opts = opts
24
+ @exiting = false
25
+ @session_uri = opts[:session_uri]
26
+ end
27
+
28
+ def file_prefix
29
+ "op"
30
+ end
31
+
32
+ def skip_schema?
33
+ !!opts[:skip_schema]
34
+ end
35
+
36
+ def indexes_first?
37
+ !!opts[:indexes_first]
38
+ end
39
+
40
+ def table_filter
41
+ opts[:table_filter]
42
+ end
43
+
44
+ def exclude_tables
45
+ opts[:exclude_tables] || []
46
+ end
47
+
48
+ def apply_table_filter(tables)
49
+ return tables unless table_filter || exclude_tables
50
+
51
+ re = table_filter ? Regexp.new(table_filter) : nil
52
+ if tables.kind_of?(Hash)
53
+ ntables = {}
54
+ tables.each do |t, d|
55
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
56
+ ntables[t] = d
57
+ end
58
+ end
59
+ ntables
60
+ else
61
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
62
+ end
63
+ end
64
+
65
+ def log
66
+ Taps.log
67
+ end
68
+
69
+ def store_session
70
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
71
+ puts "\nSaving session to #{file}.."
72
+ File.open(file, 'w') do |f|
73
+ f.write(OkJson.encode(to_hash))
74
+ end
75
+ end
76
+
77
+ def to_hash
78
+ {
79
+ :klass => self.class.to_s,
80
+ :database_url => database_url,
81
+ :remote_url => remote_url,
82
+ :session_uri => session_uri,
83
+ :stream_state => stream_state,
84
+ :completed_tables => completed_tables,
85
+ :table_filter => table_filter,
86
+ }
87
+ end
88
+
89
+ def exiting?
90
+ !!@exiting
91
+ end
92
+
93
+ def setup_signal_trap
94
+ trap("INT") {
95
+ puts "\nCompleting current action..."
96
+ @exiting = true
97
+ }
98
+
99
+ trap("TERM") {
100
+ puts "\nCompleting current action..."
101
+ @exiting = true
102
+ }
103
+ end
104
+
105
+ def resuming?
106
+ opts[:resume] == true
107
+ end
108
+
109
+ def default_chunksize
110
+ opts[:default_chunksize]
111
+ end
112
+
113
+ def completed_tables
114
+ opts[:completed_tables] ||= []
115
+ end
116
+
117
+ def stream_state
118
+ opts[:stream_state] ||= {}
119
+ end
120
+
121
+ def stream_state=(val)
122
+ opts[:stream_state] = val
123
+ end
124
+
125
+ def compression_disabled?
126
+ !!opts[:disable_compression]
127
+ end
128
+
129
+ def db
130
+ @db ||= Sequel.connect(database_url)
131
+ end
132
+
133
+ def server
134
+ @server ||= RestClient::Resource.new(remote_url)
135
+ end
136
+
137
+ def session_resource
138
+ @session_resource ||= begin
139
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
140
+ server[@session_uri]
141
+ end
142
+ end
143
+
144
+ def set_session(uri)
145
+ session_uri = uri
146
+ @session_resource = server[session_uri]
147
+ end
148
+
149
+ def close_session
150
+ @session_resource.delete(http_headers) if @session_resource
151
+ end
152
+
153
+ def safe_url(url)
154
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
155
+ end
156
+
157
+ def safe_remote_url
158
+ safe_url(remote_url)
159
+ end
160
+
161
+ def safe_database_url
162
+ safe_url(database_url)
163
+ end
164
+
165
+ def http_headers(extra = {})
166
+ base = { :taps_version => Taps.version }
167
+ if compression_disabled?
168
+ base[:accept_encoding] = ""
169
+ else
170
+ base[:accept_encoding] = "gzip, deflate"
171
+ end
172
+ base.merge(extra)
173
+ end
174
+
175
+ def format_number(num)
176
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
177
+ end
178
+
179
+ def verify_server
180
+ begin
181
+ server['/'].get(http_headers)
182
+ rescue RestClient::RequestFailed => e
183
+ if e.http_code == 417
184
+ puts "#{safe_remote_url} is running a different minor version of taps."
185
+ puts "#{e.response.to_s}"
186
+ exit(1)
187
+ else
188
+ raise
189
+ end
190
+ rescue RestClient::Unauthorized
191
+ puts "Bad credentials given for #{safe_remote_url}"
192
+ exit(1)
193
+ rescue Errno::ECONNREFUSED
194
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
195
+ exit(1)
196
+ end
197
+ end
198
+
199
+ def catch_errors(&blk)
200
+ verify_server
201
+
202
+ begin
203
+ blk.call
204
+ close_session
205
+ rescue RestClient::Exception, Taps::BaseError => e
206
+ store_session
207
+ if e.kind_of?(Taps::BaseError)
208
+ puts "!!! Caught Server Exception"
209
+ puts "#{e.class}: #{e.message}"
210
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
211
+ exit(1)
212
+ elsif e.respond_to?(:response)
213
+ puts "!!! Caught Server Exception"
214
+ puts "HTTP CODE: #{e.http_code}"
215
+ puts "#{e.response.to_s}"
216
+ exit(1)
217
+ else
218
+ raise
219
+ end
220
+ end
221
+ end
222
+
223
+ def self.factory(type, database_url, remote_url, opts)
224
+ type = :resume if opts[:resume]
225
+ klass = case type
226
+ when :pull then Taps::Pull
227
+ when :push then Taps::Push
228
+ when :resume then eval(opts[:klass])
229
+ else raise "Unknown Operation Type -> #{type}"
230
+ end
231
+
232
+ klass.new(database_url, remote_url, opts)
233
+ end
234
+ end
235
+
236
+ class Pull < Operation
237
+ def file_prefix
238
+ "pull"
239
+ end
240
+
241
+ def to_hash
242
+ super.merge(:remote_tables_info => remote_tables_info)
243
+ end
244
+
245
+ def run
246
+ catch_errors do
247
+ unless resuming?
248
+ pull_schema if !skip_schema?
249
+ pull_indexes if indexes_first? && !skip_schema?
250
+ end
251
+ setup_signal_trap
252
+ pull_partial_data if resuming?
253
+ pull_data
254
+ pull_indexes if !indexes_first? && !skip_schema?
255
+ pull_reset_sequences
256
+ end
257
+ end
258
+
259
+ def pull_schema
260
+ puts "Receiving schema"
261
+
262
+ progress = ProgressBar.new('Schema', tables.size)
263
+ tables.each do |table_name, count|
264
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
265
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
266
+ output = Taps::Utils.load_schema(database_url, schema_data)
267
+ puts output if output
268
+ progress.inc(1)
269
+ end
270
+ progress.finish
271
+ end
272
+
273
+ def pull_data
274
+ puts "Receiving data"
275
+
276
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
277
+
278
+ tables.each do |table_name, count|
279
+ progress = ProgressBar.new(table_name.to_s, count)
280
+ stream = Taps::DataStream.factory(db, {
281
+ :chunksize => default_chunksize,
282
+ :table_name => table_name
283
+ })
284
+ pull_data_from_table(stream, progress)
285
+ end
286
+ end
287
+
288
+ def pull_partial_data
289
+ return if stream_state == {}
290
+
291
+ table_name = stream_state[:table_name]
292
+ record_count = tables[table_name.to_s]
293
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
294
+
295
+ progress = ProgressBar.new(table_name.to_s, record_count)
296
+ stream = Taps::DataStream.factory(db, stream_state)
297
+ pull_data_from_table(stream, progress)
298
+ end
299
+
300
+ def pull_data_from_table(stream, progress)
301
+ loop do
302
+ begin
303
+ if exiting?
304
+ store_session
305
+ exit 0
306
+ end
307
+
308
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
309
+ break if stream.complete?
310
+ progress.inc(size) unless exiting?
311
+ stream.error = false
312
+ self.stream_state = stream.to_hash
313
+ rescue Taps::CorruptedData => e
314
+ puts "Corrupted Data Received #{e.message}, retrying..."
315
+ stream.error = true
316
+ next
317
+ end
318
+ end
319
+
320
+ progress.finish
321
+ completed_tables << stream.table_name.to_s
322
+ self.stream_state = {}
323
+ end
324
+
325
+ def tables
326
+ h = {}
327
+ remote_tables_info.each do |table_name, count|
328
+ next if completed_tables.include?(table_name.to_s)
329
+ h[table_name.to_s] = count
330
+ end
331
+ h
332
+ end
333
+
334
+ def record_count
335
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
336
+ end
337
+
338
+ def remote_tables_info
339
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
340
+ end
341
+
342
+ def fetch_remote_tables_info
343
+ retries = 0
344
+ max_retries = 10
345
+ begin
346
+ tables = OkJson.decode(session_resource['pull/table_names'].get(http_headers).to_s)
347
+ rescue RestClient::Exception
348
+ retries += 1
349
+ retry if retries <= max_retries
350
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
351
+ exit(1)
352
+ end
353
+
354
+ data = {}
355
+ apply_table_filter(tables).each do |table_name|
356
+ retries = 0
357
+ begin
358
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
359
+ data[table_name] = count
360
+ rescue RestClient::Exception
361
+ retries += 1
362
+ retry if retries <= max_retries
363
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
364
+ exit(1)
365
+ end
366
+ end
367
+ data
368
+ end
369
+
370
+ def pull_indexes
371
+ puts "Receiving indexes"
372
+
373
+ idxs = OkJson.decode(session_resource['pull/indexes'].get(http_headers).to_s)
374
+
375
+ apply_table_filter(idxs).each do |table, indexes|
376
+ next unless indexes.size > 0
377
+ progress = ProgressBar.new(table, indexes.size)
378
+ indexes.each do |idx|
379
+ output = Taps::Utils.load_indexes(database_url, idx)
380
+ puts output if output
381
+ progress.inc(1)
382
+ end
383
+ progress.finish
384
+ end
385
+ end
386
+
387
+ def pull_reset_sequences
388
+ puts "Resetting sequences"
389
+
390
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
391
+ puts output if output
392
+ end
393
+ end
394
+
395
+ class Push < Operation
396
+ def file_prefix
397
+ "push"
398
+ end
399
+
400
+ def to_hash
401
+ super.merge(:local_tables_info => local_tables_info)
402
+ end
403
+
404
+ def run
405
+ catch_errors do
406
+ unless resuming?
407
+ push_schema if !skip_schema?
408
+ push_indexes if indexes_first? && !skip_schema?
409
+ end
410
+ setup_signal_trap
411
+ push_partial_data if resuming?
412
+ push_data
413
+ push_indexes if !indexes_first? && !skip_schema?
414
+ push_reset_sequences
415
+ end
416
+ end
417
+
418
+ def push_indexes
419
+ idxs = OkJson.decode(Taps::Utils.schema_bin(:indexes_individual, database_url))
420
+
421
+ return unless idxs.size > 0
422
+
423
+ puts "Sending indexes"
424
+
425
+ apply_table_filter(idxs).each do |table, indexes|
426
+ next unless indexes.size > 0
427
+ progress = ProgressBar.new(table, indexes.size)
428
+ indexes.each do |idx|
429
+ session_resource['push/indexes'].post(idx, http_headers)
430
+ progress.inc(1)
431
+ end
432
+ progress.finish
433
+ end
434
+ end
435
+
436
+ def push_schema
437
+ puts "Sending schema"
438
+
439
+ progress = ProgressBar.new('Schema', tables.size)
440
+ tables.each do |table, count|
441
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
442
+ log.debug "Table: #{table}\n#{schema_data}\n"
443
+ session_resource['push/schema'].post(schema_data, http_headers)
444
+ progress.inc(1)
445
+ end
446
+ progress.finish
447
+ end
448
+
449
+ def push_reset_sequences
450
+ puts "Resetting sequences"
451
+
452
+ session_resource['push/reset_sequences'].post('', http_headers)
453
+ end
454
+
455
+ def push_partial_data
456
+ return if stream_state == {}
457
+
458
+ table_name = stream_state[:table_name]
459
+ record_count = tables[table_name.to_s]
460
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
461
+ progress = ProgressBar.new(table_name.to_s, record_count)
462
+ stream = Taps::DataStream.factory(db, stream_state)
463
+ push_data_from_table(stream, progress)
464
+ end
465
+
466
+ def push_data
467
+ puts "Sending data"
468
+
469
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
470
+
471
+ tables.each do |table_name, count|
472
+ stream = Taps::DataStream.factory(db,
473
+ :table_name => table_name,
474
+ :chunksize => default_chunksize)
475
+ progress = ProgressBar.new(table_name.to_s, count)
476
+ push_data_from_table(stream, progress)
477
+ end
478
+ end
479
+
480
+ def push_data_from_table(stream, progress)
481
+ loop do
482
+ if exiting?
483
+ store_session
484
+ exit 0
485
+ end
486
+
487
+ row_size = 0
488
+ chunksize = stream.state[:chunksize]
489
+
490
+ begin
491
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
492
+ stream.state[:chunksize] = c.to_i
493
+ encoded_data, row_size, elapsed_time = nil
494
+ d1 = c.time_delta do
495
+ encoded_data, row_size, elapsed_time = stream.fetch
496
+ end
497
+ break if stream.complete?
498
+
499
+ data = nil
500
+ d2 = c.time_delta do
501
+ data = {
502
+ :state => stream.to_hash,
503
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
504
+ }
505
+ end
506
+
507
+ begin
508
+ content, content_type = nil
509
+ d3 = c.time_delta do
510
+ content, content_type = Taps::Multipart.create do |r|
511
+ r.attach :name => :encoded_data,
512
+ :payload => encoded_data,
513
+ :content_type => 'application/octet-stream'
514
+ r.attach :name => :json,
515
+ :payload => OkJson.encode(data),
516
+ :content_type => 'application/json'
517
+ end
518
+ end
519
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
520
+ self.stream_state = stream.to_hash
521
+ rescue => e
522
+ Taps::Utils.reraise_server_exception(e)
523
+ end
524
+
525
+ c.idle_secs = (d1 + d2 + d3)
526
+
527
+ elapsed_time
528
+ end
529
+ rescue Taps::CorruptedData => e
530
+ # retry the same data, it got corrupted somehow.
531
+ next
532
+ rescue Taps::DuplicatePrimaryKeyError => e
533
+ # verify the stream and retry it
534
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
535
+ next
536
+ end
537
+ stream.state[:chunksize] = chunksize
538
+
539
+ progress.inc(row_size)
540
+
541
+ stream.increment(row_size)
542
+ break if stream.complete?
543
+ end
544
+
545
+ progress.finish
546
+ completed_tables << stream.table_name.to_s
547
+ self.stream_state = {}
548
+ end
549
+
550
+ def local_tables_info
551
+ opts[:local_tables_info] ||= fetch_local_tables_info
552
+ end
553
+
554
+ def tables
555
+ h = {}
556
+ local_tables_info.each do |table_name, count|
557
+ next if completed_tables.include?(table_name.to_s)
558
+ h[table_name.to_s] = count
559
+ end
560
+ h
561
+ end
562
+
563
+ def record_count
564
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
565
+ end
566
+
567
+ def fetch_local_tables_info
568
+ tables_with_counts = {}
569
+ db.tables.each do |table|
570
+ tables_with_counts[table] = db[table.to_sym.identifier].count
571
+ end
572
+ apply_table_filter(tables_with_counts)
573
+ end
574
+
575
+ end
576
+
577
+ end