taps2 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
2
+
3
+ class DbSession < Sequel::Model
4
+ plugin :schema
5
+ set_schema do
6
+ primary_key :id
7
+ text :key
8
+ text :database_url
9
+ timestamp :started_at
10
+ timestamp :last_access
11
+ end
12
+
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
16
+ end
17
+ end
18
+ end
19
+
20
+ DbSession.create_table! unless DbSession.table_exists?
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ class BaseError < StandardError
3
+ attr_reader :original_backtrace
4
+
5
+ def initialize(message, opts={})
6
+ @original_backtrace = opts.delete(:backtrace)
7
+ super(message)
8
+ end
9
+ end
10
+
11
+ class NotImplemented < BaseError; end
12
+ class DuplicatePrimaryKeyError < BaseError; end
13
+ class CorruptedData < BaseError; end
14
+ class InvalidData < BaseError; end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'stringio'
4
+ require 'vendor/okjson'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,578 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+ require 'taps/errors'
10
+
11
+ # disable warnings, rest client makes a lot of noise right now
12
+ $VERBOSE = nil
13
+
14
+ module Taps
15
+ class Operation
16
+ attr_reader :database_url, :remote_url, :opts
17
+ attr_reader :session_uri
18
+
19
+ def initialize(database_url, remote_url, opts={})
20
+ @database_url = database_url
21
+ @remote_url = remote_url
22
+ @opts = opts
23
+ @exiting = false
24
+ @session_uri = opts[:session_uri]
25
+ end
26
+
27
+ def file_prefix
28
+ "op"
29
+ end
30
+
31
+ def skip_schema?
32
+ !!opts[:skip_schema]
33
+ end
34
+
35
+ def indexes_first?
36
+ !!opts[:indexes_first]
37
+ end
38
+
39
+ def table_filter
40
+ opts[:table_filter]
41
+ end
42
+
43
+ def exclude_tables
44
+ opts[:exclude_tables] || []
45
+ end
46
+
47
+ def apply_table_filter(tables)
48
+ return tables unless table_filter || exclude_tables
49
+
50
+ re = table_filter ? Regexp.new(table_filter) : nil
51
+ if tables.kind_of?(Hash)
52
+ ntables = {}
53
+ tables.each do |t, d|
54
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
55
+ ntables[t] = d
56
+ end
57
+ end
58
+ ntables
59
+ else
60
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
61
+ end
62
+ end
63
+
64
+ def log
65
+ Taps.log
66
+ end
67
+
68
+ def store_session
69
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
70
+ puts "\nSaving session to #{file}.."
71
+ File.open(file, 'w') do |f|
72
+ f.write(::OkJson.encode(to_hash))
73
+ end
74
+ end
75
+
76
+ def to_hash
77
+ {
78
+ :klass => self.class.to_s,
79
+ :database_url => database_url,
80
+ :remote_url => remote_url,
81
+ :session_uri => session_uri,
82
+ :stream_state => stream_state,
83
+ :completed_tables => completed_tables,
84
+ :table_filter => table_filter,
85
+ }
86
+ end
87
+
88
+ def exiting?
89
+ !!@exiting
90
+ end
91
+
92
+ def setup_signal_trap
93
+ trap("INT") {
94
+ puts "\nCompleting current action..."
95
+ @exiting = true
96
+ }
97
+
98
+ trap("TERM") {
99
+ puts "\nCompleting current action..."
100
+ @exiting = true
101
+ }
102
+ end
103
+
104
+ def resuming?
105
+ opts[:resume] == true
106
+ end
107
+
108
+ def default_chunksize
109
+ opts[:default_chunksize]
110
+ end
111
+
112
+ def completed_tables
113
+ opts[:completed_tables] ||= []
114
+ end
115
+
116
+ def stream_state
117
+ opts[:stream_state] ||= {}
118
+ end
119
+
120
+ def stream_state=(val)
121
+ opts[:stream_state] = val
122
+ end
123
+
124
+ def compression_disabled?
125
+ !!opts[:disable_compression]
126
+ end
127
+
128
+ def db
129
+ @db ||= Sequel.connect(database_url)
130
+ end
131
+
132
+ def server
133
+ @server ||= RestClient::Resource.new(remote_url)
134
+ end
135
+
136
+ def session_resource
137
+ @session_resource ||= begin
138
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
139
+ server[@session_uri]
140
+ end
141
+ end
142
+
143
+ def set_session(uri)
144
+ session_uri = uri
145
+ @session_resource = server[session_uri]
146
+ end
147
+
148
+ def close_session
149
+ @session_resource.delete(http_headers) if @session_resource
150
+ end
151
+
152
+ def safe_url(url)
153
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
154
+ end
155
+
156
+ def safe_remote_url
157
+ safe_url(remote_url)
158
+ end
159
+
160
+ def safe_database_url
161
+ safe_url(database_url)
162
+ end
163
+
164
+ def http_headers(extra = {})
165
+ base = { :taps_version => Taps.version }
166
+ if compression_disabled?
167
+ base[:accept_encoding] = ""
168
+ else
169
+ base[:accept_encoding] = "gzip, deflate"
170
+ end
171
+ base.merge(extra)
172
+ end
173
+
174
+ def format_number(num)
175
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
176
+ end
177
+
178
+ def verify_server
179
+ begin
180
+ server['/'].get(http_headers)
181
+ rescue RestClient::RequestFailed => e
182
+ if e.http_code == 417
183
+ puts "#{safe_remote_url} is running a different minor version of taps."
184
+ puts "#{e.response.to_s}"
185
+ exit(1)
186
+ else
187
+ raise
188
+ end
189
+ rescue RestClient::Unauthorized
190
+ puts "Bad credentials given for #{safe_remote_url}"
191
+ exit(1)
192
+ rescue Errno::ECONNREFUSED
193
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
194
+ exit(1)
195
+ end
196
+ end
197
+
198
+ def catch_errors(&blk)
199
+ verify_server
200
+
201
+ begin
202
+ blk.call
203
+ close_session
204
+ rescue RestClient::Exception, Taps::BaseError => e
205
+ store_session
206
+ if e.kind_of?(Taps::BaseError)
207
+ puts "!!! Caught Server Exception"
208
+ puts "#{e.class}: #{e.message}"
209
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
210
+ exit(1)
211
+ elsif e.respond_to?(:response)
212
+ puts "!!! Caught Server Exception"
213
+ puts "HTTP CODE: #{e.http_code}"
214
+ puts "#{e.response.to_s}"
215
+ exit(1)
216
+ else
217
+ raise
218
+ end
219
+ end
220
+ end
221
+
222
+ def self.factory(type, database_url, remote_url, opts)
223
+ type = :resume if opts[:resume]
224
+ klass = case type
225
+ when :pull then Taps::Pull
226
+ when :push then Taps::Push
227
+ when :resume then eval(opts[:klass])
228
+ else raise "Unknown Operation Type -> #{type}"
229
+ end
230
+
231
+ klass.new(database_url, remote_url, opts)
232
+ end
233
+ end
234
+
235
+ class Pull < Operation
236
+ def file_prefix
237
+ "pull"
238
+ end
239
+
240
+ def to_hash
241
+ super.merge(:remote_tables_info => remote_tables_info)
242
+ end
243
+
244
+ def run
245
+ catch_errors do
246
+ unless resuming?
247
+ pull_schema if !skip_schema?
248
+ pull_indexes if indexes_first? && !skip_schema?
249
+ end
250
+ setup_signal_trap
251
+ pull_partial_data if resuming?
252
+ pull_data
253
+ pull_indexes if !indexes_first? && !skip_schema?
254
+ pull_reset_sequences
255
+ end
256
+ end
257
+
258
+ def pull_schema
259
+ puts "Receiving schema"
260
+
261
+ progress = ProgressBar.new('Schema', tables.size)
262
+ tables.each do |table_name, count|
263
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
264
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
265
+ output = Taps::Utils.load_schema(database_url, schema_data)
266
+ output = output.to_s.strip
267
+ puts output unless output.empty?
268
+ progress.inc(1)
269
+ end
270
+ progress.finish
271
+ end
272
+
273
+ def pull_data
274
+ puts "Receiving data"
275
+
276
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
277
+
278
+ tables.each do |table_name, count|
279
+ progress = ProgressBar.new(table_name.to_s, count)
280
+ stream = Taps::DataStream.factory(db, {
281
+ :chunksize => default_chunksize,
282
+ :table_name => table_name
283
+ })
284
+ pull_data_from_table(stream, progress)
285
+ end
286
+ end
287
+
288
+ def pull_partial_data
289
+ return if stream_state == {}
290
+
291
+ table_name = stream_state[:table_name]
292
+ record_count = tables[table_name.to_s]
293
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
294
+
295
+ progress = ProgressBar.new(table_name.to_s, record_count)
296
+ stream = Taps::DataStream.factory(db, stream_state)
297
+ pull_data_from_table(stream, progress)
298
+ end
299
+
300
+ def pull_data_from_table(stream, progress)
301
+ loop do
302
+ begin
303
+ if exiting?
304
+ store_session
305
+ exit 0
306
+ end
307
+
308
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
309
+ break if stream.complete?
310
+ progress.inc(size) unless exiting?
311
+ stream.error = false
312
+ self.stream_state = stream.to_hash
313
+ rescue Taps::CorruptedData => e
314
+ puts "Corrupted Data Received #{e.message}, retrying..."
315
+ stream.error = true
316
+ next
317
+ end
318
+ end
319
+
320
+ progress.finish
321
+ completed_tables << stream.table_name.to_s
322
+ self.stream_state = {}
323
+ end
324
+
325
+ def tables
326
+ h = {}
327
+ remote_tables_info.each do |table_name, count|
328
+ next if completed_tables.include?(table_name.to_s)
329
+ h[table_name.to_s] = count
330
+ end
331
+ h
332
+ end
333
+
334
+ def record_count
335
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
336
+ end
337
+
338
+ def remote_tables_info
339
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
340
+ end
341
+
342
+ def fetch_remote_tables_info
343
+ retries = 0
344
+ max_retries = 10
345
+ begin
346
+ tables = ::OkJson.decode(session_resource['pull/table_names'].get(http_headers).to_s)
347
+ rescue RestClient::Exception
348
+ retries += 1
349
+ retry if retries <= max_retries
350
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
351
+ exit(1)
352
+ end
353
+
354
+ data = {}
355
+ apply_table_filter(tables).each do |table_name|
356
+ retries = 0
357
+ begin
358
+ count = Integer(session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s)
359
+ data[table_name] = count
360
+ rescue RestClient::Exception
361
+ retries += 1
362
+ retry if retries <= max_retries
363
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
364
+ exit(1)
365
+ end
366
+ end
367
+ data
368
+ end
369
+
370
+ def pull_indexes
371
+ puts "Receiving indexes"
372
+
373
+ idxs = ::OkJson.decode(session_resource['pull/indexes'].get(http_headers).to_s)
374
+
375
+ apply_table_filter(idxs).each do |table, indexes|
376
+ next unless indexes.size > 0
377
+ progress = ProgressBar.new(table, indexes.size)
378
+ indexes.each do |idx|
379
+ output = Taps::Utils.load_indexes(database_url, idx)
380
+ output = output.to_s.strip
381
+ puts output unless output.empty?
382
+ progress.inc(1)
383
+ end
384
+ progress.finish
385
+ end
386
+ end
387
+
388
+ def pull_reset_sequences
389
+ puts "Resetting sequences"
390
+
391
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
392
+ output = output.to_s.strip
393
+ puts output unless output.empty?
394
+ end
395
+ end
396
+
397
+ class Push < Operation
398
+ def file_prefix
399
+ "push"
400
+ end
401
+
402
+ def to_hash
403
+ super.merge(:local_tables_info => local_tables_info)
404
+ end
405
+
406
+ def run
407
+ catch_errors do
408
+ unless resuming?
409
+ push_schema if !skip_schema?
410
+ push_indexes if indexes_first? && !skip_schema?
411
+ end
412
+ setup_signal_trap
413
+ push_partial_data if resuming?
414
+ push_data
415
+ push_indexes if !indexes_first? && !skip_schema?
416
+ push_reset_sequences
417
+ end
418
+ end
419
+
420
+ def push_indexes
421
+ idxs = ::OkJson.decode(Taps::Utils.schema_bin(:indexes_individual, database_url))
422
+
423
+ return unless idxs.size > 0
424
+
425
+ puts "Sending indexes"
426
+
427
+ apply_table_filter(idxs).each do |table, indexes|
428
+ next unless indexes.size > 0
429
+ progress = ProgressBar.new(table, indexes.size)
430
+ indexes.each do |idx|
431
+ session_resource['push/indexes'].post(idx, http_headers)
432
+ progress.inc(1)
433
+ end
434
+ progress.finish
435
+ end
436
+ end
437
+
438
+ def push_schema
439
+ puts "Sending schema"
440
+
441
+ progress = ProgressBar.new('Schema', tables.size)
442
+ tables.each do |table, count|
443
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
444
+ log.debug "Table: #{table}\n#{schema_data}\n"
445
+ session_resource['push/schema'].post(schema_data, http_headers)
446
+ progress.inc(1)
447
+ end
448
+ progress.finish
449
+ end
450
+
451
+ def push_reset_sequences
452
+ puts "Resetting sequences"
453
+
454
+ session_resource['push/reset_sequences'].post('', http_headers)
455
+ end
456
+
457
+ def push_partial_data
458
+ return if stream_state == {}
459
+
460
+ table_name = stream_state[:table_name]
461
+ record_count = tables[table_name.to_s]
462
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
463
+ progress = ProgressBar.new(table_name.to_s, record_count)
464
+ stream = Taps::DataStream.factory(db, stream_state)
465
+ push_data_from_table(stream, progress)
466
+ end
467
+
468
+ def push_data
469
+ puts "Sending data"
470
+
471
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
472
+
473
+ tables.each do |table_name, count|
474
+ stream = Taps::DataStream.factory(db,
475
+ :table_name => table_name,
476
+ :chunksize => default_chunksize)
477
+ progress = ProgressBar.new(table_name.to_s, count)
478
+ push_data_from_table(stream, progress)
479
+ end
480
+ end
481
+
482
+ def push_data_from_table(stream, progress)
483
+ loop do
484
+ if exiting?
485
+ store_session
486
+ exit 0
487
+ end
488
+
489
+ row_size = 0
490
+ chunksize = stream.state[:chunksize]
491
+
492
+ begin
493
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
494
+ stream.state[:chunksize] = c.to_i
495
+ encoded_data, row_size, elapsed_time = nil
496
+ d1 = c.time_delta do
497
+ encoded_data, row_size, elapsed_time = stream.fetch
498
+ end
499
+ break if stream.complete?
500
+
501
+ data = nil
502
+ d2 = c.time_delta do
503
+ data = {
504
+ :state => stream.to_hash,
505
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
506
+ }
507
+ end
508
+
509
+ begin
510
+ content, content_type = nil
511
+ d3 = c.time_delta do
512
+ content, content_type = Taps::Multipart.create do |r|
513
+ r.attach :name => :encoded_data,
514
+ :payload => encoded_data,
515
+ :content_type => 'application/octet-stream'
516
+ r.attach :name => :json,
517
+ :payload => ::OkJson.encode(data),
518
+ :content_type => 'application/json'
519
+ end
520
+ end
521
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
522
+ self.stream_state = stream.to_hash
523
+ rescue => e
524
+ Taps::Utils.reraise_server_exception(e)
525
+ end
526
+
527
+ c.idle_secs = (d1 + d2 + d3)
528
+
529
+ elapsed_time
530
+ end
531
+ rescue Taps::CorruptedData => e
532
+ # retry the same data, it got corrupted somehow.
533
+ next
534
+ rescue Taps::DuplicatePrimaryKeyError => e
535
+ # verify the stream and retry it
536
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
537
+ next
538
+ end
539
+ stream.state[:chunksize] = chunksize
540
+
541
+ progress.inc(row_size)
542
+
543
+ stream.increment(row_size)
544
+ break if stream.complete?
545
+ end
546
+
547
+ progress.finish
548
+ completed_tables << stream.table_name.to_s
549
+ self.stream_state = {}
550
+ end
551
+
552
+ def local_tables_info
553
+ opts[:local_tables_info] ||= fetch_local_tables_info
554
+ end
555
+
556
+ def tables
557
+ h = {}
558
+ local_tables_info.each do |table_name, count|
559
+ next if completed_tables.include?(table_name.to_s)
560
+ h[table_name.to_s] = count
561
+ end
562
+ h
563
+ end
564
+
565
+ def record_count
566
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
567
+ end
568
+
569
+ def fetch_local_tables_info
570
+ tables_with_counts = {}
571
+ db.tables.each do |table|
572
+ tables_with_counts[table] = db[table.to_sym.identifier].count
573
+ end
574
+ apply_table_filter(tables_with_counts)
575
+ end
576
+
577
+ end
578
+ end