taps-taps 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
2
+
3
+ class DbSession < Sequel::Model
4
+ plugin :schema
5
+ set_schema do
6
+ primary_key :id
7
+ text :key
8
+ text :database_url
9
+ timestamp :started_at
10
+ timestamp :last_access
11
+ end
12
+
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
16
+ end
17
+ end
18
+ end
19
+
20
+ DbSession.create_table! unless DbSession.table_exists?
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ class BaseError < StandardError
3
+ attr_reader :original_backtrace
4
+
5
+ def initialize(message, opts={})
6
+ @original_backtrace = opts.delete(:backtrace)
7
+ super(message)
8
+ end
9
+ end
10
+
11
+ class NotImplemented < BaseError; end
12
+ class DuplicatePrimaryKeyError < BaseError; end
13
+ class CorruptedData < BaseError; end
14
+ class InvalidData < BaseError; end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'stringio'
4
+ require 'vendor/okjson'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,577 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+ require 'taps/errors'
10
+
11
+ # disable warnings, rest client makes a lot of noise right now
12
+ $VERBOSE = nil
13
+
14
+ module Taps
15
+
16
+ class Operation
17
+ attr_reader :database_url, :remote_url, :opts
18
+ attr_reader :session_uri
19
+
20
+ def initialize(database_url, remote_url, opts={})
21
+ @database_url = database_url
22
+ @remote_url = remote_url
23
+ @opts = opts
24
+ @exiting = false
25
+ @session_uri = opts[:session_uri]
26
+ end
27
+
28
+ def file_prefix
29
+ "op"
30
+ end
31
+
32
+ def skip_schema?
33
+ !!opts[:skip_schema]
34
+ end
35
+
36
+ def indexes_first?
37
+ !!opts[:indexes_first]
38
+ end
39
+
40
+ def table_filter
41
+ opts[:table_filter]
42
+ end
43
+
44
+ def exclude_tables
45
+ opts[:exclude_tables] || []
46
+ end
47
+
48
+ def apply_table_filter(tables)
49
+ return tables unless table_filter || exclude_tables
50
+
51
+ re = table_filter ? Regexp.new(table_filter) : nil
52
+ if tables.kind_of?(Hash)
53
+ ntables = {}
54
+ tables.each do |t, d|
55
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
56
+ ntables[t] = d
57
+ end
58
+ end
59
+ ntables
60
+ else
61
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
62
+ end
63
+ end
64
+
65
+ def log
66
+ Taps.log
67
+ end
68
+
69
+ def store_session
70
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
71
+ puts "\nSaving session to #{file}.."
72
+ File.open(file, 'w') do |f|
73
+ f.write(OkJson.encode(to_hash))
74
+ end
75
+ end
76
+
77
+ def to_hash
78
+ {
79
+ :klass => self.class.to_s,
80
+ :database_url => database_url,
81
+ :remote_url => remote_url,
82
+ :session_uri => session_uri,
83
+ :stream_state => stream_state,
84
+ :completed_tables => completed_tables,
85
+ :table_filter => table_filter,
86
+ }
87
+ end
88
+
89
+ def exiting?
90
+ !!@exiting
91
+ end
92
+
93
+ def setup_signal_trap
94
+ trap("INT") {
95
+ puts "\nCompleting current action..."
96
+ @exiting = true
97
+ }
98
+
99
+ trap("TERM") {
100
+ puts "\nCompleting current action..."
101
+ @exiting = true
102
+ }
103
+ end
104
+
105
+ def resuming?
106
+ opts[:resume] == true
107
+ end
108
+
109
+ def default_chunksize
110
+ opts[:default_chunksize]
111
+ end
112
+
113
+ def completed_tables
114
+ opts[:completed_tables] ||= []
115
+ end
116
+
117
+ def stream_state
118
+ opts[:stream_state] ||= {}
119
+ end
120
+
121
+ def stream_state=(val)
122
+ opts[:stream_state] = val
123
+ end
124
+
125
+ def compression_disabled?
126
+ !!opts[:disable_compression]
127
+ end
128
+
129
+ def db
130
+ @db ||= Sequel.connect(database_url)
131
+ end
132
+
133
+ def server
134
+ @server ||= RestClient::Resource.new(remote_url)
135
+ end
136
+
137
+ def session_resource
138
+ @session_resource ||= begin
139
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
140
+ server[@session_uri]
141
+ end
142
+ end
143
+
144
+ def set_session(uri)
145
+ session_uri = uri
146
+ @session_resource = server[session_uri]
147
+ end
148
+
149
+ def close_session
150
+ @session_resource.delete(http_headers) if @session_resource
151
+ end
152
+
153
+ def safe_url(url)
154
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
155
+ end
156
+
157
+ def safe_remote_url
158
+ safe_url(remote_url)
159
+ end
160
+
161
+ def safe_database_url
162
+ safe_url(database_url)
163
+ end
164
+
165
+ def http_headers(extra = {})
166
+ base = { :taps_version => Taps.version }
167
+ if compression_disabled?
168
+ base[:accept_encoding] = ""
169
+ else
170
+ base[:accept_encoding] = "gzip, deflate"
171
+ end
172
+ base.merge(extra)
173
+ end
174
+
175
+ def format_number(num)
176
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
177
+ end
178
+
179
+ def verify_server
180
+ begin
181
+ server['/'].get(http_headers)
182
+ rescue RestClient::RequestFailed => e
183
+ if e.http_code == 417
184
+ puts "#{safe_remote_url} is running a different minor version of taps."
185
+ puts "#{e.response.to_s}"
186
+ exit(1)
187
+ else
188
+ raise
189
+ end
190
+ rescue RestClient::Unauthorized
191
+ puts "Bad credentials given for #{safe_remote_url}"
192
+ exit(1)
193
+ rescue Errno::ECONNREFUSED
194
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
195
+ exit(1)
196
+ end
197
+ end
198
+
199
+ def catch_errors(&blk)
200
+ verify_server
201
+
202
+ begin
203
+ blk.call
204
+ close_session
205
+ rescue RestClient::Exception, Taps::BaseError => e
206
+ store_session
207
+ if e.kind_of?(Taps::BaseError)
208
+ puts "!!! Caught Server Exception"
209
+ puts "#{e.class}: #{e.message}"
210
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
211
+ exit(1)
212
+ elsif e.respond_to?(:response)
213
+ puts "!!! Caught Server Exception"
214
+ puts "HTTP CODE: #{e.http_code}"
215
+ puts "#{e.response.to_s}"
216
+ exit(1)
217
+ else
218
+ raise
219
+ end
220
+ end
221
+ end
222
+
223
+ def self.factory(type, database_url, remote_url, opts)
224
+ type = :resume if opts[:resume]
225
+ klass = case type
226
+ when :pull then Taps::Pull
227
+ when :push then Taps::Push
228
+ when :resume then eval(opts[:klass])
229
+ else raise "Unknown Operation Type -> #{type}"
230
+ end
231
+
232
+ klass.new(database_url, remote_url, opts)
233
+ end
234
+ end
235
+
236
+ class Pull < Operation
237
+ def file_prefix
238
+ "pull"
239
+ end
240
+
241
+ def to_hash
242
+ super.merge(:remote_tables_info => remote_tables_info)
243
+ end
244
+
245
+ def run
246
+ catch_errors do
247
+ unless resuming?
248
+ pull_schema if !skip_schema?
249
+ pull_indexes if indexes_first? && !skip_schema?
250
+ end
251
+ setup_signal_trap
252
+ pull_partial_data if resuming?
253
+ pull_data
254
+ pull_indexes if !indexes_first? && !skip_schema?
255
+ pull_reset_sequences
256
+ end
257
+ end
258
+
259
+ def pull_schema
260
+ puts "Receiving schema"
261
+
262
+ progress = ProgressBar.new('Schema', tables.size)
263
+ tables.each do |table_name, count|
264
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
265
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
266
+ output = Taps::Utils.load_schema(database_url, schema_data)
267
+ puts output if output
268
+ progress.inc(1)
269
+ end
270
+ progress.finish
271
+ end
272
+
273
+ def pull_data
274
+ puts "Receiving data"
275
+
276
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
277
+
278
+ tables.each do |table_name, count|
279
+ progress = ProgressBar.new(table_name.to_s, count)
280
+ stream = Taps::DataStream.factory(db, {
281
+ :chunksize => default_chunksize,
282
+ :table_name => table_name
283
+ })
284
+ pull_data_from_table(stream, progress)
285
+ end
286
+ end
287
+
288
+ def pull_partial_data
289
+ return if stream_state == {}
290
+
291
+ table_name = stream_state[:table_name]
292
+ record_count = tables[table_name.to_s]
293
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
294
+
295
+ progress = ProgressBar.new(table_name.to_s, record_count)
296
+ stream = Taps::DataStream.factory(db, stream_state)
297
+ pull_data_from_table(stream, progress)
298
+ end
299
+
300
+ def pull_data_from_table(stream, progress)
301
+ loop do
302
+ begin
303
+ if exiting?
304
+ store_session
305
+ exit 0
306
+ end
307
+
308
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
309
+ break if stream.complete?
310
+ progress.inc(size) unless exiting?
311
+ stream.error = false
312
+ self.stream_state = stream.to_hash
313
+ rescue Taps::CorruptedData => e
314
+ puts "Corrupted Data Received #{e.message}, retrying..."
315
+ stream.error = true
316
+ next
317
+ end
318
+ end
319
+
320
+ progress.finish
321
+ completed_tables << stream.table_name.to_s
322
+ self.stream_state = {}
323
+ end
324
+
325
+ def tables
326
+ h = {}
327
+ remote_tables_info.each do |table_name, count|
328
+ next if completed_tables.include?(table_name.to_s)
329
+ h[table_name.to_s] = count
330
+ end
331
+ h
332
+ end
333
+
334
+ def record_count
335
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
336
+ end
337
+
338
+ def remote_tables_info
339
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
340
+ end
341
+
342
+ def fetch_remote_tables_info
343
+ retries = 0
344
+ max_retries = 10
345
+ begin
346
+ tables = OkJson.decode(session_resource['pull/table_names'].get(http_headers).to_s)
347
+ rescue RestClient::Exception
348
+ retries += 1
349
+ retry if retries <= max_retries
350
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
351
+ exit(1)
352
+ end
353
+
354
+ data = {}
355
+ apply_table_filter(tables).each do |table_name|
356
+ retries = 0
357
+ begin
358
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
359
+ data[table_name] = count
360
+ rescue RestClient::Exception
361
+ retries += 1
362
+ retry if retries <= max_retries
363
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
364
+ exit(1)
365
+ end
366
+ end
367
+ data
368
+ end
369
+
370
+ def pull_indexes
371
+ puts "Receiving indexes"
372
+
373
+ idxs = OkJson.decode(session_resource['pull/indexes'].get(http_headers).to_s)
374
+
375
+ apply_table_filter(idxs).each do |table, indexes|
376
+ next unless indexes.size > 0
377
+ progress = ProgressBar.new(table, indexes.size)
378
+ indexes.each do |idx|
379
+ output = Taps::Utils.load_indexes(database_url, idx)
380
+ puts output if output
381
+ progress.inc(1)
382
+ end
383
+ progress.finish
384
+ end
385
+ end
386
+
387
+ def pull_reset_sequences
388
+ puts "Resetting sequences"
389
+
390
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
391
+ puts output if output
392
+ end
393
+ end
394
+
395
+ class Push < Operation
396
+ def file_prefix
397
+ "push"
398
+ end
399
+
400
+ def to_hash
401
+ super.merge(:local_tables_info => local_tables_info)
402
+ end
403
+
404
+ def run
405
+ catch_errors do
406
+ unless resuming?
407
+ push_schema if !skip_schema?
408
+ push_indexes if indexes_first? && !skip_schema?
409
+ end
410
+ setup_signal_trap
411
+ push_partial_data if resuming?
412
+ push_data
413
+ push_indexes if !indexes_first? && !skip_schema?
414
+ push_reset_sequences
415
+ end
416
+ end
417
+
418
+ def push_indexes
419
+ idxs = OkJson.decode(Taps::Utils.schema_bin(:indexes_individual, database_url))
420
+
421
+ return unless idxs.size > 0
422
+
423
+ puts "Sending indexes"
424
+
425
+ apply_table_filter(idxs).each do |table, indexes|
426
+ next unless indexes.size > 0
427
+ progress = ProgressBar.new(table, indexes.size)
428
+ indexes.each do |idx|
429
+ session_resource['push/indexes'].post(idx, http_headers)
430
+ progress.inc(1)
431
+ end
432
+ progress.finish
433
+ end
434
+ end
435
+
436
+ def push_schema
437
+ puts "Sending schema"
438
+
439
+ progress = ProgressBar.new('Schema', tables.size)
440
+ tables.each do |table, count|
441
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
442
+ log.debug "Table: #{table}\n#{schema_data}\n"
443
+ session_resource['push/schema'].post(schema_data, http_headers)
444
+ progress.inc(1)
445
+ end
446
+ progress.finish
447
+ end
448
+
449
+ def push_reset_sequences
450
+ puts "Resetting sequences"
451
+
452
+ session_resource['push/reset_sequences'].post('', http_headers)
453
+ end
454
+
455
+ def push_partial_data
456
+ return if stream_state == {}
457
+
458
+ table_name = stream_state[:table_name]
459
+ record_count = tables[table_name.to_s]
460
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
461
+ progress = ProgressBar.new(table_name.to_s, record_count)
462
+ stream = Taps::DataStream.factory(db, stream_state)
463
+ push_data_from_table(stream, progress)
464
+ end
465
+
466
+ def push_data
467
+ puts "Sending data"
468
+
469
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
470
+
471
+ tables.each do |table_name, count|
472
+ stream = Taps::DataStream.factory(db,
473
+ :table_name => table_name,
474
+ :chunksize => default_chunksize)
475
+ progress = ProgressBar.new(table_name.to_s, count)
476
+ push_data_from_table(stream, progress)
477
+ end
478
+ end
479
+
480
+ def push_data_from_table(stream, progress)
481
+ loop do
482
+ if exiting?
483
+ store_session
484
+ exit 0
485
+ end
486
+
487
+ row_size = 0
488
+ chunksize = stream.state[:chunksize]
489
+
490
+ begin
491
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
492
+ stream.state[:chunksize] = c.to_i
493
+ encoded_data, row_size, elapsed_time = nil
494
+ d1 = c.time_delta do
495
+ encoded_data, row_size, elapsed_time = stream.fetch
496
+ end
497
+ break if stream.complete?
498
+
499
+ data = nil
500
+ d2 = c.time_delta do
501
+ data = {
502
+ :state => stream.to_hash,
503
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
504
+ }
505
+ end
506
+
507
+ begin
508
+ content, content_type = nil
509
+ d3 = c.time_delta do
510
+ content, content_type = Taps::Multipart.create do |r|
511
+ r.attach :name => :encoded_data,
512
+ :payload => encoded_data,
513
+ :content_type => 'application/octet-stream'
514
+ r.attach :name => :json,
515
+ :payload => OkJson.encode(data),
516
+ :content_type => 'application/json'
517
+ end
518
+ end
519
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
520
+ self.stream_state = stream.to_hash
521
+ rescue => e
522
+ Taps::Utils.reraise_server_exception(e)
523
+ end
524
+
525
+ c.idle_secs = (d1 + d2 + d3)
526
+
527
+ elapsed_time
528
+ end
529
+ rescue Taps::CorruptedData => e
530
+ # retry the same data, it got corrupted somehow.
531
+ next
532
+ rescue Taps::DuplicatePrimaryKeyError => e
533
+ # verify the stream and retry it
534
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
535
+ next
536
+ end
537
+ stream.state[:chunksize] = chunksize
538
+
539
+ progress.inc(row_size)
540
+
541
+ stream.increment(row_size)
542
+ break if stream.complete?
543
+ end
544
+
545
+ progress.finish
546
+ completed_tables << stream.table_name.to_s
547
+ self.stream_state = {}
548
+ end
549
+
550
+ def local_tables_info
551
+ opts[:local_tables_info] ||= fetch_local_tables_info
552
+ end
553
+
554
+ def tables
555
+ h = {}
556
+ local_tables_info.each do |table_name, count|
557
+ next if completed_tables.include?(table_name.to_s)
558
+ h[table_name.to_s] = count
559
+ end
560
+ h
561
+ end
562
+
563
+ def record_count
564
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
565
+ end
566
+
567
+ def fetch_local_tables_info
568
+ tables_with_counts = {}
569
+ db.tables.each do |table|
570
+ tables_with_counts[table] = db[table.to_sym.identifier].count
571
+ end
572
+ apply_table_filter(tables_with_counts)
573
+ end
574
+
575
+ end
576
+
577
+ end