taps-jruby 0.3.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,557 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+ require 'taps/errors'
10
+
11
+ # disable warnings, rest client makes a lot of noise right now
12
+ $VERBOSE = nil
13
+
14
+ module Taps
15
+
16
+ class Operation
17
+ attr_reader :database_url, :remote_url, :opts
18
+ attr_reader :session_uri
19
+
20
+ def initialize(database_url, remote_url, opts={})
21
+ @database_url = database_url
22
+ @remote_url = remote_url
23
+ @opts = opts
24
+ @exiting = false
25
+ @session_uri = opts[:session_uri]
26
+ end
27
+
28
+ def file_prefix
29
+ "op"
30
+ end
31
+
32
+ def indexes_first?
33
+ !!opts[:indexes_first]
34
+ end
35
+
36
+ def table_filter
37
+ opts[:table_filter]
38
+ end
39
+
40
+ def apply_table_filter(tables)
41
+ return tables unless table_filter
42
+ re = Regexp.new(table_filter)
43
+ if tables.kind_of?(Hash)
44
+ ntables = {}
45
+ tables.each do |t, d|
46
+ unless re.match(t.to_s).nil?
47
+ ntables[t] = d
48
+ end
49
+ end
50
+ ntables
51
+ else
52
+ tables.reject { |t| re.match(t.to_s).nil? }
53
+ end
54
+ end
55
+
56
+ def log
57
+ Taps.log
58
+ end
59
+
60
+ def store_session
61
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
62
+ puts "\nSaving session to #{file}.."
63
+ File.open(file, 'w') do |f|
64
+ f.write(to_hash.to_json)
65
+ end
66
+ end
67
+
68
+ def to_hash
69
+ {
70
+ :klass => self.class.to_s,
71
+ :database_url => database_url,
72
+ :remote_url => remote_url,
73
+ :session_uri => session_uri,
74
+ :stream_state => stream_state,
75
+ :completed_tables => completed_tables,
76
+ :table_filter => table_filter,
77
+ }
78
+ end
79
+
80
+ def exiting?
81
+ !!@exiting
82
+ end
83
+
84
+ def setup_signal_trap
85
+ trap("INT") {
86
+ puts "\nCompleting current action..."
87
+ @exiting = true
88
+ }
89
+
90
+ trap("TERM") {
91
+ puts "\nCompleting current action..."
92
+ @exiting = true
93
+ }
94
+ end
95
+
96
+ def resuming?
97
+ opts[:resume] == true
98
+ end
99
+
100
+ def default_chunksize
101
+ opts[:default_chunksize]
102
+ end
103
+
104
+ def completed_tables
105
+ opts[:completed_tables] ||= []
106
+ end
107
+
108
+ def stream_state
109
+ opts[:stream_state] ||= {}
110
+ end
111
+
112
+ def stream_state=(val)
113
+ opts[:stream_state] = val
114
+ end
115
+
116
+ def compression_disabled?
117
+ !!opts[:disable_compression]
118
+ end
119
+
120
+ def db
121
+ @db ||= Sequel.connect(database_url)
122
+ end
123
+
124
+ def server
125
+ @server ||= RestClient::Resource.new(remote_url)
126
+ end
127
+
128
+ def session_resource
129
+ @session_resource ||= begin
130
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
131
+ server[@session_uri]
132
+ end
133
+ end
134
+
135
+ def set_session(uri)
136
+ session_uri = uri
137
+ @session_resource = server[session_uri]
138
+ end
139
+
140
+ def close_session
141
+ @session_resource.delete(http_headers) if @session_resource
142
+ end
143
+
144
+ def safe_url(url)
145
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
146
+ end
147
+
148
+ def safe_remote_url
149
+ safe_url(remote_url)
150
+ end
151
+
152
+ def safe_database_url
153
+ safe_url(database_url)
154
+ end
155
+
156
+ def http_headers(extra = {})
157
+ base = { :taps_version => Taps.version }
158
+ if compression_disabled?
159
+ base[:accept_encoding] = ""
160
+ else
161
+ base[:accept_encoding] = "gzip, deflate"
162
+ end
163
+ base.merge(extra)
164
+ end
165
+
166
+ def format_number(num)
167
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
168
+ end
169
+
170
+ def verify_server
171
+ begin
172
+ server['/'].get(http_headers)
173
+ rescue RestClient::RequestFailed => e
174
+ if e.http_code == 417
175
+ puts "#{safe_remote_url} is running a different minor version of taps."
176
+ puts "#{e.response.to_s}"
177
+ exit(1)
178
+ else
179
+ raise
180
+ end
181
+ rescue RestClient::Unauthorized
182
+ puts "Bad credentials given for #{safe_remote_url}"
183
+ exit(1)
184
+ rescue Errno::ECONNREFUSED
185
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
186
+ exit(1)
187
+ end
188
+ end
189
+
190
+ def catch_errors(&blk)
191
+ verify_server
192
+
193
+ begin
194
+ blk.call
195
+ close_session
196
+ rescue RestClient::Exception, Taps::BaseError => e
197
+ store_session
198
+ if e.kind_of?(Taps::BaseError)
199
+ puts "!!! Caught Server Exception"
200
+ puts "#{e.class}: #{e.message}"
201
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
202
+ exit(1)
203
+ elsif e.respond_to?(:response)
204
+ puts "!!! Caught Server Exception"
205
+ puts "HTTP CODE: #{e.http_code}"
206
+ puts "#{e.response.to_s}"
207
+ exit(1)
208
+ else
209
+ raise
210
+ end
211
+ end
212
+ end
213
+
214
+ def self.factory(type, database_url, remote_url, opts)
215
+ type = :resume if opts[:resume]
216
+ klass = case type
217
+ when :pull then Taps::Pull
218
+ when :push then Taps::Push
219
+ when :resume then eval(opts[:klass])
220
+ else raise "Unknown Operation Type -> #{type}"
221
+ end
222
+
223
+ klass.new(database_url, remote_url, opts)
224
+ end
225
+ end
226
+
227
+ class Pull < Operation
228
+ def file_prefix
229
+ "pull"
230
+ end
231
+
232
+ def to_hash
233
+ super.merge(:remote_tables_info => remote_tables_info)
234
+ end
235
+
236
+ def run
237
+ catch_errors do
238
+ unless resuming?
239
+ pull_schema
240
+ pull_indexes if indexes_first?
241
+ end
242
+ setup_signal_trap
243
+ pull_partial_data if resuming?
244
+ pull_data
245
+ pull_indexes unless indexes_first?
246
+ pull_reset_sequences
247
+ end
248
+ end
249
+
250
+ def pull_schema
251
+ puts "Receiving schema"
252
+
253
+ progress = ProgressBar.new('Schema', tables.size)
254
+ tables.each do |table_name, count|
255
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
256
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
257
+ output = Taps::Utils.load_schema(database_url, schema_data)
258
+ puts output if output
259
+ progress.inc(1)
260
+ end
261
+ progress.finish
262
+ end
263
+
264
+ def pull_data
265
+ puts "Receiving data"
266
+
267
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
268
+
269
+ tables.each do |table_name, count|
270
+ progress = ProgressBar.new(table_name.to_s, count)
271
+ stream = Taps::DataStream.factory(db, {
272
+ :chunksize => default_chunksize,
273
+ :table_name => table_name
274
+ })
275
+ pull_data_from_table(stream, progress)
276
+ end
277
+ end
278
+
279
+ def pull_partial_data
280
+ return if stream_state == {}
281
+
282
+ table_name = stream_state[:table_name]
283
+ record_count = tables[table_name.to_s]
284
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
285
+
286
+ progress = ProgressBar.new(table_name.to_s, record_count)
287
+ stream = Taps::DataStream.factory(db, stream_state)
288
+ pull_data_from_table(stream, progress)
289
+ end
290
+
291
+ def pull_data_from_table(stream, progress)
292
+ loop do
293
+ begin
294
+ if exiting?
295
+ store_session
296
+ exit 0
297
+ end
298
+
299
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
300
+ break if stream.complete?
301
+ progress.inc(size) unless exiting?
302
+ stream.error = false
303
+ self.stream_state = stream.to_hash
304
+ rescue Taps::CorruptedData => e
305
+ puts "Corrupted Data Received #{e.message}, retrying..."
306
+ stream.error = true
307
+ next
308
+ end
309
+ end
310
+
311
+ progress.finish
312
+ completed_tables << stream.table_name.to_s
313
+ self.stream_state = {}
314
+ end
315
+
316
+ def tables
317
+ h = {}
318
+ remote_tables_info.each do |table_name, count|
319
+ next if completed_tables.include?(table_name.to_s)
320
+ h[table_name.to_s] = count
321
+ end
322
+ h
323
+ end
324
+
325
+ def record_count
326
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
327
+ end
328
+
329
+ def remote_tables_info
330
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
331
+ end
332
+
333
+ def fetch_remote_tables_info
334
+ retries = 0
335
+ max_retries = 10
336
+ begin
337
+ tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
338
+ rescue RestClient::Exception
339
+ retries += 1
340
+ retry if retries <= max_retries
341
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
342
+ exit(1)
343
+ end
344
+
345
+ data = {}
346
+ apply_table_filter(tables).each do |table_name|
347
+ retries = 0
348
+ begin
349
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
350
+ data[table_name] = count
351
+ rescue RestClient::Exception
352
+ retries += 1
353
+ retry if retries <= max_retries
354
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
355
+ exit(1)
356
+ end
357
+ end
358
+ data
359
+ end
360
+
361
+ def pull_indexes
362
+ puts "Receiving indexes"
363
+
364
+ idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
365
+
366
+ apply_table_filter(idxs).each do |table, indexes|
367
+ next unless indexes.size > 0
368
+ progress = ProgressBar.new(table, indexes.size)
369
+ indexes.each do |idx|
370
+ output = Taps::Utils.load_indexes(database_url, idx)
371
+ puts output if output
372
+ progress.inc(1)
373
+ end
374
+ progress.finish
375
+ end
376
+ end
377
+
378
+ def pull_reset_sequences
379
+ puts "Resetting sequences"
380
+
381
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
382
+ puts output if output
383
+ end
384
+ end
385
+
386
+ class Push < Operation
387
+ def file_prefix
388
+ "push"
389
+ end
390
+
391
+ def to_hash
392
+ super.merge(:local_tables_info => local_tables_info)
393
+ end
394
+
395
+ def run
396
+ catch_errors do
397
+ unless resuming?
398
+ push_schema
399
+ push_indexes if indexes_first?
400
+ end
401
+ setup_signal_trap
402
+ push_partial_data if resuming?
403
+ push_data
404
+ push_indexes unless indexes_first?
405
+ push_reset_sequences
406
+ end
407
+ end
408
+
409
+ def push_indexes
410
+ idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
411
+
412
+ return unless idxs.size > 0
413
+
414
+ puts "Sending indexes"
415
+
416
+ apply_table_filter(idxs).each do |table, indexes|
417
+ next unless indexes.size > 0
418
+ progress = ProgressBar.new(table, indexes.size)
419
+ indexes.each do |idx|
420
+ session_resource['push/indexes'].post(idx, http_headers)
421
+ progress.inc(1)
422
+ end
423
+ progress.finish
424
+ end
425
+ end
426
+
427
+ def push_schema
428
+ puts "Sending schema"
429
+
430
+ progress = ProgressBar.new('Schema', tables.size)
431
+ tables.each do |table, count|
432
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
433
+ log.debug "Table: #{table}\n#{schema_data}\n"
434
+ session_resource['push/schema'].post(schema_data, http_headers)
435
+ progress.inc(1)
436
+ end
437
+ progress.finish
438
+ end
439
+
440
+ def push_reset_sequences
441
+ puts "Resetting sequences"
442
+
443
+ session_resource['push/reset_sequences'].post('', http_headers)
444
+ end
445
+
446
+ def push_partial_data
447
+ return if stream_state == {}
448
+
449
+ table_name = stream_state[:table_name]
450
+ record_count = tables[table_name.to_s]
451
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
452
+ progress = ProgressBar.new(table_name.to_s, record_count)
453
+ stream = Taps::DataStream.factory(db, stream_state)
454
+ push_data_from_table(stream, progress)
455
+ end
456
+
457
+ def push_data
458
+ puts "Sending data"
459
+
460
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
461
+
462
+ tables.each do |table_name, count|
463
+ stream = Taps::DataStream.factory(db,
464
+ :table_name => table_name,
465
+ :chunksize => default_chunksize)
466
+ progress = ProgressBar.new(table_name.to_s, count)
467
+ push_data_from_table(stream, progress)
468
+ end
469
+ end
470
+
471
+ def push_data_from_table(stream, progress)
472
+ loop do
473
+ if exiting?
474
+ store_session
475
+ exit 0
476
+ end
477
+
478
+ row_size = 0
479
+ chunksize = stream.state[:chunksize]
480
+
481
+ begin
482
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
483
+ stream.state[:chunksize] = c
484
+ encoded_data, row_size, elapsed_time = stream.fetch
485
+ break if stream.complete?
486
+
487
+ data = {
488
+ :state => stream.to_hash,
489
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
490
+ }
491
+
492
+ begin
493
+ content, content_type = Taps::Multipart.create do |r|
494
+ r.attach :name => :encoded_data,
495
+ :payload => encoded_data,
496
+ :content_type => 'application/octet-stream'
497
+ r.attach :name => :json,
498
+ :payload => data.to_json,
499
+ :content_type => 'application/json'
500
+ end
501
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
502
+ self.stream_state = stream.to_hash
503
+ rescue => e
504
+ Taps::Utils.reraise_server_exception(e)
505
+ end
506
+
507
+ elapsed_time
508
+ end
509
+ rescue Taps::CorruptedData => e
510
+ # retry the same data, it got corrupted somehow.
511
+ next
512
+ rescue Taps::DuplicatePrimaryKeyError => e
513
+ # verify the stream and retry it
514
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
515
+ next
516
+ end
517
+ stream.state[:chunksize] = chunksize
518
+
519
+ progress.inc(row_size)
520
+
521
+ stream.increment(row_size)
522
+ break if stream.complete?
523
+ end
524
+
525
+ progress.finish
526
+ completed_tables << stream.table_name.to_s
527
+ self.stream_state = {}
528
+ end
529
+
530
+ def local_tables_info
531
+ opts[:local_tables_info] ||= fetch_local_tables_info
532
+ end
533
+
534
+ def tables
535
+ h = {}
536
+ local_tables_info.each do |table_name, count|
537
+ next if completed_tables.include?(table_name.to_s)
538
+ h[table_name.to_s] = count
539
+ end
540
+ h
541
+ end
542
+
543
+ def record_count
544
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
545
+ end
546
+
547
+ def fetch_local_tables_info
548
+ tables_with_counts = {}
549
+ db.tables.each do |table|
550
+ tables_with_counts[table] = db[table.to_sym.identifier].count
551
+ end
552
+ apply_table_filter(tables_with_counts)
553
+ end
554
+
555
+ end
556
+
557
+ end