taps-jruby 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,557 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+ require 'taps/errors'
10
+
11
+ # disable warnings, rest client makes a lot of noise right now
12
+ $VERBOSE = nil
13
+
14
+ module Taps
15
+
16
+ class Operation
17
+ attr_reader :database_url, :remote_url, :opts
18
+ attr_reader :session_uri
19
+
20
+ def initialize(database_url, remote_url, opts={})
21
+ @database_url = database_url
22
+ @remote_url = remote_url
23
+ @opts = opts
24
+ @exiting = false
25
+ @session_uri = opts[:session_uri]
26
+ end
27
+
28
+ def file_prefix
29
+ "op"
30
+ end
31
+
32
+ def indexes_first?
33
+ !!opts[:indexes_first]
34
+ end
35
+
36
+ def table_filter
37
+ opts[:table_filter]
38
+ end
39
+
40
+ def apply_table_filter(tables)
41
+ return tables unless table_filter
42
+ re = Regexp.new(table_filter)
43
+ if tables.kind_of?(Hash)
44
+ ntables = {}
45
+ tables.each do |t, d|
46
+ unless re.match(t.to_s).nil?
47
+ ntables[t] = d
48
+ end
49
+ end
50
+ ntables
51
+ else
52
+ tables.reject { |t| re.match(t.to_s).nil? }
53
+ end
54
+ end
55
+
56
+ def log
57
+ Taps.log
58
+ end
59
+
60
+ def store_session
61
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
62
+ puts "\nSaving session to #{file}.."
63
+ File.open(file, 'w') do |f|
64
+ f.write(to_hash.to_json)
65
+ end
66
+ end
67
+
68
+ def to_hash
69
+ {
70
+ :klass => self.class.to_s,
71
+ :database_url => database_url,
72
+ :remote_url => remote_url,
73
+ :session_uri => session_uri,
74
+ :stream_state => stream_state,
75
+ :completed_tables => completed_tables,
76
+ :table_filter => table_filter,
77
+ }
78
+ end
79
+
80
+ def exiting?
81
+ !!@exiting
82
+ end
83
+
84
+ def setup_signal_trap
85
+ trap("INT") {
86
+ puts "\nCompleting current action..."
87
+ @exiting = true
88
+ }
89
+
90
+ trap("TERM") {
91
+ puts "\nCompleting current action..."
92
+ @exiting = true
93
+ }
94
+ end
95
+
96
+ def resuming?
97
+ opts[:resume] == true
98
+ end
99
+
100
+ def default_chunksize
101
+ opts[:default_chunksize]
102
+ end
103
+
104
+ def completed_tables
105
+ opts[:completed_tables] ||= []
106
+ end
107
+
108
+ def stream_state
109
+ opts[:stream_state] ||= {}
110
+ end
111
+
112
+ def stream_state=(val)
113
+ opts[:stream_state] = val
114
+ end
115
+
116
+ def compression_disabled?
117
+ !!opts[:disable_compression]
118
+ end
119
+
120
+ def db
121
+ @db ||= Sequel.connect(database_url)
122
+ end
123
+
124
+ def server
125
+ @server ||= RestClient::Resource.new(remote_url)
126
+ end
127
+
128
+ def session_resource
129
+ @session_resource ||= begin
130
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
131
+ server[@session_uri]
132
+ end
133
+ end
134
+
135
+ def set_session(uri)
136
+ session_uri = uri
137
+ @session_resource = server[session_uri]
138
+ end
139
+
140
+ def close_session
141
+ @session_resource.delete(http_headers) if @session_resource
142
+ end
143
+
144
+ def safe_url(url)
145
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
146
+ end
147
+
148
+ def safe_remote_url
149
+ safe_url(remote_url)
150
+ end
151
+
152
+ def safe_database_url
153
+ safe_url(database_url)
154
+ end
155
+
156
+ def http_headers(extra = {})
157
+ base = { :taps_version => Taps.version }
158
+ if compression_disabled?
159
+ base[:accept_encoding] = ""
160
+ else
161
+ base[:accept_encoding] = "gzip, deflate"
162
+ end
163
+ base.merge(extra)
164
+ end
165
+
166
+ def format_number(num)
167
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
168
+ end
169
+
170
+ def verify_server
171
+ begin
172
+ server['/'].get(http_headers)
173
+ rescue RestClient::RequestFailed => e
174
+ if e.http_code == 417
175
+ puts "#{safe_remote_url} is running a different minor version of taps."
176
+ puts "#{e.response.to_s}"
177
+ exit(1)
178
+ else
179
+ raise
180
+ end
181
+ rescue RestClient::Unauthorized
182
+ puts "Bad credentials given for #{safe_remote_url}"
183
+ exit(1)
184
+ rescue Errno::ECONNREFUSED
185
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
186
+ exit(1)
187
+ end
188
+ end
189
+
190
+ def catch_errors(&blk)
191
+ verify_server
192
+
193
+ begin
194
+ blk.call
195
+ close_session
196
+ rescue RestClient::Exception, Taps::BaseError => e
197
+ store_session
198
+ if e.kind_of?(Taps::BaseError)
199
+ puts "!!! Caught Server Exception"
200
+ puts "#{e.class}: #{e.message}"
201
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
202
+ exit(1)
203
+ elsif e.respond_to?(:response)
204
+ puts "!!! Caught Server Exception"
205
+ puts "HTTP CODE: #{e.http_code}"
206
+ puts "#{e.response.to_s}"
207
+ exit(1)
208
+ else
209
+ raise
210
+ end
211
+ end
212
+ end
213
+
214
+ def self.factory(type, database_url, remote_url, opts)
215
+ type = :resume if opts[:resume]
216
+ klass = case type
217
+ when :pull then Taps::Pull
218
+ when :push then Taps::Push
219
+ when :resume then eval(opts[:klass])
220
+ else raise "Unknown Operation Type -> #{type}"
221
+ end
222
+
223
+ klass.new(database_url, remote_url, opts)
224
+ end
225
+ end
226
+
227
+ class Pull < Operation
228
+ def file_prefix
229
+ "pull"
230
+ end
231
+
232
+ def to_hash
233
+ super.merge(:remote_tables_info => remote_tables_info)
234
+ end
235
+
236
+ def run
237
+ catch_errors do
238
+ unless resuming?
239
+ pull_schema
240
+ pull_indexes if indexes_first?
241
+ end
242
+ setup_signal_trap
243
+ pull_partial_data if resuming?
244
+ pull_data
245
+ pull_indexes unless indexes_first?
246
+ pull_reset_sequences
247
+ end
248
+ end
249
+
250
+ def pull_schema
251
+ puts "Receiving schema"
252
+
253
+ progress = ProgressBar.new('Schema', tables.size)
254
+ tables.each do |table_name, count|
255
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
256
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
257
+ output = Taps::Utils.load_schema(database_url, schema_data)
258
+ puts output if output
259
+ progress.inc(1)
260
+ end
261
+ progress.finish
262
+ end
263
+
264
+ def pull_data
265
+ puts "Receiving data"
266
+
267
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
268
+
269
+ tables.each do |table_name, count|
270
+ progress = ProgressBar.new(table_name.to_s, count)
271
+ stream = Taps::DataStream.factory(db, {
272
+ :chunksize => default_chunksize,
273
+ :table_name => table_name
274
+ })
275
+ pull_data_from_table(stream, progress)
276
+ end
277
+ end
278
+
279
+ def pull_partial_data
280
+ return if stream_state == {}
281
+
282
+ table_name = stream_state[:table_name]
283
+ record_count = tables[table_name.to_s]
284
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
285
+
286
+ progress = ProgressBar.new(table_name.to_s, record_count)
287
+ stream = Taps::DataStream.factory(db, stream_state)
288
+ pull_data_from_table(stream, progress)
289
+ end
290
+
291
+ def pull_data_from_table(stream, progress)
292
+ loop do
293
+ begin
294
+ if exiting?
295
+ store_session
296
+ exit 0
297
+ end
298
+
299
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
300
+ break if stream.complete?
301
+ progress.inc(size) unless exiting?
302
+ stream.error = false
303
+ self.stream_state = stream.to_hash
304
+ rescue Taps::CorruptedData => e
305
+ puts "Corrupted Data Received #{e.message}, retrying..."
306
+ stream.error = true
307
+ next
308
+ end
309
+ end
310
+
311
+ progress.finish
312
+ completed_tables << stream.table_name.to_s
313
+ self.stream_state = {}
314
+ end
315
+
316
+ def tables
317
+ h = {}
318
+ remote_tables_info.each do |table_name, count|
319
+ next if completed_tables.include?(table_name.to_s)
320
+ h[table_name.to_s] = count
321
+ end
322
+ h
323
+ end
324
+
325
+ def record_count
326
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
327
+ end
328
+
329
+ def remote_tables_info
330
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
331
+ end
332
+
333
+ def fetch_remote_tables_info
334
+ retries = 0
335
+ max_retries = 10
336
+ begin
337
+ tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
338
+ rescue RestClient::Exception
339
+ retries += 1
340
+ retry if retries <= max_retries
341
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
342
+ exit(1)
343
+ end
344
+
345
+ data = {}
346
+ apply_table_filter(tables).each do |table_name|
347
+ retries = 0
348
+ begin
349
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
350
+ data[table_name] = count
351
+ rescue RestClient::Exception
352
+ retries += 1
353
+ retry if retries <= max_retries
354
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
355
+ exit(1)
356
+ end
357
+ end
358
+ data
359
+ end
360
+
361
+ def pull_indexes
362
+ puts "Receiving indexes"
363
+
364
+ idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
365
+
366
+ apply_table_filter(idxs).each do |table, indexes|
367
+ next unless indexes.size > 0
368
+ progress = ProgressBar.new(table, indexes.size)
369
+ indexes.each do |idx|
370
+ output = Taps::Utils.load_indexes(database_url, idx)
371
+ puts output if output
372
+ progress.inc(1)
373
+ end
374
+ progress.finish
375
+ end
376
+ end
377
+
378
+ def pull_reset_sequences
379
+ puts "Resetting sequences"
380
+
381
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
382
+ puts output if output
383
+ end
384
+ end
385
+
386
+ class Push < Operation
387
+ def file_prefix
388
+ "push"
389
+ end
390
+
391
+ def to_hash
392
+ super.merge(:local_tables_info => local_tables_info)
393
+ end
394
+
395
+ def run
396
+ catch_errors do
397
+ unless resuming?
398
+ push_schema
399
+ push_indexes if indexes_first?
400
+ end
401
+ setup_signal_trap
402
+ push_partial_data if resuming?
403
+ push_data
404
+ push_indexes unless indexes_first?
405
+ push_reset_sequences
406
+ end
407
+ end
408
+
409
+ def push_indexes
410
+ idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
411
+
412
+ return unless idxs.size > 0
413
+
414
+ puts "Sending indexes"
415
+
416
+ apply_table_filter(idxs).each do |table, indexes|
417
+ next unless indexes.size > 0
418
+ progress = ProgressBar.new(table, indexes.size)
419
+ indexes.each do |idx|
420
+ session_resource['push/indexes'].post(idx, http_headers)
421
+ progress.inc(1)
422
+ end
423
+ progress.finish
424
+ end
425
+ end
426
+
427
+ def push_schema
428
+ puts "Sending schema"
429
+
430
+ progress = ProgressBar.new('Schema', tables.size)
431
+ tables.each do |table, count|
432
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
433
+ log.debug "Table: #{table}\n#{schema_data}\n"
434
+ session_resource['push/schema'].post(schema_data, http_headers)
435
+ progress.inc(1)
436
+ end
437
+ progress.finish
438
+ end
439
+
440
+ def push_reset_sequences
441
+ puts "Resetting sequences"
442
+
443
+ session_resource['push/reset_sequences'].post('', http_headers)
444
+ end
445
+
446
+ def push_partial_data
447
+ return if stream_state == {}
448
+
449
+ table_name = stream_state[:table_name]
450
+ record_count = tables[table_name.to_s]
451
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
452
+ progress = ProgressBar.new(table_name.to_s, record_count)
453
+ stream = Taps::DataStream.factory(db, stream_state)
454
+ push_data_from_table(stream, progress)
455
+ end
456
+
457
+ def push_data
458
+ puts "Sending data"
459
+
460
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
461
+
462
+ tables.each do |table_name, count|
463
+ stream = Taps::DataStream.factory(db,
464
+ :table_name => table_name,
465
+ :chunksize => default_chunksize)
466
+ progress = ProgressBar.new(table_name.to_s, count)
467
+ push_data_from_table(stream, progress)
468
+ end
469
+ end
470
+
471
+ def push_data_from_table(stream, progress)
472
+ loop do
473
+ if exiting?
474
+ store_session
475
+ exit 0
476
+ end
477
+
478
+ row_size = 0
479
+ chunksize = stream.state[:chunksize]
480
+
481
+ begin
482
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
483
+ stream.state[:chunksize] = c
484
+ encoded_data, row_size, elapsed_time = stream.fetch
485
+ break if stream.complete?
486
+
487
+ data = {
488
+ :state => stream.to_hash,
489
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
490
+ }
491
+
492
+ begin
493
+ content, content_type = Taps::Multipart.create do |r|
494
+ r.attach :name => :encoded_data,
495
+ :payload => encoded_data,
496
+ :content_type => 'application/octet-stream'
497
+ r.attach :name => :json,
498
+ :payload => data.to_json,
499
+ :content_type => 'application/json'
500
+ end
501
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
502
+ self.stream_state = stream.to_hash
503
+ rescue => e
504
+ Taps::Utils.reraise_server_exception(e)
505
+ end
506
+
507
+ elapsed_time
508
+ end
509
+ rescue Taps::CorruptedData => e
510
+ # retry the same data, it got corrupted somehow.
511
+ next
512
+ rescue Taps::DuplicatePrimaryKeyError => e
513
+ # verify the stream and retry it
514
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
515
+ next
516
+ end
517
+ stream.state[:chunksize] = chunksize
518
+
519
+ progress.inc(row_size)
520
+
521
+ stream.increment(row_size)
522
+ break if stream.complete?
523
+ end
524
+
525
+ progress.finish
526
+ completed_tables << stream.table_name.to_s
527
+ self.stream_state = {}
528
+ end
529
+
530
+ def local_tables_info
531
+ opts[:local_tables_info] ||= fetch_local_tables_info
532
+ end
533
+
534
+ def tables
535
+ h = {}
536
+ local_tables_info.each do |table_name, count|
537
+ next if completed_tables.include?(table_name.to_s)
538
+ h[table_name.to_s] = count
539
+ end
540
+ h
541
+ end
542
+
543
+ def record_count
544
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
545
+ end
546
+
547
+ def fetch_local_tables_info
548
+ tables_with_counts = {}
549
+ db.tables.each do |table|
550
+ tables_with_counts[table] = db[table.to_sym.identifier].count
551
+ end
552
+ apply_table_filter(tables_with_counts)
553
+ end
554
+
555
+ end
556
+
557
+ end