tapsoob 0.2.7-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,482 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'sequel'
3
+
4
+ require 'tapsoob/progress_bar'
5
+ require 'tapsoob/schema'
6
+ require 'tapsoob/data_stream'
7
+
8
+ module Tapsoob
9
+ class Operation
10
+ attr_reader :database_url, :dump_path, :opts
11
+
12
+ def initialize(database_url, dump_path, opts={})
13
+ @database_url = database_url
14
+ @dump_path = dump_path
15
+ @opts = opts
16
+ @exiting = false
17
+ end
18
+
19
+ def file_prefix
20
+ "op"
21
+ end
22
+
23
+ def skip_schema?
24
+ !!opts[:skip_schema]
25
+ end
26
+
27
+ def indexes_first?
28
+ !!opts[:indexes_first]
29
+ end
30
+
31
+ def table_filter
32
+ opts[:table_filter]
33
+ end
34
+
35
+ def exclude_tables
36
+ opts[:exclude_tables] || []
37
+ end
38
+
39
+ def apply_table_filter(tables)
40
+ return tables unless table_filter || exclude_tables
41
+
42
+ re = table_filter ? Regexp.new(table_filter) : nil
43
+ if tables.kind_of?(Hash)
44
+ ntables = {}
45
+ tables.each do |t, d|
46
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
47
+ ntables[t] = d
48
+ end
49
+ end
50
+ ntables
51
+ else
52
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
53
+ end
54
+ end
55
+
56
+ def log
57
+ Tapsoob.log
58
+ end
59
+
60
+ def store_session
61
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
62
+ puts "\nSaving session to #{file}..."
63
+ File.open(file, 'w') do |f|
64
+ f.write(JSON.generate(to_hash))
65
+ end
66
+ end
67
+
68
+ def to_hash
69
+ {
70
+ :klass => self.class.to_s,
71
+ :database_url => database_url,
72
+ :stream_state => stream_state,
73
+ :completed_tables => completed_tables,
74
+ :table_filter => table_filter,
75
+ }
76
+ end
77
+
78
+ def exiting?
79
+ !!@exiting
80
+ end
81
+
82
+ def setup_signal_trap
83
+ trap("INT") {
84
+ puts "\nCompleting current action..."
85
+ @exiting = true
86
+ }
87
+
88
+ trap("TERM") {
89
+ puts "\nCompleting current action..."
90
+ @exiting = true
91
+ }
92
+ end
93
+
94
+ def resuming?
95
+ opts[:resume] == true
96
+ end
97
+
98
+ def default_chunksize
99
+ opts[:default_chunksize]
100
+ end
101
+
102
+ def completed_tables
103
+ opts[:completed_tables] ||= []
104
+ end
105
+
106
+ def stream_state
107
+ opts[:stream_state] ||= {}
108
+ end
109
+
110
+ def stream_state=(val)
111
+ opts[:stream_state] = val
112
+ end
113
+
114
+ def db
115
+ @db ||= Sequel.connect(database_url)
116
+
117
+ # Set parameters
118
+ if @db.uri =~ /oracle/i
119
+ @db << "ALTER SESSION SET NLS_DATE_FORMAT='YYYY-MM-DD HH24:MI:SS'"
120
+ @db << "ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD HH24:MI:SS:FF6'"
121
+ end
122
+
123
+ @db
124
+ end
125
+
126
+ def format_number(num)
127
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
128
+ end
129
+
130
+ def catch_errors(&blk)
131
+ begin
132
+ blk.call
133
+ rescue Exception => e
134
+ raise e
135
+ end
136
+ end
137
+
138
+ def self.factory(type, database_url, dump_path, opts)
139
+ type = :resume if opts[:resume]
140
+ klass = case type
141
+ when :pull then Tapsoob::Pull
142
+ when :push then Tapsoob::Push
143
+ when :resume then eval(opts[:klass])
144
+ else raise "Unknown Operation Type -> #{type}"
145
+ end
146
+
147
+ klass.new(database_url, dump_path, opts)
148
+ end
149
+ end
150
+
151
+ class Pull < Operation
152
+ def file_prefix
153
+ "pull"
154
+ end
155
+
156
+ def to_hash
157
+ super.merge(:remote_tables_info => remote_tables_info)
158
+ end
159
+
160
+ def run
161
+ catch_errors do
162
+ unless resuming?
163
+ pull_schema if !skip_schema?
164
+ pull_indexes if indexes_first? && !skip_schema?
165
+ end
166
+ setup_signal_trap
167
+ pull_partial_data if resuming?
168
+ pull_data
169
+ pull_indexes if !indexes_first? && !skip_schema?
170
+ pull_reset_sequences
171
+ end
172
+ end
173
+
174
+ def pull_schema
175
+ puts "Receiving schema"
176
+
177
+ progress = ProgressBar.new('Schema', tables.size)
178
+ tables.each do |table_name, count|
179
+ schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
180
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
181
+ output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
182
+ puts output if output
183
+ progress.inc(1)
184
+ end
185
+ progress.finish
186
+ end
187
+
188
+ def pull_data
189
+ puts "Receiving data"
190
+
191
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
192
+
193
+ tables.each do |table_name, count|
194
+ progress = ProgressBar.new(table_name.to_s, count)
195
+ stream = Tapsoob::DataStream.factory(db, {
196
+ :chunksize => default_chunksize,
197
+ :table_name => table_name
198
+ })
199
+ pull_data_from_table(stream, progress)
200
+ end
201
+ end
202
+
203
+ def pull_partial_data
204
+ return if stream_state == {}
205
+
206
+ table_name = stream_state[:table_name]
207
+ record_count = tables[table_name.to_s]
208
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
209
+
210
+ progress = ProgressBar.new(table_name.to_s, record_count)
211
+ stream = Tapsoob::DataStream.factory(db, stream_state)
212
+ pull_data_from_table(stream, progress)
213
+ end
214
+
215
+ def pull_data_from_table(stream, progress)
216
+ loop do
217
+ begin
218
+ exit 0 if exiting?
219
+
220
+ size = stream.fetch_database(dump_path)
221
+ break if stream.complete?
222
+ progress.inc(size) unless exiting?
223
+ stream.error = false
224
+ self.stream_state = stream.to_hash
225
+ rescue Tapsoob::CorruptedData => e
226
+ puts "Corrupted Data Received #{e.message}, retrying..."
227
+ stream.error = true
228
+ next
229
+ end
230
+ end
231
+
232
+ progress.finish
233
+ completed_tables << stream.table_name.to_s
234
+ self.stream_state = {}
235
+ end
236
+
237
+ def tables
238
+ h = {}
239
+ tables_info.each do |table_name, count|
240
+ next if completed_tables.include?(table_name.to_s)
241
+ h[table_name.to_s] = count
242
+ end
243
+ h
244
+ end
245
+
246
+ def record_count
247
+ tables_info.values.inject(:+)
248
+ end
249
+
250
+ def tables_info
251
+ opts[:tables_info] ||= fetch_tables_info
252
+ end
253
+
254
+ def fetch_tables_info
255
+ tables = db.tables
256
+
257
+ data = {}
258
+ apply_table_filter(tables).each do |table_name|
259
+ data[table_name] = db[table_name].count
260
+ end
261
+ data
262
+ end
263
+
264
+ def self.factory(db, state)
265
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
266
+ Sequel::MySQL.convert_invalid_date_time = :nil
267
+ end
268
+
269
+ if state.has_key?(:klass)
270
+ return eval(state[:klass]).new(db, state)
271
+ end
272
+
273
+ if Tapsoob::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
274
+ DataStreamKeyed.new(db, state)
275
+ else
276
+ DataStream.new(db, state)
277
+ end
278
+ end
279
+
280
+ def pull_indexes
281
+ puts "Receiving indexes"
282
+
283
+ raw_idxs = Tapsoob::Utils.schema_bin(:indexes_individual, database_url)
284
+ idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
285
+
286
+ apply_table_filter(idxs).each do |table, indexes|
287
+ next unless indexes.size > 0
288
+ progress = ProgressBar.new(table, indexes.size)
289
+ indexes.each do |idx|
290
+ output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
291
+ puts output if output
292
+ progress.inc(1)
293
+ end
294
+ progress.finish
295
+ end
296
+ end
297
+
298
+ def pull_reset_sequences
299
+ puts "Resetting sequences"
300
+
301
+ output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
302
+ puts output if output
303
+ end
304
+ end
305
+
306
+ class Push < Operation
307
+ def file_prefix
308
+ "push"
309
+ end
310
+
311
+ def to_hash
312
+ super.merge(:local_tables_info => local_tables_info)
313
+ end
314
+
315
+ def run
316
+ catch_errors do
317
+ unless resuming?
318
+ push_schema if !skip_schema?
319
+ push_indexes if indexes_first? && !skip_schema?
320
+ end
321
+ setup_signal_trap
322
+ push_partial_data if resuming?
323
+ push_data
324
+ push_indexes if !indexes_first? && !skip_schema?
325
+ push_reset_sequences
326
+ end
327
+ end
328
+
329
+ def push_indexes
330
+ idxs = {}
331
+ table_idxs = Dir.glob(File.join(dump_path, "indexes", "*.json")).map { |path| File.basename(path, '.json') }
332
+ table_idxs.each do |table_idx|
333
+ idxs[table_idx] = JSON.parse(File.read(File.join(dump_path, "indexes", "#{table_idx}.json")))
334
+ end
335
+
336
+ return unless idxs.size > 0
337
+
338
+ puts "Sending indexes"
339
+
340
+ apply_table_filter(idxs).each do |table, indexes|
341
+ next unless indexes.size > 0
342
+ progress = ProgressBar.new(table, indexes.size)
343
+ indexes.each do |idx|
344
+ Tapsoob::Utils.load_indexes(database_url, idx)
345
+ progress.inc(1)
346
+ end
347
+ progress.finish
348
+ end
349
+ end
350
+
351
+ def push_schema
352
+ puts "Sending schema"
353
+
354
+ progress = ProgressBar.new('Schema', tables.size)
355
+ tables.each do |table, count|
356
+ log.debug "Loading '#{table}' schema\n"
357
+ Tapsoob::Utils.load_schema(dump_path, database_url, table)
358
+ progress.inc(1)
359
+ end
360
+ progress.finish
361
+ end
362
+
363
+ def push_reset_sequences
364
+ puts "Resetting sequences"
365
+
366
+ Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
367
+ end
368
+
369
+ def push_partial_data
370
+ return if stream_state == {}
371
+
372
+ table_name = stream_state[:table_name]
373
+ record_count = tables[table_name.to_s]
374
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
375
+ progress = ProgressBar.new(table_name.to_s, record_count)
376
+ stream = Tapsoob::DataStream.factory(db, stream_state)
377
+ push_data_from_file(stream, progress)
378
+ end
379
+
380
+ def push_data
381
+ puts "Sending data"
382
+
383
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
384
+
385
+ tables.each do |table_name, count|
386
+ next unless File.exists?(File.join(dump_path, "data", "#{table_name}.json"))
387
+ stream = Tapsoob::DataStream.factory(db,
388
+ :table_name => table_name,
389
+ :chunksize => default_chunksize)
390
+ progress = ProgressBar.new(table_name.to_s, count)
391
+ push_data_from_file(stream, progress)
392
+ end
393
+ end
394
+
395
+ def push_data_from_file(stream, progress)
396
+ loop do
397
+ if exiting?
398
+ store_session
399
+ exit 0
400
+ end
401
+
402
+ row_size = 0
403
+ chunksize = stream.state[:chunksize]
404
+
405
+ begin
406
+ chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
407
+ stream.state[:chunksize] = c.to_i
408
+ encoded_data, row_size, elapsed_time = nil
409
+ d1 = c.time_delta do
410
+ encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
411
+ end
412
+ break if stream.complete?
413
+
414
+ data = nil
415
+ d2 = c.time_delta do
416
+ data = {
417
+ :state => stream.to_hash,
418
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s
419
+ }
420
+ end
421
+
422
+ size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
423
+ self.stream_state = stream.to_hash
424
+
425
+ c.idle_secs = (d1 + d2)
426
+
427
+ elapsed_time
428
+ end
429
+ rescue Tapsoob::CorruptedData => e
430
+ # retry the same data, it got corrupted somehow.
431
+ next
432
+ rescue Tapsoob::DuplicatePrimaryKeyError => e
433
+ # verify the stream and retry it
434
+ stream.verify_stream
435
+ stream = JSON.generate({ :state => stream.to_hash })
436
+ next
437
+ end
438
+ stream.state[:chunksize] = chunksize
439
+
440
+ progress.inc(row_size)
441
+
442
+ stream.increment(row_size)
443
+ break if stream.complete?
444
+ end
445
+
446
+ progress.finish
447
+ completed_tables << stream.table_name.to_s
448
+ self.stream_state = {}
449
+ end
450
+
451
+ def local_tables_info
452
+ opts[:local_tables_info] ||= fetch_local_tables_info
453
+ end
454
+
455
+ def tables
456
+ h = {}
457
+ local_tables_info.each do |table_name, count|
458
+ next if completed_tables.include?(table_name.to_s)
459
+ h[table_name.to_s] = count
460
+ end
461
+ h
462
+ end
463
+
464
+ def record_count
465
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
466
+ end
467
+
468
+ def fetch_local_tables_info
469
+ tables_with_counts = {}
470
+ tbls = Dir.glob(File.join(dump_path, "schemas", "*")).map { |path| File.basename(path, ".rb") }
471
+ tbls.each do |table|
472
+ if File.exists?(File.join(dump_path, "data", "#{table}.json"))
473
+ data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
474
+ tables_with_counts[table] = data.size
475
+ else
476
+ tables_with_counts[table] = 0
477
+ end
478
+ end
479
+ apply_table_filter(tables_with_counts)
480
+ end
481
+ end
482
+ end
@@ -0,0 +1,236 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #
3
+ # Ruby/ProgressBar - a text progress bar library
4
+ #
5
+ # Copyright (C) 2001-2005 Satoru Takabayashi <satoru@namazu.org>
6
+ # All rights reserved.
7
+ # This is free software with ABSOLUTELY NO WARRANTY.
8
+ #
9
+ # You can redistribute it and/or modify it under the terms
10
+ # of Ruby's license.
11
+ #
12
+
13
+ class ProgressBar
14
+ VERSION = "0.9"
15
+
16
+ def initialize (title, total, out = STDOUT)
17
+ @title = title
18
+ @total = total
19
+ @out = out
20
+ @terminal_width = 80
21
+ @bar_mark = "="
22
+ @current = 0
23
+ @previous = 0
24
+ @finished_p = false
25
+ @start_time = Time.now
26
+ @previous_time = @start_time
27
+ @title_width = 14
28
+ @format = "%-#{@title_width}s %3d%% %s %s"
29
+ @format_arguments = [:title, :percentage, :bar, :stat]
30
+ clear
31
+ show
32
+ end
33
+ attr_reader :title
34
+ attr_reader :current
35
+ attr_reader :total
36
+ attr_accessor :start_time
37
+
38
+ private
39
+ def fmt_bar
40
+ bar_width = do_percentage * @terminal_width / 100
41
+ sprintf("|%s%s|",
42
+ @bar_mark * bar_width,
43
+ " " * (@terminal_width - bar_width))
44
+ end
45
+
46
+ def fmt_percentage
47
+ do_percentage
48
+ end
49
+
50
+ def fmt_stat
51
+ if @finished_p then elapsed else eta end
52
+ end
53
+
54
+ def fmt_stat_for_file_transfer
55
+ if @finished_p then
56
+ sprintf("%s %s %s", bytes, transfer_rate, elapsed)
57
+ else
58
+ sprintf("%s %s %s", bytes, transfer_rate, eta)
59
+ end
60
+ end
61
+
62
+ def fmt_title
63
+ @title[0,(@title_width - 1)] + ":"
64
+ end
65
+
66
+ def convert_bytes (bytes)
67
+ if bytes < 1024
68
+ sprintf("%6dB", bytes)
69
+ elsif bytes < 1024 * 1000 # 1000kb
70
+ sprintf("%5.1fKB", bytes.to_f / 1024)
71
+ elsif bytes < 1024 * 1024 * 1000 # 1000mb
72
+ sprintf("%5.1fMB", bytes.to_f / 1024 / 1024)
73
+ else
74
+ sprintf("%5.1fGB", bytes.to_f / 1024 / 1024 / 1024)
75
+ end
76
+ end
77
+
78
+ def transfer_rate
79
+ bytes_per_second = @current.to_f / (Time.now - @start_time)
80
+ sprintf("%s/s", convert_bytes(bytes_per_second))
81
+ end
82
+
83
+ def bytes
84
+ convert_bytes(@current)
85
+ end
86
+
87
+ def format_time (t)
88
+ t = t.to_i
89
+ sec = t % 60
90
+ min = (t / 60) % 60
91
+ hour = t / 3600
92
+ sprintf("%02d:%02d:%02d", hour, min, sec);
93
+ end
94
+
95
+ # ETA stands for Estimated Time of Arrival.
96
+ def eta
97
+ if @current == 0
98
+ "ETA: --:--:--"
99
+ else
100
+ elapsed = Time.now - @start_time
101
+ eta = elapsed * @total / @current - elapsed;
102
+ sprintf("ETA: %s", format_time(eta))
103
+ end
104
+ end
105
+
106
+ def elapsed
107
+ elapsed = Time.now - @start_time
108
+ sprintf("Time: %s", format_time(elapsed))
109
+ end
110
+
111
+ def eol
112
+ if @finished_p then "\n" else "\r" end
113
+ end
114
+
115
+ def do_percentage
116
+ if @total.zero?
117
+ 100
118
+ else
119
+ @current * 100 / @total
120
+ end
121
+ end
122
+
123
+ def get_width
124
+ # FIXME: I don't know how portable it is.
125
+ default_width = 80
126
+ begin
127
+ tiocgwinsz = 0x5413
128
+ data = [0, 0, 0, 0].pack("SSSS")
129
+ if @out.ioctl(tiocgwinsz, data) >= 0 then
130
+ rows, cols, xpixels, ypixels = data.unpack("SSSS")
131
+ if cols > 0 then cols else default_width end
132
+ else
133
+ default_width
134
+ end
135
+ rescue Exception
136
+ default_width
137
+ end
138
+ end
139
+
140
+ def show
141
+ arguments = @format_arguments.map {|method|
142
+ method = sprintf("fmt_%s", method)
143
+ send(method)
144
+ }
145
+ line = sprintf(@format, *arguments)
146
+
147
+ width = get_width
148
+ if line.length == width - 1
149
+ @out.print(line + eol)
150
+ @out.flush
151
+ elsif line.length >= width
152
+ @terminal_width = [@terminal_width - (line.length - width + 1), 0].max
153
+ if @terminal_width == 0 then @out.print(line + eol) else show end
154
+ else # line.length < width - 1
155
+ @terminal_width += width - line.length + 1
156
+ show
157
+ end
158
+ @previous_time = Time.now
159
+ end
160
+
161
+ def show_if_needed
162
+ if @total.zero?
163
+ cur_percentage = 100
164
+ prev_percentage = 0
165
+ else
166
+ cur_percentage = (@current * 100 / @total).to_i
167
+ prev_percentage = (@previous * 100 / @total).to_i
168
+ end
169
+
170
+ # Use "!=" instead of ">" to support negative changes
171
+ if cur_percentage != prev_percentage ||
172
+ Time.now - @previous_time >= 1 || @finished_p
173
+ show
174
+ end
175
+ end
176
+
177
+ public
178
+ def clear
179
+ @out.print "\r"
180
+ @out.print(" " * (get_width - 1))
181
+ @out.print "\r"
182
+ end
183
+
184
+ def finish
185
+ @current = @total
186
+ @finished_p = true
187
+ show
188
+ end
189
+
190
+ def finished?
191
+ @finished_p
192
+ end
193
+
194
+ def file_transfer_mode
195
+ @format_arguments = [:title, :percentage, :bar, :stat_for_file_transfer]
196
+ end
197
+
198
+ def format= (format)
199
+ @format = format
200
+ end
201
+
202
+ def format_arguments= (arguments)
203
+ @format_arguments = arguments
204
+ end
205
+
206
+ def halt
207
+ @finished_p = true
208
+ show
209
+ end
210
+
211
+ def inc (step = 1)
212
+ @current += step
213
+ @current = @total if @current > @total
214
+ show_if_needed
215
+ @previous = @current
216
+ end
217
+
218
+ def set (count)
219
+ if count < 0 || count > @total
220
+ raise "invalid count: #{count} (total: #{@total})"
221
+ end
222
+ @current = count
223
+ show_if_needed
224
+ @previous = @current
225
+ end
226
+
227
+ def inspect
228
+ "#<ProgressBar:#{@current}/#{@total}>"
229
+ end
230
+ end
231
+
232
+ class ReversedProgressBar < ProgressBar
233
+ def do_percentage
234
+ 100 - super
235
+ end
236
+ end
@@ -0,0 +1,11 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'tapsoob'
3
+ require 'rails'
4
+
5
+ module Tapsoob
6
+ class Railtie < Rails::Railtie
7
+ rake_tasks do
8
+ load "tasks/tapsoob.rake"
9
+ end
10
+ end
11
+ end