tapsoob 0.2.7-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,482 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'sequel'
3
+
4
+ require 'tapsoob/progress_bar'
5
+ require 'tapsoob/schema'
6
+ require 'tapsoob/data_stream'
7
+
8
+ module Tapsoob
9
+ class Operation
10
+ attr_reader :database_url, :dump_path, :opts
11
+
12
+ def initialize(database_url, dump_path, opts={})
13
+ @database_url = database_url
14
+ @dump_path = dump_path
15
+ @opts = opts
16
+ @exiting = false
17
+ end
18
+
19
+ def file_prefix
20
+ "op"
21
+ end
22
+
23
+ def skip_schema?
24
+ !!opts[:skip_schema]
25
+ end
26
+
27
+ def indexes_first?
28
+ !!opts[:indexes_first]
29
+ end
30
+
31
+ def table_filter
32
+ opts[:table_filter]
33
+ end
34
+
35
+ def exclude_tables
36
+ opts[:exclude_tables] || []
37
+ end
38
+
39
+ def apply_table_filter(tables)
40
+ return tables unless table_filter || exclude_tables
41
+
42
+ re = table_filter ? Regexp.new(table_filter) : nil
43
+ if tables.kind_of?(Hash)
44
+ ntables = {}
45
+ tables.each do |t, d|
46
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
47
+ ntables[t] = d
48
+ end
49
+ end
50
+ ntables
51
+ else
52
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
53
+ end
54
+ end
55
+
56
+ def log
57
+ Tapsoob.log
58
+ end
59
+
60
+ def store_session
61
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
62
+ puts "\nSaving session to #{file}..."
63
+ File.open(file, 'w') do |f|
64
+ f.write(JSON.generate(to_hash))
65
+ end
66
+ end
67
+
68
+ def to_hash
69
+ {
70
+ :klass => self.class.to_s,
71
+ :database_url => database_url,
72
+ :stream_state => stream_state,
73
+ :completed_tables => completed_tables,
74
+ :table_filter => table_filter,
75
+ }
76
+ end
77
+
78
+ def exiting?
79
+ !!@exiting
80
+ end
81
+
82
+ def setup_signal_trap
83
+ trap("INT") {
84
+ puts "\nCompleting current action..."
85
+ @exiting = true
86
+ }
87
+
88
+ trap("TERM") {
89
+ puts "\nCompleting current action..."
90
+ @exiting = true
91
+ }
92
+ end
93
+
94
+ def resuming?
95
+ opts[:resume] == true
96
+ end
97
+
98
+ def default_chunksize
99
+ opts[:default_chunksize]
100
+ end
101
+
102
+ def completed_tables
103
+ opts[:completed_tables] ||= []
104
+ end
105
+
106
+ def stream_state
107
+ opts[:stream_state] ||= {}
108
+ end
109
+
110
+ def stream_state=(val)
111
+ opts[:stream_state] = val
112
+ end
113
+
114
+ def db
115
+ @db ||= Sequel.connect(database_url)
116
+
117
+ # Set parameters
118
+ if @db.uri =~ /oracle/i
119
+ @db << "ALTER SESSION SET NLS_DATE_FORMAT='YYYY-MM-DD HH24:MI:SS'"
120
+ @db << "ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD HH24:MI:SS:FF6'"
121
+ end
122
+
123
+ @db
124
+ end
125
+
126
+ def format_number(num)
127
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
128
+ end
129
+
130
+ def catch_errors(&blk)
131
+ begin
132
+ blk.call
133
+ rescue Exception => e
134
+ raise e
135
+ end
136
+ end
137
+
138
+ def self.factory(type, database_url, dump_path, opts)
139
+ type = :resume if opts[:resume]
140
+ klass = case type
141
+ when :pull then Tapsoob::Pull
142
+ when :push then Tapsoob::Push
143
+ when :resume then eval(opts[:klass])
144
+ else raise "Unknown Operation Type -> #{type}"
145
+ end
146
+
147
+ klass.new(database_url, dump_path, opts)
148
+ end
149
+ end
150
+
151
+ class Pull < Operation
152
+ def file_prefix
153
+ "pull"
154
+ end
155
+
156
+ def to_hash
157
+ super.merge(:remote_tables_info => remote_tables_info)
158
+ end
159
+
160
+ def run
161
+ catch_errors do
162
+ unless resuming?
163
+ pull_schema if !skip_schema?
164
+ pull_indexes if indexes_first? && !skip_schema?
165
+ end
166
+ setup_signal_trap
167
+ pull_partial_data if resuming?
168
+ pull_data
169
+ pull_indexes if !indexes_first? && !skip_schema?
170
+ pull_reset_sequences
171
+ end
172
+ end
173
+
174
+ def pull_schema
175
+ puts "Receiving schema"
176
+
177
+ progress = ProgressBar.new('Schema', tables.size)
178
+ tables.each do |table_name, count|
179
+ schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
180
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
181
+ output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
182
+ puts output if output
183
+ progress.inc(1)
184
+ end
185
+ progress.finish
186
+ end
187
+
188
+ def pull_data
189
+ puts "Receiving data"
190
+
191
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
192
+
193
+ tables.each do |table_name, count|
194
+ progress = ProgressBar.new(table_name.to_s, count)
195
+ stream = Tapsoob::DataStream.factory(db, {
196
+ :chunksize => default_chunksize,
197
+ :table_name => table_name
198
+ })
199
+ pull_data_from_table(stream, progress)
200
+ end
201
+ end
202
+
203
+ def pull_partial_data
204
+ return if stream_state == {}
205
+
206
+ table_name = stream_state[:table_name]
207
+ record_count = tables[table_name.to_s]
208
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
209
+
210
+ progress = ProgressBar.new(table_name.to_s, record_count)
211
+ stream = Tapsoob::DataStream.factory(db, stream_state)
212
+ pull_data_from_table(stream, progress)
213
+ end
214
+
215
+ def pull_data_from_table(stream, progress)
216
+ loop do
217
+ begin
218
+ exit 0 if exiting?
219
+
220
+ size = stream.fetch_database(dump_path)
221
+ break if stream.complete?
222
+ progress.inc(size) unless exiting?
223
+ stream.error = false
224
+ self.stream_state = stream.to_hash
225
+ rescue Tapsoob::CorruptedData => e
226
+ puts "Corrupted Data Received #{e.message}, retrying..."
227
+ stream.error = true
228
+ next
229
+ end
230
+ end
231
+
232
+ progress.finish
233
+ completed_tables << stream.table_name.to_s
234
+ self.stream_state = {}
235
+ end
236
+
237
+ def tables
238
+ h = {}
239
+ tables_info.each do |table_name, count|
240
+ next if completed_tables.include?(table_name.to_s)
241
+ h[table_name.to_s] = count
242
+ end
243
+ h
244
+ end
245
+
246
+ def record_count
247
+ tables_info.values.inject(:+)
248
+ end
249
+
250
+ def tables_info
251
+ opts[:tables_info] ||= fetch_tables_info
252
+ end
253
+
254
+ def fetch_tables_info
255
+ tables = db.tables
256
+
257
+ data = {}
258
+ apply_table_filter(tables).each do |table_name|
259
+ data[table_name] = db[table_name].count
260
+ end
261
+ data
262
+ end
263
+
264
+ def self.factory(db, state)
265
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
266
+ Sequel::MySQL.convert_invalid_date_time = :nil
267
+ end
268
+
269
+ if state.has_key?(:klass)
270
+ return eval(state[:klass]).new(db, state)
271
+ end
272
+
273
+ if Tapsoob::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
274
+ DataStreamKeyed.new(db, state)
275
+ else
276
+ DataStream.new(db, state)
277
+ end
278
+ end
279
+
280
+ def pull_indexes
281
+ puts "Receiving indexes"
282
+
283
+ raw_idxs = Tapsoob::Utils.schema_bin(:indexes_individual, database_url)
284
+ idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
285
+
286
+ apply_table_filter(idxs).each do |table, indexes|
287
+ next unless indexes.size > 0
288
+ progress = ProgressBar.new(table, indexes.size)
289
+ indexes.each do |idx|
290
+ output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
291
+ puts output if output
292
+ progress.inc(1)
293
+ end
294
+ progress.finish
295
+ end
296
+ end
297
+
298
+ def pull_reset_sequences
299
+ puts "Resetting sequences"
300
+
301
+ output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
302
+ puts output if output
303
+ end
304
+ end
305
+
306
+ class Push < Operation
307
+ def file_prefix
308
+ "push"
309
+ end
310
+
311
+ def to_hash
312
+ super.merge(:local_tables_info => local_tables_info)
313
+ end
314
+
315
+ def run
316
+ catch_errors do
317
+ unless resuming?
318
+ push_schema if !skip_schema?
319
+ push_indexes if indexes_first? && !skip_schema?
320
+ end
321
+ setup_signal_trap
322
+ push_partial_data if resuming?
323
+ push_data
324
+ push_indexes if !indexes_first? && !skip_schema?
325
+ push_reset_sequences
326
+ end
327
+ end
328
+
329
+ def push_indexes
330
+ idxs = {}
331
+ table_idxs = Dir.glob(File.join(dump_path, "indexes", "*.json")).map { |path| File.basename(path, '.json') }
332
+ table_idxs.each do |table_idx|
333
+ idxs[table_idx] = JSON.parse(File.read(File.join(dump_path, "indexes", "#{table_idx}.json")))
334
+ end
335
+
336
+ return unless idxs.size > 0
337
+
338
+ puts "Sending indexes"
339
+
340
+ apply_table_filter(idxs).each do |table, indexes|
341
+ next unless indexes.size > 0
342
+ progress = ProgressBar.new(table, indexes.size)
343
+ indexes.each do |idx|
344
+ Tapsoob::Utils.load_indexes(database_url, idx)
345
+ progress.inc(1)
346
+ end
347
+ progress.finish
348
+ end
349
+ end
350
+
351
+ def push_schema
352
+ puts "Sending schema"
353
+
354
+ progress = ProgressBar.new('Schema', tables.size)
355
+ tables.each do |table, count|
356
+ log.debug "Loading '#{table}' schema\n"
357
+ Tapsoob::Utils.load_schema(dump_path, database_url, table)
358
+ progress.inc(1)
359
+ end
360
+ progress.finish
361
+ end
362
+
363
+ def push_reset_sequences
364
+ puts "Resetting sequences"
365
+
366
+ Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
367
+ end
368
+
369
+ def push_partial_data
370
+ return if stream_state == {}
371
+
372
+ table_name = stream_state[:table_name]
373
+ record_count = tables[table_name.to_s]
374
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
375
+ progress = ProgressBar.new(table_name.to_s, record_count)
376
+ stream = Tapsoob::DataStream.factory(db, stream_state)
377
+ push_data_from_file(stream, progress)
378
+ end
379
+
380
+ def push_data
381
+ puts "Sending data"
382
+
383
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
384
+
385
+ tables.each do |table_name, count|
386
+ next unless File.exists?(File.join(dump_path, "data", "#{table_name}.json"))
387
+ stream = Tapsoob::DataStream.factory(db,
388
+ :table_name => table_name,
389
+ :chunksize => default_chunksize)
390
+ progress = ProgressBar.new(table_name.to_s, count)
391
+ push_data_from_file(stream, progress)
392
+ end
393
+ end
394
+
395
+ def push_data_from_file(stream, progress)
396
+ loop do
397
+ if exiting?
398
+ store_session
399
+ exit 0
400
+ end
401
+
402
+ row_size = 0
403
+ chunksize = stream.state[:chunksize]
404
+
405
+ begin
406
+ chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
407
+ stream.state[:chunksize] = c.to_i
408
+ encoded_data, row_size, elapsed_time = nil
409
+ d1 = c.time_delta do
410
+ encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
411
+ end
412
+ break if stream.complete?
413
+
414
+ data = nil
415
+ d2 = c.time_delta do
416
+ data = {
417
+ :state => stream.to_hash,
418
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s
419
+ }
420
+ end
421
+
422
+ size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
423
+ self.stream_state = stream.to_hash
424
+
425
+ c.idle_secs = (d1 + d2)
426
+
427
+ elapsed_time
428
+ end
429
+ rescue Tapsoob::CorruptedData => e
430
+ # retry the same data, it got corrupted somehow.
431
+ next
432
+ rescue Tapsoob::DuplicatePrimaryKeyError => e
433
+ # verify the stream and retry it
434
+ stream.verify_stream
435
+ stream = JSON.generate({ :state => stream.to_hash })
436
+ next
437
+ end
438
+ stream.state[:chunksize] = chunksize
439
+
440
+ progress.inc(row_size)
441
+
442
+ stream.increment(row_size)
443
+ break if stream.complete?
444
+ end
445
+
446
+ progress.finish
447
+ completed_tables << stream.table_name.to_s
448
+ self.stream_state = {}
449
+ end
450
+
451
+ def local_tables_info
452
+ opts[:local_tables_info] ||= fetch_local_tables_info
453
+ end
454
+
455
+ def tables
456
+ h = {}
457
+ local_tables_info.each do |table_name, count|
458
+ next if completed_tables.include?(table_name.to_s)
459
+ h[table_name.to_s] = count
460
+ end
461
+ h
462
+ end
463
+
464
+ def record_count
465
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
466
+ end
467
+
468
+ def fetch_local_tables_info
469
+ tables_with_counts = {}
470
+ tbls = Dir.glob(File.join(dump_path, "schemas", "*")).map { |path| File.basename(path, ".rb") }
471
+ tbls.each do |table|
472
+ if File.exists?(File.join(dump_path, "data", "#{table}.json"))
473
+ data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
474
+ tables_with_counts[table] = data.size
475
+ else
476
+ tables_with_counts[table] = 0
477
+ end
478
+ end
479
+ apply_table_filter(tables_with_counts)
480
+ end
481
+ end
482
+ end
@@ -0,0 +1,236 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #
3
+ # Ruby/ProgressBar - a text progress bar library
4
+ #
5
+ # Copyright (C) 2001-2005 Satoru Takabayashi <satoru@namazu.org>
6
+ # All rights reserved.
7
+ # This is free software with ABSOLUTELY NO WARRANTY.
8
+ #
9
+ # You can redistribute it and/or modify it under the terms
10
+ # of Ruby's license.
11
+ #
12
+
13
+ class ProgressBar
14
+ VERSION = "0.9"
15
+
16
+ def initialize (title, total, out = STDOUT)
17
+ @title = title
18
+ @total = total
19
+ @out = out
20
+ @terminal_width = 80
21
+ @bar_mark = "="
22
+ @current = 0
23
+ @previous = 0
24
+ @finished_p = false
25
+ @start_time = Time.now
26
+ @previous_time = @start_time
27
+ @title_width = 14
28
+ @format = "%-#{@title_width}s %3d%% %s %s"
29
+ @format_arguments = [:title, :percentage, :bar, :stat]
30
+ clear
31
+ show
32
+ end
33
+ attr_reader :title
34
+ attr_reader :current
35
+ attr_reader :total
36
+ attr_accessor :start_time
37
+
38
+ private
39
+ def fmt_bar
40
+ bar_width = do_percentage * @terminal_width / 100
41
+ sprintf("|%s%s|",
42
+ @bar_mark * bar_width,
43
+ " " * (@terminal_width - bar_width))
44
+ end
45
+
46
+ def fmt_percentage
47
+ do_percentage
48
+ end
49
+
50
+ def fmt_stat
51
+ if @finished_p then elapsed else eta end
52
+ end
53
+
54
+ def fmt_stat_for_file_transfer
55
+ if @finished_p then
56
+ sprintf("%s %s %s", bytes, transfer_rate, elapsed)
57
+ else
58
+ sprintf("%s %s %s", bytes, transfer_rate, eta)
59
+ end
60
+ end
61
+
62
+ def fmt_title
63
+ @title[0,(@title_width - 1)] + ":"
64
+ end
65
+
66
+ def convert_bytes (bytes)
67
+ if bytes < 1024
68
+ sprintf("%6dB", bytes)
69
+ elsif bytes < 1024 * 1000 # 1000kb
70
+ sprintf("%5.1fKB", bytes.to_f / 1024)
71
+ elsif bytes < 1024 * 1024 * 1000 # 1000mb
72
+ sprintf("%5.1fMB", bytes.to_f / 1024 / 1024)
73
+ else
74
+ sprintf("%5.1fGB", bytes.to_f / 1024 / 1024 / 1024)
75
+ end
76
+ end
77
+
78
+ def transfer_rate
79
+ bytes_per_second = @current.to_f / (Time.now - @start_time)
80
+ sprintf("%s/s", convert_bytes(bytes_per_second))
81
+ end
82
+
83
+ def bytes
84
+ convert_bytes(@current)
85
+ end
86
+
87
+ def format_time (t)
88
+ t = t.to_i
89
+ sec = t % 60
90
+ min = (t / 60) % 60
91
+ hour = t / 3600
92
+ sprintf("%02d:%02d:%02d", hour, min, sec);
93
+ end
94
+
95
+ # ETA stands for Estimated Time of Arrival.
96
+ def eta
97
+ if @current == 0
98
+ "ETA: --:--:--"
99
+ else
100
+ elapsed = Time.now - @start_time
101
+ eta = elapsed * @total / @current - elapsed;
102
+ sprintf("ETA: %s", format_time(eta))
103
+ end
104
+ end
105
+
106
+ def elapsed
107
+ elapsed = Time.now - @start_time
108
+ sprintf("Time: %s", format_time(elapsed))
109
+ end
110
+
111
+ def eol
112
+ if @finished_p then "\n" else "\r" end
113
+ end
114
+
115
+ def do_percentage
116
+ if @total.zero?
117
+ 100
118
+ else
119
+ @current * 100 / @total
120
+ end
121
+ end
122
+
123
+ def get_width
124
+ # FIXME: I don't know how portable it is.
125
+ default_width = 80
126
+ begin
127
+ tiocgwinsz = 0x5413
128
+ data = [0, 0, 0, 0].pack("SSSS")
129
+ if @out.ioctl(tiocgwinsz, data) >= 0 then
130
+ rows, cols, xpixels, ypixels = data.unpack("SSSS")
131
+ if cols > 0 then cols else default_width end
132
+ else
133
+ default_width
134
+ end
135
+ rescue Exception
136
+ default_width
137
+ end
138
+ end
139
+
140
+ def show
141
+ arguments = @format_arguments.map {|method|
142
+ method = sprintf("fmt_%s", method)
143
+ send(method)
144
+ }
145
+ line = sprintf(@format, *arguments)
146
+
147
+ width = get_width
148
+ if line.length == width - 1
149
+ @out.print(line + eol)
150
+ @out.flush
151
+ elsif line.length >= width
152
+ @terminal_width = [@terminal_width - (line.length - width + 1), 0].max
153
+ if @terminal_width == 0 then @out.print(line + eol) else show end
154
+ else # line.length < width - 1
155
+ @terminal_width += width - line.length + 1
156
+ show
157
+ end
158
+ @previous_time = Time.now
159
+ end
160
+
161
+ def show_if_needed
162
+ if @total.zero?
163
+ cur_percentage = 100
164
+ prev_percentage = 0
165
+ else
166
+ cur_percentage = (@current * 100 / @total).to_i
167
+ prev_percentage = (@previous * 100 / @total).to_i
168
+ end
169
+
170
+ # Use "!=" instead of ">" to support negative changes
171
+ if cur_percentage != prev_percentage ||
172
+ Time.now - @previous_time >= 1 || @finished_p
173
+ show
174
+ end
175
+ end
176
+
177
+ public
178
+ def clear
179
+ @out.print "\r"
180
+ @out.print(" " * (get_width - 1))
181
+ @out.print "\r"
182
+ end
183
+
184
+ def finish
185
+ @current = @total
186
+ @finished_p = true
187
+ show
188
+ end
189
+
190
+ def finished?
191
+ @finished_p
192
+ end
193
+
194
+ def file_transfer_mode
195
+ @format_arguments = [:title, :percentage, :bar, :stat_for_file_transfer]
196
+ end
197
+
198
+ def format= (format)
199
+ @format = format
200
+ end
201
+
202
+ def format_arguments= (arguments)
203
+ @format_arguments = arguments
204
+ end
205
+
206
+ def halt
207
+ @finished_p = true
208
+ show
209
+ end
210
+
211
+ def inc (step = 1)
212
+ @current += step
213
+ @current = @total if @current > @total
214
+ show_if_needed
215
+ @previous = @current
216
+ end
217
+
218
+ def set (count)
219
+ if count < 0 || count > @total
220
+ raise "invalid count: #{count} (total: #{@total})"
221
+ end
222
+ @current = count
223
+ show_if_needed
224
+ @previous = @current
225
+ end
226
+
227
+ def inspect
228
+ "#<ProgressBar:#{@current}/#{@total}>"
229
+ end
230
+ end
231
+
232
+ class ReversedProgressBar < ProgressBar
233
+ def do_percentage
234
+ 100 - super
235
+ end
236
+ end
@@ -0,0 +1,11 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'tapsoob'
3
+ require 'rails'
4
+
5
+ module Tapsoob
6
+ class Railtie < Rails::Railtie
7
+ rake_tasks do
8
+ load "tasks/tapsoob.rake"
9
+ end
10
+ end
11
+ end