tapsoob 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +24 -0
- data/README.md +66 -0
- data/Rakefile +0 -0
- data/bin/schema +54 -0
- data/bin/tapsoob +6 -0
- data/lib/tapsoob.rb +9 -0
- data/lib/tapsoob/chunksize.rb +53 -0
- data/lib/tapsoob/cli.rb +145 -0
- data/lib/tapsoob/config.rb +33 -0
- data/lib/tapsoob/data_stream.rb +350 -0
- data/lib/tapsoob/errors.rb +16 -0
- data/lib/tapsoob/log.rb +16 -0
- data/lib/tapsoob/operation.rb +468 -0
- data/lib/tapsoob/progress_bar.rb +236 -0
- data/lib/tapsoob/railtie.rb +11 -0
- data/lib/tapsoob/schema.rb +83 -0
- data/lib/tapsoob/utils.rb +179 -0
- data/lib/tapsoob/version.rb +4 -0
- data/lib/tasks/tapsoob.rake +59 -0
- data/tapsoob.gemspec +30 -0
- metadata +138 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module Tapsoob
|
3
|
+
class BaseError < StandardError
|
4
|
+
attr_reader :original_backtrace
|
5
|
+
|
6
|
+
def initialize(message, opts = {})
|
7
|
+
@original_backtrace = opts.delete(:backtrace)
|
8
|
+
super(message)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class NotImplemented < BaseError; end
|
13
|
+
class DuplicatePrimaryKeyError < BaseError; end
|
14
|
+
class CorruptedData < BaseError; end
|
15
|
+
class InvalidData < BaseError; end
|
16
|
+
end
|
data/lib/tapsoob/log.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module Tapsoob
|
3
|
+
def self.log=(log)
|
4
|
+
@@log = log
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.log
|
8
|
+
@@log ||= begin
|
9
|
+
require 'logger'
|
10
|
+
log = Logger.new($stderr)
|
11
|
+
log.level = Logger::ERROR
|
12
|
+
log.datetime_format = "%Y-%m-%d %H:%M:%S"
|
13
|
+
log
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,468 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'sequel'
|
3
|
+
|
4
|
+
require 'tapsoob/progress_bar'
|
5
|
+
require 'tapsoob/schema'
|
6
|
+
require 'tapsoob/data_stream'
|
7
|
+
|
8
|
+
module Tapsoob
|
9
|
+
class Operation
|
10
|
+
attr_reader :database_url, :dump_path, :opts
|
11
|
+
|
12
|
+
def initialize(database_url, dump_path, opts={})
|
13
|
+
@database_url = database_url
|
14
|
+
@dump_path = dump_path
|
15
|
+
@opts = opts
|
16
|
+
@exiting = false
|
17
|
+
end
|
18
|
+
|
19
|
+
def file_prefix
|
20
|
+
"op"
|
21
|
+
end
|
22
|
+
|
23
|
+
def skip_schema?
|
24
|
+
!!opts[:skip_schema]
|
25
|
+
end
|
26
|
+
|
27
|
+
def indexes_first?
|
28
|
+
!!opts[:indexes_first]
|
29
|
+
end
|
30
|
+
|
31
|
+
def table_filter
|
32
|
+
opts[:table_filter]
|
33
|
+
end
|
34
|
+
|
35
|
+
def exclude_tables
|
36
|
+
opts[:exclude_tables] || []
|
37
|
+
end
|
38
|
+
|
39
|
+
def apply_table_filter(tables)
|
40
|
+
return tables unless table_filter || exclude_tables
|
41
|
+
|
42
|
+
re = table_filter ? Regexp.new(table_filter) : nil
|
43
|
+
if tables.kind_of?(Hash)
|
44
|
+
ntables = {}
|
45
|
+
tables.each do |t, d|
|
46
|
+
if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
|
47
|
+
ntables[t] = d
|
48
|
+
end
|
49
|
+
end
|
50
|
+
ntables
|
51
|
+
else
|
52
|
+
tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def log
|
57
|
+
Tapsoob.log
|
58
|
+
end
|
59
|
+
|
60
|
+
def store_session
|
61
|
+
file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
|
62
|
+
puts "\nSaving session to #{file}..."
|
63
|
+
File.open(file, 'w') do |f|
|
64
|
+
f.write(JSON.generate(to_hash))
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_hash
|
69
|
+
{
|
70
|
+
:klass => self.class.to_s,
|
71
|
+
:database_url => database_url,
|
72
|
+
:stream_state => stream_state,
|
73
|
+
:completed_tables => completed_tables,
|
74
|
+
:table_filter => table_filter,
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def exiting?
|
79
|
+
!!@exiting
|
80
|
+
end
|
81
|
+
|
82
|
+
def setup_signal_trap
|
83
|
+
trap("INT") {
|
84
|
+
puts "\nCompleting current action..."
|
85
|
+
@exiting = true
|
86
|
+
}
|
87
|
+
|
88
|
+
trap("TERM") {
|
89
|
+
puts "\nCompleting current action..."
|
90
|
+
@exiting = true
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
def resuming?
|
95
|
+
opts[:resume] == true
|
96
|
+
end
|
97
|
+
|
98
|
+
def default_chunksize
|
99
|
+
opts[:default_chunksize]
|
100
|
+
end
|
101
|
+
|
102
|
+
def completed_tables
|
103
|
+
opts[:completed_tables] ||= []
|
104
|
+
end
|
105
|
+
|
106
|
+
def stream_state
|
107
|
+
opts[:stream_state] ||= {}
|
108
|
+
end
|
109
|
+
|
110
|
+
def stream_state=(val)
|
111
|
+
opts[:stream_state] = val
|
112
|
+
end
|
113
|
+
|
114
|
+
def db
|
115
|
+
@db ||= Sequel.connect(database_url)
|
116
|
+
end
|
117
|
+
|
118
|
+
def format_number(num)
|
119
|
+
num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
|
120
|
+
end
|
121
|
+
|
122
|
+
def catch_errors(&blk)
|
123
|
+
begin
|
124
|
+
blk.call
|
125
|
+
rescue Exception => e
|
126
|
+
raise e
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def self.factory(type, database_url, dump_path, opts)
|
131
|
+
type = :resume if opts[:resume]
|
132
|
+
klass = case type
|
133
|
+
when :pull then Tapsoob::Pull
|
134
|
+
when :push then Tapsoob::Push
|
135
|
+
when :resume then eval(opts[:klass])
|
136
|
+
else raise "Unknown Operation Type -> #{type}"
|
137
|
+
end
|
138
|
+
|
139
|
+
klass.new(database_url, dump_path, opts)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
class Pull < Operation
|
144
|
+
def file_prefix
|
145
|
+
"pull"
|
146
|
+
end
|
147
|
+
|
148
|
+
def to_hash
|
149
|
+
super.merge(:remote_tables_info => remote_tables_info)
|
150
|
+
end
|
151
|
+
|
152
|
+
def run
|
153
|
+
catch_errors do
|
154
|
+
unless resuming?
|
155
|
+
pull_schema if !skip_schema?
|
156
|
+
pull_indexes if indexes_first? && !skip_schema?
|
157
|
+
end
|
158
|
+
setup_signal_trap
|
159
|
+
pull_partial_data if resuming?
|
160
|
+
pull_data
|
161
|
+
pull_indexes if !indexes_first? && !skip_schema?
|
162
|
+
pull_reset_sequences
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def pull_schema
|
167
|
+
puts "Receiving schema"
|
168
|
+
|
169
|
+
progress = ProgressBar.new('Schema', tables.size)
|
170
|
+
tables.each do |table_name, count|
|
171
|
+
schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
|
172
|
+
log.debug "Table: #{table_name}\n#{schema_data}\n"
|
173
|
+
output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
|
174
|
+
puts output if output
|
175
|
+
progress.inc(1)
|
176
|
+
end
|
177
|
+
progress.finish
|
178
|
+
end
|
179
|
+
|
180
|
+
def pull_data
|
181
|
+
puts "Receiving data"
|
182
|
+
|
183
|
+
puts "#{tables.size} tables, #{format_number(record_count)} records"
|
184
|
+
|
185
|
+
tables.each do |table_name, count|
|
186
|
+
progress = ProgressBar.new(table_name.to_s, count)
|
187
|
+
stream = Tapsoob::DataStream.factory(db, {
|
188
|
+
:chunksize => default_chunksize,
|
189
|
+
:table_name => table_name
|
190
|
+
})
|
191
|
+
pull_data_from_table(stream, progress)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def pull_partial_data
|
196
|
+
return if stream_state == {}
|
197
|
+
|
198
|
+
table_name = stream_state[:table_name]
|
199
|
+
record_count = tables[table_name.to_s]
|
200
|
+
puts "Resuming #{table_name}, #{format_number(record_count)} records"
|
201
|
+
|
202
|
+
progress = ProgressBar.new(table_name.to_s, record_count)
|
203
|
+
stream = Tapsoob::DataStream.factory(db, stream_state)
|
204
|
+
pull_data_from_table(stream, progress)
|
205
|
+
end
|
206
|
+
|
207
|
+
def pull_data_from_table(stream, progress)
|
208
|
+
loop do
|
209
|
+
begin
|
210
|
+
exit 0 if exiting?
|
211
|
+
|
212
|
+
size = stream.fetch_database(dump_path)
|
213
|
+
break if stream.complete?
|
214
|
+
progress.inc(size) unless exiting?
|
215
|
+
stream.error = false
|
216
|
+
self.stream_state = stream.to_hash
|
217
|
+
rescue Tapsoob::CorruptedData => e
|
218
|
+
puts "Corrupted Data Received #{e.message}, retrying..."
|
219
|
+
stream.error = true
|
220
|
+
next
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
progress.finish
|
225
|
+
completed_tables << stream.table_name.to_s
|
226
|
+
self.stream_state = {}
|
227
|
+
end
|
228
|
+
|
229
|
+
def tables
|
230
|
+
h = {}
|
231
|
+
tables_info.each do |table_name, count|
|
232
|
+
next if completed_tables.include?(table_name.to_s)
|
233
|
+
h[table_name.to_s] = count
|
234
|
+
end
|
235
|
+
h
|
236
|
+
end
|
237
|
+
|
238
|
+
def record_count
|
239
|
+
tables_info.values.inject(:+)
|
240
|
+
end
|
241
|
+
|
242
|
+
def tables_info
|
243
|
+
opts[:tables_info] ||= fetch_tables_info
|
244
|
+
end
|
245
|
+
|
246
|
+
def fetch_tables_info
|
247
|
+
tables = db.tables
|
248
|
+
|
249
|
+
data = {}
|
250
|
+
apply_table_filter(tables).each do |table_name|
|
251
|
+
data[table_name] = db[table_name].count
|
252
|
+
end
|
253
|
+
data
|
254
|
+
end
|
255
|
+
|
256
|
+
def self.factory(db, state)
|
257
|
+
if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
|
258
|
+
Sequel::MySQL.convert_invalid_date_time = :nil
|
259
|
+
end
|
260
|
+
|
261
|
+
if state.has_key?(:klass)
|
262
|
+
return eval(state[:klass]).new(db, state)
|
263
|
+
end
|
264
|
+
|
265
|
+
if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
|
266
|
+
DataStreamKeyed.new(db, state)
|
267
|
+
else
|
268
|
+
DataStream.new(db, state)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def pull_indexes
|
273
|
+
puts "Receiving indexes"
|
274
|
+
|
275
|
+
idxs = JSON.parse(Tapsoob::Utils.schema_bin(:indexes_individual, database_url))
|
276
|
+
|
277
|
+
apply_table_filter(idxs).each do |table, indexes|
|
278
|
+
next unless indexes.size > 0
|
279
|
+
progress = ProgressBar.new(table, indexes.size)
|
280
|
+
indexes.each do |idx|
|
281
|
+
output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
|
282
|
+
puts output if output
|
283
|
+
progress.inc(1)
|
284
|
+
end
|
285
|
+
progress.finish
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def pull_reset_sequences
|
290
|
+
puts "Resetting sequences"
|
291
|
+
|
292
|
+
output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
|
293
|
+
puts output if output
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
class Push < Operation
|
298
|
+
def file_prefix
|
299
|
+
"push"
|
300
|
+
end
|
301
|
+
|
302
|
+
def to_hash
|
303
|
+
super.merge(:local_tables_info => local_tables_info)
|
304
|
+
end
|
305
|
+
|
306
|
+
def run
|
307
|
+
catch_errors do
|
308
|
+
unless resuming?
|
309
|
+
push_schema if !skip_schema?
|
310
|
+
push_indexes if indexes_first? && !skip_schema?
|
311
|
+
end
|
312
|
+
setup_signal_trap
|
313
|
+
push_partial_data if resuming?
|
314
|
+
push_data
|
315
|
+
push_indexes if !indexes_first? && !skip_schema?
|
316
|
+
push_reset_sequences
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
def push_indexes
|
321
|
+
idxs = {}
|
322
|
+
table_idxs = Dir.glob(File.join(dump_path, "indexes", "*.json")).map { |path| File.basename(path, '.json') }
|
323
|
+
table_idxs.each do |table_idx|
|
324
|
+
idxs[table_idx] = JSON.parse(File.read(File.join(dump_path, "indexes", "#{table_idx}.json")))
|
325
|
+
end
|
326
|
+
|
327
|
+
return unless idxs.size > 0
|
328
|
+
|
329
|
+
puts "Sending indexes"
|
330
|
+
|
331
|
+
apply_table_filter(idxs).each do |table, indexes|
|
332
|
+
next unless indexes.size > 0
|
333
|
+
progress = ProgressBar.new(table, indexes.size)
|
334
|
+
indexes.each do |idx|
|
335
|
+
Tapsoob::Utils.load_indexes(database_url, idx)
|
336
|
+
progress.inc(1)
|
337
|
+
end
|
338
|
+
progress.finish
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
def push_schema
|
343
|
+
puts "Sending schema"
|
344
|
+
|
345
|
+
progress = ProgressBar.new('Schema', tables.size)
|
346
|
+
tables.each do |table, count|
|
347
|
+
log.debug "Loading '#{table}' schema\n"
|
348
|
+
Tapsoob::Utils.load_schema(dump_path, database_url, table)
|
349
|
+
progress.inc(1)
|
350
|
+
end
|
351
|
+
progress.finish
|
352
|
+
end
|
353
|
+
|
354
|
+
def push_reset_sequences
|
355
|
+
puts "Resetting sequences"
|
356
|
+
|
357
|
+
Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
|
358
|
+
end
|
359
|
+
|
360
|
+
def push_partial_data
|
361
|
+
return if stream_state == {}
|
362
|
+
|
363
|
+
table_name = stream_state[:table_name]
|
364
|
+
record_count = tables[table_name.to_s]
|
365
|
+
puts "Resuming #{table_name}, #{format_number(record_count)} records"
|
366
|
+
progress = ProgressBar.new(table_name.to_s, record_count)
|
367
|
+
stream = Tapsoob::DataStream.factory(db, stream_state)
|
368
|
+
push_data_from_file(stream, progress)
|
369
|
+
end
|
370
|
+
|
371
|
+
def push_data
|
372
|
+
puts "Sending data"
|
373
|
+
|
374
|
+
puts "#{tables.size} tables, #{format_number(record_count)} records"
|
375
|
+
|
376
|
+
tables.each do |table_name, count|
|
377
|
+
stream = Tapsoob::DataStream.factory(db,
|
378
|
+
:table_name => table_name,
|
379
|
+
:chunksize => default_chunksize)
|
380
|
+
progress = ProgressBar.new(table_name.to_s, count)
|
381
|
+
push_data_from_file(stream, progress)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
def push_data_from_file(stream, progress)
|
386
|
+
loop do
|
387
|
+
if exiting?
|
388
|
+
store_session
|
389
|
+
exit 0
|
390
|
+
end
|
391
|
+
|
392
|
+
row_size = 0
|
393
|
+
chunksize = stream.state[:chunksize]
|
394
|
+
|
395
|
+
begin
|
396
|
+
chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
|
397
|
+
stream.state[:chunksize] = c.to_i
|
398
|
+
encoded_data, row_size, elapsed_time = nil
|
399
|
+
d1 = c.time_delta do
|
400
|
+
encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
|
401
|
+
end
|
402
|
+
break if stream.complete?
|
403
|
+
|
404
|
+
data = nil
|
405
|
+
d2 = c.time_delta do
|
406
|
+
data = {
|
407
|
+
:state => stream.to_hash,
|
408
|
+
:checksum => Tapsoob::Utils.checksum(encoded_data).to_s
|
409
|
+
}
|
410
|
+
end
|
411
|
+
|
412
|
+
size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
|
413
|
+
self.stream_state = stream.to_hash
|
414
|
+
|
415
|
+
c.idle_secs = (d1 + d2)
|
416
|
+
|
417
|
+
elapsed_time
|
418
|
+
end
|
419
|
+
rescue Tapsoob::CorruptedData => e
|
420
|
+
# retry the same data, it got corrupted somehow.
|
421
|
+
next
|
422
|
+
rescue Tapsoob::DuplicatePrimaryKeyError => e
|
423
|
+
# verify the stream and retry it
|
424
|
+
stream.verify_stream
|
425
|
+
stream = JSON.generate({ :state => stream.to_hash })
|
426
|
+
next
|
427
|
+
end
|
428
|
+
stream.state[:chunksize] = chunksize
|
429
|
+
|
430
|
+
progress.inc(row_size)
|
431
|
+
|
432
|
+
stream.increment(row_size)
|
433
|
+
break if stream.complete?
|
434
|
+
end
|
435
|
+
|
436
|
+
progress.finish
|
437
|
+
completed_tables << stream.table_name.to_s
|
438
|
+
self.stream_state = {}
|
439
|
+
end
|
440
|
+
|
441
|
+
def local_tables_info
|
442
|
+
opts[:local_tables_info] ||= fetch_local_tables_info
|
443
|
+
end
|
444
|
+
|
445
|
+
def tables
|
446
|
+
h = {}
|
447
|
+
local_tables_info.each do |table_name, count|
|
448
|
+
next if completed_tables.include?(table_name.to_s)
|
449
|
+
h[table_name.to_s] = count
|
450
|
+
end
|
451
|
+
h
|
452
|
+
end
|
453
|
+
|
454
|
+
def record_count
|
455
|
+
@record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
|
456
|
+
end
|
457
|
+
|
458
|
+
def fetch_local_tables_info
|
459
|
+
tables_with_counts = {}
|
460
|
+
tbls = Dir.glob(File.join(dump_path, "data", "*")).map { |path| File.basename(path, ".json") }
|
461
|
+
tbls.each do |table|
|
462
|
+
data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
|
463
|
+
tables_with_counts[table] = data.size
|
464
|
+
end
|
465
|
+
apply_table_filter(tables_with_counts)
|
466
|
+
end
|
467
|
+
end
|
468
|
+
end
|