tapsoob 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,16 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module Tapsoob
3
+ class BaseError < StandardError
4
+ attr_reader :original_backtrace
5
+
6
+ def initialize(message, opts = {})
7
+ @original_backtrace = opts.delete(:backtrace)
8
+ super(message)
9
+ end
10
+ end
11
+
12
+ class NotImplemented < BaseError; end
13
+ class DuplicatePrimaryKeyError < BaseError; end
14
+ class CorruptedData < BaseError; end
15
+ class InvalidData < BaseError; end
16
+ end
@@ -0,0 +1,16 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module Tapsoob
3
+ def self.log=(log)
4
+ @@log = log
5
+ end
6
+
7
+ def self.log
8
+ @@log ||= begin
9
+ require 'logger'
10
+ log = Logger.new($stderr)
11
+ log.level = Logger::ERROR
12
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
13
+ log
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,468 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'sequel'
3
+
4
+ require 'tapsoob/progress_bar'
5
+ require 'tapsoob/schema'
6
+ require 'tapsoob/data_stream'
7
+
8
+ module Tapsoob
9
+ class Operation
10
+ attr_reader :database_url, :dump_path, :opts
11
+
12
+ def initialize(database_url, dump_path, opts={})
13
+ @database_url = database_url
14
+ @dump_path = dump_path
15
+ @opts = opts
16
+ @exiting = false
17
+ end
18
+
19
+ def file_prefix
20
+ "op"
21
+ end
22
+
23
+ def skip_schema?
24
+ !!opts[:skip_schema]
25
+ end
26
+
27
+ def indexes_first?
28
+ !!opts[:indexes_first]
29
+ end
30
+
31
+ def table_filter
32
+ opts[:table_filter]
33
+ end
34
+
35
+ def exclude_tables
36
+ opts[:exclude_tables] || []
37
+ end
38
+
39
+ def apply_table_filter(tables)
40
+ return tables unless table_filter || exclude_tables
41
+
42
+ re = table_filter ? Regexp.new(table_filter) : nil
43
+ if tables.kind_of?(Hash)
44
+ ntables = {}
45
+ tables.each do |t, d|
46
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
47
+ ntables[t] = d
48
+ end
49
+ end
50
+ ntables
51
+ else
52
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
53
+ end
54
+ end
55
+
56
+ def log
57
+ Tapsoob.log
58
+ end
59
+
60
+ def store_session
61
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
62
+ puts "\nSaving session to #{file}..."
63
+ File.open(file, 'w') do |f|
64
+ f.write(JSON.generate(to_hash))
65
+ end
66
+ end
67
+
68
+ def to_hash
69
+ {
70
+ :klass => self.class.to_s,
71
+ :database_url => database_url,
72
+ :stream_state => stream_state,
73
+ :completed_tables => completed_tables,
74
+ :table_filter => table_filter,
75
+ }
76
+ end
77
+
78
+ def exiting?
79
+ !!@exiting
80
+ end
81
+
82
+ def setup_signal_trap
83
+ trap("INT") {
84
+ puts "\nCompleting current action..."
85
+ @exiting = true
86
+ }
87
+
88
+ trap("TERM") {
89
+ puts "\nCompleting current action..."
90
+ @exiting = true
91
+ }
92
+ end
93
+
94
+ def resuming?
95
+ opts[:resume] == true
96
+ end
97
+
98
+ def default_chunksize
99
+ opts[:default_chunksize]
100
+ end
101
+
102
+ def completed_tables
103
+ opts[:completed_tables] ||= []
104
+ end
105
+
106
+ def stream_state
107
+ opts[:stream_state] ||= {}
108
+ end
109
+
110
+ def stream_state=(val)
111
+ opts[:stream_state] = val
112
+ end
113
+
114
+ def db
115
+ @db ||= Sequel.connect(database_url)
116
+ end
117
+
118
+ def format_number(num)
119
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
120
+ end
121
+
122
+ def catch_errors(&blk)
123
+ begin
124
+ blk.call
125
+ rescue Exception => e
126
+ raise e
127
+ end
128
+ end
129
+
130
+ def self.factory(type, database_url, dump_path, opts)
131
+ type = :resume if opts[:resume]
132
+ klass = case type
133
+ when :pull then Tapsoob::Pull
134
+ when :push then Tapsoob::Push
135
+ when :resume then eval(opts[:klass])
136
+ else raise "Unknown Operation Type -> #{type}"
137
+ end
138
+
139
+ klass.new(database_url, dump_path, opts)
140
+ end
141
+ end
142
+
143
+ class Pull < Operation
144
+ def file_prefix
145
+ "pull"
146
+ end
147
+
148
+ def to_hash
149
+ super.merge(:remote_tables_info => remote_tables_info)
150
+ end
151
+
152
+ def run
153
+ catch_errors do
154
+ unless resuming?
155
+ pull_schema if !skip_schema?
156
+ pull_indexes if indexes_first? && !skip_schema?
157
+ end
158
+ setup_signal_trap
159
+ pull_partial_data if resuming?
160
+ pull_data
161
+ pull_indexes if !indexes_first? && !skip_schema?
162
+ pull_reset_sequences
163
+ end
164
+ end
165
+
166
+ def pull_schema
167
+ puts "Receiving schema"
168
+
169
+ progress = ProgressBar.new('Schema', tables.size)
170
+ tables.each do |table_name, count|
171
+ schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
172
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
173
+ output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
174
+ puts output if output
175
+ progress.inc(1)
176
+ end
177
+ progress.finish
178
+ end
179
+
180
+ def pull_data
181
+ puts "Receiving data"
182
+
183
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
184
+
185
+ tables.each do |table_name, count|
186
+ progress = ProgressBar.new(table_name.to_s, count)
187
+ stream = Tapsoob::DataStream.factory(db, {
188
+ :chunksize => default_chunksize,
189
+ :table_name => table_name
190
+ })
191
+ pull_data_from_table(stream, progress)
192
+ end
193
+ end
194
+
195
+ def pull_partial_data
196
+ return if stream_state == {}
197
+
198
+ table_name = stream_state[:table_name]
199
+ record_count = tables[table_name.to_s]
200
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
201
+
202
+ progress = ProgressBar.new(table_name.to_s, record_count)
203
+ stream = Tapsoob::DataStream.factory(db, stream_state)
204
+ pull_data_from_table(stream, progress)
205
+ end
206
+
207
+ def pull_data_from_table(stream, progress)
208
+ loop do
209
+ begin
210
+ exit 0 if exiting?
211
+
212
+ size = stream.fetch_database(dump_path)
213
+ break if stream.complete?
214
+ progress.inc(size) unless exiting?
215
+ stream.error = false
216
+ self.stream_state = stream.to_hash
217
+ rescue Tapsoob::CorruptedData => e
218
+ puts "Corrupted Data Received #{e.message}, retrying..."
219
+ stream.error = true
220
+ next
221
+ end
222
+ end
223
+
224
+ progress.finish
225
+ completed_tables << stream.table_name.to_s
226
+ self.stream_state = {}
227
+ end
228
+
229
+ def tables
230
+ h = {}
231
+ tables_info.each do |table_name, count|
232
+ next if completed_tables.include?(table_name.to_s)
233
+ h[table_name.to_s] = count
234
+ end
235
+ h
236
+ end
237
+
238
+ def record_count
239
+ tables_info.values.inject(:+)
240
+ end
241
+
242
+ def tables_info
243
+ opts[:tables_info] ||= fetch_tables_info
244
+ end
245
+
246
+ def fetch_tables_info
247
+ tables = db.tables
248
+
249
+ data = {}
250
+ apply_table_filter(tables).each do |table_name|
251
+ data[table_name] = db[table_name].count
252
+ end
253
+ data
254
+ end
255
+
256
+ def self.factory(db, state)
257
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
258
+ Sequel::MySQL.convert_invalid_date_time = :nil
259
+ end
260
+
261
+ if state.has_key?(:klass)
262
+ return eval(state[:klass]).new(db, state)
263
+ end
264
+
265
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
266
+ DataStreamKeyed.new(db, state)
267
+ else
268
+ DataStream.new(db, state)
269
+ end
270
+ end
271
+
272
+ def pull_indexes
273
+ puts "Receiving indexes"
274
+
275
+ idxs = JSON.parse(Tapsoob::Utils.schema_bin(:indexes_individual, database_url))
276
+
277
+ apply_table_filter(idxs).each do |table, indexes|
278
+ next unless indexes.size > 0
279
+ progress = ProgressBar.new(table, indexes.size)
280
+ indexes.each do |idx|
281
+ output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
282
+ puts output if output
283
+ progress.inc(1)
284
+ end
285
+ progress.finish
286
+ end
287
+ end
288
+
289
+ def pull_reset_sequences
290
+ puts "Resetting sequences"
291
+
292
+ output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
293
+ puts output if output
294
+ end
295
+ end
296
+
297
+ class Push < Operation
298
+ def file_prefix
299
+ "push"
300
+ end
301
+
302
+ def to_hash
303
+ super.merge(:local_tables_info => local_tables_info)
304
+ end
305
+
306
+ def run
307
+ catch_errors do
308
+ unless resuming?
309
+ push_schema if !skip_schema?
310
+ push_indexes if indexes_first? && !skip_schema?
311
+ end
312
+ setup_signal_trap
313
+ push_partial_data if resuming?
314
+ push_data
315
+ push_indexes if !indexes_first? && !skip_schema?
316
+ push_reset_sequences
317
+ end
318
+ end
319
+
320
+ def push_indexes
321
+ idxs = {}
322
+ table_idxs = Dir.glob(File.join(dump_path, "indexes", "*.json")).map { |path| File.basename(path, '.json') }
323
+ table_idxs.each do |table_idx|
324
+ idxs[table_idx] = JSON.parse(File.read(File.join(dump_path, "indexes", "#{table_idx}.json")))
325
+ end
326
+
327
+ return unless idxs.size > 0
328
+
329
+ puts "Sending indexes"
330
+
331
+ apply_table_filter(idxs).each do |table, indexes|
332
+ next unless indexes.size > 0
333
+ progress = ProgressBar.new(table, indexes.size)
334
+ indexes.each do |idx|
335
+ Tapsoob::Utils.load_indexes(database_url, idx)
336
+ progress.inc(1)
337
+ end
338
+ progress.finish
339
+ end
340
+ end
341
+
342
+ def push_schema
343
+ puts "Sending schema"
344
+
345
+ progress = ProgressBar.new('Schema', tables.size)
346
+ tables.each do |table, count|
347
+ log.debug "Loading '#{table}' schema\n"
348
+ Tapsoob::Utils.load_schema(dump_path, database_url, table)
349
+ progress.inc(1)
350
+ end
351
+ progress.finish
352
+ end
353
+
354
+ def push_reset_sequences
355
+ puts "Resetting sequences"
356
+
357
+ Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
358
+ end
359
+
360
+ def push_partial_data
361
+ return if stream_state == {}
362
+
363
+ table_name = stream_state[:table_name]
364
+ record_count = tables[table_name.to_s]
365
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
366
+ progress = ProgressBar.new(table_name.to_s, record_count)
367
+ stream = Tapsoob::DataStream.factory(db, stream_state)
368
+ push_data_from_file(stream, progress)
369
+ end
370
+
371
+ def push_data
372
+ puts "Sending data"
373
+
374
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
375
+
376
+ tables.each do |table_name, count|
377
+ stream = Tapsoob::DataStream.factory(db,
378
+ :table_name => table_name,
379
+ :chunksize => default_chunksize)
380
+ progress = ProgressBar.new(table_name.to_s, count)
381
+ push_data_from_file(stream, progress)
382
+ end
383
+ end
384
+
385
+ def push_data_from_file(stream, progress)
386
+ loop do
387
+ if exiting?
388
+ store_session
389
+ exit 0
390
+ end
391
+
392
+ row_size = 0
393
+ chunksize = stream.state[:chunksize]
394
+
395
+ begin
396
+ chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
397
+ stream.state[:chunksize] = c.to_i
398
+ encoded_data, row_size, elapsed_time = nil
399
+ d1 = c.time_delta do
400
+ encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
401
+ end
402
+ break if stream.complete?
403
+
404
+ data = nil
405
+ d2 = c.time_delta do
406
+ data = {
407
+ :state => stream.to_hash,
408
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s
409
+ }
410
+ end
411
+
412
+ size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
413
+ self.stream_state = stream.to_hash
414
+
415
+ c.idle_secs = (d1 + d2)
416
+
417
+ elapsed_time
418
+ end
419
+ rescue Tapsoob::CorruptedData => e
420
+ # retry the same data, it got corrupted somehow.
421
+ next
422
+ rescue Tapsoob::DuplicatePrimaryKeyError => e
423
+ # verify the stream and retry it
424
+ stream.verify_stream
425
+ stream = JSON.generate({ :state => stream.to_hash })
426
+ next
427
+ end
428
+ stream.state[:chunksize] = chunksize
429
+
430
+ progress.inc(row_size)
431
+
432
+ stream.increment(row_size)
433
+ break if stream.complete?
434
+ end
435
+
436
+ progress.finish
437
+ completed_tables << stream.table_name.to_s
438
+ self.stream_state = {}
439
+ end
440
+
441
+ def local_tables_info
442
+ opts[:local_tables_info] ||= fetch_local_tables_info
443
+ end
444
+
445
+ def tables
446
+ h = {}
447
+ local_tables_info.each do |table_name, count|
448
+ next if completed_tables.include?(table_name.to_s)
449
+ h[table_name.to_s] = count
450
+ end
451
+ h
452
+ end
453
+
454
+ def record_count
455
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
456
+ end
457
+
458
+ def fetch_local_tables_info
459
+ tables_with_counts = {}
460
+ tbls = Dir.glob(File.join(dump_path, "data", "*")).map { |path| File.basename(path, ".json") }
461
+ tbls.each do |table|
462
+ data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
463
+ tables_with_counts[table] = data.size
464
+ end
465
+ apply_table_filter(tables_with_counts)
466
+ end
467
+ end
468
+ end