tapsoob 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module Tapsoob
3
+ class BaseError < StandardError
4
+ attr_reader :original_backtrace
5
+
6
+ def initialize(message, opts = {})
7
+ @original_backtrace = opts.delete(:backtrace)
8
+ super(message)
9
+ end
10
+ end
11
+
12
+ class NotImplemented < BaseError; end
13
+ class DuplicatePrimaryKeyError < BaseError; end
14
+ class CorruptedData < BaseError; end
15
+ class InvalidData < BaseError; end
16
+ end
@@ -0,0 +1,16 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module Tapsoob
3
+ def self.log=(log)
4
+ @@log = log
5
+ end
6
+
7
+ def self.log
8
+ @@log ||= begin
9
+ require 'logger'
10
+ log = Logger.new($stderr)
11
+ log.level = Logger::ERROR
12
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
13
+ log
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,468 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'sequel'
3
+
4
+ require 'tapsoob/progress_bar'
5
+ require 'tapsoob/schema'
6
+ require 'tapsoob/data_stream'
7
+
8
+ module Tapsoob
9
+ class Operation
10
+ attr_reader :database_url, :dump_path, :opts
11
+
12
+ def initialize(database_url, dump_path, opts={})
13
+ @database_url = database_url
14
+ @dump_path = dump_path
15
+ @opts = opts
16
+ @exiting = false
17
+ end
18
+
19
+ def file_prefix
20
+ "op"
21
+ end
22
+
23
+ def skip_schema?
24
+ !!opts[:skip_schema]
25
+ end
26
+
27
+ def indexes_first?
28
+ !!opts[:indexes_first]
29
+ end
30
+
31
+ def table_filter
32
+ opts[:table_filter]
33
+ end
34
+
35
+ def exclude_tables
36
+ opts[:exclude_tables] || []
37
+ end
38
+
39
+ def apply_table_filter(tables)
40
+ return tables unless table_filter || exclude_tables
41
+
42
+ re = table_filter ? Regexp.new(table_filter) : nil
43
+ if tables.kind_of?(Hash)
44
+ ntables = {}
45
+ tables.each do |t, d|
46
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
47
+ ntables[t] = d
48
+ end
49
+ end
50
+ ntables
51
+ else
52
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
53
+ end
54
+ end
55
+
56
+ def log
57
+ Tapsoob.log
58
+ end
59
+
60
+ def store_session
61
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
62
+ puts "\nSaving session to #{file}..."
63
+ File.open(file, 'w') do |f|
64
+ f.write(JSON.generate(to_hash))
65
+ end
66
+ end
67
+
68
+ def to_hash
69
+ {
70
+ :klass => self.class.to_s,
71
+ :database_url => database_url,
72
+ :stream_state => stream_state,
73
+ :completed_tables => completed_tables,
74
+ :table_filter => table_filter,
75
+ }
76
+ end
77
+
78
+ def exiting?
79
+ !!@exiting
80
+ end
81
+
82
+ def setup_signal_trap
83
+ trap("INT") {
84
+ puts "\nCompleting current action..."
85
+ @exiting = true
86
+ }
87
+
88
+ trap("TERM") {
89
+ puts "\nCompleting current action..."
90
+ @exiting = true
91
+ }
92
+ end
93
+
94
+ def resuming?
95
+ opts[:resume] == true
96
+ end
97
+
98
+ def default_chunksize
99
+ opts[:default_chunksize]
100
+ end
101
+
102
+ def completed_tables
103
+ opts[:completed_tables] ||= []
104
+ end
105
+
106
+ def stream_state
107
+ opts[:stream_state] ||= {}
108
+ end
109
+
110
+ def stream_state=(val)
111
+ opts[:stream_state] = val
112
+ end
113
+
114
+ def db
115
+ @db ||= Sequel.connect(database_url)
116
+ end
117
+
118
+ def format_number(num)
119
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
120
+ end
121
+
122
+ def catch_errors(&blk)
123
+ begin
124
+ blk.call
125
+ rescue Exception => e
126
+ raise e
127
+ end
128
+ end
129
+
130
+ def self.factory(type, database_url, dump_path, opts)
131
+ type = :resume if opts[:resume]
132
+ klass = case type
133
+ when :pull then Tapsoob::Pull
134
+ when :push then Tapsoob::Push
135
+ when :resume then eval(opts[:klass])
136
+ else raise "Unknown Operation Type -> #{type}"
137
+ end
138
+
139
+ klass.new(database_url, dump_path, opts)
140
+ end
141
+ end
142
+
143
+ class Pull < Operation
144
+ def file_prefix
145
+ "pull"
146
+ end
147
+
148
+ def to_hash
149
+ super.merge(:remote_tables_info => remote_tables_info)
150
+ end
151
+
152
+ def run
153
+ catch_errors do
154
+ unless resuming?
155
+ pull_schema if !skip_schema?
156
+ pull_indexes if indexes_first? && !skip_schema?
157
+ end
158
+ setup_signal_trap
159
+ pull_partial_data if resuming?
160
+ pull_data
161
+ pull_indexes if !indexes_first? && !skip_schema?
162
+ pull_reset_sequences
163
+ end
164
+ end
165
+
166
+ def pull_schema
167
+ puts "Receiving schema"
168
+
169
+ progress = ProgressBar.new('Schema', tables.size)
170
+ tables.each do |table_name, count|
171
+ schema_data = Tapsoob::Schema.dump_table(database_url, table_name)
172
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
173
+ output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
174
+ puts output if output
175
+ progress.inc(1)
176
+ end
177
+ progress.finish
178
+ end
179
+
180
+ def pull_data
181
+ puts "Receiving data"
182
+
183
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
184
+
185
+ tables.each do |table_name, count|
186
+ progress = ProgressBar.new(table_name.to_s, count)
187
+ stream = Tapsoob::DataStream.factory(db, {
188
+ :chunksize => default_chunksize,
189
+ :table_name => table_name
190
+ })
191
+ pull_data_from_table(stream, progress)
192
+ end
193
+ end
194
+
195
+ def pull_partial_data
196
+ return if stream_state == {}
197
+
198
+ table_name = stream_state[:table_name]
199
+ record_count = tables[table_name.to_s]
200
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
201
+
202
+ progress = ProgressBar.new(table_name.to_s, record_count)
203
+ stream = Tapsoob::DataStream.factory(db, stream_state)
204
+ pull_data_from_table(stream, progress)
205
+ end
206
+
207
+ def pull_data_from_table(stream, progress)
208
+ loop do
209
+ begin
210
+ exit 0 if exiting?
211
+
212
+ size = stream.fetch_database(dump_path)
213
+ break if stream.complete?
214
+ progress.inc(size) unless exiting?
215
+ stream.error = false
216
+ self.stream_state = stream.to_hash
217
+ rescue Tapsoob::CorruptedData => e
218
+ puts "Corrupted Data Received #{e.message}, retrying..."
219
+ stream.error = true
220
+ next
221
+ end
222
+ end
223
+
224
+ progress.finish
225
+ completed_tables << stream.table_name.to_s
226
+ self.stream_state = {}
227
+ end
228
+
229
+ def tables
230
+ h = {}
231
+ tables_info.each do |table_name, count|
232
+ next if completed_tables.include?(table_name.to_s)
233
+ h[table_name.to_s] = count
234
+ end
235
+ h
236
+ end
237
+
238
+ def record_count
239
+ tables_info.values.inject(:+)
240
+ end
241
+
242
+ def tables_info
243
+ opts[:tables_info] ||= fetch_tables_info
244
+ end
245
+
246
+ def fetch_tables_info
247
+ tables = db.tables
248
+
249
+ data = {}
250
+ apply_table_filter(tables).each do |table_name|
251
+ data[table_name] = db[table_name].count
252
+ end
253
+ data
254
+ end
255
+
256
+ def self.factory(db, state)
257
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
258
+ Sequel::MySQL.convert_invalid_date_time = :nil
259
+ end
260
+
261
+ if state.has_key?(:klass)
262
+ return eval(state[:klass]).new(db, state)
263
+ end
264
+
265
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
266
+ DataStreamKeyed.new(db, state)
267
+ else
268
+ DataStream.new(db, state)
269
+ end
270
+ end
271
+
272
+ def pull_indexes
273
+ puts "Receiving indexes"
274
+
275
+ idxs = JSON.parse(Tapsoob::Utils.schema_bin(:indexes_individual, database_url))
276
+
277
+ apply_table_filter(idxs).each do |table, indexes|
278
+ next unless indexes.size > 0
279
+ progress = ProgressBar.new(table, indexes.size)
280
+ indexes.each do |idx|
281
+ output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
282
+ puts output if output
283
+ progress.inc(1)
284
+ end
285
+ progress.finish
286
+ end
287
+ end
288
+
289
+ def pull_reset_sequences
290
+ puts "Resetting sequences"
291
+
292
+ output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
293
+ puts output if output
294
+ end
295
+ end
296
+
297
+ class Push < Operation
298
+ def file_prefix
299
+ "push"
300
+ end
301
+
302
+ def to_hash
303
+ super.merge(:local_tables_info => local_tables_info)
304
+ end
305
+
306
+ def run
307
+ catch_errors do
308
+ unless resuming?
309
+ push_schema if !skip_schema?
310
+ push_indexes if indexes_first? && !skip_schema?
311
+ end
312
+ setup_signal_trap
313
+ push_partial_data if resuming?
314
+ push_data
315
+ push_indexes if !indexes_first? && !skip_schema?
316
+ push_reset_sequences
317
+ end
318
+ end
319
+
320
+ def push_indexes
321
+ idxs = {}
322
+ table_idxs = Dir.glob(File.join(dump_path, "indexes", "*.json")).map { |path| File.basename(path, '.json') }
323
+ table_idxs.each do |table_idx|
324
+ idxs[table_idx] = JSON.parse(File.read(File.join(dump_path, "indexes", "#{table_idx}.json")))
325
+ end
326
+
327
+ return unless idxs.size > 0
328
+
329
+ puts "Sending indexes"
330
+
331
+ apply_table_filter(idxs).each do |table, indexes|
332
+ next unless indexes.size > 0
333
+ progress = ProgressBar.new(table, indexes.size)
334
+ indexes.each do |idx|
335
+ Tapsoob::Utils.load_indexes(database_url, idx)
336
+ progress.inc(1)
337
+ end
338
+ progress.finish
339
+ end
340
+ end
341
+
342
+ def push_schema
343
+ puts "Sending schema"
344
+
345
+ progress = ProgressBar.new('Schema', tables.size)
346
+ tables.each do |table, count|
347
+ log.debug "Loading '#{table}' schema\n"
348
+ Tapsoob::Utils.load_schema(dump_path, database_url, table)
349
+ progress.inc(1)
350
+ end
351
+ progress.finish
352
+ end
353
+
354
+ def push_reset_sequences
355
+ puts "Resetting sequences"
356
+
357
+ Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
358
+ end
359
+
360
+ def push_partial_data
361
+ return if stream_state == {}
362
+
363
+ table_name = stream_state[:table_name]
364
+ record_count = tables[table_name.to_s]
365
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
366
+ progress = ProgressBar.new(table_name.to_s, record_count)
367
+ stream = Tapsoob::DataStream.factory(db, stream_state)
368
+ push_data_from_file(stream, progress)
369
+ end
370
+
371
+ def push_data
372
+ puts "Sending data"
373
+
374
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
375
+
376
+ tables.each do |table_name, count|
377
+ stream = Tapsoob::DataStream.factory(db,
378
+ :table_name => table_name,
379
+ :chunksize => default_chunksize)
380
+ progress = ProgressBar.new(table_name.to_s, count)
381
+ push_data_from_file(stream, progress)
382
+ end
383
+ end
384
+
385
+ def push_data_from_file(stream, progress)
386
+ loop do
387
+ if exiting?
388
+ store_session
389
+ exit 0
390
+ end
391
+
392
+ row_size = 0
393
+ chunksize = stream.state[:chunksize]
394
+
395
+ begin
396
+ chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
397
+ stream.state[:chunksize] = c.to_i
398
+ encoded_data, row_size, elapsed_time = nil
399
+ d1 = c.time_delta do
400
+ encoded_data, row_size, elapsed_time = stream.fetch({ :type => "file", :source => dump_path })
401
+ end
402
+ break if stream.complete?
403
+
404
+ data = nil
405
+ d2 = c.time_delta do
406
+ data = {
407
+ :state => stream.to_hash,
408
+ :checksum => Tapsoob::Utils.checksum(encoded_data).to_s
409
+ }
410
+ end
411
+
412
+ size = stream.fetch_data_in_database({ :encoded_data => encoded_data, :checksum => data[:checksum] })
413
+ self.stream_state = stream.to_hash
414
+
415
+ c.idle_secs = (d1 + d2)
416
+
417
+ elapsed_time
418
+ end
419
+ rescue Tapsoob::CorruptedData => e
420
+ # retry the same data, it got corrupted somehow.
421
+ next
422
+ rescue Tapsoob::DuplicatePrimaryKeyError => e
423
+ # verify the stream and retry it
424
+ stream.verify_stream
425
+ stream = JSON.generate({ :state => stream.to_hash })
426
+ next
427
+ end
428
+ stream.state[:chunksize] = chunksize
429
+
430
+ progress.inc(row_size)
431
+
432
+ stream.increment(row_size)
433
+ break if stream.complete?
434
+ end
435
+
436
+ progress.finish
437
+ completed_tables << stream.table_name.to_s
438
+ self.stream_state = {}
439
+ end
440
+
441
+ def local_tables_info
442
+ opts[:local_tables_info] ||= fetch_local_tables_info
443
+ end
444
+
445
+ def tables
446
+ h = {}
447
+ local_tables_info.each do |table_name, count|
448
+ next if completed_tables.include?(table_name.to_s)
449
+ h[table_name.to_s] = count
450
+ end
451
+ h
452
+ end
453
+
454
+ def record_count
455
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
456
+ end
457
+
458
+ def fetch_local_tables_info
459
+ tables_with_counts = {}
460
+ tbls = Dir.glob(File.join(dump_path, "data", "*")).map { |path| File.basename(path, ".json") }
461
+ tbls.each do |table|
462
+ data = JSON.parse(File.read(File.join(dump_path, "data", "#{table}.json")))
463
+ tables_with_counts[table] = data.size
464
+ end
465
+ apply_table_filter(tables_with_counts)
466
+ end
467
+ end
468
+ end