taps 0.2.26 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,3 @@
1
- require 'thread'
2
-
3
1
  Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
4
2
 
5
3
  class DbSession < Sequel::Model
@@ -12,51 +10,11 @@ class DbSession < Sequel::Model
12
10
  timestamp :last_access
13
11
  end
14
12
 
15
- @@connections = {}
16
- @@mutex = Mutex.new
17
-
18
- def connection
19
- @@mutex.synchronize {
20
- conn =
21
- if @@connections.key?(key)
22
- @@connections[key].first
23
- else
24
- Sequel.connect(database_url)
25
- end
26
- @@connections[key] = [conn, Time.now]
27
- return conn
28
- }
29
- end
30
-
31
- def disconnect
32
- @@mutex.synchronize {
33
- if @@connections.key?(key)
34
- conn, time = @@connections.delete(key)
35
- conn.disconnect
36
- end
37
- }
38
- end
39
-
40
- # Removes connections that have not been accessed within the
41
- # past thirty seconds.
42
- def self.cleanup
43
- @@mutex.synchronize {
44
- now = Time.now
45
- @@connections.each do |key, (conn, time)|
46
- if now - time > 30
47
- @@connections.delete(key)
48
- conn.disconnect
49
- end
50
- end
51
- }
52
- end
53
-
54
- Thread.new {
55
- while true
56
- sleep 30
57
- cleanup
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
58
16
  end
59
- }.run
17
+ end
60
18
  end
61
19
 
62
20
  DbSession.create_table! unless DbSession.table_exists?
data/lib/taps/log.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'json'
4
+ require 'stringio'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,537 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+
10
+ # disable warnings, rest client makes a lot of noise right now
11
+ $VERBOSE = nil
12
+
13
+ module Taps
14
+
15
+ class Operation
16
+ attr_reader :database_url, :remote_url, :opts
17
+ attr_reader :session_uri
18
+
19
+ def initialize(database_url, remote_url, opts={})
20
+ @database_url = database_url
21
+ @remote_url = remote_url
22
+ @opts = opts
23
+ @exiting = false
24
+ @session_uri = opts[:session_uri]
25
+ end
26
+
27
+ def file_prefix
28
+ "op"
29
+ end
30
+
31
+ def table_filter
32
+ opts[:table_filter]
33
+ end
34
+
35
+ def apply_table_filter(tables)
36
+ return tables unless table_filter
37
+ re = Regexp.new(table_filter)
38
+ if tables.kind_of?(Hash)
39
+ ntables = {}
40
+ tables.each do |t, d|
41
+ unless re.match(t.to_s).nil?
42
+ ntables[t] = d
43
+ end
44
+ end
45
+ ntables
46
+ else
47
+ tables.reject { |t| re.match(t.to_s).nil? }
48
+ end
49
+ end
50
+
51
+ def log
52
+ Taps.log
53
+ end
54
+
55
+ def store_session
56
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
57
+ puts "Saving session to #{file}.."
58
+ File.open(file, 'w') do |f|
59
+ f.write(to_hash.to_json)
60
+ end
61
+ end
62
+
63
+ def to_hash
64
+ {
65
+ :klass => self.class.to_s,
66
+ :database_url => database_url,
67
+ :remote_url => remote_url,
68
+ :session_uri => session_uri,
69
+ :stream_state => stream_state,
70
+ :completed_tables => completed_tables,
71
+ :table_filter => table_filter,
72
+ }
73
+ end
74
+
75
+ def exiting?
76
+ !!@exiting
77
+ end
78
+
79
+ def setup_signal_trap
80
+ trap("INT") {
81
+ puts "\nCompleting current action..."
82
+ @exiting = true
83
+ }
84
+
85
+ trap("TERM") {
86
+ puts "\nCompleting current action..."
87
+ @exiting = true
88
+ }
89
+ end
90
+
91
+ def resuming?
92
+ opts[:resume] == true
93
+ end
94
+
95
+ def default_chunksize
96
+ opts[:default_chunksize]
97
+ end
98
+
99
+ def completed_tables
100
+ opts[:completed_tables] ||= []
101
+ end
102
+
103
+ def stream_state
104
+ opts[:stream_state] ||= {}
105
+ end
106
+
107
+ def stream_state=(val)
108
+ opts[:stream_state] = val
109
+ end
110
+
111
+ def compression_disabled?
112
+ !!opts[:disable_compression]
113
+ end
114
+
115
+ def db
116
+ @db ||= Sequel.connect(database_url)
117
+ end
118
+
119
+ def server
120
+ @server ||= RestClient::Resource.new(remote_url)
121
+ end
122
+
123
+ def session_resource
124
+ @session_resource ||= begin
125
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
126
+ server[@session_uri]
127
+ end
128
+ end
129
+
130
+ def set_session(uri)
131
+ session_uri = uri
132
+ @session_resource = server[session_uri]
133
+ end
134
+
135
+ def close_session
136
+ @session_resource.delete(http_headers) if @session_resource
137
+ end
138
+
139
+ def safe_url(url)
140
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
141
+ end
142
+
143
+ def safe_remote_url
144
+ safe_url(remote_url)
145
+ end
146
+
147
+ def safe_database_url
148
+ safe_url(database_url)
149
+ end
150
+
151
+ def http_headers(extra = {})
152
+ base = { :taps_version => Taps.compatible_version }
153
+ if compression_disabled?
154
+ base[:accept_encoding] = ""
155
+ else
156
+ base[:accept_encoding] = "gzip, deflate"
157
+ end
158
+ base.merge(extra)
159
+ end
160
+
161
+ def format_number(num)
162
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
163
+ end
164
+
165
+ def verify_server
166
+ begin
167
+ server['/'].get(http_headers)
168
+ rescue RestClient::RequestFailed => e
169
+ if e.http_code == 417
170
+ puts "#{safe_remote_url} is running a different minor version of taps."
171
+ puts "#{e.response.to_s}"
172
+ exit(1)
173
+ else
174
+ raise
175
+ end
176
+ rescue RestClient::Unauthorized
177
+ puts "Bad credentials given for #{safe_remote_url}"
178
+ exit(1)
179
+ rescue Errno::ECONNREFUSED
180
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
181
+ exit(1)
182
+ end
183
+ end
184
+
185
+ def self.factory(type, database_url, remote_url, opts)
186
+ type = :resume if opts[:resume]
187
+ klass = case type
188
+ when :pull then Taps::Pull
189
+ when :push then Taps::Push
190
+ when :resume then eval(opts[:klass])
191
+ else raise "Unknown Operation Type -> #{type}"
192
+ end
193
+
194
+ klass.new(database_url, remote_url, opts)
195
+ end
196
+ end
197
+
198
+ class Pull < Operation
199
+ def file_prefix
200
+ "pull"
201
+ end
202
+
203
+ def to_hash
204
+ super.merge(:remote_tables_info => remote_tables_info)
205
+ end
206
+
207
+ def run
208
+ verify_server
209
+
210
+ begin
211
+ pull_schema unless resuming?
212
+
213
+ setup_signal_trap
214
+
215
+ pull_partial_data if resuming?
216
+
217
+ pull_data
218
+ pull_indexes
219
+ pull_reset_sequences
220
+ close_session
221
+ rescue RestClient::Exception => e
222
+ store_session
223
+ if e.respond_to?(:response)
224
+ puts "!!! Caught Server Exception"
225
+ puts "HTTP CODE: #{e.http_code}"
226
+ puts "#{e.response.to_s}"
227
+ exit(1)
228
+ else
229
+ raise
230
+ end
231
+ end
232
+ end
233
+
234
+ def pull_schema
235
+ puts "Receiving schema"
236
+
237
+ tables.each do |table_name, count|
238
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
239
+ output = Taps::Utils.load_schema(database_url, schema_data)
240
+ puts output if output
241
+ end
242
+ end
243
+
244
+ def pull_data
245
+ puts "Receiving data"
246
+
247
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
248
+
249
+ tables.each do |table_name, count|
250
+ progress = ProgressBar.new(table_name.to_s, count)
251
+ stream = Taps::DataStream.factory(db, {
252
+ :chunksize => default_chunksize,
253
+ :table_name => table_name
254
+ })
255
+ pull_data_from_table(stream, progress)
256
+ end
257
+ end
258
+
259
+ def pull_partial_data
260
+ return if stream_state == {}
261
+
262
+ table_name = stream_state[:table_name]
263
+ record_count = tables[table_name.to_s]
264
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
265
+
266
+ progress = ProgressBar.new(table_name.to_s, record_count)
267
+ stream = Taps::DataStream.factory(db, stream_state)
268
+ pull_data_from_table(stream, progress)
269
+ end
270
+
271
+ def pull_data_from_table(stream, progress)
272
+ loop do
273
+ begin
274
+ if exiting?
275
+ store_session
276
+ exit 0
277
+ end
278
+
279
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
280
+ break if stream.complete?
281
+ progress.inc(size) unless exiting?
282
+ stream.error = false
283
+ self.stream_state = stream.to_hash
284
+ rescue DataStream::CorruptedData => e
285
+ puts "Corrupted Data Received #{e.message}, retrying..."
286
+ stream.error = true
287
+ next
288
+ end
289
+ end
290
+
291
+ progress.finish
292
+ completed_tables << stream.table_name.to_s
293
+ self.stream_state = {}
294
+ end
295
+
296
+ def tables
297
+ h = {}
298
+ remote_tables_info.each do |table_name, count|
299
+ next if completed_tables.include?(table_name.to_s)
300
+ h[table_name.to_s] = count
301
+ end
302
+ h
303
+ end
304
+
305
+ def record_count
306
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
307
+ end
308
+
309
+ def remote_tables_info
310
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
311
+ end
312
+
313
+ def fetch_remote_tables_info
314
+ retries = 0
315
+ max_retries = 10
316
+ begin
317
+ tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
318
+ rescue RestClient::Exception
319
+ retries += 1
320
+ retry if retries <= max_retries
321
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
322
+ exit(1)
323
+ end
324
+
325
+ data = {}
326
+ apply_table_filter(tables).each do |table_name|
327
+ retries = 0
328
+ begin
329
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
330
+ data[table_name] = count
331
+ rescue RestClient::Exception
332
+ retries += 1
333
+ retry if retries <= max_retries
334
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
335
+ exit(1)
336
+ end
337
+ end
338
+ data
339
+ end
340
+
341
+ def pull_indexes
342
+ puts "Receiving indexes"
343
+
344
+ idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
345
+
346
+ apply_table_filter(idxs).each do |table, indexes|
347
+ next unless indexes.size > 0
348
+ progress = ProgressBar.new(table, indexes.size)
349
+ indexes.each do |idx|
350
+ output = Taps::Utils.load_indexes(database_url, idx)
351
+ puts output if output
352
+ progress.inc(1)
353
+ end
354
+ progress.finish
355
+ end
356
+ end
357
+
358
+ def pull_reset_sequences
359
+ puts "Resetting sequences"
360
+
361
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
362
+ puts output if output
363
+ end
364
+ end
365
+
366
+ class Push < Operation
367
+ def file_prefix
368
+ "push"
369
+ end
370
+
371
+ def to_hash
372
+ super.merge(:local_tables_info => local_tables_info)
373
+ end
374
+
375
+ def run
376
+ verify_server
377
+ begin
378
+ push_schema unless resuming?
379
+
380
+ setup_signal_trap
381
+
382
+ push_partial_data if resuming?
383
+
384
+ push_data
385
+ push_indexes
386
+ push_reset_sequences
387
+ close_session
388
+ rescue RestClient::Exception => e
389
+ store_session
390
+ if e.respond_to?(:response)
391
+ puts "!!! Caught Server Exception"
392
+ puts "HTTP CODE: #{e.http_code}"
393
+ puts "#{e.response.to_s}"
394
+ exit(1)
395
+ else
396
+ raise
397
+ end
398
+ end
399
+ end
400
+
401
+ def push_indexes
402
+ idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
403
+
404
+ return unless idxs.size > 0
405
+
406
+ puts "Sending indexes"
407
+
408
+ apply_table_filter(idxs).each do |table, indexes|
409
+ progress = ProgressBar.new(table, indexes.size)
410
+ indexes.each do |idx|
411
+ session_resource['push/indexes'].post(idx, http_headers)
412
+ progress.inc(1)
413
+ end
414
+ progress.finish
415
+ end
416
+ end
417
+
418
+ def push_schema
419
+ puts "Sending schema"
420
+
421
+ tables.each do |table, count|
422
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
423
+ session_resource['push/schema'].post(schema_data, http_headers)
424
+ end
425
+ end
426
+
427
+ def push_reset_sequences
428
+ puts "Resetting sequences"
429
+
430
+ session_resource['push/reset_sequences'].post('', http_headers)
431
+ end
432
+
433
+ def push_partial_data
434
+ return if stream_state == {}
435
+
436
+ table_name = stream_state[:table_name]
437
+ record_count = tables[table_name.to_s]
438
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
439
+ progress = ProgressBar.new(table_name.to_s, record_count)
440
+ stream = Taps::DataStream.factory(db, stream_state)
441
+ push_data_from_table(stream, progress)
442
+ end
443
+
444
+ def push_data
445
+ puts "Sending data"
446
+
447
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
448
+
449
+ tables.each do |table_name, count|
450
+ stream = Taps::DataStream.factory(db,
451
+ :table_name => table_name,
452
+ :chunksize => default_chunksize)
453
+ progress = ProgressBar.new(table_name.to_s, count)
454
+ push_data_from_table(stream, progress)
455
+ end
456
+ end
457
+
458
+ def push_data_from_table(stream, progress)
459
+ loop do
460
+ if exiting?
461
+ store_session
462
+ exit 0
463
+ end
464
+
465
+ row_size = 0
466
+ chunksize = stream.state[:chunksize]
467
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
468
+ stream.state[:chunksize] = c
469
+ encoded_data, row_size, elapsed_time = stream.fetch
470
+ break if stream.complete?
471
+
472
+ data = {
473
+ :state => stream.to_hash,
474
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
475
+ }
476
+
477
+ begin
478
+ content, content_type = Taps::Multipart.create do |r|
479
+ r.attach :name => :encoded_data,
480
+ :payload => encoded_data,
481
+ :content_type => 'application/octet-stream'
482
+ r.attach :name => :json,
483
+ :payload => data.to_json,
484
+ :content_type => 'application/json'
485
+ end
486
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
487
+ self.stream_state = stream.to_hash
488
+ rescue RestClient::RequestFailed => e
489
+ # retry the same data, it got corrupted somehow.
490
+ if e.http_code == 412
491
+ next
492
+ end
493
+ raise
494
+ end
495
+ elapsed_time
496
+ end
497
+ stream.state[:chunksize] = chunksize
498
+
499
+ progress.inc(row_size)
500
+
501
+ stream.increment(row_size)
502
+ break if stream.complete?
503
+ end
504
+
505
+ progress.finish
506
+ completed_tables << stream.table_name.to_s
507
+ self.stream_state = {}
508
+ end
509
+
510
+ def local_tables_info
511
+ opts[:local_tables_info] ||= fetch_local_tables_info
512
+ end
513
+
514
+ def tables
515
+ h = {}
516
+ local_tables_info.each do |table_name, count|
517
+ next if completed_tables.include?(table_name.to_s)
518
+ h[table_name.to_s] = count
519
+ end
520
+ h
521
+ end
522
+
523
+ def record_count
524
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
525
+ end
526
+
527
+ def fetch_local_tables_info
528
+ tables_with_counts = {}
529
+ db.tables.each do |table|
530
+ tables_with_counts[table] = db[table].count
531
+ end
532
+ apply_table_filter(tables_with_counts)
533
+ end
534
+
535
+ end
536
+
537
+ end