taps 0.2.26 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,3 @@
1
- require 'thread'
2
-
3
1
  Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
4
2
 
5
3
  class DbSession < Sequel::Model
@@ -12,51 +10,11 @@ class DbSession < Sequel::Model
12
10
  timestamp :last_access
13
11
  end
14
12
 
15
- @@connections = {}
16
- @@mutex = Mutex.new
17
-
18
- def connection
19
- @@mutex.synchronize {
20
- conn =
21
- if @@connections.key?(key)
22
- @@connections[key].first
23
- else
24
- Sequel.connect(database_url)
25
- end
26
- @@connections[key] = [conn, Time.now]
27
- return conn
28
- }
29
- end
30
-
31
- def disconnect
32
- @@mutex.synchronize {
33
- if @@connections.key?(key)
34
- conn, time = @@connections.delete(key)
35
- conn.disconnect
36
- end
37
- }
38
- end
39
-
40
- # Removes connections that have not been accessed within the
41
- # past thirty seconds.
42
- def self.cleanup
43
- @@mutex.synchronize {
44
- now = Time.now
45
- @@connections.each do |key, (conn, time)|
46
- if now - time > 30
47
- @@connections.delete(key)
48
- conn.disconnect
49
- end
50
- end
51
- }
52
- end
53
-
54
- Thread.new {
55
- while true
56
- sleep 30
57
- cleanup
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
58
16
  end
59
- }.run
17
+ end
60
18
  end
61
19
 
62
20
  DbSession.create_table! unless DbSession.table_exists?
data/lib/taps/log.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'json'
4
+ require 'stringio'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,537 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+
10
+ # disable warnings, rest client makes a lot of noise right now
11
+ $VERBOSE = nil
12
+
13
+ module Taps
14
+
15
+ class Operation
16
+ attr_reader :database_url, :remote_url, :opts
17
+ attr_reader :session_uri
18
+
19
+ def initialize(database_url, remote_url, opts={})
20
+ @database_url = database_url
21
+ @remote_url = remote_url
22
+ @opts = opts
23
+ @exiting = false
24
+ @session_uri = opts[:session_uri]
25
+ end
26
+
27
+ def file_prefix
28
+ "op"
29
+ end
30
+
31
+ def table_filter
32
+ opts[:table_filter]
33
+ end
34
+
35
+ def apply_table_filter(tables)
36
+ return tables unless table_filter
37
+ re = Regexp.new(table_filter)
38
+ if tables.kind_of?(Hash)
39
+ ntables = {}
40
+ tables.each do |t, d|
41
+ unless re.match(t.to_s).nil?
42
+ ntables[t] = d
43
+ end
44
+ end
45
+ ntables
46
+ else
47
+ tables.reject { |t| re.match(t.to_s).nil? }
48
+ end
49
+ end
50
+
51
+ def log
52
+ Taps.log
53
+ end
54
+
55
+ def store_session
56
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
57
+ puts "Saving session to #{file}.."
58
+ File.open(file, 'w') do |f|
59
+ f.write(to_hash.to_json)
60
+ end
61
+ end
62
+
63
+ def to_hash
64
+ {
65
+ :klass => self.class.to_s,
66
+ :database_url => database_url,
67
+ :remote_url => remote_url,
68
+ :session_uri => session_uri,
69
+ :stream_state => stream_state,
70
+ :completed_tables => completed_tables,
71
+ :table_filter => table_filter,
72
+ }
73
+ end
74
+
75
+ def exiting?
76
+ !!@exiting
77
+ end
78
+
79
+ def setup_signal_trap
80
+ trap("INT") {
81
+ puts "\nCompleting current action..."
82
+ @exiting = true
83
+ }
84
+
85
+ trap("TERM") {
86
+ puts "\nCompleting current action..."
87
+ @exiting = true
88
+ }
89
+ end
90
+
91
+ def resuming?
92
+ opts[:resume] == true
93
+ end
94
+
95
+ def default_chunksize
96
+ opts[:default_chunksize]
97
+ end
98
+
99
+ def completed_tables
100
+ opts[:completed_tables] ||= []
101
+ end
102
+
103
+ def stream_state
104
+ opts[:stream_state] ||= {}
105
+ end
106
+
107
+ def stream_state=(val)
108
+ opts[:stream_state] = val
109
+ end
110
+
111
+ def compression_disabled?
112
+ !!opts[:disable_compression]
113
+ end
114
+
115
+ def db
116
+ @db ||= Sequel.connect(database_url)
117
+ end
118
+
119
+ def server
120
+ @server ||= RestClient::Resource.new(remote_url)
121
+ end
122
+
123
+ def session_resource
124
+ @session_resource ||= begin
125
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
126
+ server[@session_uri]
127
+ end
128
+ end
129
+
130
+ def set_session(uri)
131
+ session_uri = uri
132
+ @session_resource = server[session_uri]
133
+ end
134
+
135
+ def close_session
136
+ @session_resource.delete(http_headers) if @session_resource
137
+ end
138
+
139
+ def safe_url(url)
140
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
141
+ end
142
+
143
+ def safe_remote_url
144
+ safe_url(remote_url)
145
+ end
146
+
147
+ def safe_database_url
148
+ safe_url(database_url)
149
+ end
150
+
151
+ def http_headers(extra = {})
152
+ base = { :taps_version => Taps.compatible_version }
153
+ if compression_disabled?
154
+ base[:accept_encoding] = ""
155
+ else
156
+ base[:accept_encoding] = "gzip, deflate"
157
+ end
158
+ base.merge(extra)
159
+ end
160
+
161
+ def format_number(num)
162
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
163
+ end
164
+
165
+ def verify_server
166
+ begin
167
+ server['/'].get(http_headers)
168
+ rescue RestClient::RequestFailed => e
169
+ if e.http_code == 417
170
+ puts "#{safe_remote_url} is running a different minor version of taps."
171
+ puts "#{e.response.to_s}"
172
+ exit(1)
173
+ else
174
+ raise
175
+ end
176
+ rescue RestClient::Unauthorized
177
+ puts "Bad credentials given for #{safe_remote_url}"
178
+ exit(1)
179
+ rescue Errno::ECONNREFUSED
180
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
181
+ exit(1)
182
+ end
183
+ end
184
+
185
+ def self.factory(type, database_url, remote_url, opts)
186
+ type = :resume if opts[:resume]
187
+ klass = case type
188
+ when :pull then Taps::Pull
189
+ when :push then Taps::Push
190
+ when :resume then eval(opts[:klass])
191
+ else raise "Unknown Operation Type -> #{type}"
192
+ end
193
+
194
+ klass.new(database_url, remote_url, opts)
195
+ end
196
+ end
197
+
198
+ class Pull < Operation
199
+ def file_prefix
200
+ "pull"
201
+ end
202
+
203
+ def to_hash
204
+ super.merge(:remote_tables_info => remote_tables_info)
205
+ end
206
+
207
+ def run
208
+ verify_server
209
+
210
+ begin
211
+ pull_schema unless resuming?
212
+
213
+ setup_signal_trap
214
+
215
+ pull_partial_data if resuming?
216
+
217
+ pull_data
218
+ pull_indexes
219
+ pull_reset_sequences
220
+ close_session
221
+ rescue RestClient::Exception => e
222
+ store_session
223
+ if e.respond_to?(:response)
224
+ puts "!!! Caught Server Exception"
225
+ puts "HTTP CODE: #{e.http_code}"
226
+ puts "#{e.response.to_s}"
227
+ exit(1)
228
+ else
229
+ raise
230
+ end
231
+ end
232
+ end
233
+
234
+ def pull_schema
235
+ puts "Receiving schema"
236
+
237
+ tables.each do |table_name, count|
238
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
239
+ output = Taps::Utils.load_schema(database_url, schema_data)
240
+ puts output if output
241
+ end
242
+ end
243
+
244
+ def pull_data
245
+ puts "Receiving data"
246
+
247
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
248
+
249
+ tables.each do |table_name, count|
250
+ progress = ProgressBar.new(table_name.to_s, count)
251
+ stream = Taps::DataStream.factory(db, {
252
+ :chunksize => default_chunksize,
253
+ :table_name => table_name
254
+ })
255
+ pull_data_from_table(stream, progress)
256
+ end
257
+ end
258
+
259
+ def pull_partial_data
260
+ return if stream_state == {}
261
+
262
+ table_name = stream_state[:table_name]
263
+ record_count = tables[table_name.to_s]
264
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
265
+
266
+ progress = ProgressBar.new(table_name.to_s, record_count)
267
+ stream = Taps::DataStream.factory(db, stream_state)
268
+ pull_data_from_table(stream, progress)
269
+ end
270
+
271
+ def pull_data_from_table(stream, progress)
272
+ loop do
273
+ begin
274
+ if exiting?
275
+ store_session
276
+ exit 0
277
+ end
278
+
279
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
280
+ break if stream.complete?
281
+ progress.inc(size) unless exiting?
282
+ stream.error = false
283
+ self.stream_state = stream.to_hash
284
+ rescue DataStream::CorruptedData => e
285
+ puts "Corrupted Data Received #{e.message}, retrying..."
286
+ stream.error = true
287
+ next
288
+ end
289
+ end
290
+
291
+ progress.finish
292
+ completed_tables << stream.table_name.to_s
293
+ self.stream_state = {}
294
+ end
295
+
296
+ def tables
297
+ h = {}
298
+ remote_tables_info.each do |table_name, count|
299
+ next if completed_tables.include?(table_name.to_s)
300
+ h[table_name.to_s] = count
301
+ end
302
+ h
303
+ end
304
+
305
+ def record_count
306
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
307
+ end
308
+
309
+ def remote_tables_info
310
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
311
+ end
312
+
313
+ def fetch_remote_tables_info
314
+ retries = 0
315
+ max_retries = 10
316
+ begin
317
+ tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
318
+ rescue RestClient::Exception
319
+ retries += 1
320
+ retry if retries <= max_retries
321
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
322
+ exit(1)
323
+ end
324
+
325
+ data = {}
326
+ apply_table_filter(tables).each do |table_name|
327
+ retries = 0
328
+ begin
329
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
330
+ data[table_name] = count
331
+ rescue RestClient::Exception
332
+ retries += 1
333
+ retry if retries <= max_retries
334
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
335
+ exit(1)
336
+ end
337
+ end
338
+ data
339
+ end
340
+
341
+ def pull_indexes
342
+ puts "Receiving indexes"
343
+
344
+ idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
345
+
346
+ apply_table_filter(idxs).each do |table, indexes|
347
+ next unless indexes.size > 0
348
+ progress = ProgressBar.new(table, indexes.size)
349
+ indexes.each do |idx|
350
+ output = Taps::Utils.load_indexes(database_url, idx)
351
+ puts output if output
352
+ progress.inc(1)
353
+ end
354
+ progress.finish
355
+ end
356
+ end
357
+
358
+ def pull_reset_sequences
359
+ puts "Resetting sequences"
360
+
361
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
362
+ puts output if output
363
+ end
364
+ end
365
+
366
+ class Push < Operation
367
+ def file_prefix
368
+ "push"
369
+ end
370
+
371
+ def to_hash
372
+ super.merge(:local_tables_info => local_tables_info)
373
+ end
374
+
375
+ def run
376
+ verify_server
377
+ begin
378
+ push_schema unless resuming?
379
+
380
+ setup_signal_trap
381
+
382
+ push_partial_data if resuming?
383
+
384
+ push_data
385
+ push_indexes
386
+ push_reset_sequences
387
+ close_session
388
+ rescue RestClient::Exception => e
389
+ store_session
390
+ if e.respond_to?(:response)
391
+ puts "!!! Caught Server Exception"
392
+ puts "HTTP CODE: #{e.http_code}"
393
+ puts "#{e.response.to_s}"
394
+ exit(1)
395
+ else
396
+ raise
397
+ end
398
+ end
399
+ end
400
+
401
+ def push_indexes
402
+ idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
403
+
404
+ return unless idxs.size > 0
405
+
406
+ puts "Sending indexes"
407
+
408
+ apply_table_filter(idxs).each do |table, indexes|
409
+ progress = ProgressBar.new(table, indexes.size)
410
+ indexes.each do |idx|
411
+ session_resource['push/indexes'].post(idx, http_headers)
412
+ progress.inc(1)
413
+ end
414
+ progress.finish
415
+ end
416
+ end
417
+
418
+ def push_schema
419
+ puts "Sending schema"
420
+
421
+ tables.each do |table, count|
422
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
423
+ session_resource['push/schema'].post(schema_data, http_headers)
424
+ end
425
+ end
426
+
427
+ def push_reset_sequences
428
+ puts "Resetting sequences"
429
+
430
+ session_resource['push/reset_sequences'].post('', http_headers)
431
+ end
432
+
433
+ def push_partial_data
434
+ return if stream_state == {}
435
+
436
+ table_name = stream_state[:table_name]
437
+ record_count = tables[table_name.to_s]
438
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
439
+ progress = ProgressBar.new(table_name.to_s, record_count)
440
+ stream = Taps::DataStream.factory(db, stream_state)
441
+ push_data_from_table(stream, progress)
442
+ end
443
+
444
+ def push_data
445
+ puts "Sending data"
446
+
447
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
448
+
449
+ tables.each do |table_name, count|
450
+ stream = Taps::DataStream.factory(db,
451
+ :table_name => table_name,
452
+ :chunksize => default_chunksize)
453
+ progress = ProgressBar.new(table_name.to_s, count)
454
+ push_data_from_table(stream, progress)
455
+ end
456
+ end
457
+
458
+ def push_data_from_table(stream, progress)
459
+ loop do
460
+ if exiting?
461
+ store_session
462
+ exit 0
463
+ end
464
+
465
+ row_size = 0
466
+ chunksize = stream.state[:chunksize]
467
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
468
+ stream.state[:chunksize] = c
469
+ encoded_data, row_size, elapsed_time = stream.fetch
470
+ break if stream.complete?
471
+
472
+ data = {
473
+ :state => stream.to_hash,
474
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
475
+ }
476
+
477
+ begin
478
+ content, content_type = Taps::Multipart.create do |r|
479
+ r.attach :name => :encoded_data,
480
+ :payload => encoded_data,
481
+ :content_type => 'application/octet-stream'
482
+ r.attach :name => :json,
483
+ :payload => data.to_json,
484
+ :content_type => 'application/json'
485
+ end
486
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
487
+ self.stream_state = stream.to_hash
488
+ rescue RestClient::RequestFailed => e
489
+ # retry the same data, it got corrupted somehow.
490
+ if e.http_code == 412
491
+ next
492
+ end
493
+ raise
494
+ end
495
+ elapsed_time
496
+ end
497
+ stream.state[:chunksize] = chunksize
498
+
499
+ progress.inc(row_size)
500
+
501
+ stream.increment(row_size)
502
+ break if stream.complete?
503
+ end
504
+
505
+ progress.finish
506
+ completed_tables << stream.table_name.to_s
507
+ self.stream_state = {}
508
+ end
509
+
510
+ def local_tables_info
511
+ opts[:local_tables_info] ||= fetch_local_tables_info
512
+ end
513
+
514
+ def tables
515
+ h = {}
516
+ local_tables_info.each do |table_name, count|
517
+ next if completed_tables.include?(table_name.to_s)
518
+ h[table_name.to_s] = count
519
+ end
520
+ h
521
+ end
522
+
523
+ def record_count
524
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
525
+ end
526
+
527
+ def fetch_local_tables_info
528
+ tables_with_counts = {}
529
+ db.tables.each do |table|
530
+ tables_with_counts[table] = db[table].count
531
+ end
532
+ apply_table_filter(tables_with_counts)
533
+ end
534
+
535
+ end
536
+
537
+ end