dkastner-taps 0.3.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,303 @@
1
+ require 'taps/monkey'
2
+ require 'taps/multipart'
3
+ require 'taps/utils'
4
+ require 'taps/log'
5
+ require 'json/pure'
6
+
7
+ module Taps
8
+
9
+ class DataStream
10
+ class CorruptedData < Exception; end
11
+
12
+ attr_reader :db, :state
13
+
14
+ def initialize(db, state)
15
+ @db = db
16
+ @state = {
17
+ :offset => 0,
18
+ :avg_chunksize => 0,
19
+ :num_chunksize => 0,
20
+ :total_chunksize => 0,
21
+ }.merge(state)
22
+ @complete = false
23
+ end
24
+
25
+ def log
26
+ Taps.log
27
+ end
28
+
29
+ def error=(val)
30
+ state[:error] = val
31
+ end
32
+
33
+ def error
34
+ state[:error] || false
35
+ end
36
+
37
+ def table_name
38
+ state[:table_name].to_sym
39
+ end
40
+
41
+ def table_name_sql
42
+ table_name.identifier
43
+ end
44
+
45
+ def to_hash
46
+ state.merge(:klass => self.class.to_s)
47
+ end
48
+
49
+ def to_json
50
+ to_hash.to_json
51
+ end
52
+
53
+ def string_columns
54
+ @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
55
+ end
56
+
57
+ def table
58
+ @table ||= db[table_name_sql]
59
+ end
60
+
61
+ def order_by(name=nil)
62
+ @order_by ||= begin
63
+ name ||= table_name
64
+ Taps::Utils.order_by(db, name)
65
+ end
66
+ end
67
+
68
+ def increment(row_count)
69
+ state[:offset] += row_count
70
+ end
71
+
72
+ # keep a record of the average chunksize within the first few hundred thousand records, after chunksize
73
+ # goes below 100 or maybe if offset is > 1000
74
+ def fetch_rows
75
+ state[:chunksize] = fetch_chunksize
76
+ ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
77
+ log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
78
+ rows = Taps::Utils.format_data(ds.all,
79
+ :string_columns => string_columns)
80
+ update_chunksize_stats
81
+ rows
82
+ end
83
+
84
+ def max_chunksize_training
85
+ 20
86
+ end
87
+
88
+ def fetch_chunksize
89
+ chunksize = state[:chunksize]
90
+ return chunksize if state[:num_chunksize] < max_chunksize_training
91
+ return chunksize if state[:avg_chunksize] == 0
92
+ return chunksize if state[:error]
93
+ state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
94
+ end
95
+
96
+ def update_chunksize_stats
97
+ return if state[:num_chunksize] >= max_chunksize_training
98
+ state[:total_chunksize] += state[:chunksize]
99
+ state[:num_chunksize] += 1
100
+ state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
101
+ end
102
+
103
+ def encode_rows(rows)
104
+ Taps::Utils.base64encode(Marshal.dump(rows))
105
+ end
106
+
107
+ def fetch
108
+ log.debug "DataStream#fetch state -> #{state.inspect}"
109
+
110
+ t1 = Time.now
111
+ rows = fetch_rows
112
+ encoded_data = encode_rows(rows)
113
+ t2 = Time.now
114
+ elapsed_time = t2 - t1
115
+
116
+ @complete = rows == { }
117
+
118
+ [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
119
+ end
120
+
121
+ def complete?
122
+ @complete
123
+ end
124
+
125
+ def fetch_remote(resource, headers)
126
+ params = fetch_from_resource(resource, headers)
127
+ encoded_data = params[:encoded_data]
128
+ json = params[:json]
129
+
130
+ rows = parse_encoded_data(encoded_data, json[:checksum])
131
+ @complete = rows == { }
132
+
133
+ # update local state
134
+ state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
135
+
136
+ unless @complete
137
+ import_rows(rows)
138
+ rows[:data].size
139
+ else
140
+ 0
141
+ end
142
+ end
143
+
144
+ # this one is used inside the server process
145
+ def fetch_remote_in_server(params)
146
+ json = self.class.parse_json(params[:json])
147
+ encoded_data = params[:encoded_data]
148
+
149
+ rows = parse_encoded_data(encoded_data, json[:checksum])
150
+ @complete = rows == { }
151
+
152
+ unless @complete
153
+ import_rows(rows)
154
+ rows[:data].size
155
+ else
156
+ 0
157
+ end
158
+ end
159
+
160
+ def fetch_from_resource(resource, headers)
161
+ res = nil
162
+ log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
163
+ state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
164
+ state[:chunksize] = c
165
+ res = resource.post({:state => self.to_json}, headers)
166
+ end
167
+
168
+ begin
169
+ params = Taps::Multipart.parse(res)
170
+ params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
171
+ return params
172
+ rescue JSON::Parser
173
+ raise DataStream::CorruptedData.new("Invalid JSON Received")
174
+ end
175
+ end
176
+
177
+ def self.parse_json(json)
178
+ hash = JSON.parse(json).symbolize_keys
179
+ hash[:state].symbolize_keys! if hash.has_key?(:state)
180
+ hash
181
+ end
182
+
183
+ def parse_encoded_data(encoded_data, checksum)
184
+ raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
185
+
186
+ begin
187
+ return Marshal.load(Taps::Utils.base64decode(encoded_data))
188
+ rescue Object => e
189
+ unless ENV['NO_DUMP_MARSHAL_ERRORS']
190
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
191
+ File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
192
+ end
193
+ raise
194
+ end
195
+ end
196
+
197
+ def import_rows(rows)
198
+ table.import(rows[:header], rows[:data])
199
+ state[:offset] += rows[:data].size
200
+ end
201
+
202
+ def self.factory(db, state)
203
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
204
+ Sequel::MySQL.convert_invalid_date_time = :nil
205
+ end
206
+
207
+ if state.has_key?(:klass)
208
+ return eval(state[:klass]).new(db, state)
209
+ end
210
+
211
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
212
+ DataStreamKeyed.new(db, state)
213
+ else
214
+ DataStream.new(db, state)
215
+ end
216
+ end
217
+ end
218
+
219
+
220
+ class DataStreamKeyed < DataStream
221
+ attr_accessor :buffer
222
+
223
+ def initialize(db, state)
224
+ super(db, state)
225
+ @state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
226
+ @buffer = []
227
+ end
228
+
229
+ def primary_key
230
+ state[:primary_key].to_sym
231
+ end
232
+
233
+ def buffer_limit
234
+ if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
235
+ state[:last_fetched]
236
+ else
237
+ state[:filter]
238
+ end
239
+ end
240
+
241
+ def calc_limit(chunksize)
242
+ # we want to not fetch more than is needed while we're
243
+ # inside sinatra but locally we can select more than
244
+ # is strictly needed
245
+ if defined?(Sinatra)
246
+ (chunksize * 1.1).ceil
247
+ else
248
+ (chunksize * 3).ceil
249
+ end
250
+ end
251
+
252
+ def load_buffer(chunksize)
253
+ # make sure BasicObject is not polluted by subsequent requires
254
+ Sequel::BasicObject.remove_methods!
255
+
256
+ num = 0
257
+ loop do
258
+ limit = calc_limit(chunksize)
259
+ # we have to use local variables in order for the virtual row filter to work correctly
260
+ key = primary_key
261
+ buf_limit = buffer_limit
262
+ ds = table.order(*order_by).filter { key.sql_number > buf_limit }.limit(limit)
263
+ log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
264
+ data = ds.all
265
+ self.buffer += data
266
+ num += data.size
267
+ if data.size > 0
268
+ # keep a record of the last primary key value in the buffer
269
+ state[:filter] = self.buffer.last[ primary_key ]
270
+ end
271
+
272
+ break if num >= chunksize or data.size == 0
273
+ end
274
+ end
275
+
276
+ def fetch_buffered(chunksize)
277
+ load_buffer(chunksize) if self.buffer.size < chunksize
278
+ rows = buffer.slice(0, chunksize)
279
+ state[:last_fetched] = if rows.size > 0
280
+ rows.last[ primary_key ]
281
+ else
282
+ nil
283
+ end
284
+ rows
285
+ end
286
+
287
+ def import_rows(rows)
288
+ table.import(rows[:header], rows[:data])
289
+ end
290
+
291
+ def fetch_rows
292
+ chunksize = state[:chunksize]
293
+ Taps::Utils.format_data(fetch_buffered(chunksize) || [],
294
+ :string_columns => string_columns)
295
+ end
296
+
297
+ def increment(row_count)
298
+ # pop the rows we just successfully sent off the buffer
299
+ @buffer.slice!(0, row_count)
300
+ end
301
+ end
302
+
303
+ end
@@ -0,0 +1,20 @@
1
+ Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
2
+
3
+ class DbSession < Sequel::Model
4
+ plugin :schema
5
+ set_schema do
6
+ primary_key :id
7
+ text :key
8
+ text :database_url
9
+ timestamp :started_at
10
+ timestamp :last_access
11
+ end
12
+
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
16
+ end
17
+ end
18
+ end
19
+
20
+ DbSession.create_table! unless DbSession.table_exists?
data/lib/taps/log.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'json/pure'
4
+ require 'stringio'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,550 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+
10
+ # disable warnings, rest client makes a lot of noise right now
11
+ $VERBOSE = nil
12
+
13
+ module Taps
14
+
15
+ class Operation
16
+ attr_reader :database_url, :remote_url, :opts
17
+ attr_reader :session_uri
18
+
19
+ def initialize(database_url, remote_url, opts={})
20
+ @database_url = database_url
21
+ @remote_url = remote_url
22
+ @opts = opts
23
+ @exiting = false
24
+ @session_uri = opts[:session_uri]
25
+ end
26
+
27
+ def file_prefix
28
+ "op"
29
+ end
30
+
31
+ def indexes_first?
32
+ !!opts[:indexes_first]
33
+ end
34
+
35
+ def table_filter
36
+ opts[:table_filter]
37
+ end
38
+
39
+ def apply_table_filter(tables)
40
+ return tables unless table_filter
41
+ re = Regexp.new(table_filter)
42
+ if tables.kind_of?(Hash)
43
+ ntables = {}
44
+ tables.each do |t, d|
45
+ unless re.match(t.to_s).nil?
46
+ ntables[t] = d
47
+ end
48
+ end
49
+ ntables
50
+ else
51
+ tables.reject { |t| re.match(t.to_s).nil? }
52
+ end
53
+ end
54
+
55
+ def log
56
+ Taps.log
57
+ end
58
+
59
+ def store_session
60
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
61
+ puts "\nSaving session to #{file}.."
62
+ File.open(file, 'w') do |f|
63
+ f.write(to_hash.to_json)
64
+ end
65
+ end
66
+
67
+ def to_hash
68
+ {
69
+ :klass => self.class.to_s,
70
+ :database_url => database_url,
71
+ :remote_url => remote_url,
72
+ :session_uri => session_uri,
73
+ :stream_state => stream_state,
74
+ :completed_tables => completed_tables,
75
+ :table_filter => table_filter,
76
+ }
77
+ end
78
+
79
+ def exiting?
80
+ !!@exiting
81
+ end
82
+
83
+ def setup_signal_trap
84
+ trap("INT") {
85
+ puts "\nCompleting current action..."
86
+ @exiting = true
87
+ }
88
+
89
+ trap("TERM") {
90
+ puts "\nCompleting current action..."
91
+ @exiting = true
92
+ }
93
+ end
94
+
95
+ def resuming?
96
+ opts[:resume] == true
97
+ end
98
+
99
+ def default_chunksize
100
+ opts[:default_chunksize]
101
+ end
102
+
103
+ def completed_tables
104
+ opts[:completed_tables] ||= []
105
+ end
106
+
107
+ def stream_state
108
+ opts[:stream_state] ||= {}
109
+ end
110
+
111
+ def stream_state=(val)
112
+ opts[:stream_state] = val
113
+ end
114
+
115
+ def compression_disabled?
116
+ !!opts[:disable_compression]
117
+ end
118
+
119
+ def db
120
+ @db ||= Sequel.connect(database_url)
121
+ end
122
+
123
+ def server
124
+ @server ||= RestClient::Resource.new(remote_url)
125
+ end
126
+
127
+ def session_resource
128
+ @session_resource ||= begin
129
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
130
+ server[@session_uri]
131
+ end
132
+ end
133
+
134
+ def set_session(uri)
135
+ session_uri = uri
136
+ @session_resource = server[session_uri]
137
+ end
138
+
139
+ def close_session
140
+ @session_resource.delete(http_headers) if @session_resource
141
+ end
142
+
143
+ def safe_url(url)
144
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
145
+ end
146
+
147
+ def safe_remote_url
148
+ safe_url(remote_url)
149
+ end
150
+
151
+ def safe_database_url
152
+ safe_url(database_url)
153
+ end
154
+
155
+ def http_headers(extra = {})
156
+ base = { :taps_version => Taps.version }
157
+ if compression_disabled?
158
+ base[:accept_encoding] = ""
159
+ else
160
+ base[:accept_encoding] = "gzip, deflate"
161
+ end
162
+ base.merge(extra)
163
+ end
164
+
165
+ def format_number(num)
166
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
167
+ end
168
+
169
+ def verify_server
170
+ begin
171
+ server['/'].get(http_headers)
172
+ rescue RestClient::RequestFailed => e
173
+ if e.http_code == 417
174
+ puts "#{safe_remote_url} is running a different minor version of taps."
175
+ puts "#{e.response.to_s}"
176
+ exit(1)
177
+ else
178
+ raise
179
+ end
180
+ rescue RestClient::Unauthorized
181
+ puts "Bad credentials given for #{safe_remote_url}"
182
+ exit(1)
183
+ rescue Errno::ECONNREFUSED
184
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
185
+ exit(1)
186
+ end
187
+ end
188
+
189
+ def self.factory(type, database_url, remote_url, opts)
190
+ type = :resume if opts[:resume]
191
+ klass = case type
192
+ when :pull then Taps::Pull
193
+ when :push then Taps::Push
194
+ when :resume then eval(opts[:klass])
195
+ else raise "Unknown Operation Type -> #{type}"
196
+ end
197
+
198
+ klass.new(database_url, remote_url, opts)
199
+ end
200
+ end
201
+
202
+ class Pull < Operation
203
+ def file_prefix
204
+ "pull"
205
+ end
206
+
207
+ def to_hash
208
+ super.merge(:remote_tables_info => remote_tables_info)
209
+ end
210
+
211
+ def run
212
+ verify_server
213
+
214
+ begin
215
+ unless resuming?
216
+ pull_schema
217
+ pull_indexes if indexes_first?
218
+ end
219
+ setup_signal_trap
220
+ pull_partial_data if resuming?
221
+ pull_data
222
+ pull_indexes unless indexes_first?
223
+ pull_reset_sequences
224
+ close_session
225
+ rescue RestClient::Exception => e
226
+ store_session
227
+ if e.respond_to?(:response)
228
+ puts "!!! Caught Server Exception"
229
+ puts "HTTP CODE: #{e.http_code}"
230
+ puts "#{e.response.to_s}"
231
+ exit(1)
232
+ else
233
+ raise
234
+ end
235
+ end
236
+ end
237
+
238
+ def pull_schema
239
+ puts "Receiving schema"
240
+
241
+ progress = ProgressBar.new('Schema', tables.size)
242
+ tables.each do |table_name, count|
243
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
244
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
245
+ output = Taps::Utils.load_schema(database_url, schema_data)
246
+ puts output if output
247
+ progress.inc(1)
248
+ end
249
+ progress.finish
250
+ end
251
+
252
+ def pull_data
253
+ puts "Receiving data"
254
+
255
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
256
+
257
+ tables.each do |table_name, count|
258
+ progress = ProgressBar.new(table_name.to_s, count)
259
+ stream = Taps::DataStream.factory(db, {
260
+ :chunksize => default_chunksize,
261
+ :table_name => table_name
262
+ })
263
+ pull_data_from_table(stream, progress)
264
+ end
265
+ end
266
+
267
+ def pull_partial_data
268
+ return if stream_state == {}
269
+
270
+ table_name = stream_state[:table_name]
271
+ record_count = tables[table_name.to_s]
272
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
273
+
274
+ progress = ProgressBar.new(table_name.to_s, record_count)
275
+ stream = Taps::DataStream.factory(db, stream_state)
276
+ pull_data_from_table(stream, progress)
277
+ end
278
+
279
+ def pull_data_from_table(stream, progress)
280
+ loop do
281
+ begin
282
+ if exiting?
283
+ store_session
284
+ exit 0
285
+ end
286
+
287
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
288
+ break if stream.complete?
289
+ progress.inc(size) unless exiting?
290
+ stream.error = false
291
+ self.stream_state = stream.to_hash
292
+ rescue DataStream::CorruptedData => e
293
+ puts "Corrupted Data Received #{e.message}, retrying..."
294
+ stream.error = true
295
+ next
296
+ end
297
+ end
298
+
299
+ progress.finish
300
+ completed_tables << stream.table_name.to_s
301
+ self.stream_state = {}
302
+ end
303
+
304
+ def tables
305
+ h = {}
306
+ remote_tables_info.each do |table_name, count|
307
+ next if completed_tables.include?(table_name.to_s)
308
+ h[table_name.to_s] = count
309
+ end
310
+ h
311
+ end
312
+
313
+ def record_count
314
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
315
+ end
316
+
317
+ def remote_tables_info
318
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
319
+ end
320
+
321
+ def fetch_remote_tables_info
322
+ retries = 0
323
+ max_retries = 10
324
+ begin
325
+ tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
326
+ rescue RestClient::Exception
327
+ retries += 1
328
+ retry if retries <= max_retries
329
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
330
+ exit(1)
331
+ end
332
+
333
+ data = {}
334
+ apply_table_filter(tables).each do |table_name|
335
+ retries = 0
336
+ begin
337
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
338
+ data[table_name] = count
339
+ rescue RestClient::Exception
340
+ retries += 1
341
+ retry if retries <= max_retries
342
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
343
+ exit(1)
344
+ end
345
+ end
346
+ data
347
+ end
348
+
349
+ def pull_indexes
350
+ puts "Receiving indexes"
351
+
352
+ idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
353
+
354
+ apply_table_filter(idxs).each do |table, indexes|
355
+ next unless indexes.size > 0
356
+ progress = ProgressBar.new(table, indexes.size)
357
+ indexes.each do |idx|
358
+ output = Taps::Utils.load_indexes(database_url, idx)
359
+ puts output if output
360
+ progress.inc(1)
361
+ end
362
+ progress.finish
363
+ end
364
+ end
365
+
366
+ def pull_reset_sequences
367
+ puts "Resetting sequences"
368
+
369
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
370
+ puts output if output
371
+ end
372
+ end
373
+
374
+ class Push < Operation
375
+ def file_prefix
376
+ "push"
377
+ end
378
+
379
+ def to_hash
380
+ super.merge(:local_tables_info => local_tables_info)
381
+ end
382
+
383
+ def run
384
+ verify_server
385
+ begin
386
+ unless resuming?
387
+ push_schema
388
+ push_indexes if indexes_first?
389
+ end
390
+ setup_signal_trap
391
+ push_partial_data if resuming?
392
+ push_data
393
+ push_indexes unless indexes_first?
394
+ push_reset_sequences
395
+ close_session
396
+ rescue RestClient::Exception => e
397
+ store_session
398
+ if e.respond_to?(:response)
399
+ puts "!!! Caught Server Exception"
400
+ puts "HTTP CODE: #{e.http_code}"
401
+ puts "#{e.response.to_s}"
402
+ exit(1)
403
+ else
404
+ raise
405
+ end
406
+ end
407
+ end
408
+
409
+ def push_indexes
410
+ idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
411
+
412
+ return unless idxs.size > 0
413
+
414
+ puts "Sending indexes"
415
+
416
+ apply_table_filter(idxs).each do |table, indexes|
417
+ next unless indexes.size > 0
418
+ progress = ProgressBar.new(table, indexes.size)
419
+ indexes.each do |idx|
420
+ session_resource['push/indexes'].post(idx, http_headers)
421
+ progress.inc(1)
422
+ end
423
+ progress.finish
424
+ end
425
+ end
426
+
427
+ def push_schema
428
+ puts "Sending schema"
429
+
430
+ progress = ProgressBar.new('Schema', tables.size)
431
+ tables.each do |table, count|
432
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
433
+ log.debug "Table: #{table}\n#{schema_data}\n"
434
+ session_resource['push/schema'].post(schema_data, http_headers)
435
+ progress.inc(1)
436
+ end
437
+ progress.finish
438
+ end
439
+
440
+ def push_reset_sequences
441
+ puts "Resetting sequences"
442
+
443
+ session_resource['push/reset_sequences'].post('', http_headers)
444
+ end
445
+
446
+ def push_partial_data
447
+ return if stream_state == {}
448
+
449
+ table_name = stream_state[:table_name]
450
+ record_count = tables[table_name.to_s]
451
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
452
+ progress = ProgressBar.new(table_name.to_s, record_count)
453
+ stream = Taps::DataStream.factory(db, stream_state)
454
+ push_data_from_table(stream, progress)
455
+ end
456
+
457
+ def push_data
458
+ puts "Sending data"
459
+
460
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
461
+
462
+ tables.each do |table_name, count|
463
+ stream = Taps::DataStream.factory(db,
464
+ :table_name => table_name,
465
+ :chunksize => default_chunksize)
466
+ progress = ProgressBar.new(table_name.to_s, count)
467
+ push_data_from_table(stream, progress)
468
+ end
469
+ end
470
+
471
+ def push_data_from_table(stream, progress)
472
+ loop do
473
+ if exiting?
474
+ store_session
475
+ exit 0
476
+ end
477
+
478
+ row_size = 0
479
+ chunksize = stream.state[:chunksize]
480
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
481
+ stream.state[:chunksize] = c
482
+ encoded_data, row_size, elapsed_time = stream.fetch
483
+ break if stream.complete?
484
+
485
+ data = {
486
+ :state => stream.to_hash,
487
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
488
+ }
489
+
490
+ begin
491
+ content, content_type = Taps::Multipart.create do |r|
492
+ r.attach :name => :encoded_data,
493
+ :payload => encoded_data,
494
+ :content_type => 'application/octet-stream'
495
+ r.attach :name => :json,
496
+ :payload => data.to_json,
497
+ :content_type => 'application/json'
498
+ end
499
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
500
+ self.stream_state = stream.to_hash
501
+ rescue RestClient::RequestFailed => e
502
+ # retry the same data, it got corrupted somehow.
503
+ if e.http_code == 412
504
+ next
505
+ end
506
+ raise
507
+ end
508
+ elapsed_time
509
+ end
510
+ stream.state[:chunksize] = chunksize
511
+
512
+ progress.inc(row_size)
513
+
514
+ stream.increment(row_size)
515
+ break if stream.complete?
516
+ end
517
+
518
+ progress.finish
519
+ completed_tables << stream.table_name.to_s
520
+ self.stream_state = {}
521
+ end
522
+
523
+ def local_tables_info
524
+ opts[:local_tables_info] ||= fetch_local_tables_info
525
+ end
526
+
527
+ def tables
528
+ h = {}
529
+ local_tables_info.each do |table_name, count|
530
+ next if completed_tables.include?(table_name.to_s)
531
+ h[table_name.to_s] = count
532
+ end
533
+ h
534
+ end
535
+
536
+ def record_count
537
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
538
+ end
539
+
540
+ def fetch_local_tables_info
541
+ tables_with_counts = {}
542
+ db.tables.each do |table|
543
+ tables_with_counts[table] = db[table.to_sym.identifier].count
544
+ end
545
+ apply_table_filter(tables_with_counts)
546
+ end
547
+
548
+ end
549
+
550
+ end