dkastner-taps 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ require 'taps/monkey'
2
+ require 'taps/multipart'
3
+ require 'taps/utils'
4
+ require 'taps/log'
5
+ require 'json/pure'
6
+
7
+ module Taps
8
+
9
+ class DataStream
10
+ class CorruptedData < Exception; end
11
+
12
+ attr_reader :db, :state
13
+
14
+ def initialize(db, state)
15
+ @db = db
16
+ @state = {
17
+ :offset => 0,
18
+ :avg_chunksize => 0,
19
+ :num_chunksize => 0,
20
+ :total_chunksize => 0,
21
+ }.merge(state)
22
+ @complete = false
23
+ end
24
+
25
+ def log
26
+ Taps.log
27
+ end
28
+
29
+ def error=(val)
30
+ state[:error] = val
31
+ end
32
+
33
+ def error
34
+ state[:error] || false
35
+ end
36
+
37
+ def table_name
38
+ state[:table_name].to_sym
39
+ end
40
+
41
+ def table_name_sql
42
+ table_name.identifier
43
+ end
44
+
45
+ def to_hash
46
+ state.merge(:klass => self.class.to_s)
47
+ end
48
+
49
+ def to_json
50
+ to_hash.to_json
51
+ end
52
+
53
+ def string_columns
54
+ @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
55
+ end
56
+
57
+ def table
58
+ @table ||= db[table_name_sql]
59
+ end
60
+
61
+ def order_by(name=nil)
62
+ @order_by ||= begin
63
+ name ||= table_name
64
+ Taps::Utils.order_by(db, name)
65
+ end
66
+ end
67
+
68
+ def increment(row_count)
69
+ state[:offset] += row_count
70
+ end
71
+
72
+ # keep a record of the average chunksize within the first few hundred thousand records, after chunksize
73
+ # goes below 100 or maybe if offset is > 1000
74
+ def fetch_rows
75
+ state[:chunksize] = fetch_chunksize
76
+ ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
77
+ log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
78
+ rows = Taps::Utils.format_data(ds.all,
79
+ :string_columns => string_columns)
80
+ update_chunksize_stats
81
+ rows
82
+ end
83
+
84
+ def max_chunksize_training
85
+ 20
86
+ end
87
+
88
+ def fetch_chunksize
89
+ chunksize = state[:chunksize]
90
+ return chunksize if state[:num_chunksize] < max_chunksize_training
91
+ return chunksize if state[:avg_chunksize] == 0
92
+ return chunksize if state[:error]
93
+ state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
94
+ end
95
+
96
+ def update_chunksize_stats
97
+ return if state[:num_chunksize] >= max_chunksize_training
98
+ state[:total_chunksize] += state[:chunksize]
99
+ state[:num_chunksize] += 1
100
+ state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
101
+ end
102
+
103
+ def encode_rows(rows)
104
+ Taps::Utils.base64encode(Marshal.dump(rows))
105
+ end
106
+
107
+ def fetch
108
+ log.debug "DataStream#fetch state -> #{state.inspect}"
109
+
110
+ t1 = Time.now
111
+ rows = fetch_rows
112
+ encoded_data = encode_rows(rows)
113
+ t2 = Time.now
114
+ elapsed_time = t2 - t1
115
+
116
+ @complete = rows == { }
117
+
118
+ [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
119
+ end
120
+
121
+ def complete?
122
+ @complete
123
+ end
124
+
125
+ def fetch_remote(resource, headers)
126
+ params = fetch_from_resource(resource, headers)
127
+ encoded_data = params[:encoded_data]
128
+ json = params[:json]
129
+
130
+ rows = parse_encoded_data(encoded_data, json[:checksum])
131
+ @complete = rows == { }
132
+
133
+ # update local state
134
+ state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
135
+
136
+ unless @complete
137
+ import_rows(rows)
138
+ rows[:data].size
139
+ else
140
+ 0
141
+ end
142
+ end
143
+
144
+ # this one is used inside the server process
145
+ def fetch_remote_in_server(params)
146
+ json = self.class.parse_json(params[:json])
147
+ encoded_data = params[:encoded_data]
148
+
149
+ rows = parse_encoded_data(encoded_data, json[:checksum])
150
+ @complete = rows == { }
151
+
152
+ unless @complete
153
+ import_rows(rows)
154
+ rows[:data].size
155
+ else
156
+ 0
157
+ end
158
+ end
159
+
160
+ def fetch_from_resource(resource, headers)
161
+ res = nil
162
+ log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
163
+ state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
164
+ state[:chunksize] = c
165
+ res = resource.post({:state => self.to_json}, headers)
166
+ end
167
+
168
+ begin
169
+ params = Taps::Multipart.parse(res)
170
+ params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
171
+ return params
172
+ rescue JSON::Parser
173
+ raise DataStream::CorruptedData.new("Invalid JSON Received")
174
+ end
175
+ end
176
+
177
+ def self.parse_json(json)
178
+ hash = JSON.parse(json).symbolize_keys
179
+ hash[:state].symbolize_keys! if hash.has_key?(:state)
180
+ hash
181
+ end
182
+
183
+ def parse_encoded_data(encoded_data, checksum)
184
+ raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
185
+
186
+ begin
187
+ return Marshal.load(Taps::Utils.base64decode(encoded_data))
188
+ rescue Object => e
189
+ unless ENV['NO_DUMP_MARSHAL_ERRORS']
190
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
191
+ File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
192
+ end
193
+ raise
194
+ end
195
+ end
196
+
197
+ def import_rows(rows)
198
+ table.import(rows[:header], rows[:data])
199
+ state[:offset] += rows[:data].size
200
+ end
201
+
202
+ def self.factory(db, state)
203
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
204
+ Sequel::MySQL.convert_invalid_date_time = :nil
205
+ end
206
+
207
+ if state.has_key?(:klass)
208
+ return eval(state[:klass]).new(db, state)
209
+ end
210
+
211
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
212
+ DataStreamKeyed.new(db, state)
213
+ else
214
+ DataStream.new(db, state)
215
+ end
216
+ end
217
+ end
218
+
219
+
220
+ class DataStreamKeyed < DataStream
221
+ attr_accessor :buffer
222
+
223
+ def initialize(db, state)
224
+ super(db, state)
225
+ @state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
226
+ @buffer = []
227
+ end
228
+
229
+ def primary_key
230
+ state[:primary_key].to_sym
231
+ end
232
+
233
+ def buffer_limit
234
+ if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
235
+ state[:last_fetched]
236
+ else
237
+ state[:filter]
238
+ end
239
+ end
240
+
241
+ def calc_limit(chunksize)
242
+ # we want to not fetch more than is needed while we're
243
+ # inside sinatra but locally we can select more than
244
+ # is strictly needed
245
+ if defined?(Sinatra)
246
+ (chunksize * 1.1).ceil
247
+ else
248
+ (chunksize * 3).ceil
249
+ end
250
+ end
251
+
252
+ def load_buffer(chunksize)
253
+ # make sure BasicObject is not polluted by subsequent requires
254
+ Sequel::BasicObject.remove_methods!
255
+
256
+ num = 0
257
+ loop do
258
+ limit = calc_limit(chunksize)
259
+ # we have to use local variables in order for the virtual row filter to work correctly
260
+ key = primary_key
261
+ buf_limit = buffer_limit
262
+ ds = table.order(*order_by).filter { key.sql_number > buf_limit }.limit(limit)
263
+ log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
264
+ data = ds.all
265
+ self.buffer += data
266
+ num += data.size
267
+ if data.size > 0
268
+ # keep a record of the last primary key value in the buffer
269
+ state[:filter] = self.buffer.last[ primary_key ]
270
+ end
271
+
272
+ break if num >= chunksize or data.size == 0
273
+ end
274
+ end
275
+
276
+ def fetch_buffered(chunksize)
277
+ load_buffer(chunksize) if self.buffer.size < chunksize
278
+ rows = buffer.slice(0, chunksize)
279
+ state[:last_fetched] = if rows.size > 0
280
+ rows.last[ primary_key ]
281
+ else
282
+ nil
283
+ end
284
+ rows
285
+ end
286
+
287
+ def import_rows(rows)
288
+ table.import(rows[:header], rows[:data])
289
+ end
290
+
291
+ def fetch_rows
292
+ chunksize = state[:chunksize]
293
+ Taps::Utils.format_data(fetch_buffered(chunksize) || [],
294
+ :string_columns => string_columns)
295
+ end
296
+
297
+ def increment(row_count)
298
+ # pop the rows we just successfully sent off the buffer
299
+ @buffer.slice!(0, row_count)
300
+ end
301
+ end
302
+
303
+ end
@@ -0,0 +1,20 @@
1
+ Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
2
+
3
+ class DbSession < Sequel::Model
4
+ plugin :schema
5
+ set_schema do
6
+ primary_key :id
7
+ text :key
8
+ text :database_url
9
+ timestamp :started_at
10
+ timestamp :last_access
11
+ end
12
+
13
+ def conn
14
+ Sequel.connect(database_url) do |db|
15
+ yield db if block_given?
16
+ end
17
+ end
18
+ end
19
+
20
+ DbSession.create_table! unless DbSession.table_exists?
data/lib/taps/log.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ def self.log=(log)
3
+ @@log = log
4
+ end
5
+
6
+ def self.log
7
+ @@log ||= begin
8
+ require 'logger'
9
+ log = Logger.new($stderr)
10
+ log.level = Logger::ERROR
11
+ log.datetime_format = "%Y-%m-%d %H:%M:%S"
12
+ log
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ class Hash
2
+ def symbolize_keys
3
+ inject({}) do |options, (key, value)|
4
+ options[(key.to_sym rescue key) || key] = value
5
+ options
6
+ end
7
+ end
8
+
9
+ def symbolize_keys!
10
+ self.replace(symbolize_keys)
11
+ end
12
+
13
+ def symbolize_recursively!
14
+ self.replace(symbolize_keys)
15
+ self.each do |k, v|
16
+ if v.kind_of?(Hash)
17
+ v.symbolize_keys!
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,73 @@
1
+ require 'restclient'
2
+ require 'rack/utils'
3
+ require 'json/pure'
4
+ require 'stringio'
5
+
6
+ module Taps
7
+ class Multipart
8
+ class Container
9
+ attr_accessor :attachments
10
+
11
+ def initialize
12
+ @attachments = []
13
+ end
14
+
15
+ def attach(opts)
16
+ mp = Taps::Multipart.new(opts)
17
+ attachments << mp
18
+ end
19
+
20
+ def generate
21
+ hash = {}
22
+ attachments.each do |mp|
23
+ hash[mp.name] = mp
24
+ end
25
+ m = RestClient::Payload::Multipart.new(hash)
26
+ [m.to_s, m.headers['Content-Type']]
27
+ end
28
+ end
29
+
30
+ attr_reader :opts
31
+
32
+ def initialize(opts={})
33
+ @opts = opts
34
+ end
35
+
36
+ def name
37
+ opts[:name]
38
+ end
39
+
40
+ def to_s
41
+ opts[:payload]
42
+ end
43
+
44
+ def content_type
45
+ opts[:content_type] || 'text/plain'
46
+ end
47
+
48
+ def original_filename
49
+ opts[:original_filename]
50
+ end
51
+
52
+ def self.create
53
+ c = Taps::Multipart::Container.new
54
+ yield c
55
+ c.generate
56
+ end
57
+
58
+ # response is a rest-client response
59
+ def self.parse(response)
60
+ content = response.to_s
61
+ env = {
62
+ 'CONTENT_TYPE' => response.headers[:content_type],
63
+ 'CONTENT_LENGTH' => content.size,
64
+ 'rack.input' => StringIO.new(content)
65
+ }
66
+
67
+ params = Rack::Utils::Multipart.parse_multipart(env)
68
+ params.symbolize_keys!
69
+ params
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,550 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/data_stream'
9
+
10
+ # disable warnings, rest client makes a lot of noise right now
11
+ $VERBOSE = nil
12
+
13
+ module Taps
14
+
15
+ class Operation
16
+ attr_reader :database_url, :remote_url, :opts
17
+ attr_reader :session_uri
18
+
19
+ def initialize(database_url, remote_url, opts={})
20
+ @database_url = database_url
21
+ @remote_url = remote_url
22
+ @opts = opts
23
+ @exiting = false
24
+ @session_uri = opts[:session_uri]
25
+ end
26
+
27
+ def file_prefix
28
+ "op"
29
+ end
30
+
31
+ def indexes_first?
32
+ !!opts[:indexes_first]
33
+ end
34
+
35
+ def table_filter
36
+ opts[:table_filter]
37
+ end
38
+
39
+ def apply_table_filter(tables)
40
+ return tables unless table_filter
41
+ re = Regexp.new(table_filter)
42
+ if tables.kind_of?(Hash)
43
+ ntables = {}
44
+ tables.each do |t, d|
45
+ unless re.match(t.to_s).nil?
46
+ ntables[t] = d
47
+ end
48
+ end
49
+ ntables
50
+ else
51
+ tables.reject { |t| re.match(t.to_s).nil? }
52
+ end
53
+ end
54
+
55
+ def log
56
+ Taps.log
57
+ end
58
+
59
+ def store_session
60
+ file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
61
+ puts "\nSaving session to #{file}.."
62
+ File.open(file, 'w') do |f|
63
+ f.write(to_hash.to_json)
64
+ end
65
+ end
66
+
67
+ def to_hash
68
+ {
69
+ :klass => self.class.to_s,
70
+ :database_url => database_url,
71
+ :remote_url => remote_url,
72
+ :session_uri => session_uri,
73
+ :stream_state => stream_state,
74
+ :completed_tables => completed_tables,
75
+ :table_filter => table_filter,
76
+ }
77
+ end
78
+
79
+ def exiting?
80
+ !!@exiting
81
+ end
82
+
83
+ def setup_signal_trap
84
+ trap("INT") {
85
+ puts "\nCompleting current action..."
86
+ @exiting = true
87
+ }
88
+
89
+ trap("TERM") {
90
+ puts "\nCompleting current action..."
91
+ @exiting = true
92
+ }
93
+ end
94
+
95
+ def resuming?
96
+ opts[:resume] == true
97
+ end
98
+
99
+ def default_chunksize
100
+ opts[:default_chunksize]
101
+ end
102
+
103
+ def completed_tables
104
+ opts[:completed_tables] ||= []
105
+ end
106
+
107
+ def stream_state
108
+ opts[:stream_state] ||= {}
109
+ end
110
+
111
+ def stream_state=(val)
112
+ opts[:stream_state] = val
113
+ end
114
+
115
+ def compression_disabled?
116
+ !!opts[:disable_compression]
117
+ end
118
+
119
+ def db
120
+ @db ||= Sequel.connect(database_url)
121
+ end
122
+
123
+ def server
124
+ @server ||= RestClient::Resource.new(remote_url)
125
+ end
126
+
127
+ def session_resource
128
+ @session_resource ||= begin
129
+ @session_uri ||= server['sessions'].post('', http_headers).to_s
130
+ server[@session_uri]
131
+ end
132
+ end
133
+
134
+ def set_session(uri)
135
+ session_uri = uri
136
+ @session_resource = server[session_uri]
137
+ end
138
+
139
+ def close_session
140
+ @session_resource.delete(http_headers) if @session_resource
141
+ end
142
+
143
+ def safe_url(url)
144
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
145
+ end
146
+
147
+ def safe_remote_url
148
+ safe_url(remote_url)
149
+ end
150
+
151
+ def safe_database_url
152
+ safe_url(database_url)
153
+ end
154
+
155
+ def http_headers(extra = {})
156
+ base = { :taps_version => Taps.version }
157
+ if compression_disabled?
158
+ base[:accept_encoding] = ""
159
+ else
160
+ base[:accept_encoding] = "gzip, deflate"
161
+ end
162
+ base.merge(extra)
163
+ end
164
+
165
+ def format_number(num)
166
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
167
+ end
168
+
169
+ def verify_server
170
+ begin
171
+ server['/'].get(http_headers)
172
+ rescue RestClient::RequestFailed => e
173
+ if e.http_code == 417
174
+ puts "#{safe_remote_url} is running a different minor version of taps."
175
+ puts "#{e.response.to_s}"
176
+ exit(1)
177
+ else
178
+ raise
179
+ end
180
+ rescue RestClient::Unauthorized
181
+ puts "Bad credentials given for #{safe_remote_url}"
182
+ exit(1)
183
+ rescue Errno::ECONNREFUSED
184
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
185
+ exit(1)
186
+ end
187
+ end
188
+
189
+ def self.factory(type, database_url, remote_url, opts)
190
+ type = :resume if opts[:resume]
191
+ klass = case type
192
+ when :pull then Taps::Pull
193
+ when :push then Taps::Push
194
+ when :resume then eval(opts[:klass])
195
+ else raise "Unknown Operation Type -> #{type}"
196
+ end
197
+
198
+ klass.new(database_url, remote_url, opts)
199
+ end
200
+ end
201
+
202
+ class Pull < Operation
203
+ def file_prefix
204
+ "pull"
205
+ end
206
+
207
+ def to_hash
208
+ super.merge(:remote_tables_info => remote_tables_info)
209
+ end
210
+
211
+ def run
212
+ verify_server
213
+
214
+ begin
215
+ unless resuming?
216
+ pull_schema
217
+ pull_indexes if indexes_first?
218
+ end
219
+ setup_signal_trap
220
+ pull_partial_data if resuming?
221
+ pull_data
222
+ pull_indexes unless indexes_first?
223
+ pull_reset_sequences
224
+ close_session
225
+ rescue RestClient::Exception => e
226
+ store_session
227
+ if e.respond_to?(:response)
228
+ puts "!!! Caught Server Exception"
229
+ puts "HTTP CODE: #{e.http_code}"
230
+ puts "#{e.response.to_s}"
231
+ exit(1)
232
+ else
233
+ raise
234
+ end
235
+ end
236
+ end
237
+
238
+ def pull_schema
239
+ puts "Receiving schema"
240
+
241
+ progress = ProgressBar.new('Schema', tables.size)
242
+ tables.each do |table_name, count|
243
+ schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
244
+ log.debug "Table: #{table_name}\n#{schema_data}\n"
245
+ output = Taps::Utils.load_schema(database_url, schema_data)
246
+ puts output if output
247
+ progress.inc(1)
248
+ end
249
+ progress.finish
250
+ end
251
+
252
+ def pull_data
253
+ puts "Receiving data"
254
+
255
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
256
+
257
+ tables.each do |table_name, count|
258
+ progress = ProgressBar.new(table_name.to_s, count)
259
+ stream = Taps::DataStream.factory(db, {
260
+ :chunksize => default_chunksize,
261
+ :table_name => table_name
262
+ })
263
+ pull_data_from_table(stream, progress)
264
+ end
265
+ end
266
+
267
+ def pull_partial_data
268
+ return if stream_state == {}
269
+
270
+ table_name = stream_state[:table_name]
271
+ record_count = tables[table_name.to_s]
272
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
273
+
274
+ progress = ProgressBar.new(table_name.to_s, record_count)
275
+ stream = Taps::DataStream.factory(db, stream_state)
276
+ pull_data_from_table(stream, progress)
277
+ end
278
+
279
+ def pull_data_from_table(stream, progress)
280
+ loop do
281
+ begin
282
+ if exiting?
283
+ store_session
284
+ exit 0
285
+ end
286
+
287
+ size = stream.fetch_remote(session_resource['pull/table'], http_headers)
288
+ break if stream.complete?
289
+ progress.inc(size) unless exiting?
290
+ stream.error = false
291
+ self.stream_state = stream.to_hash
292
+ rescue DataStream::CorruptedData => e
293
+ puts "Corrupted Data Received #{e.message}, retrying..."
294
+ stream.error = true
295
+ next
296
+ end
297
+ end
298
+
299
+ progress.finish
300
+ completed_tables << stream.table_name.to_s
301
+ self.stream_state = {}
302
+ end
303
+
304
+ def tables
305
+ h = {}
306
+ remote_tables_info.each do |table_name, count|
307
+ next if completed_tables.include?(table_name.to_s)
308
+ h[table_name.to_s] = count
309
+ end
310
+ h
311
+ end
312
+
313
+ def record_count
314
+ @record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
315
+ end
316
+
317
+ def remote_tables_info
318
+ opts[:remote_tables_info] ||= fetch_remote_tables_info
319
+ end
320
+
321
+ def fetch_remote_tables_info
322
+ retries = 0
323
+ max_retries = 10
324
+ begin
325
+ tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
326
+ rescue RestClient::Exception
327
+ retries += 1
328
+ retry if retries <= max_retries
329
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
330
+ exit(1)
331
+ end
332
+
333
+ data = {}
334
+ apply_table_filter(tables).each do |table_name|
335
+ retries = 0
336
+ begin
337
+ count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
338
+ data[table_name] = count
339
+ rescue RestClient::Exception
340
+ retries += 1
341
+ retry if retries <= max_retries
342
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
343
+ exit(1)
344
+ end
345
+ end
346
+ data
347
+ end
348
+
349
+ def pull_indexes
350
+ puts "Receiving indexes"
351
+
352
+ idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
353
+
354
+ apply_table_filter(idxs).each do |table, indexes|
355
+ next unless indexes.size > 0
356
+ progress = ProgressBar.new(table, indexes.size)
357
+ indexes.each do |idx|
358
+ output = Taps::Utils.load_indexes(database_url, idx)
359
+ puts output if output
360
+ progress.inc(1)
361
+ end
362
+ progress.finish
363
+ end
364
+ end
365
+
366
+ def pull_reset_sequences
367
+ puts "Resetting sequences"
368
+
369
+ output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
370
+ puts output if output
371
+ end
372
+ end
373
+
374
+ class Push < Operation
375
+ def file_prefix
376
+ "push"
377
+ end
378
+
379
+ def to_hash
380
+ super.merge(:local_tables_info => local_tables_info)
381
+ end
382
+
383
+ def run
384
+ verify_server
385
+ begin
386
+ unless resuming?
387
+ push_schema
388
+ push_indexes if indexes_first?
389
+ end
390
+ setup_signal_trap
391
+ push_partial_data if resuming?
392
+ push_data
393
+ push_indexes unless indexes_first?
394
+ push_reset_sequences
395
+ close_session
396
+ rescue RestClient::Exception => e
397
+ store_session
398
+ if e.respond_to?(:response)
399
+ puts "!!! Caught Server Exception"
400
+ puts "HTTP CODE: #{e.http_code}"
401
+ puts "#{e.response.to_s}"
402
+ exit(1)
403
+ else
404
+ raise
405
+ end
406
+ end
407
+ end
408
+
409
+ def push_indexes
410
+ idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
411
+
412
+ return unless idxs.size > 0
413
+
414
+ puts "Sending indexes"
415
+
416
+ apply_table_filter(idxs).each do |table, indexes|
417
+ next unless indexes.size > 0
418
+ progress = ProgressBar.new(table, indexes.size)
419
+ indexes.each do |idx|
420
+ session_resource['push/indexes'].post(idx, http_headers)
421
+ progress.inc(1)
422
+ end
423
+ progress.finish
424
+ end
425
+ end
426
+
427
+ def push_schema
428
+ puts "Sending schema"
429
+
430
+ progress = ProgressBar.new('Schema', tables.size)
431
+ tables.each do |table, count|
432
+ schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
433
+ log.debug "Table: #{table}\n#{schema_data}\n"
434
+ session_resource['push/schema'].post(schema_data, http_headers)
435
+ progress.inc(1)
436
+ end
437
+ progress.finish
438
+ end
439
+
440
+ def push_reset_sequences
441
+ puts "Resetting sequences"
442
+
443
+ session_resource['push/reset_sequences'].post('', http_headers)
444
+ end
445
+
446
+ def push_partial_data
447
+ return if stream_state == {}
448
+
449
+ table_name = stream_state[:table_name]
450
+ record_count = tables[table_name.to_s]
451
+ puts "Resuming #{table_name}, #{format_number(record_count)} records"
452
+ progress = ProgressBar.new(table_name.to_s, record_count)
453
+ stream = Taps::DataStream.factory(db, stream_state)
454
+ push_data_from_table(stream, progress)
455
+ end
456
+
457
+ def push_data
458
+ puts "Sending data"
459
+
460
+ puts "#{tables.size} tables, #{format_number(record_count)} records"
461
+
462
+ tables.each do |table_name, count|
463
+ stream = Taps::DataStream.factory(db,
464
+ :table_name => table_name,
465
+ :chunksize => default_chunksize)
466
+ progress = ProgressBar.new(table_name.to_s, count)
467
+ push_data_from_table(stream, progress)
468
+ end
469
+ end
470
+
471
+ def push_data_from_table(stream, progress)
472
+ loop do
473
+ if exiting?
474
+ store_session
475
+ exit 0
476
+ end
477
+
478
+ row_size = 0
479
+ chunksize = stream.state[:chunksize]
480
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
481
+ stream.state[:chunksize] = c
482
+ encoded_data, row_size, elapsed_time = stream.fetch
483
+ break if stream.complete?
484
+
485
+ data = {
486
+ :state => stream.to_hash,
487
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
488
+ }
489
+
490
+ begin
491
+ content, content_type = Taps::Multipart.create do |r|
492
+ r.attach :name => :encoded_data,
493
+ :payload => encoded_data,
494
+ :content_type => 'application/octet-stream'
495
+ r.attach :name => :json,
496
+ :payload => data.to_json,
497
+ :content_type => 'application/json'
498
+ end
499
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
500
+ self.stream_state = stream.to_hash
501
+ rescue RestClient::RequestFailed => e
502
+ # retry the same data, it got corrupted somehow.
503
+ if e.http_code == 412
504
+ next
505
+ end
506
+ raise
507
+ end
508
+ elapsed_time
509
+ end
510
+ stream.state[:chunksize] = chunksize
511
+
512
+ progress.inc(row_size)
513
+
514
+ stream.increment(row_size)
515
+ break if stream.complete?
516
+ end
517
+
518
+ progress.finish
519
+ completed_tables << stream.table_name.to_s
520
+ self.stream_state = {}
521
+ end
522
+
523
+ def local_tables_info
524
+ opts[:local_tables_info] ||= fetch_local_tables_info
525
+ end
526
+
527
+ def tables
528
+ h = {}
529
+ local_tables_info.each do |table_name, count|
530
+ next if completed_tables.include?(table_name.to_s)
531
+ h[table_name.to_s] = count
532
+ end
533
+ h
534
+ end
535
+
536
+ def record_count
537
+ @record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
538
+ end
539
+
540
+ def fetch_local_tables_info
541
+ tables_with_counts = {}
542
+ db.tables.each do |table|
543
+ tables_with_counts[table] = db[table.to_sym.identifier].count
544
+ end
545
+ apply_table_filter(tables_with_counts)
546
+ end
547
+
548
+ end
549
+
550
+ end