dkastner-taps 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +51 -0
- data/Rakefile +75 -0
- data/TODO +1 -0
- data/VERSION.yml +5 -0
- data/bin/schema +54 -0
- data/bin/schema.cmd +6 -0
- data/bin/taps +6 -0
- data/lib/taps/cli.rb +188 -0
- data/lib/taps/config.rb +47 -0
- data/lib/taps/data_stream.rb +303 -0
- data/lib/taps/db_session.rb +20 -0
- data/lib/taps/log.rb +15 -0
- data/lib/taps/monkey.rb +21 -0
- data/lib/taps/multipart.rb +73 -0
- data/lib/taps/operation.rb +550 -0
- data/lib/taps/progress_bar.rb +236 -0
- data/lib/taps/schema.rb +83 -0
- data/lib/taps/server.rb +166 -0
- data/lib/taps/utils.rb +154 -0
- data/spec/base.rb +26 -0
- data/spec/cli_spec.rb +10 -0
- data/spec/data_stream_spec.rb +23 -0
- data/spec/operation_spec.rb +32 -0
- data/spec/server_spec.rb +35 -0
- data/spec/utils_spec.rb +55 -0
- metadata +231 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
require 'taps/monkey'
|
|
2
|
+
require 'taps/multipart'
|
|
3
|
+
require 'taps/utils'
|
|
4
|
+
require 'taps/log'
|
|
5
|
+
require 'json/pure'
|
|
6
|
+
|
|
7
|
+
module Taps
|
|
8
|
+
|
|
9
|
+
class DataStream
|
|
10
|
+
class CorruptedData < Exception; end
|
|
11
|
+
|
|
12
|
+
attr_reader :db, :state
|
|
13
|
+
|
|
14
|
+
def initialize(db, state)
|
|
15
|
+
@db = db
|
|
16
|
+
@state = {
|
|
17
|
+
:offset => 0,
|
|
18
|
+
:avg_chunksize => 0,
|
|
19
|
+
:num_chunksize => 0,
|
|
20
|
+
:total_chunksize => 0,
|
|
21
|
+
}.merge(state)
|
|
22
|
+
@complete = false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def log
|
|
26
|
+
Taps.log
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def error=(val)
|
|
30
|
+
state[:error] = val
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def error
|
|
34
|
+
state[:error] || false
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def table_name
|
|
38
|
+
state[:table_name].to_sym
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def table_name_sql
|
|
42
|
+
table_name.identifier
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def to_hash
|
|
46
|
+
state.merge(:klass => self.class.to_s)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def to_json
|
|
50
|
+
to_hash.to_json
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def string_columns
|
|
54
|
+
@string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def table
|
|
58
|
+
@table ||= db[table_name_sql]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def order_by(name=nil)
|
|
62
|
+
@order_by ||= begin
|
|
63
|
+
name ||= table_name
|
|
64
|
+
Taps::Utils.order_by(db, name)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def increment(row_count)
|
|
69
|
+
state[:offset] += row_count
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# keep a record of the average chunksize within the first few hundred thousand records, after chunksize
|
|
73
|
+
# goes below 100 or maybe if offset is > 1000
|
|
74
|
+
def fetch_rows
|
|
75
|
+
state[:chunksize] = fetch_chunksize
|
|
76
|
+
ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
|
|
77
|
+
log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
|
|
78
|
+
rows = Taps::Utils.format_data(ds.all,
|
|
79
|
+
:string_columns => string_columns)
|
|
80
|
+
update_chunksize_stats
|
|
81
|
+
rows
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def max_chunksize_training
|
|
85
|
+
20
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def fetch_chunksize
|
|
89
|
+
chunksize = state[:chunksize]
|
|
90
|
+
return chunksize if state[:num_chunksize] < max_chunksize_training
|
|
91
|
+
return chunksize if state[:avg_chunksize] == 0
|
|
92
|
+
return chunksize if state[:error]
|
|
93
|
+
state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def update_chunksize_stats
|
|
97
|
+
return if state[:num_chunksize] >= max_chunksize_training
|
|
98
|
+
state[:total_chunksize] += state[:chunksize]
|
|
99
|
+
state[:num_chunksize] += 1
|
|
100
|
+
state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def encode_rows(rows)
|
|
104
|
+
Taps::Utils.base64encode(Marshal.dump(rows))
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def fetch
|
|
108
|
+
log.debug "DataStream#fetch state -> #{state.inspect}"
|
|
109
|
+
|
|
110
|
+
t1 = Time.now
|
|
111
|
+
rows = fetch_rows
|
|
112
|
+
encoded_data = encode_rows(rows)
|
|
113
|
+
t2 = Time.now
|
|
114
|
+
elapsed_time = t2 - t1
|
|
115
|
+
|
|
116
|
+
@complete = rows == { }
|
|
117
|
+
|
|
118
|
+
[encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def complete?
|
|
122
|
+
@complete
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def fetch_remote(resource, headers)
|
|
126
|
+
params = fetch_from_resource(resource, headers)
|
|
127
|
+
encoded_data = params[:encoded_data]
|
|
128
|
+
json = params[:json]
|
|
129
|
+
|
|
130
|
+
rows = parse_encoded_data(encoded_data, json[:checksum])
|
|
131
|
+
@complete = rows == { }
|
|
132
|
+
|
|
133
|
+
# update local state
|
|
134
|
+
state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
|
|
135
|
+
|
|
136
|
+
unless @complete
|
|
137
|
+
import_rows(rows)
|
|
138
|
+
rows[:data].size
|
|
139
|
+
else
|
|
140
|
+
0
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# this one is used inside the server process
|
|
145
|
+
def fetch_remote_in_server(params)
|
|
146
|
+
json = self.class.parse_json(params[:json])
|
|
147
|
+
encoded_data = params[:encoded_data]
|
|
148
|
+
|
|
149
|
+
rows = parse_encoded_data(encoded_data, json[:checksum])
|
|
150
|
+
@complete = rows == { }
|
|
151
|
+
|
|
152
|
+
unless @complete
|
|
153
|
+
import_rows(rows)
|
|
154
|
+
rows[:data].size
|
|
155
|
+
else
|
|
156
|
+
0
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def fetch_from_resource(resource, headers)
|
|
161
|
+
res = nil
|
|
162
|
+
log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
|
|
163
|
+
state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
|
|
164
|
+
state[:chunksize] = c
|
|
165
|
+
res = resource.post({:state => self.to_json}, headers)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
begin
|
|
169
|
+
params = Taps::Multipart.parse(res)
|
|
170
|
+
params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
|
|
171
|
+
return params
|
|
172
|
+
rescue JSON::Parser
|
|
173
|
+
raise DataStream::CorruptedData.new("Invalid JSON Received")
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def self.parse_json(json)
|
|
178
|
+
hash = JSON.parse(json).symbolize_keys
|
|
179
|
+
hash[:state].symbolize_keys! if hash.has_key?(:state)
|
|
180
|
+
hash
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def parse_encoded_data(encoded_data, checksum)
|
|
184
|
+
raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
|
|
185
|
+
|
|
186
|
+
begin
|
|
187
|
+
return Marshal.load(Taps::Utils.base64decode(encoded_data))
|
|
188
|
+
rescue Object => e
|
|
189
|
+
unless ENV['NO_DUMP_MARSHAL_ERRORS']
|
|
190
|
+
puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
|
|
191
|
+
File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
|
|
192
|
+
end
|
|
193
|
+
raise
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def import_rows(rows)
|
|
198
|
+
table.import(rows[:header], rows[:data])
|
|
199
|
+
state[:offset] += rows[:data].size
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def self.factory(db, state)
|
|
203
|
+
if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
|
|
204
|
+
Sequel::MySQL.convert_invalid_date_time = :nil
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
if state.has_key?(:klass)
|
|
208
|
+
return eval(state[:klass]).new(db, state)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
|
|
212
|
+
DataStreamKeyed.new(db, state)
|
|
213
|
+
else
|
|
214
|
+
DataStream.new(db, state)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class DataStreamKeyed < DataStream
|
|
221
|
+
attr_accessor :buffer
|
|
222
|
+
|
|
223
|
+
def initialize(db, state)
|
|
224
|
+
super(db, state)
|
|
225
|
+
@state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
|
|
226
|
+
@buffer = []
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def primary_key
|
|
230
|
+
state[:primary_key].to_sym
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def buffer_limit
|
|
234
|
+
if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
|
|
235
|
+
state[:last_fetched]
|
|
236
|
+
else
|
|
237
|
+
state[:filter]
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def calc_limit(chunksize)
|
|
242
|
+
# we want to not fetch more than is needed while we're
|
|
243
|
+
# inside sinatra but locally we can select more than
|
|
244
|
+
# is strictly needed
|
|
245
|
+
if defined?(Sinatra)
|
|
246
|
+
(chunksize * 1.1).ceil
|
|
247
|
+
else
|
|
248
|
+
(chunksize * 3).ceil
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def load_buffer(chunksize)
|
|
253
|
+
# make sure BasicObject is not polluted by subsequent requires
|
|
254
|
+
Sequel::BasicObject.remove_methods!
|
|
255
|
+
|
|
256
|
+
num = 0
|
|
257
|
+
loop do
|
|
258
|
+
limit = calc_limit(chunksize)
|
|
259
|
+
# we have to use local variables in order for the virtual row filter to work correctly
|
|
260
|
+
key = primary_key
|
|
261
|
+
buf_limit = buffer_limit
|
|
262
|
+
ds = table.order(*order_by).filter { key.sql_number > buf_limit }.limit(limit)
|
|
263
|
+
log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
|
|
264
|
+
data = ds.all
|
|
265
|
+
self.buffer += data
|
|
266
|
+
num += data.size
|
|
267
|
+
if data.size > 0
|
|
268
|
+
# keep a record of the last primary key value in the buffer
|
|
269
|
+
state[:filter] = self.buffer.last[ primary_key ]
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
break if num >= chunksize or data.size == 0
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def fetch_buffered(chunksize)
|
|
277
|
+
load_buffer(chunksize) if self.buffer.size < chunksize
|
|
278
|
+
rows = buffer.slice(0, chunksize)
|
|
279
|
+
state[:last_fetched] = if rows.size > 0
|
|
280
|
+
rows.last[ primary_key ]
|
|
281
|
+
else
|
|
282
|
+
nil
|
|
283
|
+
end
|
|
284
|
+
rows
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def import_rows(rows)
|
|
288
|
+
table.import(rows[:header], rows[:data])
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def fetch_rows
|
|
292
|
+
chunksize = state[:chunksize]
|
|
293
|
+
Taps::Utils.format_data(fetch_buffered(chunksize) || [],
|
|
294
|
+
:string_columns => string_columns)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def increment(row_count)
|
|
298
|
+
# pop the rows we just successfully sent off the buffer
|
|
299
|
+
@buffer.slice!(0, row_count)
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Sequel::Model.db = Sequel.connect(Taps::Config.taps_database_url)
|
|
2
|
+
|
|
3
|
+
class DbSession < Sequel::Model
|
|
4
|
+
plugin :schema
|
|
5
|
+
set_schema do
|
|
6
|
+
primary_key :id
|
|
7
|
+
text :key
|
|
8
|
+
text :database_url
|
|
9
|
+
timestamp :started_at
|
|
10
|
+
timestamp :last_access
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def conn
|
|
14
|
+
Sequel.connect(database_url) do |db|
|
|
15
|
+
yield db if block_given?
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
DbSession.create_table! unless DbSession.table_exists?
|
data/lib/taps/log.rb
ADDED
data/lib/taps/monkey.rb
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class Hash
|
|
2
|
+
def symbolize_keys
|
|
3
|
+
inject({}) do |options, (key, value)|
|
|
4
|
+
options[(key.to_sym rescue key) || key] = value
|
|
5
|
+
options
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def symbolize_keys!
|
|
10
|
+
self.replace(symbolize_keys)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def symbolize_recursively!
|
|
14
|
+
self.replace(symbolize_keys)
|
|
15
|
+
self.each do |k, v|
|
|
16
|
+
if v.kind_of?(Hash)
|
|
17
|
+
v.symbolize_keys!
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
require 'restclient'
|
|
2
|
+
require 'rack/utils'
|
|
3
|
+
require 'json/pure'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
|
|
6
|
+
module Taps
|
|
7
|
+
class Multipart
|
|
8
|
+
class Container
|
|
9
|
+
attr_accessor :attachments
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@attachments = []
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def attach(opts)
|
|
16
|
+
mp = Taps::Multipart.new(opts)
|
|
17
|
+
attachments << mp
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def generate
|
|
21
|
+
hash = {}
|
|
22
|
+
attachments.each do |mp|
|
|
23
|
+
hash[mp.name] = mp
|
|
24
|
+
end
|
|
25
|
+
m = RestClient::Payload::Multipart.new(hash)
|
|
26
|
+
[m.to_s, m.headers['Content-Type']]
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
attr_reader :opts
|
|
31
|
+
|
|
32
|
+
def initialize(opts={})
|
|
33
|
+
@opts = opts
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def name
|
|
37
|
+
opts[:name]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def to_s
|
|
41
|
+
opts[:payload]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def content_type
|
|
45
|
+
opts[:content_type] || 'text/plain'
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def original_filename
|
|
49
|
+
opts[:original_filename]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.create
|
|
53
|
+
c = Taps::Multipart::Container.new
|
|
54
|
+
yield c
|
|
55
|
+
c.generate
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# response is a rest-client response
|
|
59
|
+
def self.parse(response)
|
|
60
|
+
content = response.to_s
|
|
61
|
+
env = {
|
|
62
|
+
'CONTENT_TYPE' => response.headers[:content_type],
|
|
63
|
+
'CONTENT_LENGTH' => content.size,
|
|
64
|
+
'rack.input' => StringIO.new(content)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
params = Rack::Utils::Multipart.parse_multipart(env)
|
|
68
|
+
params.symbolize_keys!
|
|
69
|
+
params
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
require 'rest_client'
|
|
2
|
+
require 'sequel'
|
|
3
|
+
require 'zlib'
|
|
4
|
+
|
|
5
|
+
require 'taps/progress_bar'
|
|
6
|
+
require 'taps/config'
|
|
7
|
+
require 'taps/utils'
|
|
8
|
+
require 'taps/data_stream'
|
|
9
|
+
|
|
10
|
+
# disable warnings, rest client makes a lot of noise right now
|
|
11
|
+
$VERBOSE = nil
|
|
12
|
+
|
|
13
|
+
module Taps
|
|
14
|
+
|
|
15
|
+
class Operation
|
|
16
|
+
attr_reader :database_url, :remote_url, :opts
|
|
17
|
+
attr_reader :session_uri
|
|
18
|
+
|
|
19
|
+
def initialize(database_url, remote_url, opts={})
|
|
20
|
+
@database_url = database_url
|
|
21
|
+
@remote_url = remote_url
|
|
22
|
+
@opts = opts
|
|
23
|
+
@exiting = false
|
|
24
|
+
@session_uri = opts[:session_uri]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def file_prefix
|
|
28
|
+
"op"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def indexes_first?
|
|
32
|
+
!!opts[:indexes_first]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def table_filter
|
|
36
|
+
opts[:table_filter]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def apply_table_filter(tables)
|
|
40
|
+
return tables unless table_filter
|
|
41
|
+
re = Regexp.new(table_filter)
|
|
42
|
+
if tables.kind_of?(Hash)
|
|
43
|
+
ntables = {}
|
|
44
|
+
tables.each do |t, d|
|
|
45
|
+
unless re.match(t.to_s).nil?
|
|
46
|
+
ntables[t] = d
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
ntables
|
|
50
|
+
else
|
|
51
|
+
tables.reject { |t| re.match(t.to_s).nil? }
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def log
|
|
56
|
+
Taps.log
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def store_session
|
|
60
|
+
file = "#{file_prefix}_#{Time.now.strftime("%Y%m%d%H%M")}.dat"
|
|
61
|
+
puts "\nSaving session to #{file}.."
|
|
62
|
+
File.open(file, 'w') do |f|
|
|
63
|
+
f.write(to_hash.to_json)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def to_hash
|
|
68
|
+
{
|
|
69
|
+
:klass => self.class.to_s,
|
|
70
|
+
:database_url => database_url,
|
|
71
|
+
:remote_url => remote_url,
|
|
72
|
+
:session_uri => session_uri,
|
|
73
|
+
:stream_state => stream_state,
|
|
74
|
+
:completed_tables => completed_tables,
|
|
75
|
+
:table_filter => table_filter,
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def exiting?
|
|
80
|
+
!!@exiting
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def setup_signal_trap
|
|
84
|
+
trap("INT") {
|
|
85
|
+
puts "\nCompleting current action..."
|
|
86
|
+
@exiting = true
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
trap("TERM") {
|
|
90
|
+
puts "\nCompleting current action..."
|
|
91
|
+
@exiting = true
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def resuming?
|
|
96
|
+
opts[:resume] == true
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def default_chunksize
|
|
100
|
+
opts[:default_chunksize]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def completed_tables
|
|
104
|
+
opts[:completed_tables] ||= []
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def stream_state
|
|
108
|
+
opts[:stream_state] ||= {}
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def stream_state=(val)
|
|
112
|
+
opts[:stream_state] = val
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def compression_disabled?
|
|
116
|
+
!!opts[:disable_compression]
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def db
|
|
120
|
+
@db ||= Sequel.connect(database_url)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def server
|
|
124
|
+
@server ||= RestClient::Resource.new(remote_url)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def session_resource
|
|
128
|
+
@session_resource ||= begin
|
|
129
|
+
@session_uri ||= server['sessions'].post('', http_headers).to_s
|
|
130
|
+
server[@session_uri]
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def set_session(uri)
|
|
135
|
+
session_uri = uri
|
|
136
|
+
@session_resource = server[session_uri]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def close_session
|
|
140
|
+
@session_resource.delete(http_headers) if @session_resource
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def safe_url(url)
|
|
144
|
+
url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def safe_remote_url
|
|
148
|
+
safe_url(remote_url)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def safe_database_url
|
|
152
|
+
safe_url(database_url)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def http_headers(extra = {})
|
|
156
|
+
base = { :taps_version => Taps.version }
|
|
157
|
+
if compression_disabled?
|
|
158
|
+
base[:accept_encoding] = ""
|
|
159
|
+
else
|
|
160
|
+
base[:accept_encoding] = "gzip, deflate"
|
|
161
|
+
end
|
|
162
|
+
base.merge(extra)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def format_number(num)
|
|
166
|
+
num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def verify_server
|
|
170
|
+
begin
|
|
171
|
+
server['/'].get(http_headers)
|
|
172
|
+
rescue RestClient::RequestFailed => e
|
|
173
|
+
if e.http_code == 417
|
|
174
|
+
puts "#{safe_remote_url} is running a different minor version of taps."
|
|
175
|
+
puts "#{e.response.to_s}"
|
|
176
|
+
exit(1)
|
|
177
|
+
else
|
|
178
|
+
raise
|
|
179
|
+
end
|
|
180
|
+
rescue RestClient::Unauthorized
|
|
181
|
+
puts "Bad credentials given for #{safe_remote_url}"
|
|
182
|
+
exit(1)
|
|
183
|
+
rescue Errno::ECONNREFUSED
|
|
184
|
+
puts "Can't connect to #{safe_remote_url}. Please check that it's running"
|
|
185
|
+
exit(1)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def self.factory(type, database_url, remote_url, opts)
|
|
190
|
+
type = :resume if opts[:resume]
|
|
191
|
+
klass = case type
|
|
192
|
+
when :pull then Taps::Pull
|
|
193
|
+
when :push then Taps::Push
|
|
194
|
+
when :resume then eval(opts[:klass])
|
|
195
|
+
else raise "Unknown Operation Type -> #{type}"
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
klass.new(database_url, remote_url, opts)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
class Pull < Operation
|
|
203
|
+
def file_prefix
|
|
204
|
+
"pull"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def to_hash
|
|
208
|
+
super.merge(:remote_tables_info => remote_tables_info)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def run
|
|
212
|
+
verify_server
|
|
213
|
+
|
|
214
|
+
begin
|
|
215
|
+
unless resuming?
|
|
216
|
+
pull_schema
|
|
217
|
+
pull_indexes if indexes_first?
|
|
218
|
+
end
|
|
219
|
+
setup_signal_trap
|
|
220
|
+
pull_partial_data if resuming?
|
|
221
|
+
pull_data
|
|
222
|
+
pull_indexes unless indexes_first?
|
|
223
|
+
pull_reset_sequences
|
|
224
|
+
close_session
|
|
225
|
+
rescue RestClient::Exception => e
|
|
226
|
+
store_session
|
|
227
|
+
if e.respond_to?(:response)
|
|
228
|
+
puts "!!! Caught Server Exception"
|
|
229
|
+
puts "HTTP CODE: #{e.http_code}"
|
|
230
|
+
puts "#{e.response.to_s}"
|
|
231
|
+
exit(1)
|
|
232
|
+
else
|
|
233
|
+
raise
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def pull_schema
|
|
239
|
+
puts "Receiving schema"
|
|
240
|
+
|
|
241
|
+
progress = ProgressBar.new('Schema', tables.size)
|
|
242
|
+
tables.each do |table_name, count|
|
|
243
|
+
schema_data = session_resource['pull/schema'].post({:table_name => table_name}, http_headers).to_s
|
|
244
|
+
log.debug "Table: #{table_name}\n#{schema_data}\n"
|
|
245
|
+
output = Taps::Utils.load_schema(database_url, schema_data)
|
|
246
|
+
puts output if output
|
|
247
|
+
progress.inc(1)
|
|
248
|
+
end
|
|
249
|
+
progress.finish
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def pull_data
|
|
253
|
+
puts "Receiving data"
|
|
254
|
+
|
|
255
|
+
puts "#{tables.size} tables, #{format_number(record_count)} records"
|
|
256
|
+
|
|
257
|
+
tables.each do |table_name, count|
|
|
258
|
+
progress = ProgressBar.new(table_name.to_s, count)
|
|
259
|
+
stream = Taps::DataStream.factory(db, {
|
|
260
|
+
:chunksize => default_chunksize,
|
|
261
|
+
:table_name => table_name
|
|
262
|
+
})
|
|
263
|
+
pull_data_from_table(stream, progress)
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def pull_partial_data
|
|
268
|
+
return if stream_state == {}
|
|
269
|
+
|
|
270
|
+
table_name = stream_state[:table_name]
|
|
271
|
+
record_count = tables[table_name.to_s]
|
|
272
|
+
puts "Resuming #{table_name}, #{format_number(record_count)} records"
|
|
273
|
+
|
|
274
|
+
progress = ProgressBar.new(table_name.to_s, record_count)
|
|
275
|
+
stream = Taps::DataStream.factory(db, stream_state)
|
|
276
|
+
pull_data_from_table(stream, progress)
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def pull_data_from_table(stream, progress)
|
|
280
|
+
loop do
|
|
281
|
+
begin
|
|
282
|
+
if exiting?
|
|
283
|
+
store_session
|
|
284
|
+
exit 0
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
size = stream.fetch_remote(session_resource['pull/table'], http_headers)
|
|
288
|
+
break if stream.complete?
|
|
289
|
+
progress.inc(size) unless exiting?
|
|
290
|
+
stream.error = false
|
|
291
|
+
self.stream_state = stream.to_hash
|
|
292
|
+
rescue DataStream::CorruptedData => e
|
|
293
|
+
puts "Corrupted Data Received #{e.message}, retrying..."
|
|
294
|
+
stream.error = true
|
|
295
|
+
next
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
progress.finish
|
|
300
|
+
completed_tables << stream.table_name.to_s
|
|
301
|
+
self.stream_state = {}
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def tables
|
|
305
|
+
h = {}
|
|
306
|
+
remote_tables_info.each do |table_name, count|
|
|
307
|
+
next if completed_tables.include?(table_name.to_s)
|
|
308
|
+
h[table_name.to_s] = count
|
|
309
|
+
end
|
|
310
|
+
h
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def record_count
|
|
314
|
+
@record_count ||= remote_tables_info.values.inject(0) { |a,c| a += c }
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def remote_tables_info
|
|
318
|
+
opts[:remote_tables_info] ||= fetch_remote_tables_info
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def fetch_remote_tables_info
|
|
322
|
+
retries = 0
|
|
323
|
+
max_retries = 10
|
|
324
|
+
begin
|
|
325
|
+
tables = JSON.load(session_resource['pull/table_names'].get(http_headers).to_s)
|
|
326
|
+
rescue RestClient::Exception
|
|
327
|
+
retries += 1
|
|
328
|
+
retry if retries <= max_retries
|
|
329
|
+
puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
|
|
330
|
+
exit(1)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
data = {}
|
|
334
|
+
apply_table_filter(tables).each do |table_name|
|
|
335
|
+
retries = 0
|
|
336
|
+
begin
|
|
337
|
+
count = session_resource['pull/table_count'].post({:table => table_name}, http_headers).to_s.to_i
|
|
338
|
+
data[table_name] = count
|
|
339
|
+
rescue RestClient::Exception
|
|
340
|
+
retries += 1
|
|
341
|
+
retry if retries <= max_retries
|
|
342
|
+
puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
|
|
343
|
+
exit(1)
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
data
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def pull_indexes
|
|
350
|
+
puts "Receiving indexes"
|
|
351
|
+
|
|
352
|
+
idxs = JSON.parse(session_resource['pull/indexes'].get(http_headers).to_s)
|
|
353
|
+
|
|
354
|
+
apply_table_filter(idxs).each do |table, indexes|
|
|
355
|
+
next unless indexes.size > 0
|
|
356
|
+
progress = ProgressBar.new(table, indexes.size)
|
|
357
|
+
indexes.each do |idx|
|
|
358
|
+
output = Taps::Utils.load_indexes(database_url, idx)
|
|
359
|
+
puts output if output
|
|
360
|
+
progress.inc(1)
|
|
361
|
+
end
|
|
362
|
+
progress.finish
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def pull_reset_sequences
|
|
367
|
+
puts "Resetting sequences"
|
|
368
|
+
|
|
369
|
+
output = Taps::Utils.schema_bin(:reset_db_sequences, database_url)
|
|
370
|
+
puts output if output
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
class Push < Operation
|
|
375
|
+
def file_prefix
|
|
376
|
+
"push"
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def to_hash
|
|
380
|
+
super.merge(:local_tables_info => local_tables_info)
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def run
|
|
384
|
+
verify_server
|
|
385
|
+
begin
|
|
386
|
+
unless resuming?
|
|
387
|
+
push_schema
|
|
388
|
+
push_indexes if indexes_first?
|
|
389
|
+
end
|
|
390
|
+
setup_signal_trap
|
|
391
|
+
push_partial_data if resuming?
|
|
392
|
+
push_data
|
|
393
|
+
push_indexes unless indexes_first?
|
|
394
|
+
push_reset_sequences
|
|
395
|
+
close_session
|
|
396
|
+
rescue RestClient::Exception => e
|
|
397
|
+
store_session
|
|
398
|
+
if e.respond_to?(:response)
|
|
399
|
+
puts "!!! Caught Server Exception"
|
|
400
|
+
puts "HTTP CODE: #{e.http_code}"
|
|
401
|
+
puts "#{e.response.to_s}"
|
|
402
|
+
exit(1)
|
|
403
|
+
else
|
|
404
|
+
raise
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def push_indexes
|
|
410
|
+
idxs = JSON.parse(Taps::Utils.schema_bin(:indexes_individual, database_url))
|
|
411
|
+
|
|
412
|
+
return unless idxs.size > 0
|
|
413
|
+
|
|
414
|
+
puts "Sending indexes"
|
|
415
|
+
|
|
416
|
+
apply_table_filter(idxs).each do |table, indexes|
|
|
417
|
+
next unless indexes.size > 0
|
|
418
|
+
progress = ProgressBar.new(table, indexes.size)
|
|
419
|
+
indexes.each do |idx|
|
|
420
|
+
session_resource['push/indexes'].post(idx, http_headers)
|
|
421
|
+
progress.inc(1)
|
|
422
|
+
end
|
|
423
|
+
progress.finish
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def push_schema
|
|
428
|
+
puts "Sending schema"
|
|
429
|
+
|
|
430
|
+
progress = ProgressBar.new('Schema', tables.size)
|
|
431
|
+
tables.each do |table, count|
|
|
432
|
+
schema_data = Taps::Utils.schema_bin(:dump_table, database_url, table)
|
|
433
|
+
log.debug "Table: #{table}\n#{schema_data}\n"
|
|
434
|
+
session_resource['push/schema'].post(schema_data, http_headers)
|
|
435
|
+
progress.inc(1)
|
|
436
|
+
end
|
|
437
|
+
progress.finish
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def push_reset_sequences
|
|
441
|
+
puts "Resetting sequences"
|
|
442
|
+
|
|
443
|
+
session_resource['push/reset_sequences'].post('', http_headers)
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def push_partial_data
|
|
447
|
+
return if stream_state == {}
|
|
448
|
+
|
|
449
|
+
table_name = stream_state[:table_name]
|
|
450
|
+
record_count = tables[table_name.to_s]
|
|
451
|
+
puts "Resuming #{table_name}, #{format_number(record_count)} records"
|
|
452
|
+
progress = ProgressBar.new(table_name.to_s, record_count)
|
|
453
|
+
stream = Taps::DataStream.factory(db, stream_state)
|
|
454
|
+
push_data_from_table(stream, progress)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def push_data
|
|
458
|
+
puts "Sending data"
|
|
459
|
+
|
|
460
|
+
puts "#{tables.size} tables, #{format_number(record_count)} records"
|
|
461
|
+
|
|
462
|
+
tables.each do |table_name, count|
|
|
463
|
+
stream = Taps::DataStream.factory(db,
|
|
464
|
+
:table_name => table_name,
|
|
465
|
+
:chunksize => default_chunksize)
|
|
466
|
+
progress = ProgressBar.new(table_name.to_s, count)
|
|
467
|
+
push_data_from_table(stream, progress)
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def push_data_from_table(stream, progress)
|
|
472
|
+
loop do
|
|
473
|
+
if exiting?
|
|
474
|
+
store_session
|
|
475
|
+
exit 0
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
row_size = 0
|
|
479
|
+
chunksize = stream.state[:chunksize]
|
|
480
|
+
chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
|
|
481
|
+
stream.state[:chunksize] = c
|
|
482
|
+
encoded_data, row_size, elapsed_time = stream.fetch
|
|
483
|
+
break if stream.complete?
|
|
484
|
+
|
|
485
|
+
data = {
|
|
486
|
+
:state => stream.to_hash,
|
|
487
|
+
:checksum => Taps::Utils.checksum(encoded_data).to_s
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
begin
|
|
491
|
+
content, content_type = Taps::Multipart.create do |r|
|
|
492
|
+
r.attach :name => :encoded_data,
|
|
493
|
+
:payload => encoded_data,
|
|
494
|
+
:content_type => 'application/octet-stream'
|
|
495
|
+
r.attach :name => :json,
|
|
496
|
+
:payload => data.to_json,
|
|
497
|
+
:content_type => 'application/json'
|
|
498
|
+
end
|
|
499
|
+
session_resource['push/table'].post(content, http_headers(:content_type => content_type))
|
|
500
|
+
self.stream_state = stream.to_hash
|
|
501
|
+
rescue RestClient::RequestFailed => e
|
|
502
|
+
# retry the same data, it got corrupted somehow.
|
|
503
|
+
if e.http_code == 412
|
|
504
|
+
next
|
|
505
|
+
end
|
|
506
|
+
raise
|
|
507
|
+
end
|
|
508
|
+
elapsed_time
|
|
509
|
+
end
|
|
510
|
+
stream.state[:chunksize] = chunksize
|
|
511
|
+
|
|
512
|
+
progress.inc(row_size)
|
|
513
|
+
|
|
514
|
+
stream.increment(row_size)
|
|
515
|
+
break if stream.complete?
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
progress.finish
|
|
519
|
+
completed_tables << stream.table_name.to_s
|
|
520
|
+
self.stream_state = {}
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
def local_tables_info
|
|
524
|
+
opts[:local_tables_info] ||= fetch_local_tables_info
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def tables
|
|
528
|
+
h = {}
|
|
529
|
+
local_tables_info.each do |table_name, count|
|
|
530
|
+
next if completed_tables.include?(table_name.to_s)
|
|
531
|
+
h[table_name.to_s] = count
|
|
532
|
+
end
|
|
533
|
+
h
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def record_count
|
|
537
|
+
@record_count ||= local_tables_info.values.inject(0) { |a,c| a += c }
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def fetch_local_tables_info
|
|
541
|
+
tables_with_counts = {}
|
|
542
|
+
db.tables.each do |table|
|
|
543
|
+
tables_with_counts[table] = db[table.to_sym.identifier].count
|
|
544
|
+
end
|
|
545
|
+
apply_table_filter(tables_with_counts)
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
end
|