taps 0.3.13 → 0.3.14
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION.yml +1 -1
- data/bin/schema +1 -1
- data/lib/taps/data_stream.rb +31 -5
- data/lib/taps/errors.rb +15 -0
- data/lib/taps/operation.rb +61 -54
- data/lib/taps/server.rb +28 -6
- data/lib/taps/utils.rb +51 -6
- data/spec/utils_spec.rb +6 -0
- metadata +6 -5
data/Rakefile
CHANGED
@@ -13,7 +13,7 @@ begin
|
|
13
13
|
s.add_dependency 'json_pure', '>= 1.2.0', '< 1.5.0'
|
14
14
|
s.add_dependency 'sinatra', '~> 1.0.0'
|
15
15
|
s.add_dependency 'rest-client', '>= 1.4.0', '< 1.7.0'
|
16
|
-
s.add_dependency 'sequel', '~> 3.
|
16
|
+
s.add_dependency 'sequel', '~> 3.17.0'
|
17
17
|
s.add_dependency 'sqlite3-ruby', '~> 1.2'
|
18
18
|
s.add_dependency 'rack', '>= 1.0.1'
|
19
19
|
|
data/VERSION.yml
CHANGED
data/bin/schema
CHANGED
data/lib/taps/data_stream.rb
CHANGED
@@ -2,13 +2,12 @@ require 'taps/monkey'
|
|
2
2
|
require 'taps/multipart'
|
3
3
|
require 'taps/utils'
|
4
4
|
require 'taps/log'
|
5
|
+
require 'taps/errors'
|
5
6
|
require 'json/pure'
|
6
7
|
|
7
8
|
module Taps
|
8
9
|
|
9
10
|
class DataStream
|
10
|
-
class CorruptedData < Exception; end
|
11
|
-
|
12
11
|
DEFAULT_CHUNKSIZE = 1000
|
13
12
|
|
14
13
|
attr_reader :db, :state
|
@@ -79,7 +78,10 @@ class DataStream
|
|
79
78
|
ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
|
80
79
|
log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
|
81
80
|
rows = Taps::Utils.format_data(ds.all,
|
82
|
-
:string_columns => string_columns
|
81
|
+
:string_columns => string_columns,
|
82
|
+
:schema => db.schema(table_name),
|
83
|
+
:table => table_name
|
84
|
+
)
|
83
85
|
update_chunksize_stats
|
84
86
|
rows
|
85
87
|
end
|
@@ -173,7 +175,7 @@ class DataStream
|
|
173
175
|
params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
|
174
176
|
return params
|
175
177
|
rescue JSON::Parser
|
176
|
-
raise
|
178
|
+
raise Taps::CorruptedData.new("Invalid JSON Received")
|
177
179
|
end
|
178
180
|
end
|
179
181
|
|
@@ -184,7 +186,7 @@ class DataStream
|
|
184
186
|
end
|
185
187
|
|
186
188
|
def parse_encoded_data(encoded_data, checksum)
|
187
|
-
raise
|
189
|
+
raise Taps::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
|
188
190
|
|
189
191
|
begin
|
190
192
|
return Marshal.load(Taps::Utils.base64decode(encoded_data))
|
@@ -202,6 +204,17 @@ class DataStream
|
|
202
204
|
state[:offset] += rows[:data].size
|
203
205
|
end
|
204
206
|
|
207
|
+
def verify_stream
|
208
|
+
state[:offset] = table.count
|
209
|
+
end
|
210
|
+
|
211
|
+
def verify_remote_stream(resource, headers)
|
212
|
+
json_raw = resource.post({:state => self.to_json}, headers).to_s
|
213
|
+
json = self.class.parse_json(json_raw)
|
214
|
+
|
215
|
+
self.class.new(db, json[:state])
|
216
|
+
end
|
217
|
+
|
205
218
|
def self.factory(db, state)
|
206
219
|
if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
|
207
220
|
Sequel::MySQL.convert_invalid_date_time = :nil
|
@@ -302,6 +315,19 @@ class DataStreamKeyed < DataStream
|
|
302
315
|
# pop the rows we just successfully sent off the buffer
|
303
316
|
@buffer.slice!(0, row_count)
|
304
317
|
end
|
318
|
+
|
319
|
+
def verify_stream
|
320
|
+
key = primary_key
|
321
|
+
ds = table.order(*order_by)
|
322
|
+
current_filter = ds.max(key.sql_number)
|
323
|
+
|
324
|
+
# set the current filter to the max of the primary key
|
325
|
+
state[:filter] = current_filter
|
326
|
+
# clear out the last_fetched value so it can restart from scratch
|
327
|
+
state[:last_fetched] = nil
|
328
|
+
|
329
|
+
log.debug "DataStreamKeyed#verify_stream -> state: #{state.inspect}"
|
330
|
+
end
|
305
331
|
end
|
306
332
|
|
307
333
|
end
|
data/lib/taps/errors.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Taps
|
2
|
+
class BaseError < StandardError
|
3
|
+
attr_reader :original_backtrace
|
4
|
+
|
5
|
+
def initialize(message, opts={})
|
6
|
+
@original_backtrace = opts.delete(:backtrace)
|
7
|
+
super(message)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class NotImplemented < BaseError; end
|
12
|
+
class DuplicatePrimaryKeyError < BaseError; end
|
13
|
+
class CorruptedData < BaseError; end
|
14
|
+
class InvalidData < BaseError; end
|
15
|
+
end
|
data/lib/taps/operation.rb
CHANGED
@@ -6,6 +6,7 @@ require 'taps/progress_bar'
|
|
6
6
|
require 'taps/config'
|
7
7
|
require 'taps/utils'
|
8
8
|
require 'taps/data_stream'
|
9
|
+
require 'taps/errors'
|
9
10
|
|
10
11
|
# disable warnings, rest client makes a lot of noise right now
|
11
12
|
$VERBOSE = nil
|
@@ -186,6 +187,30 @@ class Operation
|
|
186
187
|
end
|
187
188
|
end
|
188
189
|
|
190
|
+
def catch_errors(&blk)
|
191
|
+
verify_server
|
192
|
+
|
193
|
+
begin
|
194
|
+
blk.call
|
195
|
+
close_session
|
196
|
+
rescue RestClient::Exception, Taps::BaseError => e
|
197
|
+
store_session
|
198
|
+
if e.kind_of?(Taps::BaseError)
|
199
|
+
puts "!!! Caught Server Exception"
|
200
|
+
puts "#{e.class}: #{e.message}"
|
201
|
+
puts "\n#{e.original_backtrace}" if e.original_backtrace
|
202
|
+
exit(1)
|
203
|
+
elsif e.respond_to?(:response)
|
204
|
+
puts "!!! Caught Server Exception"
|
205
|
+
puts "HTTP CODE: #{e.http_code}"
|
206
|
+
puts "#{e.response.to_s}"
|
207
|
+
exit(1)
|
208
|
+
else
|
209
|
+
raise
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
189
214
|
def self.factory(type, database_url, remote_url, opts)
|
190
215
|
type = :resume if opts[:resume]
|
191
216
|
klass = case type
|
@@ -209,9 +234,7 @@ class Pull < Operation
|
|
209
234
|
end
|
210
235
|
|
211
236
|
def run
|
212
|
-
|
213
|
-
|
214
|
-
begin
|
237
|
+
catch_errors do
|
215
238
|
unless resuming?
|
216
239
|
pull_schema
|
217
240
|
pull_indexes if indexes_first?
|
@@ -221,17 +244,6 @@ class Pull < Operation
|
|
221
244
|
pull_data
|
222
245
|
pull_indexes unless indexes_first?
|
223
246
|
pull_reset_sequences
|
224
|
-
close_session
|
225
|
-
rescue RestClient::Exception => e
|
226
|
-
store_session
|
227
|
-
if e.respond_to?(:response)
|
228
|
-
puts "!!! Caught Server Exception"
|
229
|
-
puts "HTTP CODE: #{e.http_code}"
|
230
|
-
puts "#{e.response.to_s}"
|
231
|
-
exit(1)
|
232
|
-
else
|
233
|
-
raise
|
234
|
-
end
|
235
247
|
end
|
236
248
|
end
|
237
249
|
|
@@ -289,7 +301,7 @@ class Pull < Operation
|
|
289
301
|
progress.inc(size) unless exiting?
|
290
302
|
stream.error = false
|
291
303
|
self.stream_state = stream.to_hash
|
292
|
-
rescue
|
304
|
+
rescue Taps::CorruptedData => e
|
293
305
|
puts "Corrupted Data Received #{e.message}, retrying..."
|
294
306
|
stream.error = true
|
295
307
|
next
|
@@ -381,8 +393,7 @@ class Push < Operation
|
|
381
393
|
end
|
382
394
|
|
383
395
|
def run
|
384
|
-
|
385
|
-
begin
|
396
|
+
catch_errors do
|
386
397
|
unless resuming?
|
387
398
|
push_schema
|
388
399
|
push_indexes if indexes_first?
|
@@ -392,17 +403,6 @@ class Push < Operation
|
|
392
403
|
push_data
|
393
404
|
push_indexes unless indexes_first?
|
394
405
|
push_reset_sequences
|
395
|
-
close_session
|
396
|
-
rescue RestClient::Exception => e
|
397
|
-
store_session
|
398
|
-
if e.respond_to?(:response)
|
399
|
-
puts "!!! Caught Server Exception"
|
400
|
-
puts "HTTP CODE: #{e.http_code}"
|
401
|
-
puts "#{e.response.to_s}"
|
402
|
-
exit(1)
|
403
|
-
else
|
404
|
-
raise
|
405
|
-
end
|
406
406
|
end
|
407
407
|
end
|
408
408
|
|
@@ -477,35 +477,42 @@ class Push < Operation
|
|
477
477
|
|
478
478
|
row_size = 0
|
479
479
|
chunksize = stream.state[:chunksize]
|
480
|
-
chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
|
481
|
-
stream.state[:chunksize] = c
|
482
|
-
encoded_data, row_size, elapsed_time = stream.fetch
|
483
|
-
break if stream.complete?
|
484
480
|
|
485
|
-
|
486
|
-
|
487
|
-
:
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
481
|
+
begin
|
482
|
+
chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
|
483
|
+
stream.state[:chunksize] = c
|
484
|
+
encoded_data, row_size, elapsed_time = stream.fetch
|
485
|
+
break if stream.complete?
|
486
|
+
|
487
|
+
data = {
|
488
|
+
:state => stream.to_hash,
|
489
|
+
:checksum => Taps::Utils.checksum(encoded_data).to_s
|
490
|
+
}
|
491
|
+
|
492
|
+
begin
|
493
|
+
content, content_type = Taps::Multipart.create do |r|
|
494
|
+
r.attach :name => :encoded_data,
|
495
|
+
:payload => encoded_data,
|
496
|
+
:content_type => 'application/octet-stream'
|
497
|
+
r.attach :name => :json,
|
498
|
+
:payload => data.to_json,
|
499
|
+
:content_type => 'application/json'
|
500
|
+
end
|
501
|
+
session_resource['push/table'].post(content, http_headers(:content_type => content_type))
|
502
|
+
self.stream_state = stream.to_hash
|
503
|
+
rescue => e
|
504
|
+
Taps::Utils.reraise_server_exception(e)
|
505
505
|
end
|
506
|
-
|
506
|
+
|
507
|
+
elapsed_time
|
507
508
|
end
|
508
|
-
|
509
|
+
rescue Taps::CorruptedData => e
|
510
|
+
# retry the same data, it got corrupted somehow.
|
511
|
+
next
|
512
|
+
rescue Taps::DuplicatePrimaryKeyError => e
|
513
|
+
# verify the stream and retry it
|
514
|
+
stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
|
515
|
+
next
|
509
516
|
end
|
510
517
|
stream.state[:chunksize] = chunksize
|
511
518
|
|
data/lib/taps/server.rb
CHANGED
@@ -14,13 +14,18 @@ class Server < Sinatra::Base
|
|
14
14
|
|
15
15
|
error do
|
16
16
|
e = request.env['sinatra.error']
|
17
|
-
|
17
|
+
if e.kind_of?(Taps::BaseError)
|
18
|
+
content_type "application/json"
|
19
|
+
halt 412, { 'error_class' => e.class.to_s, 'error_message' => e.message, 'error_backtrace' => e.backtrace.join("\n") }.to_json
|
20
|
+
else
|
21
|
+
"Taps Server Error: #{e}\n#{e.backtrace}"
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
before do
|
21
26
|
major, minor, patch = request.env['HTTP_TAPS_VERSION'].split('.') rescue []
|
22
|
-
unless "#{major}.#{minor}" == Taps.compatible_version
|
23
|
-
halt 417, "Taps v#{Taps.compatible_version}.
|
27
|
+
unless "#{major}.#{minor}" == Taps.compatible_version && patch.to_i >= 14
|
28
|
+
halt 417, "Taps >= v#{Taps.compatible_version}.14 is required for this server"
|
24
29
|
end
|
25
30
|
end
|
26
31
|
|
@@ -42,6 +47,25 @@ class Server < Sinatra::Base
|
|
42
47
|
"/sessions/#{key}"
|
43
48
|
end
|
44
49
|
|
50
|
+
post '/sessions/:key/push/verify_stream' do
|
51
|
+
session = DbSession.filter(:key => params[:key]).first
|
52
|
+
halt 404 unless session
|
53
|
+
|
54
|
+
state = DataStream.parse_json(params[:state])
|
55
|
+
stream = nil
|
56
|
+
|
57
|
+
size = 0
|
58
|
+
session.conn do |db|
|
59
|
+
Taps::Utils.server_error_handling do
|
60
|
+
stream = DataStream.factory(db, state)
|
61
|
+
stream.verify_stream
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
content_type 'application/json'
|
66
|
+
{ :state => stream.to_hash }.to_json
|
67
|
+
end
|
68
|
+
|
45
69
|
post '/sessions/:key/push/table' do
|
46
70
|
session = DbSession.filter(:key => params[:key]).first
|
47
71
|
halt 404 unless session
|
@@ -50,11 +74,9 @@ class Server < Sinatra::Base
|
|
50
74
|
|
51
75
|
size = 0
|
52
76
|
session.conn do |db|
|
53
|
-
|
77
|
+
Taps::Utils.server_error_handling do
|
54
78
|
stream = DataStream.factory(db, json[:state])
|
55
79
|
size = stream.fetch_remote_in_server(params)
|
56
|
-
rescue Taps::DataStream::CorruptedData
|
57
|
-
halt 412
|
58
80
|
end
|
59
81
|
end
|
60
82
|
|
data/lib/taps/utils.rb
CHANGED
@@ -3,6 +3,8 @@ require 'stringio'
|
|
3
3
|
require 'time'
|
4
4
|
require 'tempfile'
|
5
5
|
|
6
|
+
require 'taps/errors'
|
7
|
+
|
6
8
|
module Taps
|
7
9
|
module Utils
|
8
10
|
extend self
|
@@ -37,10 +39,32 @@ module Utils
|
|
37
39
|
def format_data(data, opts={})
|
38
40
|
return {} if data.size == 0
|
39
41
|
string_columns = opts[:string_columns] || []
|
42
|
+
schema = opts[:schema] || []
|
43
|
+
table = opts[:table]
|
44
|
+
|
45
|
+
max_lengths = schema.inject({}) do |hash, (column, meta)|
|
46
|
+
if meta[:db_type] =~ /^\w+\((\d+)\)/
|
47
|
+
hash.update(column => $1.to_i)
|
48
|
+
end
|
49
|
+
hash
|
50
|
+
end
|
40
51
|
|
41
52
|
header = data[0].keys
|
42
53
|
only_data = data.collect do |row|
|
43
54
|
row = blobs_to_string(row, string_columns)
|
55
|
+
row.each do |column, data|
|
56
|
+
if data.to_s.length > (max_lengths[column] || data.to_s.length)
|
57
|
+
raise Taps::InvalidData.new(<<-ERROR)
|
58
|
+
Detected data that exceeds the length limitation of its column. This is
|
59
|
+
generally due to the fact that SQLite does not enforce length restrictions.
|
60
|
+
|
61
|
+
Table : #{table}
|
62
|
+
Column : #{column}
|
63
|
+
Type : #{schema.detect{|s| s.first == column}.last[:db_type]}
|
64
|
+
Data : #{data}
|
65
|
+
ERROR
|
66
|
+
end
|
67
|
+
end
|
44
68
|
header.collect { |h| row[h] }
|
45
69
|
end
|
46
70
|
{ :header => header, :data => only_data }
|
@@ -127,12 +151,7 @@ module Utils
|
|
127
151
|
end
|
128
152
|
|
129
153
|
def primary_key(db, table)
|
130
|
-
table
|
131
|
-
if db.respond_to?(:primary_key)
|
132
|
-
db.primary_key(table)
|
133
|
-
else
|
134
|
-
db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c.first.to_sym }
|
135
|
-
end
|
154
|
+
db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] }
|
136
155
|
end
|
137
156
|
|
138
157
|
def single_integer_primary_key(db, table)
|
@@ -150,5 +169,31 @@ module Utils
|
|
150
169
|
db[table].columns
|
151
170
|
end
|
152
171
|
end
|
172
|
+
|
173
|
+
|
174
|
+
# try to detect server side errors to
|
175
|
+
# give the client a more useful error message
|
176
|
+
def server_error_handling(&blk)
|
177
|
+
begin
|
178
|
+
blk.call
|
179
|
+
rescue Sequel::DatabaseError => e
|
180
|
+
if e.message =~ /duplicate key value/i
|
181
|
+
raise Taps::DuplicatePrimaryKeyError, e.message
|
182
|
+
else
|
183
|
+
raise
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def reraise_server_exception(e)
|
189
|
+
if e.kind_of?(RestClient::Exception)
|
190
|
+
if e.respond_to?(:response) && e.response.headers[:content_type] == 'application/json'
|
191
|
+
json = JSON.parse(e.response.to_s)
|
192
|
+
klass = eval(json['error_class']) rescue nil
|
193
|
+
raise klass.new(json['error_message'], :backtrace => json['error_backtrace']) if klass
|
194
|
+
end
|
195
|
+
end
|
196
|
+
raise e
|
197
|
+
end
|
153
198
|
end
|
154
199
|
end
|
data/spec/utils_spec.rb
CHANGED
@@ -12,6 +12,12 @@ describe Taps::Utils do
|
|
12
12
|
Taps::Utils.format_data([ first_row, { :x => 2, :y => 2 } ]).should == { :header => [ :x, :y ], :data => [ [1, 1], [2, 2] ] }
|
13
13
|
end
|
14
14
|
|
15
|
+
it "enforces length limitations on columns" do
|
16
|
+
data = [ { :a => "aaabbbccc" } ]
|
17
|
+
schema = [ [ :a, { :db_type => "varchar(3)" }]]
|
18
|
+
lambda { Taps::Utils.format_data(data, :schema => schema) }.should.raise(Taps::InvalidData)
|
19
|
+
end
|
20
|
+
|
15
21
|
it "scales chunksize down slowly when the time delta of the block is just over a second" do
|
16
22
|
Time.stubs(:now).returns(10.0).returns(11.5)
|
17
23
|
Taps::Utils.calculate_chunksize(1000) { }.should == 900
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 0.3.
|
8
|
+
- 14
|
9
|
+
version: 0.3.14
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Ricardo Chimal, Jr.
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-11-12 00:00:00 -08:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -82,9 +82,9 @@ dependencies:
|
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
segments:
|
84
84
|
- 3
|
85
|
-
-
|
85
|
+
- 17
|
86
86
|
- 0
|
87
|
-
version: 3.
|
87
|
+
version: 3.17.0
|
88
88
|
type: :runtime
|
89
89
|
version_requirements: *id004
|
90
90
|
- !ruby/object:Gem::Dependency
|
@@ -137,6 +137,7 @@ files:
|
|
137
137
|
- lib/taps/config.rb
|
138
138
|
- lib/taps/data_stream.rb
|
139
139
|
- lib/taps/db_session.rb
|
140
|
+
- lib/taps/errors.rb
|
140
141
|
- lib/taps/log.rb
|
141
142
|
- lib/taps/monkey.rb
|
142
143
|
- lib/taps/multipart.rb
|