taps 0.3.13 → 0.3.14

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ begin
13
13
  s.add_dependency 'json_pure', '>= 1.2.0', '< 1.5.0'
14
14
  s.add_dependency 'sinatra', '~> 1.0.0'
15
15
  s.add_dependency 'rest-client', '>= 1.4.0', '< 1.7.0'
16
- s.add_dependency 'sequel', '~> 3.15.0'
16
+ s.add_dependency 'sequel', '~> 3.17.0'
17
17
  s.add_dependency 'sqlite3-ruby', '~> 1.2'
18
18
  s.add_dependency 'rack', '>= 1.0.1'
19
19
 
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :build:
3
3
  :minor: 3
4
- :patch: 13
4
+ :patch: 14
5
5
  :major: 0
data/bin/schema CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- gem 'sequel', '~> 3.15.0'
4
+ gem 'sequel', '~> 3.17.0'
5
5
 
6
6
  $:.unshift File.dirname(__FILE__) + '/../lib'
7
7
 
@@ -2,13 +2,12 @@ require 'taps/monkey'
2
2
  require 'taps/multipart'
3
3
  require 'taps/utils'
4
4
  require 'taps/log'
5
+ require 'taps/errors'
5
6
  require 'json/pure'
6
7
 
7
8
  module Taps
8
9
 
9
10
  class DataStream
10
- class CorruptedData < Exception; end
11
-
12
11
  DEFAULT_CHUNKSIZE = 1000
13
12
 
14
13
  attr_reader :db, :state
@@ -79,7 +78,10 @@ class DataStream
79
78
  ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
80
79
  log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
81
80
  rows = Taps::Utils.format_data(ds.all,
82
- :string_columns => string_columns)
81
+ :string_columns => string_columns,
82
+ :schema => db.schema(table_name),
83
+ :table => table_name
84
+ )
83
85
  update_chunksize_stats
84
86
  rows
85
87
  end
@@ -173,7 +175,7 @@ class DataStream
173
175
  params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
174
176
  return params
175
177
  rescue JSON::Parser
176
- raise DataStream::CorruptedData.new("Invalid JSON Received")
178
+ raise Taps::CorruptedData.new("Invalid JSON Received")
177
179
  end
178
180
  end
179
181
 
@@ -184,7 +186,7 @@ class DataStream
184
186
  end
185
187
 
186
188
  def parse_encoded_data(encoded_data, checksum)
187
- raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
189
+ raise Taps::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
188
190
 
189
191
  begin
190
192
  return Marshal.load(Taps::Utils.base64decode(encoded_data))
@@ -202,6 +204,17 @@ class DataStream
202
204
  state[:offset] += rows[:data].size
203
205
  end
204
206
 
207
+ def verify_stream
208
+ state[:offset] = table.count
209
+ end
210
+
211
+ def verify_remote_stream(resource, headers)
212
+ json_raw = resource.post({:state => self.to_json}, headers).to_s
213
+ json = self.class.parse_json(json_raw)
214
+
215
+ self.class.new(db, json[:state])
216
+ end
217
+
205
218
  def self.factory(db, state)
206
219
  if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
207
220
  Sequel::MySQL.convert_invalid_date_time = :nil
@@ -302,6 +315,19 @@ class DataStreamKeyed < DataStream
302
315
  # pop the rows we just successfully sent off the buffer
303
316
  @buffer.slice!(0, row_count)
304
317
  end
318
+
319
+ def verify_stream
320
+ key = primary_key
321
+ ds = table.order(*order_by)
322
+ current_filter = ds.max(key.sql_number)
323
+
324
+ # set the current filter to the max of the primary key
325
+ state[:filter] = current_filter
326
+ # clear out the last_fetched value so it can restart from scratch
327
+ state[:last_fetched] = nil
328
+
329
+ log.debug "DataStreamKeyed#verify_stream -> state: #{state.inspect}"
330
+ end
305
331
  end
306
332
 
307
333
  end
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ class BaseError < StandardError
3
+ attr_reader :original_backtrace
4
+
5
+ def initialize(message, opts={})
6
+ @original_backtrace = opts.delete(:backtrace)
7
+ super(message)
8
+ end
9
+ end
10
+
11
+ class NotImplemented < BaseError; end
12
+ class DuplicatePrimaryKeyError < BaseError; end
13
+ class CorruptedData < BaseError; end
14
+ class InvalidData < BaseError; end
15
+ end
@@ -6,6 +6,7 @@ require 'taps/progress_bar'
6
6
  require 'taps/config'
7
7
  require 'taps/utils'
8
8
  require 'taps/data_stream'
9
+ require 'taps/errors'
9
10
 
10
11
  # disable warnings, rest client makes a lot of noise right now
11
12
  $VERBOSE = nil
@@ -186,6 +187,30 @@ class Operation
186
187
  end
187
188
  end
188
189
 
190
+ def catch_errors(&blk)
191
+ verify_server
192
+
193
+ begin
194
+ blk.call
195
+ close_session
196
+ rescue RestClient::Exception, Taps::BaseError => e
197
+ store_session
198
+ if e.kind_of?(Taps::BaseError)
199
+ puts "!!! Caught Server Exception"
200
+ puts "#{e.class}: #{e.message}"
201
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
202
+ exit(1)
203
+ elsif e.respond_to?(:response)
204
+ puts "!!! Caught Server Exception"
205
+ puts "HTTP CODE: #{e.http_code}"
206
+ puts "#{e.response.to_s}"
207
+ exit(1)
208
+ else
209
+ raise
210
+ end
211
+ end
212
+ end
213
+
189
214
  def self.factory(type, database_url, remote_url, opts)
190
215
  type = :resume if opts[:resume]
191
216
  klass = case type
@@ -209,9 +234,7 @@ class Pull < Operation
209
234
  end
210
235
 
211
236
  def run
212
- verify_server
213
-
214
- begin
237
+ catch_errors do
215
238
  unless resuming?
216
239
  pull_schema
217
240
  pull_indexes if indexes_first?
@@ -221,17 +244,6 @@ class Pull < Operation
221
244
  pull_data
222
245
  pull_indexes unless indexes_first?
223
246
  pull_reset_sequences
224
- close_session
225
- rescue RestClient::Exception => e
226
- store_session
227
- if e.respond_to?(:response)
228
- puts "!!! Caught Server Exception"
229
- puts "HTTP CODE: #{e.http_code}"
230
- puts "#{e.response.to_s}"
231
- exit(1)
232
- else
233
- raise
234
- end
235
247
  end
236
248
  end
237
249
 
@@ -289,7 +301,7 @@ class Pull < Operation
289
301
  progress.inc(size) unless exiting?
290
302
  stream.error = false
291
303
  self.stream_state = stream.to_hash
292
- rescue DataStream::CorruptedData => e
304
+ rescue Taps::CorruptedData => e
293
305
  puts "Corrupted Data Received #{e.message}, retrying..."
294
306
  stream.error = true
295
307
  next
@@ -381,8 +393,7 @@ class Push < Operation
381
393
  end
382
394
 
383
395
  def run
384
- verify_server
385
- begin
396
+ catch_errors do
386
397
  unless resuming?
387
398
  push_schema
388
399
  push_indexes if indexes_first?
@@ -392,17 +403,6 @@ class Push < Operation
392
403
  push_data
393
404
  push_indexes unless indexes_first?
394
405
  push_reset_sequences
395
- close_session
396
- rescue RestClient::Exception => e
397
- store_session
398
- if e.respond_to?(:response)
399
- puts "!!! Caught Server Exception"
400
- puts "HTTP CODE: #{e.http_code}"
401
- puts "#{e.response.to_s}"
402
- exit(1)
403
- else
404
- raise
405
- end
406
406
  end
407
407
  end
408
408
 
@@ -477,35 +477,42 @@ class Push < Operation
477
477
 
478
478
  row_size = 0
479
479
  chunksize = stream.state[:chunksize]
480
- chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
481
- stream.state[:chunksize] = c
482
- encoded_data, row_size, elapsed_time = stream.fetch
483
- break if stream.complete?
484
480
 
485
- data = {
486
- :state => stream.to_hash,
487
- :checksum => Taps::Utils.checksum(encoded_data).to_s
488
- }
489
-
490
- begin
491
- content, content_type = Taps::Multipart.create do |r|
492
- r.attach :name => :encoded_data,
493
- :payload => encoded_data,
494
- :content_type => 'application/octet-stream'
495
- r.attach :name => :json,
496
- :payload => data.to_json,
497
- :content_type => 'application/json'
498
- end
499
- session_resource['push/table'].post(content, http_headers(:content_type => content_type))
500
- self.stream_state = stream.to_hash
501
- rescue RestClient::RequestFailed => e
502
- # retry the same data, it got corrupted somehow.
503
- if e.http_code == 412
504
- next
481
+ begin
482
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
483
+ stream.state[:chunksize] = c
484
+ encoded_data, row_size, elapsed_time = stream.fetch
485
+ break if stream.complete?
486
+
487
+ data = {
488
+ :state => stream.to_hash,
489
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
490
+ }
491
+
492
+ begin
493
+ content, content_type = Taps::Multipart.create do |r|
494
+ r.attach :name => :encoded_data,
495
+ :payload => encoded_data,
496
+ :content_type => 'application/octet-stream'
497
+ r.attach :name => :json,
498
+ :payload => data.to_json,
499
+ :content_type => 'application/json'
500
+ end
501
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
502
+ self.stream_state = stream.to_hash
503
+ rescue => e
504
+ Taps::Utils.reraise_server_exception(e)
505
505
  end
506
- raise
506
+
507
+ elapsed_time
507
508
  end
508
- elapsed_time
509
+ rescue Taps::CorruptedData => e
510
+ # retry the same data, it got corrupted somehow.
511
+ next
512
+ rescue Taps::DuplicatePrimaryKeyError => e
513
+ # verify the stream and retry it
514
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
515
+ next
509
516
  end
510
517
  stream.state[:chunksize] = chunksize
511
518
 
data/lib/taps/server.rb CHANGED
@@ -14,13 +14,18 @@ class Server < Sinatra::Base
14
14
 
15
15
  error do
16
16
  e = request.env['sinatra.error']
17
- "Taps Server Error: #{e}\n#{e.backtrace}"
17
+ if e.kind_of?(Taps::BaseError)
18
+ content_type "application/json"
19
+ halt 412, { 'error_class' => e.class.to_s, 'error_message' => e.message, 'error_backtrace' => e.backtrace.join("\n") }.to_json
20
+ else
21
+ "Taps Server Error: #{e}\n#{e.backtrace}"
22
+ end
18
23
  end
19
24
 
20
25
  before do
21
26
  major, minor, patch = request.env['HTTP_TAPS_VERSION'].split('.') rescue []
22
- unless "#{major}.#{minor}" == Taps.compatible_version
23
- halt 417, "Taps v#{Taps.compatible_version}.x is required for this server"
27
+ unless "#{major}.#{minor}" == Taps.compatible_version && patch.to_i >= 14
28
+ halt 417, "Taps >= v#{Taps.compatible_version}.14 is required for this server"
24
29
  end
25
30
  end
26
31
 
@@ -42,6 +47,25 @@ class Server < Sinatra::Base
42
47
  "/sessions/#{key}"
43
48
  end
44
49
 
50
+ post '/sessions/:key/push/verify_stream' do
51
+ session = DbSession.filter(:key => params[:key]).first
52
+ halt 404 unless session
53
+
54
+ state = DataStream.parse_json(params[:state])
55
+ stream = nil
56
+
57
+ size = 0
58
+ session.conn do |db|
59
+ Taps::Utils.server_error_handling do
60
+ stream = DataStream.factory(db, state)
61
+ stream.verify_stream
62
+ end
63
+ end
64
+
65
+ content_type 'application/json'
66
+ { :state => stream.to_hash }.to_json
67
+ end
68
+
45
69
  post '/sessions/:key/push/table' do
46
70
  session = DbSession.filter(:key => params[:key]).first
47
71
  halt 404 unless session
@@ -50,11 +74,9 @@ class Server < Sinatra::Base
50
74
 
51
75
  size = 0
52
76
  session.conn do |db|
53
- begin
77
+ Taps::Utils.server_error_handling do
54
78
  stream = DataStream.factory(db, json[:state])
55
79
  size = stream.fetch_remote_in_server(params)
56
- rescue Taps::DataStream::CorruptedData
57
- halt 412
58
80
  end
59
81
  end
60
82
 
data/lib/taps/utils.rb CHANGED
@@ -3,6 +3,8 @@ require 'stringio'
3
3
  require 'time'
4
4
  require 'tempfile'
5
5
 
6
+ require 'taps/errors'
7
+
6
8
  module Taps
7
9
  module Utils
8
10
  extend self
@@ -37,10 +39,32 @@ module Utils
37
39
  def format_data(data, opts={})
38
40
  return {} if data.size == 0
39
41
  string_columns = opts[:string_columns] || []
42
+ schema = opts[:schema] || []
43
+ table = opts[:table]
44
+
45
+ max_lengths = schema.inject({}) do |hash, (column, meta)|
46
+ if meta[:db_type] =~ /^\w+\((\d+)\)/
47
+ hash.update(column => $1.to_i)
48
+ end
49
+ hash
50
+ end
40
51
 
41
52
  header = data[0].keys
42
53
  only_data = data.collect do |row|
43
54
  row = blobs_to_string(row, string_columns)
55
+ row.each do |column, data|
56
+ if data.to_s.length > (max_lengths[column] || data.to_s.length)
57
+ raise Taps::InvalidData.new(<<-ERROR)
58
+ Detected data that exceeds the length limitation of its column. This is
59
+ generally due to the fact that SQLite does not enforce length restrictions.
60
+
61
+ Table : #{table}
62
+ Column : #{column}
63
+ Type : #{schema.detect{|s| s.first == column}.last[:db_type]}
64
+ Data : #{data}
65
+ ERROR
66
+ end
67
+ end
44
68
  header.collect { |h| row[h] }
45
69
  end
46
70
  { :header => header, :data => only_data }
@@ -127,12 +151,7 @@ module Utils
127
151
  end
128
152
 
129
153
  def primary_key(db, table)
130
- table = table.to_sym.identifier unless table.kind_of?(Sequel::SQL::Identifier)
131
- if db.respond_to?(:primary_key)
132
- db.primary_key(table)
133
- else
134
- db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c.first.to_sym }
135
- end
154
+ db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] }
136
155
  end
137
156
 
138
157
  def single_integer_primary_key(db, table)
@@ -150,5 +169,31 @@ module Utils
150
169
  db[table].columns
151
170
  end
152
171
  end
172
+
173
+
174
+ # try to detect server side errors to
175
+ # give the client a more useful error message
176
+ def server_error_handling(&blk)
177
+ begin
178
+ blk.call
179
+ rescue Sequel::DatabaseError => e
180
+ if e.message =~ /duplicate key value/i
181
+ raise Taps::DuplicatePrimaryKeyError, e.message
182
+ else
183
+ raise
184
+ end
185
+ end
186
+ end
187
+
188
+ def reraise_server_exception(e)
189
+ if e.kind_of?(RestClient::Exception)
190
+ if e.respond_to?(:response) && e.response.headers[:content_type] == 'application/json'
191
+ json = JSON.parse(e.response.to_s)
192
+ klass = eval(json['error_class']) rescue nil
193
+ raise klass.new(json['error_message'], :backtrace => json['error_backtrace']) if klass
194
+ end
195
+ end
196
+ raise e
197
+ end
153
198
  end
154
199
  end
data/spec/utils_spec.rb CHANGED
@@ -12,6 +12,12 @@ describe Taps::Utils do
12
12
  Taps::Utils.format_data([ first_row, { :x => 2, :y => 2 } ]).should == { :header => [ :x, :y ], :data => [ [1, 1], [2, 2] ] }
13
13
  end
14
14
 
15
+ it "enforces length limitations on columns" do
16
+ data = [ { :a => "aaabbbccc" } ]
17
+ schema = [ [ :a, { :db_type => "varchar(3)" }]]
18
+ lambda { Taps::Utils.format_data(data, :schema => schema) }.should.raise(Taps::InvalidData)
19
+ end
20
+
15
21
  it "scales chunksize down slowly when the time delta of the block is just over a second" do
16
22
  Time.stubs(:now).returns(10.0).returns(11.5)
17
23
  Taps::Utils.calculate_chunksize(1000) { }.should == 900
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 3
8
- - 13
9
- version: 0.3.13
8
+ - 14
9
+ version: 0.3.14
10
10
  platform: ruby
11
11
  authors:
12
12
  - Ricardo Chimal, Jr.
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-27 00:00:00 -07:00
17
+ date: 2010-11-12 00:00:00 -08:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -82,9 +82,9 @@ dependencies:
82
82
  - !ruby/object:Gem::Version
83
83
  segments:
84
84
  - 3
85
- - 15
85
+ - 17
86
86
  - 0
87
- version: 3.15.0
87
+ version: 3.17.0
88
88
  type: :runtime
89
89
  version_requirements: *id004
90
90
  - !ruby/object:Gem::Dependency
@@ -137,6 +137,7 @@ files:
137
137
  - lib/taps/config.rb
138
138
  - lib/taps/data_stream.rb
139
139
  - lib/taps/db_session.rb
140
+ - lib/taps/errors.rb
140
141
  - lib/taps/log.rb
141
142
  - lib/taps/monkey.rb
142
143
  - lib/taps/multipart.rb