taps 0.3.13 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ begin
13
13
  s.add_dependency 'json_pure', '>= 1.2.0', '< 1.5.0'
14
14
  s.add_dependency 'sinatra', '~> 1.0.0'
15
15
  s.add_dependency 'rest-client', '>= 1.4.0', '< 1.7.0'
16
- s.add_dependency 'sequel', '~> 3.15.0'
16
+ s.add_dependency 'sequel', '~> 3.17.0'
17
17
  s.add_dependency 'sqlite3-ruby', '~> 1.2'
18
18
  s.add_dependency 'rack', '>= 1.0.1'
19
19
 
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :build:
3
3
  :minor: 3
4
- :patch: 13
4
+ :patch: 14
5
5
  :major: 0
data/bin/schema CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- gem 'sequel', '~> 3.15.0'
4
+ gem 'sequel', '~> 3.17.0'
5
5
 
6
6
  $:.unshift File.dirname(__FILE__) + '/../lib'
7
7
 
@@ -2,13 +2,12 @@ require 'taps/monkey'
2
2
  require 'taps/multipart'
3
3
  require 'taps/utils'
4
4
  require 'taps/log'
5
+ require 'taps/errors'
5
6
  require 'json/pure'
6
7
 
7
8
  module Taps
8
9
 
9
10
  class DataStream
10
- class CorruptedData < Exception; end
11
-
12
11
  DEFAULT_CHUNKSIZE = 1000
13
12
 
14
13
  attr_reader :db, :state
@@ -79,7 +78,10 @@ class DataStream
79
78
  ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
80
79
  log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
81
80
  rows = Taps::Utils.format_data(ds.all,
82
- :string_columns => string_columns)
81
+ :string_columns => string_columns,
82
+ :schema => db.schema(table_name),
83
+ :table => table_name
84
+ )
83
85
  update_chunksize_stats
84
86
  rows
85
87
  end
@@ -173,7 +175,7 @@ class DataStream
173
175
  params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
174
176
  return params
175
177
  rescue JSON::Parser
176
- raise DataStream::CorruptedData.new("Invalid JSON Received")
178
+ raise Taps::CorruptedData.new("Invalid JSON Received")
177
179
  end
178
180
  end
179
181
 
@@ -184,7 +186,7 @@ class DataStream
184
186
  end
185
187
 
186
188
  def parse_encoded_data(encoded_data, checksum)
187
- raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
189
+ raise Taps::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
188
190
 
189
191
  begin
190
192
  return Marshal.load(Taps::Utils.base64decode(encoded_data))
@@ -202,6 +204,17 @@ class DataStream
202
204
  state[:offset] += rows[:data].size
203
205
  end
204
206
 
207
+ def verify_stream
208
+ state[:offset] = table.count
209
+ end
210
+
211
+ def verify_remote_stream(resource, headers)
212
+ json_raw = resource.post({:state => self.to_json}, headers).to_s
213
+ json = self.class.parse_json(json_raw)
214
+
215
+ self.class.new(db, json[:state])
216
+ end
217
+
205
218
  def self.factory(db, state)
206
219
  if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
207
220
  Sequel::MySQL.convert_invalid_date_time = :nil
@@ -302,6 +315,19 @@ class DataStreamKeyed < DataStream
302
315
  # pop the rows we just successfully sent off the buffer
303
316
  @buffer.slice!(0, row_count)
304
317
  end
318
+
319
+ def verify_stream
320
+ key = primary_key
321
+ ds = table.order(*order_by)
322
+ current_filter = ds.max(key.sql_number)
323
+
324
+ # set the current filter to the max of the primary key
325
+ state[:filter] = current_filter
326
+ # clear out the last_fetched value so it can restart from scratch
327
+ state[:last_fetched] = nil
328
+
329
+ log.debug "DataStreamKeyed#verify_stream -> state: #{state.inspect}"
330
+ end
305
331
  end
306
332
 
307
333
  end
@@ -0,0 +1,15 @@
1
+ module Taps
2
+ class BaseError < StandardError
3
+ attr_reader :original_backtrace
4
+
5
+ def initialize(message, opts={})
6
+ @original_backtrace = opts.delete(:backtrace)
7
+ super(message)
8
+ end
9
+ end
10
+
11
+ class NotImplemented < BaseError; end
12
+ class DuplicatePrimaryKeyError < BaseError; end
13
+ class CorruptedData < BaseError; end
14
+ class InvalidData < BaseError; end
15
+ end
@@ -6,6 +6,7 @@ require 'taps/progress_bar'
6
6
  require 'taps/config'
7
7
  require 'taps/utils'
8
8
  require 'taps/data_stream'
9
+ require 'taps/errors'
9
10
 
10
11
  # disable warnings, rest client makes a lot of noise right now
11
12
  $VERBOSE = nil
@@ -186,6 +187,30 @@ class Operation
186
187
  end
187
188
  end
188
189
 
190
+ def catch_errors(&blk)
191
+ verify_server
192
+
193
+ begin
194
+ blk.call
195
+ close_session
196
+ rescue RestClient::Exception, Taps::BaseError => e
197
+ store_session
198
+ if e.kind_of?(Taps::BaseError)
199
+ puts "!!! Caught Server Exception"
200
+ puts "#{e.class}: #{e.message}"
201
+ puts "\n#{e.original_backtrace}" if e.original_backtrace
202
+ exit(1)
203
+ elsif e.respond_to?(:response)
204
+ puts "!!! Caught Server Exception"
205
+ puts "HTTP CODE: #{e.http_code}"
206
+ puts "#{e.response.to_s}"
207
+ exit(1)
208
+ else
209
+ raise
210
+ end
211
+ end
212
+ end
213
+
189
214
  def self.factory(type, database_url, remote_url, opts)
190
215
  type = :resume if opts[:resume]
191
216
  klass = case type
@@ -209,9 +234,7 @@ class Pull < Operation
209
234
  end
210
235
 
211
236
  def run
212
- verify_server
213
-
214
- begin
237
+ catch_errors do
215
238
  unless resuming?
216
239
  pull_schema
217
240
  pull_indexes if indexes_first?
@@ -221,17 +244,6 @@ class Pull < Operation
221
244
  pull_data
222
245
  pull_indexes unless indexes_first?
223
246
  pull_reset_sequences
224
- close_session
225
- rescue RestClient::Exception => e
226
- store_session
227
- if e.respond_to?(:response)
228
- puts "!!! Caught Server Exception"
229
- puts "HTTP CODE: #{e.http_code}"
230
- puts "#{e.response.to_s}"
231
- exit(1)
232
- else
233
- raise
234
- end
235
247
  end
236
248
  end
237
249
 
@@ -289,7 +301,7 @@ class Pull < Operation
289
301
  progress.inc(size) unless exiting?
290
302
  stream.error = false
291
303
  self.stream_state = stream.to_hash
292
- rescue DataStream::CorruptedData => e
304
+ rescue Taps::CorruptedData => e
293
305
  puts "Corrupted Data Received #{e.message}, retrying..."
294
306
  stream.error = true
295
307
  next
@@ -381,8 +393,7 @@ class Push < Operation
381
393
  end
382
394
 
383
395
  def run
384
- verify_server
385
- begin
396
+ catch_errors do
386
397
  unless resuming?
387
398
  push_schema
388
399
  push_indexes if indexes_first?
@@ -392,17 +403,6 @@ class Push < Operation
392
403
  push_data
393
404
  push_indexes unless indexes_first?
394
405
  push_reset_sequences
395
- close_session
396
- rescue RestClient::Exception => e
397
- store_session
398
- if e.respond_to?(:response)
399
- puts "!!! Caught Server Exception"
400
- puts "HTTP CODE: #{e.http_code}"
401
- puts "#{e.response.to_s}"
402
- exit(1)
403
- else
404
- raise
405
- end
406
406
  end
407
407
  end
408
408
 
@@ -477,35 +477,42 @@ class Push < Operation
477
477
 
478
478
  row_size = 0
479
479
  chunksize = stream.state[:chunksize]
480
- chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
481
- stream.state[:chunksize] = c
482
- encoded_data, row_size, elapsed_time = stream.fetch
483
- break if stream.complete?
484
480
 
485
- data = {
486
- :state => stream.to_hash,
487
- :checksum => Taps::Utils.checksum(encoded_data).to_s
488
- }
489
-
490
- begin
491
- content, content_type = Taps::Multipart.create do |r|
492
- r.attach :name => :encoded_data,
493
- :payload => encoded_data,
494
- :content_type => 'application/octet-stream'
495
- r.attach :name => :json,
496
- :payload => data.to_json,
497
- :content_type => 'application/json'
498
- end
499
- session_resource['push/table'].post(content, http_headers(:content_type => content_type))
500
- self.stream_state = stream.to_hash
501
- rescue RestClient::RequestFailed => e
502
- # retry the same data, it got corrupted somehow.
503
- if e.http_code == 412
504
- next
481
+ begin
482
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
483
+ stream.state[:chunksize] = c
484
+ encoded_data, row_size, elapsed_time = stream.fetch
485
+ break if stream.complete?
486
+
487
+ data = {
488
+ :state => stream.to_hash,
489
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
490
+ }
491
+
492
+ begin
493
+ content, content_type = Taps::Multipart.create do |r|
494
+ r.attach :name => :encoded_data,
495
+ :payload => encoded_data,
496
+ :content_type => 'application/octet-stream'
497
+ r.attach :name => :json,
498
+ :payload => data.to_json,
499
+ :content_type => 'application/json'
500
+ end
501
+ session_resource['push/table'].post(content, http_headers(:content_type => content_type))
502
+ self.stream_state = stream.to_hash
503
+ rescue => e
504
+ Taps::Utils.reraise_server_exception(e)
505
505
  end
506
- raise
506
+
507
+ elapsed_time
507
508
  end
508
- elapsed_time
509
+ rescue Taps::CorruptedData => e
510
+ # retry the same data, it got corrupted somehow.
511
+ next
512
+ rescue Taps::DuplicatePrimaryKeyError => e
513
+ # verify the stream and retry it
514
+ stream = stream.verify_remote_stream(session_resource['push/verify_stream'], http_headers)
515
+ next
509
516
  end
510
517
  stream.state[:chunksize] = chunksize
511
518
 
data/lib/taps/server.rb CHANGED
@@ -14,13 +14,18 @@ class Server < Sinatra::Base
14
14
 
15
15
  error do
16
16
  e = request.env['sinatra.error']
17
- "Taps Server Error: #{e}\n#{e.backtrace}"
17
+ if e.kind_of?(Taps::BaseError)
18
+ content_type "application/json"
19
+ halt 412, { 'error_class' => e.class.to_s, 'error_message' => e.message, 'error_backtrace' => e.backtrace.join("\n") }.to_json
20
+ else
21
+ "Taps Server Error: #{e}\n#{e.backtrace}"
22
+ end
18
23
  end
19
24
 
20
25
  before do
21
26
  major, minor, patch = request.env['HTTP_TAPS_VERSION'].split('.') rescue []
22
- unless "#{major}.#{minor}" == Taps.compatible_version
23
- halt 417, "Taps v#{Taps.compatible_version}.x is required for this server"
27
+ unless "#{major}.#{minor}" == Taps.compatible_version && patch.to_i >= 14
28
+ halt 417, "Taps >= v#{Taps.compatible_version}.14 is required for this server"
24
29
  end
25
30
  end
26
31
 
@@ -42,6 +47,25 @@ class Server < Sinatra::Base
42
47
  "/sessions/#{key}"
43
48
  end
44
49
 
50
+ post '/sessions/:key/push/verify_stream' do
51
+ session = DbSession.filter(:key => params[:key]).first
52
+ halt 404 unless session
53
+
54
+ state = DataStream.parse_json(params[:state])
55
+ stream = nil
56
+
57
+ size = 0
58
+ session.conn do |db|
59
+ Taps::Utils.server_error_handling do
60
+ stream = DataStream.factory(db, state)
61
+ stream.verify_stream
62
+ end
63
+ end
64
+
65
+ content_type 'application/json'
66
+ { :state => stream.to_hash }.to_json
67
+ end
68
+
45
69
  post '/sessions/:key/push/table' do
46
70
  session = DbSession.filter(:key => params[:key]).first
47
71
  halt 404 unless session
@@ -50,11 +74,9 @@ class Server < Sinatra::Base
50
74
 
51
75
  size = 0
52
76
  session.conn do |db|
53
- begin
77
+ Taps::Utils.server_error_handling do
54
78
  stream = DataStream.factory(db, json[:state])
55
79
  size = stream.fetch_remote_in_server(params)
56
- rescue Taps::DataStream::CorruptedData
57
- halt 412
58
80
  end
59
81
  end
60
82
 
data/lib/taps/utils.rb CHANGED
@@ -3,6 +3,8 @@ require 'stringio'
3
3
  require 'time'
4
4
  require 'tempfile'
5
5
 
6
+ require 'taps/errors'
7
+
6
8
  module Taps
7
9
  module Utils
8
10
  extend self
@@ -37,10 +39,32 @@ module Utils
37
39
  def format_data(data, opts={})
38
40
  return {} if data.size == 0
39
41
  string_columns = opts[:string_columns] || []
42
+ schema = opts[:schema] || []
43
+ table = opts[:table]
44
+
45
+ max_lengths = schema.inject({}) do |hash, (column, meta)|
46
+ if meta[:db_type] =~ /^\w+\((\d+)\)/
47
+ hash.update(column => $1.to_i)
48
+ end
49
+ hash
50
+ end
40
51
 
41
52
  header = data[0].keys
42
53
  only_data = data.collect do |row|
43
54
  row = blobs_to_string(row, string_columns)
55
+ row.each do |column, data|
56
+ if data.to_s.length > (max_lengths[column] || data.to_s.length)
57
+ raise Taps::InvalidData.new(<<-ERROR)
58
+ Detected data that exceeds the length limitation of its column. This is
59
+ generally due to the fact that SQLite does not enforce length restrictions.
60
+
61
+ Table : #{table}
62
+ Column : #{column}
63
+ Type : #{schema.detect{|s| s.first == column}.last[:db_type]}
64
+ Data : #{data}
65
+ ERROR
66
+ end
67
+ end
44
68
  header.collect { |h| row[h] }
45
69
  end
46
70
  { :header => header, :data => only_data }
@@ -127,12 +151,7 @@ module Utils
127
151
  end
128
152
 
129
153
  def primary_key(db, table)
130
- table = table.to_sym.identifier unless table.kind_of?(Sequel::SQL::Identifier)
131
- if db.respond_to?(:primary_key)
132
- db.primary_key(table)
133
- else
134
- db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c.first.to_sym }
135
- end
154
+ db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] }
136
155
  end
137
156
 
138
157
  def single_integer_primary_key(db, table)
@@ -150,5 +169,31 @@ module Utils
150
169
  db[table].columns
151
170
  end
152
171
  end
172
+
173
+
174
+ # try to detect server side errors to
175
+ # give the client a more useful error message
176
+ def server_error_handling(&blk)
177
+ begin
178
+ blk.call
179
+ rescue Sequel::DatabaseError => e
180
+ if e.message =~ /duplicate key value/i
181
+ raise Taps::DuplicatePrimaryKeyError, e.message
182
+ else
183
+ raise
184
+ end
185
+ end
186
+ end
187
+
188
+ def reraise_server_exception(e)
189
+ if e.kind_of?(RestClient::Exception)
190
+ if e.respond_to?(:response) && e.response.headers[:content_type] == 'application/json'
191
+ json = JSON.parse(e.response.to_s)
192
+ klass = eval(json['error_class']) rescue nil
193
+ raise klass.new(json['error_message'], :backtrace => json['error_backtrace']) if klass
194
+ end
195
+ end
196
+ raise e
197
+ end
153
198
  end
154
199
  end
data/spec/utils_spec.rb CHANGED
@@ -12,6 +12,12 @@ describe Taps::Utils do
12
12
  Taps::Utils.format_data([ first_row, { :x => 2, :y => 2 } ]).should == { :header => [ :x, :y ], :data => [ [1, 1], [2, 2] ] }
13
13
  end
14
14
 
15
+ it "enforces length limitations on columns" do
16
+ data = [ { :a => "aaabbbccc" } ]
17
+ schema = [ [ :a, { :db_type => "varchar(3)" }]]
18
+ lambda { Taps::Utils.format_data(data, :schema => schema) }.should.raise(Taps::InvalidData)
19
+ end
20
+
15
21
  it "scales chunksize down slowly when the time delta of the block is just over a second" do
16
22
  Time.stubs(:now).returns(10.0).returns(11.5)
17
23
  Taps::Utils.calculate_chunksize(1000) { }.should == 900
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 3
8
- - 13
9
- version: 0.3.13
8
+ - 14
9
+ version: 0.3.14
10
10
  platform: ruby
11
11
  authors:
12
12
  - Ricardo Chimal, Jr.
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-27 00:00:00 -07:00
17
+ date: 2010-11-12 00:00:00 -08:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -82,9 +82,9 @@ dependencies:
82
82
  - !ruby/object:Gem::Version
83
83
  segments:
84
84
  - 3
85
- - 15
85
+ - 17
86
86
  - 0
87
- version: 3.15.0
87
+ version: 3.17.0
88
88
  type: :runtime
89
89
  version_requirements: *id004
90
90
  - !ruby/object:Gem::Dependency
@@ -137,6 +137,7 @@ files:
137
137
  - lib/taps/config.rb
138
138
  - lib/taps/data_stream.rb
139
139
  - lib/taps/db_session.rb
140
+ - lib/taps/errors.rb
140
141
  - lib/taps/log.rb
141
142
  - lib/taps/monkey.rb
142
143
  - lib/taps/multipart.rb