taps 0.3.15 → 0.3.17

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :build:
3
3
  :minor: 3
4
- :patch: 15
4
+ :patch: 17
5
5
  :major: 0
data/bin/schema CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- gem 'sequel', '~> 3.17.0'
4
+ gem 'sequel', '~> 3.20.0'
5
5
 
6
6
  $:.unshift File.dirname(__FILE__) + '/../lib'
7
7
 
@@ -0,0 +1,52 @@
1
+ require 'taps/errors'
2
+
3
+ class Taps::Chunksize
4
+ attr_accessor :idle_secs, :time_in_db, :start_time, :end_time, :retries
5
+ attr_reader :chunksize
6
+
7
+ def initialize(chunksize)
8
+ @chunksize = chunksize
9
+ @idle_secs = 0.0
10
+ @retries = 0
11
+ end
12
+
13
+ def to_i
14
+ chunksize
15
+ end
16
+
17
+ def reset_chunksize
18
+ @chunksize = (retries <= 1) ? 10 : 1
19
+ end
20
+
21
+ def diff
22
+ end_time - start_time - time_in_db - idle_secs
23
+ end
24
+
25
+ def time_in_db=(t)
26
+ @time_in_db = t
27
+ @time_in_db = @time_in_db.to_f rescue 0.0
28
+ end
29
+
30
+ def time_delta
31
+ t1 = Time.now
32
+ yield if block_given?
33
+ t2 = Time.now
34
+ t2 - t1
35
+ end
36
+
37
+ def calc_new_chunksize
38
+ new_chunksize = if retries > 0
39
+ chunksize
40
+ elsif diff > 3.0
41
+ (chunksize / 3).ceil
42
+ elsif diff > 1.1
43
+ chunksize - 100
44
+ elsif diff < 0.8
45
+ chunksize * 2
46
+ else
47
+ chunksize + 100
48
+ end
49
+ new_chunksize = 1 if new_chunksize < 1
50
+ new_chunksize
51
+ end
52
+ end
@@ -128,6 +128,7 @@ EOHELP
128
128
  o.define_head "Push a database to a taps server"
129
129
  end
130
130
 
131
+ o.on("-s", "--skip-schema", "Don't transfer the schema, just data") { |v| opts[:skip_schema] = true }
131
132
  o.on("-i", "--indexes-first", "Transfer indexes first before data") { |v| opts[:indexes_first] = true }
132
133
  o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
133
134
  o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
@@ -137,6 +138,7 @@ EOHELP
137
138
  r_tables = v.collect { |t| "^#{t}$" }.join("|")
138
139
  opts[:table_filter] = "(#{r_tables})"
139
140
  end
141
+ o.on("-e", "--exclude_tables=A,B,C", Array, "Shortcut to exclude a list of tables") { |v| opts[:exclude_tables] = v }
140
142
  o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
141
143
  o.parse!(argv)
142
144
 
@@ -166,7 +166,7 @@ class DataStream
166
166
  res = nil
167
167
  log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
168
168
  state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
169
- state[:chunksize] = c
169
+ state[:chunksize] = c.to_i
170
170
  res = resource.post({:state => self.to_json}, headers)
171
171
  end
172
172
 
@@ -202,6 +202,16 @@ class DataStream
202
202
  def import_rows(rows)
203
203
  table.import(rows[:header], rows[:data])
204
204
  state[:offset] += rows[:data].size
205
+ rescue Exception => ex
206
+ case ex.message
207
+ when /integer out of range/ then
208
+ raise Taps::InvalidData, <<-ERROR, []
209
+ \nDetected integer data that exceeds the maximum allowable size for an integer type.
210
+ This generally occurs when importing from SQLite due to the fact that SQLite does
211
+ not enforce maximum values on integer types.
212
+ ERROR
213
+ else raise ex
214
+ end
205
215
  end
206
216
 
207
217
  def verify_stream
@@ -29,6 +29,10 @@ class Operation
29
29
  "op"
30
30
  end
31
31
 
32
+ def skip_schema?
33
+ !!opts[:skip_schema]
34
+ end
35
+
32
36
  def indexes_first?
33
37
  !!opts[:indexes_first]
34
38
  end
@@ -37,19 +41,24 @@ class Operation
37
41
  opts[:table_filter]
38
42
  end
39
43
 
44
+ def exclude_tables
45
+ opts[:exclude_tables] || []
46
+ end
47
+
40
48
  def apply_table_filter(tables)
41
- return tables unless table_filter
42
- re = Regexp.new(table_filter)
49
+ return tables unless table_filter || exclude_tables
50
+
51
+ re = table_filter ? Regexp.new(table_filter) : nil
43
52
  if tables.kind_of?(Hash)
44
53
  ntables = {}
45
54
  tables.each do |t, d|
46
- unless re.match(t.to_s).nil?
55
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
47
56
  ntables[t] = d
48
57
  end
49
58
  end
50
59
  ntables
51
60
  else
52
- tables.reject { |t| re.match(t.to_s).nil? }
61
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
53
62
  end
54
63
  end
55
64
 
@@ -236,13 +245,13 @@ class Pull < Operation
236
245
  def run
237
246
  catch_errors do
238
247
  unless resuming?
239
- pull_schema
240
- pull_indexes if indexes_first?
248
+ pull_schema if !skip_schema?
249
+ pull_indexes if indexes_first? && !skip_schema?
241
250
  end
242
251
  setup_signal_trap
243
252
  pull_partial_data if resuming?
244
253
  pull_data
245
- pull_indexes unless indexes_first?
254
+ pull_indexes if !indexes_first? && !skip_schema?
246
255
  pull_reset_sequences
247
256
  end
248
257
  end
@@ -395,13 +404,13 @@ class Push < Operation
395
404
  def run
396
405
  catch_errors do
397
406
  unless resuming?
398
- push_schema
399
- push_indexes if indexes_first?
407
+ push_schema if !skip_schema?
408
+ push_indexes if indexes_first? && !skip_schema?
400
409
  end
401
410
  setup_signal_trap
402
411
  push_partial_data if resuming?
403
412
  push_data
404
- push_indexes unless indexes_first?
413
+ push_indexes if !indexes_first? && !skip_schema?
405
414
  push_reset_sequences
406
415
  end
407
416
  end
@@ -480,23 +489,32 @@ class Push < Operation
480
489
 
481
490
  begin
482
491
  chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
483
- stream.state[:chunksize] = c
484
- encoded_data, row_size, elapsed_time = stream.fetch
492
+ stream.state[:chunksize] = c.to_i
493
+ encoded_data, row_size, elapsed_time = nil
494
+ d1 = c.time_delta do
495
+ encoded_data, row_size, elapsed_time = stream.fetch
496
+ end
485
497
  break if stream.complete?
486
498
 
487
- data = {
488
- :state => stream.to_hash,
489
- :checksum => Taps::Utils.checksum(encoded_data).to_s
490
- }
499
+ data = nil
500
+ d2 = c.time_delta do
501
+ data = {
502
+ :state => stream.to_hash,
503
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
504
+ }
505
+ end
491
506
 
492
507
  begin
493
- content, content_type = Taps::Multipart.create do |r|
494
- r.attach :name => :encoded_data,
495
- :payload => encoded_data,
496
- :content_type => 'application/octet-stream'
497
- r.attach :name => :json,
498
- :payload => data.to_json,
499
- :content_type => 'application/json'
508
+ content, content_type = nil
509
+ d3 = c.time_delta do
510
+ content, content_type = Taps::Multipart.create do |r|
511
+ r.attach :name => :encoded_data,
512
+ :payload => encoded_data,
513
+ :content_type => 'application/octet-stream'
514
+ r.attach :name => :json,
515
+ :payload => data.to_json,
516
+ :content_type => 'application/json'
517
+ end
500
518
  end
501
519
  session_resource['push/table'].post(content, http_headers(:content_type => content_type))
502
520
  self.stream_state = stream.to_hash
@@ -504,6 +522,8 @@ class Push < Operation
504
522
  Taps::Utils.reraise_server_exception(e)
505
523
  end
506
524
 
525
+ c.idle_secs = (d1 + d2 + d3)
526
+
507
527
  elapsed_time
508
528
  end
509
529
  rescue Taps::CorruptedData => e
@@ -2,8 +2,10 @@ require 'zlib'
2
2
  require 'stringio'
3
3
  require 'time'
4
4
  require 'tempfile'
5
+ require 'rest_client'
5
6
 
6
7
  require 'taps/errors'
8
+ require 'taps/chunksize'
7
9
 
8
10
  module Taps
9
11
  module Utils
@@ -93,42 +95,23 @@ Data : #{data}
93
95
  end
94
96
 
95
97
  def calculate_chunksize(old_chunksize)
96
- chunksize = old_chunksize
98
+ c = Taps::Chunksize.new(old_chunksize)
97
99
 
98
- retries = 0
99
- time_in_db = 0
100
100
  begin
101
- t1 = Time.now
102
- time_in_db = yield chunksize
103
- time_in_db = time_in_db.to_f rescue 0
101
+ c.start_time = Time.now
102
+ c.time_in_db = yield c
104
103
  rescue Errno::EPIPE, RestClient::RequestFailed, RestClient::RequestTimeout
105
- retries += 1
106
- raise if retries > 2
104
+ c.retries += 1
105
+ raise if c.retries > 2
107
106
 
108
107
  # we got disconnected, the chunksize could be too large
109
- # on first retry change to 10, on successive retries go down to 1
110
- chunksize = (retries == 1) ? 10 : 1
111
-
108
+ # reset the chunksize based on the number of retries
109
+ c.reset_chunksize
112
110
  retry
113
111
  end
114
112
 
115
- t2 = Time.now
116
-
117
- diff = t2 - t1 - time_in_db
118
-
119
- new_chunksize = if retries > 0
120
- chunksize
121
- elsif diff > 3.0
122
- (chunksize / 3).ceil
123
- elsif diff > 1.1
124
- chunksize - 100
125
- elsif diff < 0.8
126
- chunksize * 2
127
- else
128
- chunksize + 100
129
- end
130
- new_chunksize = 1 if new_chunksize < 1
131
- new_chunksize
113
+ c.end_time = Time.now
114
+ c.calc_new_chunksize
132
115
  end
133
116
 
134
117
  def load_schema(database_url, schema_data)
@@ -0,0 +1,41 @@
1
+ require File.dirname(__FILE__) + '/base'
2
+ require 'taps/utils'
3
+
4
+ describe Taps::Chunksize do
5
+ it "scales chunksize down slowly when the time delta of the block is just over a second" do
6
+ Time.stubs(:now).returns(10.0).returns(11.5)
7
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 900
8
+ end
9
+
10
+ it "scales chunksize down fast when the time delta of the block is over 3 seconds" do
11
+ Time.stubs(:now).returns(10.0).returns(15.0)
12
+ Taps::Utils.calculate_chunksize(3000) { |c| }.should == 1000
13
+ end
14
+
15
+ it "scales up chunksize fast when the time delta of the block is under 0.8 seconds" do
16
+ Time.stubs(:now).returns(10.0).returns(10.7)
17
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 2000
18
+ end
19
+
20
+ it "scales up chunksize slow when the time delta of the block is between 0.8 and 1.1 seconds" do
21
+ Time.stubs(:now).returns(10.0).returns(10.8)
22
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 1100
23
+
24
+ Time.stubs(:now).returns(10.0).returns(11.1)
25
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 1100
26
+ end
27
+
28
+ it "will reset the chunksize to a small value if we got a broken pipe exception" do
29
+ Taps::Utils.calculate_chunksize(1000) do |c|
30
+ raise Errno::EPIPE if c.chunksize == 1000
31
+ c.chunksize.should == 10
32
+ end.should == 10
33
+ end
34
+
35
+ it "will reset the chunksize to a small value if we got a broken pipe exception a second time" do
36
+ Taps::Utils.calculate_chunksize(1000) do |c|
37
+ raise Errno::EPIPE if c.chunksize == 1000 || c.chunksize == 10
38
+ c.chunksize.should == 1
39
+ end.should == 1
40
+ end
41
+ end
@@ -7,4 +7,10 @@ describe Taps::Cli do
7
7
  opts = @cli.clientoptparse(:pull)
8
8
  opts[:table_filter].should == "(^mytable1$|^logs$)"
9
9
  end
10
+
11
+ it "translates a list of tables to exclude into a regex that can be used in table_filter" do
12
+ @cli = Taps::Cli.new(["-e", "mytable1,logs", "sqlite://tmp.db", "http://x:y@localhost:5000"])
13
+ opts = @cli.clientoptparse(:pull)
14
+ opts[:exclude_tables].should == ['mytable1','logs']
15
+ end
10
16
  end
@@ -16,6 +16,16 @@ describe Taps::Operation do
16
16
  @op.apply_table_filter({ 'abc' => 1, 'def' => 2 }).should == { 'abc' => 1 }
17
17
  end
18
18
 
19
+ it "returns an array of tables without the exclude_tables tables" do
20
+ @op = Taps::Operation.new('dummy://localhost', 'http://x:y@localhost:5000', :exclude_tables => ['abc', 'ghi', 'jkl'])
21
+ @op.apply_table_filter(['abc', 'def', 'ghi', 'jkl', 'mno']).should == ['def', 'mno']
22
+ end
23
+
24
+ it "returns a hash of tables without the exclude_tables tables" do
25
+ @op = Taps::Operation.new('dummy://localhost', 'http://x:y@localhost:5000', :exclude_tables => ['abc', 'ghi', 'jkl'])
26
+ @op.apply_table_filter({ 'abc' => 1, 'def' => 2, 'ghi' => 3, 'jkl' => 4, 'mno' => 5 }).should == { 'def' => 2, 'mno' => 5 }
27
+ end
28
+
19
29
  it "masks a url's password" do
20
30
  @op.safe_url("mysql://root:password@localhost/mydb").should == "mysql://root:[hidden]@localhost/mydb"
21
31
  end
@@ -18,37 +18,6 @@ describe Taps::Utils do
18
18
  lambda { Taps::Utils.format_data(data, :schema => schema) }.should.raise(Taps::InvalidData)
19
19
  end
20
20
 
21
- it "scales chunksize down slowly when the time delta of the block is just over a second" do
22
- Time.stubs(:now).returns(10.0).returns(11.5)
23
- Taps::Utils.calculate_chunksize(1000) { }.should == 900
24
- end
25
-
26
- it "scales chunksize down fast when the time delta of the block is over 3 seconds" do
27
- Time.stubs(:now).returns(10.0).returns(15.0)
28
- Taps::Utils.calculate_chunksize(3000) { }.should == 1000
29
- end
30
-
31
- it "scales up chunksize fast when the time delta of the block is under 0.8 seconds" do
32
- Time.stubs(:now).returns(10.0).returns(10.7)
33
- Taps::Utils.calculate_chunksize(1000) { }.should == 2000
34
- end
35
-
36
- it "scales up chunksize slow when the time delta of the block is between 0.8 and 1.1 seconds" do
37
- Time.stubs(:now).returns(10.0).returns(10.8)
38
- Taps::Utils.calculate_chunksize(1000) { }.should == 1100
39
-
40
- Time.stubs(:now).returns(10.0).returns(11.1)
41
- Taps::Utils.calculate_chunksize(1000) { }.should == 1100
42
- end
43
-
44
- it "will reset the chunksize to a small value if we got a broken pipe exception" do
45
- Taps::Utils.calculate_chunksize(1000) { |c| raise Errno::EPIPE if c == 1000; c.should == 10 }.should == 10
46
- end
47
-
48
- it "will reset the chunksize to a small value if we got a broken pipe exception a second time" do
49
- Taps::Utils.calculate_chunksize(1000) { |c| raise Errno::EPIPE if c == 1000 || c == 10; c.should == 1 }.should == 1
50
- end
51
-
52
21
  it "returns a list of columns that are text fields if the database is mysql" do
53
22
  @db = mock("db", :url => "mysql://localhost/mydb")
54
23
  @db.stubs(:schema).with(:mytable).returns([
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taps
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 49
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 15
10
- version: 0.3.15
9
+ - 17
10
+ version: 0.3.17
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ricardo Chimal, Jr.
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-11 00:00:00 -05:00
18
+ date: 2011-02-25 00:00:00 -08:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -26,12 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ~>
28
28
  - !ruby/object:Gem::Version
29
- hash: 11
29
+ hash: 1
30
30
  segments:
31
31
  - 1
32
- - 4
33
- - 6
34
- version: 1.4.6
32
+ - 5
33
+ - 1
34
+ version: 1.5.1
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency
@@ -82,12 +82,12 @@ dependencies:
82
82
  requirements:
83
83
  - - ~>
84
84
  - !ruby/object:Gem::Version
85
- hash: 67
85
+ hash: 87
86
86
  segments:
87
87
  - 3
88
- - 17
88
+ - 20
89
89
  - 0
90
- version: 3.17.0
90
+ version: 3.20.0
91
91
  type: :runtime
92
92
  version_requirements: *id004
93
93
  - !ruby/object:Gem::Dependency
@@ -204,6 +204,7 @@ files:
204
204
  - bin/schema
205
205
  - bin/schema.cmd
206
206
  - bin/taps
207
+ - lib/taps/chunksize.rb
207
208
  - lib/taps/cli.rb
208
209
  - lib/taps/config.rb
209
210
  - lib/taps/data_stream.rb
@@ -220,6 +221,7 @@ files:
220
221
  - lib/taps/version.rb
221
222
  - README.rdoc
222
223
  - spec/base.rb
224
+ - spec/chunksize_spec.rb
223
225
  - spec/cli_spec.rb
224
226
  - spec/data_stream_spec.rb
225
227
  - spec/operation_spec.rb