taps 0.3.15 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :build:
3
3
  :minor: 3
4
- :patch: 15
4
+ :patch: 17
5
5
  :major: 0
data/bin/schema CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- gem 'sequel', '~> 3.17.0'
4
+ gem 'sequel', '~> 3.20.0'
5
5
 
6
6
  $:.unshift File.dirname(__FILE__) + '/../lib'
7
7
 
@@ -0,0 +1,52 @@
1
+ require 'taps/errors'
2
+
3
+ class Taps::Chunksize
4
+ attr_accessor :idle_secs, :time_in_db, :start_time, :end_time, :retries
5
+ attr_reader :chunksize
6
+
7
+ def initialize(chunksize)
8
+ @chunksize = chunksize
9
+ @idle_secs = 0.0
10
+ @retries = 0
11
+ end
12
+
13
+ def to_i
14
+ chunksize
15
+ end
16
+
17
+ def reset_chunksize
18
+ @chunksize = (retries <= 1) ? 10 : 1
19
+ end
20
+
21
+ def diff
22
+ end_time - start_time - time_in_db - idle_secs
23
+ end
24
+
25
+ def time_in_db=(t)
26
+ @time_in_db = t
27
+ @time_in_db = @time_in_db.to_f rescue 0.0
28
+ end
29
+
30
+ def time_delta
31
+ t1 = Time.now
32
+ yield if block_given?
33
+ t2 = Time.now
34
+ t2 - t1
35
+ end
36
+
37
+ def calc_new_chunksize
38
+ new_chunksize = if retries > 0
39
+ chunksize
40
+ elsif diff > 3.0
41
+ (chunksize / 3).ceil
42
+ elsif diff > 1.1
43
+ chunksize - 100
44
+ elsif diff < 0.8
45
+ chunksize * 2
46
+ else
47
+ chunksize + 100
48
+ end
49
+ new_chunksize = 1 if new_chunksize < 1
50
+ new_chunksize
51
+ end
52
+ end
@@ -128,6 +128,7 @@ EOHELP
128
128
  o.define_head "Push a database to a taps server"
129
129
  end
130
130
 
131
+ o.on("-s", "--skip-schema", "Don't transfer the schema, just data") { |v| opts[:skip_schema] = true }
131
132
  o.on("-i", "--indexes-first", "Transfer indexes first before data") { |v| opts[:indexes_first] = true }
132
133
  o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
133
134
  o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
@@ -137,6 +138,7 @@ EOHELP
137
138
  r_tables = v.collect { |t| "^#{t}$" }.join("|")
138
139
  opts[:table_filter] = "(#{r_tables})"
139
140
  end
141
+ o.on("-e", "--exclude_tables=A,B,C", Array, "Shortcut to exclude a list of tables") { |v| opts[:exclude_tables] = v }
140
142
  o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
141
143
  o.parse!(argv)
142
144
 
@@ -166,7 +166,7 @@ class DataStream
166
166
  res = nil
167
167
  log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
168
168
  state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
169
- state[:chunksize] = c
169
+ state[:chunksize] = c.to_i
170
170
  res = resource.post({:state => self.to_json}, headers)
171
171
  end
172
172
 
@@ -202,6 +202,16 @@ class DataStream
202
202
  def import_rows(rows)
203
203
  table.import(rows[:header], rows[:data])
204
204
  state[:offset] += rows[:data].size
205
+ rescue Exception => ex
206
+ case ex.message
207
+ when /integer out of range/ then
208
+ raise Taps::InvalidData, <<-ERROR, []
209
+ \nDetected integer data that exceeds the maximum allowable size for an integer type.
210
+ This generally occurs when importing from SQLite due to the fact that SQLite does
211
+ not enforce maximum values on integer types.
212
+ ERROR
213
+ else raise ex
214
+ end
205
215
  end
206
216
 
207
217
  def verify_stream
@@ -29,6 +29,10 @@ class Operation
29
29
  "op"
30
30
  end
31
31
 
32
+ def skip_schema?
33
+ !!opts[:skip_schema]
34
+ end
35
+
32
36
  def indexes_first?
33
37
  !!opts[:indexes_first]
34
38
  end
@@ -37,19 +41,24 @@ class Operation
37
41
  opts[:table_filter]
38
42
  end
39
43
 
44
+ def exclude_tables
45
+ opts[:exclude_tables] || []
46
+ end
47
+
40
48
  def apply_table_filter(tables)
41
- return tables unless table_filter
42
- re = Regexp.new(table_filter)
49
+ return tables unless table_filter || exclude_tables
50
+
51
+ re = table_filter ? Regexp.new(table_filter) : nil
43
52
  if tables.kind_of?(Hash)
44
53
  ntables = {}
45
54
  tables.each do |t, d|
46
- unless re.match(t.to_s).nil?
55
+ if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
47
56
  ntables[t] = d
48
57
  end
49
58
  end
50
59
  ntables
51
60
  else
52
- tables.reject { |t| re.match(t.to_s).nil? }
61
+ tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
53
62
  end
54
63
  end
55
64
 
@@ -236,13 +245,13 @@ class Pull < Operation
236
245
  def run
237
246
  catch_errors do
238
247
  unless resuming?
239
- pull_schema
240
- pull_indexes if indexes_first?
248
+ pull_schema if !skip_schema?
249
+ pull_indexes if indexes_first? && !skip_schema?
241
250
  end
242
251
  setup_signal_trap
243
252
  pull_partial_data if resuming?
244
253
  pull_data
245
- pull_indexes unless indexes_first?
254
+ pull_indexes if !indexes_first? && !skip_schema?
246
255
  pull_reset_sequences
247
256
  end
248
257
  end
@@ -395,13 +404,13 @@ class Push < Operation
395
404
  def run
396
405
  catch_errors do
397
406
  unless resuming?
398
- push_schema
399
- push_indexes if indexes_first?
407
+ push_schema if !skip_schema?
408
+ push_indexes if indexes_first? && !skip_schema?
400
409
  end
401
410
  setup_signal_trap
402
411
  push_partial_data if resuming?
403
412
  push_data
404
- push_indexes unless indexes_first?
413
+ push_indexes if !indexes_first? && !skip_schema?
405
414
  push_reset_sequences
406
415
  end
407
416
  end
@@ -480,23 +489,32 @@ class Push < Operation
480
489
 
481
490
  begin
482
491
  chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
483
- stream.state[:chunksize] = c
484
- encoded_data, row_size, elapsed_time = stream.fetch
492
+ stream.state[:chunksize] = c.to_i
493
+ encoded_data, row_size, elapsed_time = nil
494
+ d1 = c.time_delta do
495
+ encoded_data, row_size, elapsed_time = stream.fetch
496
+ end
485
497
  break if stream.complete?
486
498
 
487
- data = {
488
- :state => stream.to_hash,
489
- :checksum => Taps::Utils.checksum(encoded_data).to_s
490
- }
499
+ data = nil
500
+ d2 = c.time_delta do
501
+ data = {
502
+ :state => stream.to_hash,
503
+ :checksum => Taps::Utils.checksum(encoded_data).to_s
504
+ }
505
+ end
491
506
 
492
507
  begin
493
- content, content_type = Taps::Multipart.create do |r|
494
- r.attach :name => :encoded_data,
495
- :payload => encoded_data,
496
- :content_type => 'application/octet-stream'
497
- r.attach :name => :json,
498
- :payload => data.to_json,
499
- :content_type => 'application/json'
508
+ content, content_type = nil
509
+ d3 = c.time_delta do
510
+ content, content_type = Taps::Multipart.create do |r|
511
+ r.attach :name => :encoded_data,
512
+ :payload => encoded_data,
513
+ :content_type => 'application/octet-stream'
514
+ r.attach :name => :json,
515
+ :payload => data.to_json,
516
+ :content_type => 'application/json'
517
+ end
500
518
  end
501
519
  session_resource['push/table'].post(content, http_headers(:content_type => content_type))
502
520
  self.stream_state = stream.to_hash
@@ -504,6 +522,8 @@ class Push < Operation
504
522
  Taps::Utils.reraise_server_exception(e)
505
523
  end
506
524
 
525
+ c.idle_secs = (d1 + d2 + d3)
526
+
507
527
  elapsed_time
508
528
  end
509
529
  rescue Taps::CorruptedData => e
@@ -2,8 +2,10 @@ require 'zlib'
2
2
  require 'stringio'
3
3
  require 'time'
4
4
  require 'tempfile'
5
+ require 'rest_client'
5
6
 
6
7
  require 'taps/errors'
8
+ require 'taps/chunksize'
7
9
 
8
10
  module Taps
9
11
  module Utils
@@ -93,42 +95,23 @@ Data : #{data}
93
95
  end
94
96
 
95
97
  def calculate_chunksize(old_chunksize)
96
- chunksize = old_chunksize
98
+ c = Taps::Chunksize.new(old_chunksize)
97
99
 
98
- retries = 0
99
- time_in_db = 0
100
100
  begin
101
- t1 = Time.now
102
- time_in_db = yield chunksize
103
- time_in_db = time_in_db.to_f rescue 0
101
+ c.start_time = Time.now
102
+ c.time_in_db = yield c
104
103
  rescue Errno::EPIPE, RestClient::RequestFailed, RestClient::RequestTimeout
105
- retries += 1
106
- raise if retries > 2
104
+ c.retries += 1
105
+ raise if c.retries > 2
107
106
 
108
107
  # we got disconnected, the chunksize could be too large
109
- # on first retry change to 10, on successive retries go down to 1
110
- chunksize = (retries == 1) ? 10 : 1
111
-
108
+ # reset the chunksize based on the number of retries
109
+ c.reset_chunksize
112
110
  retry
113
111
  end
114
112
 
115
- t2 = Time.now
116
-
117
- diff = t2 - t1 - time_in_db
118
-
119
- new_chunksize = if retries > 0
120
- chunksize
121
- elsif diff > 3.0
122
- (chunksize / 3).ceil
123
- elsif diff > 1.1
124
- chunksize - 100
125
- elsif diff < 0.8
126
- chunksize * 2
127
- else
128
- chunksize + 100
129
- end
130
- new_chunksize = 1 if new_chunksize < 1
131
- new_chunksize
113
+ c.end_time = Time.now
114
+ c.calc_new_chunksize
132
115
  end
133
116
 
134
117
  def load_schema(database_url, schema_data)
@@ -0,0 +1,41 @@
1
+ require File.dirname(__FILE__) + '/base'
2
+ require 'taps/utils'
3
+
4
+ describe Taps::Chunksize do
5
+ it "scales chunksize down slowly when the time delta of the block is just over a second" do
6
+ Time.stubs(:now).returns(10.0).returns(11.5)
7
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 900
8
+ end
9
+
10
+ it "scales chunksize down fast when the time delta of the block is over 3 seconds" do
11
+ Time.stubs(:now).returns(10.0).returns(15.0)
12
+ Taps::Utils.calculate_chunksize(3000) { |c| }.should == 1000
13
+ end
14
+
15
+ it "scales up chunksize fast when the time delta of the block is under 0.8 seconds" do
16
+ Time.stubs(:now).returns(10.0).returns(10.7)
17
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 2000
18
+ end
19
+
20
+ it "scales up chunksize slow when the time delta of the block is between 0.8 and 1.1 seconds" do
21
+ Time.stubs(:now).returns(10.0).returns(10.8)
22
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 1100
23
+
24
+ Time.stubs(:now).returns(10.0).returns(11.1)
25
+ Taps::Utils.calculate_chunksize(1000) { |c| }.should == 1100
26
+ end
27
+
28
+ it "will reset the chunksize to a small value if we got a broken pipe exception" do
29
+ Taps::Utils.calculate_chunksize(1000) do |c|
30
+ raise Errno::EPIPE if c.chunksize == 1000
31
+ c.chunksize.should == 10
32
+ end.should == 10
33
+ end
34
+
35
+ it "will reset the chunksize to a small value if we got a broken pipe exception a second time" do
36
+ Taps::Utils.calculate_chunksize(1000) do |c|
37
+ raise Errno::EPIPE if c.chunksize == 1000 || c.chunksize == 10
38
+ c.chunksize.should == 1
39
+ end.should == 1
40
+ end
41
+ end
@@ -7,4 +7,10 @@ describe Taps::Cli do
7
7
  opts = @cli.clientoptparse(:pull)
8
8
  opts[:table_filter].should == "(^mytable1$|^logs$)"
9
9
  end
10
+
11
+ it "translates a list of tables to exclude into a regex that can be used in table_filter" do
12
+ @cli = Taps::Cli.new(["-e", "mytable1,logs", "sqlite://tmp.db", "http://x:y@localhost:5000"])
13
+ opts = @cli.clientoptparse(:pull)
14
+ opts[:exclude_tables].should == ['mytable1','logs']
15
+ end
10
16
  end
@@ -16,6 +16,16 @@ describe Taps::Operation do
16
16
  @op.apply_table_filter({ 'abc' => 1, 'def' => 2 }).should == { 'abc' => 1 }
17
17
  end
18
18
 
19
+ it "returns an array of tables without the exclude_tables tables" do
20
+ @op = Taps::Operation.new('dummy://localhost', 'http://x:y@localhost:5000', :exclude_tables => ['abc', 'ghi', 'jkl'])
21
+ @op.apply_table_filter(['abc', 'def', 'ghi', 'jkl', 'mno']).should == ['def', 'mno']
22
+ end
23
+
24
+ it "returns a hash of tables without the exclude_tables tables" do
25
+ @op = Taps::Operation.new('dummy://localhost', 'http://x:y@localhost:5000', :exclude_tables => ['abc', 'ghi', 'jkl'])
26
+ @op.apply_table_filter({ 'abc' => 1, 'def' => 2, 'ghi' => 3, 'jkl' => 4, 'mno' => 5 }).should == { 'def' => 2, 'mno' => 5 }
27
+ end
28
+
19
29
  it "masks a url's password" do
20
30
  @op.safe_url("mysql://root:password@localhost/mydb").should == "mysql://root:[hidden]@localhost/mydb"
21
31
  end
@@ -18,37 +18,6 @@ describe Taps::Utils do
18
18
  lambda { Taps::Utils.format_data(data, :schema => schema) }.should.raise(Taps::InvalidData)
19
19
  end
20
20
 
21
- it "scales chunksize down slowly when the time delta of the block is just over a second" do
22
- Time.stubs(:now).returns(10.0).returns(11.5)
23
- Taps::Utils.calculate_chunksize(1000) { }.should == 900
24
- end
25
-
26
- it "scales chunksize down fast when the time delta of the block is over 3 seconds" do
27
- Time.stubs(:now).returns(10.0).returns(15.0)
28
- Taps::Utils.calculate_chunksize(3000) { }.should == 1000
29
- end
30
-
31
- it "scales up chunksize fast when the time delta of the block is under 0.8 seconds" do
32
- Time.stubs(:now).returns(10.0).returns(10.7)
33
- Taps::Utils.calculate_chunksize(1000) { }.should == 2000
34
- end
35
-
36
- it "scales up chunksize slow when the time delta of the block is between 0.8 and 1.1 seconds" do
37
- Time.stubs(:now).returns(10.0).returns(10.8)
38
- Taps::Utils.calculate_chunksize(1000) { }.should == 1100
39
-
40
- Time.stubs(:now).returns(10.0).returns(11.1)
41
- Taps::Utils.calculate_chunksize(1000) { }.should == 1100
42
- end
43
-
44
- it "will reset the chunksize to a small value if we got a broken pipe exception" do
45
- Taps::Utils.calculate_chunksize(1000) { |c| raise Errno::EPIPE if c == 1000; c.should == 10 }.should == 10
46
- end
47
-
48
- it "will reset the chunksize to a small value if we got a broken pipe exception a second time" do
49
- Taps::Utils.calculate_chunksize(1000) { |c| raise Errno::EPIPE if c == 1000 || c == 10; c.should == 1 }.should == 1
50
- end
51
-
52
21
  it "returns a list of columns that are text fields if the database is mysql" do
53
22
  @db = mock("db", :url => "mysql://localhost/mydb")
54
23
  @db.stubs(:schema).with(:mytable).returns([
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taps
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 49
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 15
10
- version: 0.3.15
9
+ - 17
10
+ version: 0.3.17
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ricardo Chimal, Jr.
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-11 00:00:00 -05:00
18
+ date: 2011-02-25 00:00:00 -08:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -26,12 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ~>
28
28
  - !ruby/object:Gem::Version
29
- hash: 11
29
+ hash: 1
30
30
  segments:
31
31
  - 1
32
- - 4
33
- - 6
34
- version: 1.4.6
32
+ - 5
33
+ - 1
34
+ version: 1.5.1
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency
@@ -82,12 +82,12 @@ dependencies:
82
82
  requirements:
83
83
  - - ~>
84
84
  - !ruby/object:Gem::Version
85
- hash: 67
85
+ hash: 87
86
86
  segments:
87
87
  - 3
88
- - 17
88
+ - 20
89
89
  - 0
90
- version: 3.17.0
90
+ version: 3.20.0
91
91
  type: :runtime
92
92
  version_requirements: *id004
93
93
  - !ruby/object:Gem::Dependency
@@ -204,6 +204,7 @@ files:
204
204
  - bin/schema
205
205
  - bin/schema.cmd
206
206
  - bin/taps
207
+ - lib/taps/chunksize.rb
207
208
  - lib/taps/cli.rb
208
209
  - lib/taps/config.rb
209
210
  - lib/taps/data_stream.rb
@@ -220,6 +221,7 @@ files:
220
221
  - lib/taps/version.rb
221
222
  - README.rdoc
222
223
  - spec/base.rb
224
+ - spec/chunksize_spec.rb
223
225
  - spec/cli_spec.rb
224
226
  - spec/data_stream_spec.rb
225
227
  - spec/operation_spec.rb