taps 0.3.15 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/bin/schema +1 -1
- data/lib/taps/chunksize.rb +52 -0
- data/lib/taps/cli.rb +2 -0
- data/lib/taps/data_stream.rb +11 -1
- data/lib/taps/operation.rb +43 -23
- data/lib/taps/utils.rb +11 -28
- data/spec/chunksize_spec.rb +41 -0
- data/spec/cli_spec.rb +6 -0
- data/spec/operation_spec.rb +10 -0
- data/spec/utils_spec.rb +0 -31
- metadata +13 -11
data/VERSION.yml
CHANGED
data/bin/schema
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'taps/errors'
|
2
|
+
|
3
|
+
class Taps::Chunksize
|
4
|
+
attr_accessor :idle_secs, :time_in_db, :start_time, :end_time, :retries
|
5
|
+
attr_reader :chunksize
|
6
|
+
|
7
|
+
def initialize(chunksize)
|
8
|
+
@chunksize = chunksize
|
9
|
+
@idle_secs = 0.0
|
10
|
+
@retries = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_i
|
14
|
+
chunksize
|
15
|
+
end
|
16
|
+
|
17
|
+
def reset_chunksize
|
18
|
+
@chunksize = (retries <= 1) ? 10 : 1
|
19
|
+
end
|
20
|
+
|
21
|
+
def diff
|
22
|
+
end_time - start_time - time_in_db - idle_secs
|
23
|
+
end
|
24
|
+
|
25
|
+
def time_in_db=(t)
|
26
|
+
@time_in_db = t
|
27
|
+
@time_in_db = @time_in_db.to_f rescue 0.0
|
28
|
+
end
|
29
|
+
|
30
|
+
def time_delta
|
31
|
+
t1 = Time.now
|
32
|
+
yield if block_given?
|
33
|
+
t2 = Time.now
|
34
|
+
t2 - t1
|
35
|
+
end
|
36
|
+
|
37
|
+
def calc_new_chunksize
|
38
|
+
new_chunksize = if retries > 0
|
39
|
+
chunksize
|
40
|
+
elsif diff > 3.0
|
41
|
+
(chunksize / 3).ceil
|
42
|
+
elsif diff > 1.1
|
43
|
+
chunksize - 100
|
44
|
+
elsif diff < 0.8
|
45
|
+
chunksize * 2
|
46
|
+
else
|
47
|
+
chunksize + 100
|
48
|
+
end
|
49
|
+
new_chunksize = 1 if new_chunksize < 1
|
50
|
+
new_chunksize
|
51
|
+
end
|
52
|
+
end
|
data/lib/taps/cli.rb
CHANGED
@@ -128,6 +128,7 @@ EOHELP
|
|
128
128
|
o.define_head "Push a database to a taps server"
|
129
129
|
end
|
130
130
|
|
131
|
+
o.on("-s", "--skip-schema", "Don't transfer the schema, just data") { |v| opts[:skip_schema] = true }
|
131
132
|
o.on("-i", "--indexes-first", "Transfer indexes first before data") { |v| opts[:indexes_first] = true }
|
132
133
|
o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
|
133
134
|
o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
|
@@ -137,6 +138,7 @@ EOHELP
|
|
137
138
|
r_tables = v.collect { |t| "^#{t}$" }.join("|")
|
138
139
|
opts[:table_filter] = "(#{r_tables})"
|
139
140
|
end
|
141
|
+
o.on("-e", "--exclude_tables=A,B,C", Array, "Shortcut to exclude a list of tables") { |v| opts[:exclude_tables] = v }
|
140
142
|
o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
|
141
143
|
o.parse!(argv)
|
142
144
|
|
data/lib/taps/data_stream.rb
CHANGED
@@ -166,7 +166,7 @@ class DataStream
|
|
166
166
|
res = nil
|
167
167
|
log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
|
168
168
|
state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
|
169
|
-
state[:chunksize] = c
|
169
|
+
state[:chunksize] = c.to_i
|
170
170
|
res = resource.post({:state => self.to_json}, headers)
|
171
171
|
end
|
172
172
|
|
@@ -202,6 +202,16 @@ class DataStream
|
|
202
202
|
def import_rows(rows)
|
203
203
|
table.import(rows[:header], rows[:data])
|
204
204
|
state[:offset] += rows[:data].size
|
205
|
+
rescue Exception => ex
|
206
|
+
case ex.message
|
207
|
+
when /integer out of range/ then
|
208
|
+
raise Taps::InvalidData, <<-ERROR, []
|
209
|
+
\nDetected integer data that exceeds the maximum allowable size for an integer type.
|
210
|
+
This generally occurs when importing from SQLite due to the fact that SQLite does
|
211
|
+
not enforce maximum values on integer types.
|
212
|
+
ERROR
|
213
|
+
else raise ex
|
214
|
+
end
|
205
215
|
end
|
206
216
|
|
207
217
|
def verify_stream
|
data/lib/taps/operation.rb
CHANGED
@@ -29,6 +29,10 @@ class Operation
|
|
29
29
|
"op"
|
30
30
|
end
|
31
31
|
|
32
|
+
def skip_schema?
|
33
|
+
!!opts[:skip_schema]
|
34
|
+
end
|
35
|
+
|
32
36
|
def indexes_first?
|
33
37
|
!!opts[:indexes_first]
|
34
38
|
end
|
@@ -37,19 +41,24 @@ class Operation
|
|
37
41
|
opts[:table_filter]
|
38
42
|
end
|
39
43
|
|
44
|
+
def exclude_tables
|
45
|
+
opts[:exclude_tables] || []
|
46
|
+
end
|
47
|
+
|
40
48
|
def apply_table_filter(tables)
|
41
|
-
return tables unless table_filter
|
42
|
-
|
49
|
+
return tables unless table_filter || exclude_tables
|
50
|
+
|
51
|
+
re = table_filter ? Regexp.new(table_filter) : nil
|
43
52
|
if tables.kind_of?(Hash)
|
44
53
|
ntables = {}
|
45
54
|
tables.each do |t, d|
|
46
|
-
|
55
|
+
if !exclude_tables.include?(t.to_s) && (!re || !re.match(t.to_s).nil?)
|
47
56
|
ntables[t] = d
|
48
57
|
end
|
49
58
|
end
|
50
59
|
ntables
|
51
60
|
else
|
52
|
-
tables.reject { |t| re.match(t.to_s).nil? }
|
61
|
+
tables.reject { |t| exclude_tables.include?(t.to_s) || (re && re.match(t.to_s).nil?) }
|
53
62
|
end
|
54
63
|
end
|
55
64
|
|
@@ -236,13 +245,13 @@ class Pull < Operation
|
|
236
245
|
def run
|
237
246
|
catch_errors do
|
238
247
|
unless resuming?
|
239
|
-
pull_schema
|
240
|
-
pull_indexes if indexes_first?
|
248
|
+
pull_schema if !skip_schema?
|
249
|
+
pull_indexes if indexes_first? && !skip_schema?
|
241
250
|
end
|
242
251
|
setup_signal_trap
|
243
252
|
pull_partial_data if resuming?
|
244
253
|
pull_data
|
245
|
-
pull_indexes
|
254
|
+
pull_indexes if !indexes_first? && !skip_schema?
|
246
255
|
pull_reset_sequences
|
247
256
|
end
|
248
257
|
end
|
@@ -395,13 +404,13 @@ class Push < Operation
|
|
395
404
|
def run
|
396
405
|
catch_errors do
|
397
406
|
unless resuming?
|
398
|
-
push_schema
|
399
|
-
push_indexes if indexes_first?
|
407
|
+
push_schema if !skip_schema?
|
408
|
+
push_indexes if indexes_first? && !skip_schema?
|
400
409
|
end
|
401
410
|
setup_signal_trap
|
402
411
|
push_partial_data if resuming?
|
403
412
|
push_data
|
404
|
-
push_indexes
|
413
|
+
push_indexes if !indexes_first? && !skip_schema?
|
405
414
|
push_reset_sequences
|
406
415
|
end
|
407
416
|
end
|
@@ -480,23 +489,32 @@ class Push < Operation
|
|
480
489
|
|
481
490
|
begin
|
482
491
|
chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
|
483
|
-
stream.state[:chunksize] = c
|
484
|
-
encoded_data, row_size, elapsed_time =
|
492
|
+
stream.state[:chunksize] = c.to_i
|
493
|
+
encoded_data, row_size, elapsed_time = nil
|
494
|
+
d1 = c.time_delta do
|
495
|
+
encoded_data, row_size, elapsed_time = stream.fetch
|
496
|
+
end
|
485
497
|
break if stream.complete?
|
486
498
|
|
487
|
-
data =
|
488
|
-
|
489
|
-
|
490
|
-
|
499
|
+
data = nil
|
500
|
+
d2 = c.time_delta do
|
501
|
+
data = {
|
502
|
+
:state => stream.to_hash,
|
503
|
+
:checksum => Taps::Utils.checksum(encoded_data).to_s
|
504
|
+
}
|
505
|
+
end
|
491
506
|
|
492
507
|
begin
|
493
|
-
content, content_type =
|
494
|
-
|
495
|
-
|
496
|
-
:
|
497
|
-
|
498
|
-
|
499
|
-
:
|
508
|
+
content, content_type = nil
|
509
|
+
d3 = c.time_delta do
|
510
|
+
content, content_type = Taps::Multipart.create do |r|
|
511
|
+
r.attach :name => :encoded_data,
|
512
|
+
:payload => encoded_data,
|
513
|
+
:content_type => 'application/octet-stream'
|
514
|
+
r.attach :name => :json,
|
515
|
+
:payload => data.to_json,
|
516
|
+
:content_type => 'application/json'
|
517
|
+
end
|
500
518
|
end
|
501
519
|
session_resource['push/table'].post(content, http_headers(:content_type => content_type))
|
502
520
|
self.stream_state = stream.to_hash
|
@@ -504,6 +522,8 @@ class Push < Operation
|
|
504
522
|
Taps::Utils.reraise_server_exception(e)
|
505
523
|
end
|
506
524
|
|
525
|
+
c.idle_secs = (d1 + d2 + d3)
|
526
|
+
|
507
527
|
elapsed_time
|
508
528
|
end
|
509
529
|
rescue Taps::CorruptedData => e
|
data/lib/taps/utils.rb
CHANGED
@@ -2,8 +2,10 @@ require 'zlib'
|
|
2
2
|
require 'stringio'
|
3
3
|
require 'time'
|
4
4
|
require 'tempfile'
|
5
|
+
require 'rest_client'
|
5
6
|
|
6
7
|
require 'taps/errors'
|
8
|
+
require 'taps/chunksize'
|
7
9
|
|
8
10
|
module Taps
|
9
11
|
module Utils
|
@@ -93,42 +95,23 @@ Data : #{data}
|
|
93
95
|
end
|
94
96
|
|
95
97
|
def calculate_chunksize(old_chunksize)
|
96
|
-
|
98
|
+
c = Taps::Chunksize.new(old_chunksize)
|
97
99
|
|
98
|
-
retries = 0
|
99
|
-
time_in_db = 0
|
100
100
|
begin
|
101
|
-
|
102
|
-
time_in_db = yield
|
103
|
-
time_in_db = time_in_db.to_f rescue 0
|
101
|
+
c.start_time = Time.now
|
102
|
+
c.time_in_db = yield c
|
104
103
|
rescue Errno::EPIPE, RestClient::RequestFailed, RestClient::RequestTimeout
|
105
|
-
retries += 1
|
106
|
-
raise if retries > 2
|
104
|
+
c.retries += 1
|
105
|
+
raise if c.retries > 2
|
107
106
|
|
108
107
|
# we got disconnected, the chunksize could be too large
|
109
|
-
#
|
110
|
-
|
111
|
-
|
108
|
+
# reset the chunksize based on the number of retries
|
109
|
+
c.reset_chunksize
|
112
110
|
retry
|
113
111
|
end
|
114
112
|
|
115
|
-
|
116
|
-
|
117
|
-
diff = t2 - t1 - time_in_db
|
118
|
-
|
119
|
-
new_chunksize = if retries > 0
|
120
|
-
chunksize
|
121
|
-
elsif diff > 3.0
|
122
|
-
(chunksize / 3).ceil
|
123
|
-
elsif diff > 1.1
|
124
|
-
chunksize - 100
|
125
|
-
elsif diff < 0.8
|
126
|
-
chunksize * 2
|
127
|
-
else
|
128
|
-
chunksize + 100
|
129
|
-
end
|
130
|
-
new_chunksize = 1 if new_chunksize < 1
|
131
|
-
new_chunksize
|
113
|
+
c.end_time = Time.now
|
114
|
+
c.calc_new_chunksize
|
132
115
|
end
|
133
116
|
|
134
117
|
def load_schema(database_url, schema_data)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/base'
|
2
|
+
require 'taps/utils'
|
3
|
+
|
4
|
+
describe Taps::Chunksize do
|
5
|
+
it "scales chunksize down slowly when the time delta of the block is just over a second" do
|
6
|
+
Time.stubs(:now).returns(10.0).returns(11.5)
|
7
|
+
Taps::Utils.calculate_chunksize(1000) { |c| }.should == 900
|
8
|
+
end
|
9
|
+
|
10
|
+
it "scales chunksize down fast when the time delta of the block is over 3 seconds" do
|
11
|
+
Time.stubs(:now).returns(10.0).returns(15.0)
|
12
|
+
Taps::Utils.calculate_chunksize(3000) { |c| }.should == 1000
|
13
|
+
end
|
14
|
+
|
15
|
+
it "scales up chunksize fast when the time delta of the block is under 0.8 seconds" do
|
16
|
+
Time.stubs(:now).returns(10.0).returns(10.7)
|
17
|
+
Taps::Utils.calculate_chunksize(1000) { |c| }.should == 2000
|
18
|
+
end
|
19
|
+
|
20
|
+
it "scales up chunksize slow when the time delta of the block is between 0.8 and 1.1 seconds" do
|
21
|
+
Time.stubs(:now).returns(10.0).returns(10.8)
|
22
|
+
Taps::Utils.calculate_chunksize(1000) { |c| }.should == 1100
|
23
|
+
|
24
|
+
Time.stubs(:now).returns(10.0).returns(11.1)
|
25
|
+
Taps::Utils.calculate_chunksize(1000) { |c| }.should == 1100
|
26
|
+
end
|
27
|
+
|
28
|
+
it "will reset the chunksize to a small value if we got a broken pipe exception" do
|
29
|
+
Taps::Utils.calculate_chunksize(1000) do |c|
|
30
|
+
raise Errno::EPIPE if c.chunksize == 1000
|
31
|
+
c.chunksize.should == 10
|
32
|
+
end.should == 10
|
33
|
+
end
|
34
|
+
|
35
|
+
it "will reset the chunksize to a small value if we got a broken pipe exception a second time" do
|
36
|
+
Taps::Utils.calculate_chunksize(1000) do |c|
|
37
|
+
raise Errno::EPIPE if c.chunksize == 1000 || c.chunksize == 10
|
38
|
+
c.chunksize.should == 1
|
39
|
+
end.should == 1
|
40
|
+
end
|
41
|
+
end
|
data/spec/cli_spec.rb
CHANGED
@@ -7,4 +7,10 @@ describe Taps::Cli do
|
|
7
7
|
opts = @cli.clientoptparse(:pull)
|
8
8
|
opts[:table_filter].should == "(^mytable1$|^logs$)"
|
9
9
|
end
|
10
|
+
|
11
|
+
it "translates a list of tables to exclude into a regex that can be used in table_filter" do
|
12
|
+
@cli = Taps::Cli.new(["-e", "mytable1,logs", "sqlite://tmp.db", "http://x:y@localhost:5000"])
|
13
|
+
opts = @cli.clientoptparse(:pull)
|
14
|
+
opts[:exclude_tables].should == ['mytable1','logs']
|
15
|
+
end
|
10
16
|
end
|
data/spec/operation_spec.rb
CHANGED
@@ -16,6 +16,16 @@ describe Taps::Operation do
|
|
16
16
|
@op.apply_table_filter({ 'abc' => 1, 'def' => 2 }).should == { 'abc' => 1 }
|
17
17
|
end
|
18
18
|
|
19
|
+
it "returns an array of tables without the exclude_tables tables" do
|
20
|
+
@op = Taps::Operation.new('dummy://localhost', 'http://x:y@localhost:5000', :exclude_tables => ['abc', 'ghi', 'jkl'])
|
21
|
+
@op.apply_table_filter(['abc', 'def', 'ghi', 'jkl', 'mno']).should == ['def', 'mno']
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns a hash of tables without the exclude_tables tables" do
|
25
|
+
@op = Taps::Operation.new('dummy://localhost', 'http://x:y@localhost:5000', :exclude_tables => ['abc', 'ghi', 'jkl'])
|
26
|
+
@op.apply_table_filter({ 'abc' => 1, 'def' => 2, 'ghi' => 3, 'jkl' => 4, 'mno' => 5 }).should == { 'def' => 2, 'mno' => 5 }
|
27
|
+
end
|
28
|
+
|
19
29
|
it "masks a url's password" do
|
20
30
|
@op.safe_url("mysql://root:password@localhost/mydb").should == "mysql://root:[hidden]@localhost/mydb"
|
21
31
|
end
|
data/spec/utils_spec.rb
CHANGED
@@ -18,37 +18,6 @@ describe Taps::Utils do
|
|
18
18
|
lambda { Taps::Utils.format_data(data, :schema => schema) }.should.raise(Taps::InvalidData)
|
19
19
|
end
|
20
20
|
|
21
|
-
it "scales chunksize down slowly when the time delta of the block is just over a second" do
|
22
|
-
Time.stubs(:now).returns(10.0).returns(11.5)
|
23
|
-
Taps::Utils.calculate_chunksize(1000) { }.should == 900
|
24
|
-
end
|
25
|
-
|
26
|
-
it "scales chunksize down fast when the time delta of the block is over 3 seconds" do
|
27
|
-
Time.stubs(:now).returns(10.0).returns(15.0)
|
28
|
-
Taps::Utils.calculate_chunksize(3000) { }.should == 1000
|
29
|
-
end
|
30
|
-
|
31
|
-
it "scales up chunksize fast when the time delta of the block is under 0.8 seconds" do
|
32
|
-
Time.stubs(:now).returns(10.0).returns(10.7)
|
33
|
-
Taps::Utils.calculate_chunksize(1000) { }.should == 2000
|
34
|
-
end
|
35
|
-
|
36
|
-
it "scales up chunksize slow when the time delta of the block is between 0.8 and 1.1 seconds" do
|
37
|
-
Time.stubs(:now).returns(10.0).returns(10.8)
|
38
|
-
Taps::Utils.calculate_chunksize(1000) { }.should == 1100
|
39
|
-
|
40
|
-
Time.stubs(:now).returns(10.0).returns(11.1)
|
41
|
-
Taps::Utils.calculate_chunksize(1000) { }.should == 1100
|
42
|
-
end
|
43
|
-
|
44
|
-
it "will reset the chunksize to a small value if we got a broken pipe exception" do
|
45
|
-
Taps::Utils.calculate_chunksize(1000) { |c| raise Errno::EPIPE if c == 1000; c.should == 10 }.should == 10
|
46
|
-
end
|
47
|
-
|
48
|
-
it "will reset the chunksize to a small value if we got a broken pipe exception a second time" do
|
49
|
-
Taps::Utils.calculate_chunksize(1000) { |c| raise Errno::EPIPE if c == 1000 || c == 10; c.should == 1 }.should == 1
|
50
|
-
end
|
51
|
-
|
52
21
|
it "returns a list of columns that are text fields if the database is mysql" do
|
53
22
|
@db = mock("db", :url => "mysql://localhost/mydb")
|
54
23
|
@db.stubs(:schema).with(:mytable).returns([
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: taps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 49
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 17
|
10
|
+
version: 0.3.17
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Ricardo Chimal, Jr.
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-02-25 00:00:00 -08:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -26,12 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 1
|
30
30
|
segments:
|
31
31
|
- 1
|
32
|
-
-
|
33
|
-
-
|
34
|
-
version: 1.
|
32
|
+
- 5
|
33
|
+
- 1
|
34
|
+
version: 1.5.1
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
- !ruby/object:Gem::Dependency
|
@@ -82,12 +82,12 @@ dependencies:
|
|
82
82
|
requirements:
|
83
83
|
- - ~>
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
hash:
|
85
|
+
hash: 87
|
86
86
|
segments:
|
87
87
|
- 3
|
88
|
-
-
|
88
|
+
- 20
|
89
89
|
- 0
|
90
|
-
version: 3.
|
90
|
+
version: 3.20.0
|
91
91
|
type: :runtime
|
92
92
|
version_requirements: *id004
|
93
93
|
- !ruby/object:Gem::Dependency
|
@@ -204,6 +204,7 @@ files:
|
|
204
204
|
- bin/schema
|
205
205
|
- bin/schema.cmd
|
206
206
|
- bin/taps
|
207
|
+
- lib/taps/chunksize.rb
|
207
208
|
- lib/taps/cli.rb
|
208
209
|
- lib/taps/config.rb
|
209
210
|
- lib/taps/data_stream.rb
|
@@ -220,6 +221,7 @@ files:
|
|
220
221
|
- lib/taps/version.rb
|
221
222
|
- README.rdoc
|
222
223
|
- spec/base.rb
|
224
|
+
- spec/chunksize_spec.rb
|
223
225
|
- spec/cli_spec.rb
|
224
226
|
- spec/data_stream_spec.rb
|
225
227
|
- spec/operation_spec.rb
|