taps-taps 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +51 -0
- data/VERSION.yml +5 -0
- data/bin/schema +54 -0
- data/bin/schema.cmd +6 -0
- data/bin/taps +6 -0
- data/lib/taps/chunksize.rb +52 -0
- data/lib/taps/cli.rb +196 -0
- data/lib/taps/config.rb +32 -0
- data/lib/taps/data_stream.rb +343 -0
- data/lib/taps/db_session.rb +20 -0
- data/lib/taps/errors.rb +15 -0
- data/lib/taps/log.rb +15 -0
- data/lib/taps/monkey.rb +21 -0
- data/lib/taps/multipart.rb +73 -0
- data/lib/taps/operation.rb +577 -0
- data/lib/taps/progress_bar.rb +236 -0
- data/lib/taps/schema.rb +82 -0
- data/lib/taps/server.rb +210 -0
- data/lib/taps/utils.rb +182 -0
- data/lib/taps/version.rb +18 -0
- data/lib/vendor/okjson.rb +555 -0
- data/spec/base.rb +26 -0
- data/spec/chunksize_spec.rb +41 -0
- data/spec/cli_spec.rb +16 -0
- data/spec/data_stream_spec.rb +23 -0
- data/spec/operation_spec.rb +42 -0
- data/spec/server_spec.rb +40 -0
- data/spec/utils_spec.rb +30 -0
- metadata +234 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1bc580dc61808bdb17b0de45c3fa27ce6edc445d
|
4
|
+
data.tar.gz: d3b1faefee6bf8828d20f71c73e809f54ab5ef8d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 11057870f0e66186550ec1aceae822a7f3f2907417b134215e44cad8b7defd4fb82bf4c5f7d2721670600e525246cbf5770d24da515e513f3e308f7f39c4d6b8
|
7
|
+
data.tar.gz: bf6c27c267e777ae54f97d51106dfd7d8ee0fcdf8e6d1fc402f9ef50513e418f0d6a6775d24337f18def51b6471a0b61f66da7fa8243b0e31a86bf8e5b3b4729
|
data/README.rdoc
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
= Taps -- simple database import/export app
|
2
|
+
|
3
|
+
A simple database agnostic import/export app to transfer data to/from a remote database.
|
4
|
+
|
5
|
+
== Usage: Server
|
6
|
+
|
7
|
+
Here's how you start a taps server
|
8
|
+
|
9
|
+
$ taps server postgres://localdbuser:localdbpass@localhost/dbname httpuser httppassword
|
10
|
+
|
11
|
+
You can also specify an encoding in the database url
|
12
|
+
|
13
|
+
$ taps server mysql://localdbuser:localdbpass@localhost/dbname?encoding=latin1 httpuser httppassword
|
14
|
+
|
15
|
+
== Usage: Client
|
16
|
+
|
17
|
+
When you want to pull down a database from a taps server
|
18
|
+
|
19
|
+
$ taps pull postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
|
20
|
+
|
21
|
+
or when you want to push a local database to a taps server
|
22
|
+
|
23
|
+
$ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
|
24
|
+
|
25
|
+
or when you want to transfer a list of tables
|
26
|
+
|
27
|
+
$ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --tables logs,tags
|
28
|
+
|
29
|
+
or when you want to transfer tables that start with a word
|
30
|
+
|
31
|
+
$ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --filter '^log_'
|
32
|
+
|
33
|
+
== Known Issues
|
34
|
+
|
35
|
+
* Foreign key constraints get lost in the schema transfer
|
36
|
+
* Tables without primary keys will be incredibly slow to transfer. This is due to it being inefficient having large offset values in queries.
|
37
|
+
* Multiple schemas are currently not supported
|
38
|
+
|
39
|
+
== Meta
|
40
|
+
|
41
|
+
Maintained by Ricardo Chimal, Jr. (ricardo at heroku dot com)
|
42
|
+
|
43
|
+
Written by Ricardo Chimal, Jr. (ricardo at heroku dot com) and Adam Wiggins (adam at heroku dot com)
|
44
|
+
|
45
|
+
Early research and inspiration by Blake Mizerany
|
46
|
+
|
47
|
+
Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
|
48
|
+
|
49
|
+
http://github.com/ricardochimal/taps
|
50
|
+
|
51
|
+
Special Thanks to Sequel for making this tool possible http://sequel.rubyforge.org/
|
data/VERSION.yml
ADDED
data/bin/schema
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
gem 'sequel', '~> 3.20.0'
|
5
|
+
|
6
|
+
$:.unshift File.dirname(__FILE__) + '/../lib'
|
7
|
+
|
8
|
+
require 'taps/schema'
|
9
|
+
|
10
|
+
cmd = ARGV.shift.strip rescue ''
|
11
|
+
database_url = ARGV.shift.strip rescue ''
|
12
|
+
|
13
|
+
def show_usage_and_exit
|
14
|
+
puts <<EOTXT
|
15
|
+
schema console <database_url>
|
16
|
+
schema dump <database_url>
|
17
|
+
schema dump_table <database_url> <table>
|
18
|
+
schema indexes <database_url>
|
19
|
+
schema indexes_individual <database_url>
|
20
|
+
schema reset_db_sequences <database_url>
|
21
|
+
schema load <database_url> <schema_file>
|
22
|
+
schema load_indexes <database_url> <indexes_file>
|
23
|
+
EOTXT
|
24
|
+
exit(1)
|
25
|
+
end
|
26
|
+
|
27
|
+
case cmd
|
28
|
+
when 'dump'
|
29
|
+
puts Taps::Schema.dump(database_url)
|
30
|
+
when 'dump_table'
|
31
|
+
table = ARGV.shift.strip
|
32
|
+
puts Taps::Schema.dump_table(database_url, table)
|
33
|
+
when 'indexes'
|
34
|
+
puts Taps::Schema.indexes(database_url)
|
35
|
+
when 'indexes_individual'
|
36
|
+
puts Taps::Schema.indexes_individual(database_url)
|
37
|
+
when 'load_indexes'
|
38
|
+
filename = ARGV.shift.strip rescue ''
|
39
|
+
indexes = File.read(filename) rescue show_usage_and_exit
|
40
|
+
Taps::Schema.load_indexes(database_url, indexes)
|
41
|
+
when 'load'
|
42
|
+
filename = ARGV.shift.strip rescue ''
|
43
|
+
schema = File.read(filename) rescue show_usage_and_exit
|
44
|
+
Taps::Schema.load(database_url, schema)
|
45
|
+
when 'reset_db_sequences'
|
46
|
+
Taps::Schema.reset_db_sequences(database_url)
|
47
|
+
when 'console'
|
48
|
+
$db = Sequel.connect(database_url)
|
49
|
+
require 'irb'
|
50
|
+
require 'irb/completion'
|
51
|
+
IRB.start
|
52
|
+
else
|
53
|
+
show_usage_and_exit
|
54
|
+
end
|
data/bin/schema.cmd
ADDED
data/bin/taps
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'taps/errors'
|
2
|
+
|
3
|
+
class Taps::Chunksize
|
4
|
+
attr_accessor :idle_secs, :time_in_db, :start_time, :end_time, :retries
|
5
|
+
attr_reader :chunksize
|
6
|
+
|
7
|
+
def initialize(chunksize)
|
8
|
+
@chunksize = chunksize
|
9
|
+
@idle_secs = 0.0
|
10
|
+
@retries = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_i
|
14
|
+
chunksize
|
15
|
+
end
|
16
|
+
|
17
|
+
def reset_chunksize
|
18
|
+
@chunksize = (retries <= 1) ? 10 : 1
|
19
|
+
end
|
20
|
+
|
21
|
+
def diff
|
22
|
+
end_time - start_time - time_in_db - idle_secs
|
23
|
+
end
|
24
|
+
|
25
|
+
def time_in_db=(t)
|
26
|
+
@time_in_db = t
|
27
|
+
@time_in_db = @time_in_db.to_f rescue 0.0
|
28
|
+
end
|
29
|
+
|
30
|
+
def time_delta
|
31
|
+
t1 = Time.now
|
32
|
+
yield if block_given?
|
33
|
+
t2 = Time.now
|
34
|
+
t2 - t1
|
35
|
+
end
|
36
|
+
|
37
|
+
def calc_new_chunksize
|
38
|
+
new_chunksize = if retries > 0
|
39
|
+
chunksize
|
40
|
+
elsif diff > 3.0
|
41
|
+
(chunksize / 3).ceil
|
42
|
+
elsif diff > 1.1
|
43
|
+
chunksize - 100
|
44
|
+
elsif diff < 0.8
|
45
|
+
chunksize * 2
|
46
|
+
else
|
47
|
+
chunksize + 100
|
48
|
+
end
|
49
|
+
new_chunksize = 1 if new_chunksize < 1
|
50
|
+
new_chunksize
|
51
|
+
end
|
52
|
+
end
|
data/lib/taps/cli.rb
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'taps/monkey'
|
4
|
+
require 'taps/config'
|
5
|
+
require 'taps/log'
|
6
|
+
require 'vendor/okjson'
|
7
|
+
|
8
|
+
Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || begin
|
9
|
+
# this is dirty but it solves a weird problem where the tempfile disappears mid-process
|
10
|
+
require 'sqlite3'
|
11
|
+
$__taps_database = Tempfile.new('taps.db')
|
12
|
+
$__taps_database.open()
|
13
|
+
"sqlite://#{$__taps_database.path}"
|
14
|
+
end
|
15
|
+
|
16
|
+
module Taps
|
17
|
+
class Cli
|
18
|
+
attr_accessor :argv
|
19
|
+
|
20
|
+
def initialize(argv)
|
21
|
+
@argv = argv
|
22
|
+
end
|
23
|
+
|
24
|
+
def run
|
25
|
+
method = (argv.shift || 'help').to_sym
|
26
|
+
if [:pull, :push, :server, :version].include? method
|
27
|
+
send(method)
|
28
|
+
else
|
29
|
+
help
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def pull
|
34
|
+
opts = clientoptparse(:pull)
|
35
|
+
Taps.log.level = Logger::DEBUG if opts[:debug]
|
36
|
+
if opts[:resume_filename]
|
37
|
+
clientresumexfer(:pull, opts)
|
38
|
+
else
|
39
|
+
clientxfer(:pull, opts)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def push
|
44
|
+
opts = clientoptparse(:push)
|
45
|
+
Taps.log.level = Logger::DEBUG if opts[:debug]
|
46
|
+
if opts[:resume_filename]
|
47
|
+
clientresumexfer(:push, opts)
|
48
|
+
else
|
49
|
+
clientxfer(:push, opts)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def server
|
54
|
+
opts = serveroptparse
|
55
|
+
Taps.log.level = Logger::DEBUG if opts[:debug]
|
56
|
+
Taps::Config.database_url = opts[:database_url]
|
57
|
+
Taps::Config.login = opts[:login]
|
58
|
+
Taps::Config.password = opts[:password]
|
59
|
+
|
60
|
+
Taps::Config.verify_database_url
|
61
|
+
require 'taps/server'
|
62
|
+
Taps::Server.run!({
|
63
|
+
:port => opts[:port],
|
64
|
+
:environment => :production,
|
65
|
+
:logging => true,
|
66
|
+
:dump_errors => true,
|
67
|
+
})
|
68
|
+
end
|
69
|
+
|
70
|
+
def version
|
71
|
+
puts Taps.version
|
72
|
+
end
|
73
|
+
|
74
|
+
def help
|
75
|
+
puts <<EOHELP
|
76
|
+
Options
|
77
|
+
=======
|
78
|
+
server Start a taps database import/export server
|
79
|
+
pull Pull a database from a taps server
|
80
|
+
push Push a database to a taps server
|
81
|
+
version Taps version
|
82
|
+
|
83
|
+
Add '-h' to any command to see their usage
|
84
|
+
EOHELP
|
85
|
+
end
|
86
|
+
|
87
|
+
def serveroptparse
|
88
|
+
opts={:port => 5000, :database_url => nil, :login => nil, :password => nil, :debug => false}
|
89
|
+
OptionParser.new do |o|
|
90
|
+
o.banner = "Usage: #{File.basename($0)} server [OPTIONS] <local_database_url> <login> <password>"
|
91
|
+
o.define_head "Start a taps database import/export server"
|
92
|
+
|
93
|
+
o.on("-p", "--port=N", "Server Port") { |v| opts[:port] = v.to_i if v.to_i > 0 }
|
94
|
+
o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
|
95
|
+
o.parse!(argv)
|
96
|
+
|
97
|
+
opts[:database_url] = argv.shift
|
98
|
+
opts[:login] = argv.shift
|
99
|
+
opts[:password] = argv.shift
|
100
|
+
|
101
|
+
if opts[:database_url].nil?
|
102
|
+
$stderr.puts "Missing Database URL"
|
103
|
+
puts o
|
104
|
+
exit 1
|
105
|
+
end
|
106
|
+
if opts[:login].nil?
|
107
|
+
$stderr.puts "Missing Login"
|
108
|
+
puts o
|
109
|
+
exit 1
|
110
|
+
end
|
111
|
+
if opts[:password].nil?
|
112
|
+
$stderr.puts "Missing Password"
|
113
|
+
puts o
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
end
|
117
|
+
opts
|
118
|
+
end
|
119
|
+
|
120
|
+
def clientoptparse(cmd)
|
121
|
+
opts={:default_chunksize => 1000, :database_url => nil, :remote_url => nil, :debug => false, :resume_filename => nil, :disable_compresion => false, :indexes_first => false}
|
122
|
+
OptionParser.new do |o|
|
123
|
+
o.banner = "Usage: #{File.basename($0)} #{cmd} [OPTIONS] <local_database_url> <remote_url>"
|
124
|
+
|
125
|
+
case cmd
|
126
|
+
when :pull
|
127
|
+
o.define_head "Pull a database from a taps server"
|
128
|
+
when :push
|
129
|
+
o.define_head "Push a database to a taps server"
|
130
|
+
end
|
131
|
+
|
132
|
+
o.on("-s", "--skip-schema", "Don't transfer the schema, just data") { |v| opts[:skip_schema] = true }
|
133
|
+
o.on("-i", "--indexes-first", "Transfer indexes first before data") { |v| opts[:indexes_first] = true }
|
134
|
+
o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
|
135
|
+
o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
|
136
|
+
o.on("-g", "--disable-compression", "Disable Compression") { |v| opts[:disable_compression] = true }
|
137
|
+
o.on("-f", "--filter=regex", "Regex Filter for tables") { |v| opts[:table_filter] = v }
|
138
|
+
o.on("-t", "--tables=A,B,C", Array, "Shortcut to filter on a list of tables") do |v|
|
139
|
+
r_tables = v.collect { |t| "^#{t}$" }.join("|")
|
140
|
+
opts[:table_filter] = "(#{r_tables})"
|
141
|
+
end
|
142
|
+
o.on("-e", "--exclude_tables=A,B,C", Array, "Shortcut to exclude a list of tables") { |v| opts[:exclude_tables] = v }
|
143
|
+
o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
|
144
|
+
o.parse!(argv)
|
145
|
+
|
146
|
+
opts[:database_url] = argv.shift
|
147
|
+
opts[:remote_url] = argv.shift
|
148
|
+
|
149
|
+
if opts[:database_url].nil?
|
150
|
+
$stderr.puts "Missing Database URL"
|
151
|
+
puts o
|
152
|
+
exit 1
|
153
|
+
end
|
154
|
+
if opts[:remote_url].nil?
|
155
|
+
$stderr.puts "Missing Remote Taps URL"
|
156
|
+
puts o
|
157
|
+
exit 1
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
opts
|
162
|
+
end
|
163
|
+
|
164
|
+
def clientxfer(method, opts)
|
165
|
+
database_url = opts.delete(:database_url)
|
166
|
+
remote_url = opts.delete(:remote_url)
|
167
|
+
|
168
|
+
Taps::Config.verify_database_url(database_url)
|
169
|
+
|
170
|
+
require 'taps/operation'
|
171
|
+
|
172
|
+
Taps::Operation.factory(method, database_url, remote_url, opts).run
|
173
|
+
end
|
174
|
+
|
175
|
+
def clientresumexfer(method, opts)
|
176
|
+
session = OkJson.decode(File.read(opts.delete(:resume_filename)))
|
177
|
+
session.symbolize_recursively!
|
178
|
+
|
179
|
+
database_url = opts.delete(:database_url)
|
180
|
+
remote_url = opts.delete(:remote_url) || session.delete(:remote_url)
|
181
|
+
|
182
|
+
Taps::Config.verify_database_url(database_url)
|
183
|
+
|
184
|
+
require 'taps/operation'
|
185
|
+
|
186
|
+
newsession = session.merge({
|
187
|
+
:default_chunksize => opts[:default_chunksize],
|
188
|
+
:disable_compression => opts[:disable_compression],
|
189
|
+
:resume => true,
|
190
|
+
})
|
191
|
+
|
192
|
+
Taps::Operation.factory(method, database_url, remote_url, newsession).run
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
end
|
data/lib/taps/config.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'taps/version'
|
3
|
+
|
4
|
+
Sequel.datetime_class = DateTime
|
5
|
+
|
6
|
+
module Taps
|
7
|
+
def self.exiting=(val)
|
8
|
+
@@exiting = val
|
9
|
+
end
|
10
|
+
|
11
|
+
def exiting?
|
12
|
+
(@@exiting ||= false) == true
|
13
|
+
end
|
14
|
+
|
15
|
+
class Config
|
16
|
+
class << self
|
17
|
+
attr_accessor :taps_database_url
|
18
|
+
attr_accessor :login, :password, :database_url, :remote_url
|
19
|
+
attr_accessor :chunksize
|
20
|
+
|
21
|
+
def verify_database_url(db_url=nil)
|
22
|
+
db_url ||= self.database_url
|
23
|
+
db = Sequel.connect(db_url)
|
24
|
+
db.tables
|
25
|
+
db.disconnect
|
26
|
+
rescue Object => e
|
27
|
+
puts "Failed to connect to database:\n #{e.class} -> #{e}"
|
28
|
+
exit 1
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,343 @@
|
|
1
|
+
require 'taps/monkey'
|
2
|
+
require 'taps/multipart'
|
3
|
+
require 'taps/utils'
|
4
|
+
require 'taps/log'
|
5
|
+
require 'taps/errors'
|
6
|
+
require 'vendor/okjson'
|
7
|
+
|
8
|
+
module Taps
|
9
|
+
|
10
|
+
class DataStream
|
11
|
+
DEFAULT_CHUNKSIZE = 1000
|
12
|
+
|
13
|
+
attr_reader :db, :state
|
14
|
+
|
15
|
+
def initialize(db, state)
|
16
|
+
@db = db
|
17
|
+
@state = {
|
18
|
+
:offset => 0,
|
19
|
+
:avg_chunksize => 0,
|
20
|
+
:num_chunksize => 0,
|
21
|
+
:total_chunksize => 0,
|
22
|
+
}.merge(state)
|
23
|
+
@state[:chunksize] ||= DEFAULT_CHUNKSIZE
|
24
|
+
@complete = false
|
25
|
+
end
|
26
|
+
|
27
|
+
def log
|
28
|
+
Taps.log
|
29
|
+
end
|
30
|
+
|
31
|
+
def error=(val)
|
32
|
+
state[:error] = val
|
33
|
+
end
|
34
|
+
|
35
|
+
def error
|
36
|
+
state[:error] || false
|
37
|
+
end
|
38
|
+
|
39
|
+
def table_name
|
40
|
+
state[:table_name].to_sym
|
41
|
+
end
|
42
|
+
|
43
|
+
def table_name_sql
|
44
|
+
table_name.identifier
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_hash
|
48
|
+
state.merge(:klass => self.class.to_s)
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_json
|
52
|
+
OkJson.encode(to_hash)
|
53
|
+
end
|
54
|
+
|
55
|
+
def string_columns
|
56
|
+
@string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
|
57
|
+
end
|
58
|
+
|
59
|
+
def table
|
60
|
+
@table ||= db[table_name_sql]
|
61
|
+
end
|
62
|
+
|
63
|
+
def order_by(name=nil)
|
64
|
+
@order_by ||= begin
|
65
|
+
name ||= table_name
|
66
|
+
Taps::Utils.order_by(db, name)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def increment(row_count)
|
71
|
+
state[:offset] += row_count
|
72
|
+
end
|
73
|
+
|
74
|
+
# keep a record of the average chunksize within the first few hundred thousand records, after chunksize
|
75
|
+
# goes below 100 or maybe if offset is > 1000
|
76
|
+
def fetch_rows
|
77
|
+
state[:chunksize] = fetch_chunksize
|
78
|
+
ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
|
79
|
+
log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
|
80
|
+
rows = Taps::Utils.format_data(ds.all,
|
81
|
+
:string_columns => string_columns,
|
82
|
+
:schema => db.schema(table_name),
|
83
|
+
:table => table_name
|
84
|
+
)
|
85
|
+
update_chunksize_stats
|
86
|
+
rows
|
87
|
+
end
|
88
|
+
|
89
|
+
def max_chunksize_training
|
90
|
+
20
|
91
|
+
end
|
92
|
+
|
93
|
+
def fetch_chunksize
|
94
|
+
chunksize = state[:chunksize]
|
95
|
+
return chunksize if state[:num_chunksize] < max_chunksize_training
|
96
|
+
return chunksize if state[:avg_chunksize] == 0
|
97
|
+
return chunksize if state[:error]
|
98
|
+
state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
|
99
|
+
end
|
100
|
+
|
101
|
+
def update_chunksize_stats
|
102
|
+
return if state[:num_chunksize] >= max_chunksize_training
|
103
|
+
state[:total_chunksize] += state[:chunksize]
|
104
|
+
state[:num_chunksize] += 1
|
105
|
+
state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
|
106
|
+
end
|
107
|
+
|
108
|
+
def encode_rows(rows)
|
109
|
+
Taps::Utils.base64encode(Marshal.dump(rows))
|
110
|
+
end
|
111
|
+
|
112
|
+
def fetch
|
113
|
+
log.debug "DataStream#fetch state -> #{state.inspect}"
|
114
|
+
|
115
|
+
t1 = Time.now
|
116
|
+
rows = fetch_rows
|
117
|
+
encoded_data = encode_rows(rows)
|
118
|
+
t2 = Time.now
|
119
|
+
elapsed_time = t2 - t1
|
120
|
+
|
121
|
+
@complete = rows == { }
|
122
|
+
|
123
|
+
[encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
|
124
|
+
end
|
125
|
+
|
126
|
+
def complete?
|
127
|
+
@complete
|
128
|
+
end
|
129
|
+
|
130
|
+
def fetch_remote(resource, headers)
|
131
|
+
params = fetch_from_resource(resource, headers)
|
132
|
+
encoded_data = params[:encoded_data]
|
133
|
+
json = params[:json]
|
134
|
+
|
135
|
+
rows = parse_encoded_data(encoded_data, json[:checksum])
|
136
|
+
@complete = rows == { }
|
137
|
+
|
138
|
+
# update local state
|
139
|
+
state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
|
140
|
+
|
141
|
+
unless @complete
|
142
|
+
import_rows(rows)
|
143
|
+
rows[:data].size
|
144
|
+
else
|
145
|
+
0
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# this one is used inside the server process
|
150
|
+
def fetch_remote_in_server(params)
|
151
|
+
json = self.class.parse_json(params[:json])
|
152
|
+
encoded_data = params[:encoded_data]
|
153
|
+
|
154
|
+
rows = parse_encoded_data(encoded_data, json[:checksum])
|
155
|
+
@complete = rows == { }
|
156
|
+
|
157
|
+
unless @complete
|
158
|
+
import_rows(rows)
|
159
|
+
rows[:data].size
|
160
|
+
else
|
161
|
+
0
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def fetch_from_resource(resource, headers)
|
166
|
+
res = nil
|
167
|
+
log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
|
168
|
+
state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
|
169
|
+
state[:chunksize] = c.to_i
|
170
|
+
res = resource.post({:state => OkJson.encode(self.to_hash)}, headers)
|
171
|
+
end
|
172
|
+
|
173
|
+
begin
|
174
|
+
params = Taps::Multipart.parse(res)
|
175
|
+
params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
|
176
|
+
return params
|
177
|
+
rescue OkJson::Parser
|
178
|
+
raise Taps::CorruptedData.new("Invalid OkJson Received")
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def self.parse_json(json)
|
183
|
+
hash = OkJson.decode(json).symbolize_keys
|
184
|
+
hash[:state].symbolize_keys! if hash.has_key?(:state)
|
185
|
+
hash
|
186
|
+
end
|
187
|
+
|
188
|
+
def parse_encoded_data(encoded_data, checksum)
|
189
|
+
raise Taps::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
|
190
|
+
|
191
|
+
begin
|
192
|
+
return Marshal.load(Taps::Utils.base64decode(encoded_data))
|
193
|
+
rescue Object => e
|
194
|
+
unless ENV['NO_DUMP_MARSHAL_ERRORS']
|
195
|
+
puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
|
196
|
+
File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
|
197
|
+
end
|
198
|
+
raise
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def import_rows(rows)
|
203
|
+
table.import(rows[:header], rows[:data])
|
204
|
+
state[:offset] += rows[:data].size
|
205
|
+
rescue Exception => ex
|
206
|
+
case ex.message
|
207
|
+
when /integer out of range/ then
|
208
|
+
raise Taps::InvalidData, <<-ERROR, []
|
209
|
+
\nDetected integer data that exceeds the maximum allowable size for an integer type.
|
210
|
+
This generally occurs when importing from SQLite due to the fact that SQLite does
|
211
|
+
not enforce maximum values on integer types.
|
212
|
+
ERROR
|
213
|
+
else raise ex
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def verify_stream
|
218
|
+
state[:offset] = table.count
|
219
|
+
end
|
220
|
+
|
221
|
+
def verify_remote_stream(resource, headers)
|
222
|
+
json_raw = resource.post({:state => OkJson.encode(self)}, headers).to_s
|
223
|
+
json = self.class.parse_json(json_raw)
|
224
|
+
|
225
|
+
self.class.new(db, json[:state])
|
226
|
+
end
|
227
|
+
|
228
|
+
def self.factory(db, state)
|
229
|
+
if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
|
230
|
+
Sequel::MySQL.convert_invalid_date_time = :nil
|
231
|
+
end
|
232
|
+
|
233
|
+
if state.has_key?(:klass)
|
234
|
+
return eval(state[:klass]).new(db, state)
|
235
|
+
end
|
236
|
+
|
237
|
+
if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
|
238
|
+
DataStreamKeyed.new(db, state)
|
239
|
+
else
|
240
|
+
DataStream.new(db, state)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
class DataStreamKeyed < DataStream
|
247
|
+
attr_accessor :buffer
|
248
|
+
|
249
|
+
def initialize(db, state)
|
250
|
+
super(db, state)
|
251
|
+
@state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
|
252
|
+
@state[:chunksize] ||= DEFAULT_CHUNKSIZE
|
253
|
+
@buffer = []
|
254
|
+
end
|
255
|
+
|
256
|
+
def primary_key
|
257
|
+
state[:primary_key].to_sym
|
258
|
+
end
|
259
|
+
|
260
|
+
def buffer_limit
|
261
|
+
if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
|
262
|
+
state[:last_fetched]
|
263
|
+
else
|
264
|
+
state[:filter]
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def calc_limit(chunksize)
|
269
|
+
# we want to not fetch more than is needed while we're
|
270
|
+
# inside sinatra but locally we can select more than
|
271
|
+
# is strictly needed
|
272
|
+
if defined?(Sinatra)
|
273
|
+
(chunksize * 1.1).ceil
|
274
|
+
else
|
275
|
+
(chunksize * 3).ceil
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
def load_buffer(chunksize)
|
280
|
+
# make sure BasicObject is not polluted by subsequent requires
|
281
|
+
Sequel::BasicObject.remove_methods!
|
282
|
+
|
283
|
+
num = 0
|
284
|
+
loop do
|
285
|
+
limit = calc_limit(chunksize)
|
286
|
+
# we have to use local variables in order for the virtual row filter to work correctly
|
287
|
+
key = primary_key
|
288
|
+
buf_limit = buffer_limit
|
289
|
+
ds = table.order(*order_by).filter { key.sql_number > buf_limit }.limit(limit)
|
290
|
+
log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
|
291
|
+
data = ds.all
|
292
|
+
self.buffer += data
|
293
|
+
num += data.size
|
294
|
+
if data.size > 0
|
295
|
+
# keep a record of the last primary key value in the buffer
|
296
|
+
state[:filter] = self.buffer.last[ primary_key ]
|
297
|
+
end
|
298
|
+
|
299
|
+
break if num >= chunksize or data.size == 0
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def fetch_buffered(chunksize)
|
304
|
+
load_buffer(chunksize) if self.buffer.size < chunksize
|
305
|
+
rows = buffer.slice(0, chunksize)
|
306
|
+
state[:last_fetched] = if rows.size > 0
|
307
|
+
rows.last[ primary_key ]
|
308
|
+
else
|
309
|
+
nil
|
310
|
+
end
|
311
|
+
rows
|
312
|
+
end
|
313
|
+
|
314
|
+
def import_rows(rows)
|
315
|
+
table.import(rows[:header], rows[:data])
|
316
|
+
end
|
317
|
+
|
318
|
+
def fetch_rows
|
319
|
+
chunksize = state[:chunksize]
|
320
|
+
Taps::Utils.format_data(fetch_buffered(chunksize) || [],
|
321
|
+
:string_columns => string_columns)
|
322
|
+
end
|
323
|
+
|
324
|
+
def increment(row_count)
|
325
|
+
# pop the rows we just successfully sent off the buffer
|
326
|
+
@buffer.slice!(0, row_count)
|
327
|
+
end
|
328
|
+
|
329
|
+
def verify_stream
|
330
|
+
key = primary_key
|
331
|
+
ds = table.order(*order_by)
|
332
|
+
current_filter = ds.max(key.sql_number)
|
333
|
+
|
334
|
+
# set the current filter to the max of the primary key
|
335
|
+
state[:filter] = current_filter
|
336
|
+
# clear out the last_fetched value so it can restart from scratch
|
337
|
+
state[:last_fetched] = nil
|
338
|
+
|
339
|
+
log.debug "DataStreamKeyed#verify_stream -> state: #{state.inspect}"
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
end
|