taps-taps 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1bc580dc61808bdb17b0de45c3fa27ce6edc445d
4
+ data.tar.gz: d3b1faefee6bf8828d20f71c73e809f54ab5ef8d
5
+ SHA512:
6
+ metadata.gz: 11057870f0e66186550ec1aceae822a7f3f2907417b134215e44cad8b7defd4fb82bf4c5f7d2721670600e525246cbf5770d24da515e513f3e308f7f39c4d6b8
7
+ data.tar.gz: bf6c27c267e777ae54f97d51106dfd7d8ee0fcdf8e6d1fc402f9ef50513e418f0d6a6775d24337f18def51b6471a0b61f66da7fa8243b0e31a86bf8e5b3b4729
@@ -0,0 +1,51 @@
1
+ = Taps -- simple database import/export app
2
+
3
+ A simple database agnostic import/export app to transfer data to/from a remote database.
4
+
5
+ == Usage: Server
6
+
7
+ Here's how you start a taps server
8
+
9
+ $ taps server postgres://localdbuser:localdbpass@localhost/dbname httpuser httppassword
10
+
11
+ You can also specify an encoding in the database url
12
+
13
+ $ taps server mysql://localdbuser:localdbpass@localhost/dbname?encoding=latin1 httpuser httppassword
14
+
15
+ == Usage: Client
16
+
17
+ When you want to pull down a database from a taps server
18
+
19
+ $ taps pull postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
20
+
21
+ or when you want to push a local database to a taps server
22
+
23
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
24
+
25
+ or when you want to transfer a list of tables
26
+
27
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --tables logs,tags
28
+
29
+ or when you want to transfer tables that start with a word
30
+
31
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --filter '^log_'
32
+
33
+ == Known Issues
34
+
35
+ * Foreign key constraints get lost in the schema transfer
36
+ * Tables without primary keys will be incredibly slow to transfer. This is due to it being inefficient having large offset values in queries.
37
+ * Multiple schemas are currently not supported
38
+
39
+ == Meta
40
+
41
+ Maintained by Ricardo Chimal, Jr. (ricardo at heroku dot com)
42
+
43
+ Written by Ricardo Chimal, Jr. (ricardo at heroku dot com) and Adam Wiggins (adam at heroku dot com)
44
+
45
+ Early research and inspiration by Blake Mizerany
46
+
47
+ Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
48
+
49
+ http://github.com/ricardochimal/taps
50
+
51
+ Special Thanks to Sequel for making this tool possible http://sequel.rubyforge.org/
@@ -0,0 +1,5 @@
1
+ ---
2
+ :build:
3
+ :major: 0
4
+ :minor: 3
5
+ :patch: 24
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ gem 'sequel', '~> 3.20.0'
5
+
6
+ $:.unshift File.dirname(__FILE__) + '/../lib'
7
+
8
+ require 'taps/schema'
9
+
10
+ cmd = ARGV.shift.strip rescue ''
11
+ database_url = ARGV.shift.strip rescue ''
12
+
13
+ def show_usage_and_exit
14
+ puts <<EOTXT
15
+ schema console <database_url>
16
+ schema dump <database_url>
17
+ schema dump_table <database_url> <table>
18
+ schema indexes <database_url>
19
+ schema indexes_individual <database_url>
20
+ schema reset_db_sequences <database_url>
21
+ schema load <database_url> <schema_file>
22
+ schema load_indexes <database_url> <indexes_file>
23
+ EOTXT
24
+ exit(1)
25
+ end
26
+
27
+ case cmd
28
+ when 'dump'
29
+ puts Taps::Schema.dump(database_url)
30
+ when 'dump_table'
31
+ table = ARGV.shift.strip
32
+ puts Taps::Schema.dump_table(database_url, table)
33
+ when 'indexes'
34
+ puts Taps::Schema.indexes(database_url)
35
+ when 'indexes_individual'
36
+ puts Taps::Schema.indexes_individual(database_url)
37
+ when 'load_indexes'
38
+ filename = ARGV.shift.strip rescue ''
39
+ indexes = File.read(filename) rescue show_usage_and_exit
40
+ Taps::Schema.load_indexes(database_url, indexes)
41
+ when 'load'
42
+ filename = ARGV.shift.strip rescue ''
43
+ schema = File.read(filename) rescue show_usage_and_exit
44
+ Taps::Schema.load(database_url, schema)
45
+ when 'reset_db_sequences'
46
+ Taps::Schema.reset_db_sequences(database_url)
47
+ when 'console'
48
+ $db = Sequel.connect(database_url)
49
+ require 'irb'
50
+ require 'irb/completion'
51
+ IRB.start
52
+ else
53
+ show_usage_and_exit
54
+ end
@@ -0,0 +1,6 @@
1
+ @ECHO OFF
2
+ IF NOT "%~f0" == "~f0" GOTO :WinNT
3
+ @"ruby.exe" "./schema" %1 %2 %3 %4 %5 %6 %7 %8 %9
4
+ GOTO :EOF
5
+ :WinNT
6
+ @"ruby.exe" "%~dpn0" %*
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.dirname(__FILE__) + '/../lib'
4
+ require 'taps/cli'
5
+
6
+ Taps::Cli.new(ARGV.dup).run
@@ -0,0 +1,52 @@
1
+ require 'taps/errors'
2
+
3
+ class Taps::Chunksize
4
+ attr_accessor :idle_secs, :time_in_db, :start_time, :end_time, :retries
5
+ attr_reader :chunksize
6
+
7
+ def initialize(chunksize)
8
+ @chunksize = chunksize
9
+ @idle_secs = 0.0
10
+ @retries = 0
11
+ end
12
+
13
+ def to_i
14
+ chunksize
15
+ end
16
+
17
+ def reset_chunksize
18
+ @chunksize = (retries <= 1) ? 10 : 1
19
+ end
20
+
21
+ def diff
22
+ end_time - start_time - time_in_db - idle_secs
23
+ end
24
+
25
+ def time_in_db=(t)
26
+ @time_in_db = t
27
+ @time_in_db = @time_in_db.to_f rescue 0.0
28
+ end
29
+
30
+ def time_delta
31
+ t1 = Time.now
32
+ yield if block_given?
33
+ t2 = Time.now
34
+ t2 - t1
35
+ end
36
+
37
+ def calc_new_chunksize
38
+ new_chunksize = if retries > 0
39
+ chunksize
40
+ elsif diff > 3.0
41
+ (chunksize / 3).ceil
42
+ elsif diff > 1.1
43
+ chunksize - 100
44
+ elsif diff < 0.8
45
+ chunksize * 2
46
+ else
47
+ chunksize + 100
48
+ end
49
+ new_chunksize = 1 if new_chunksize < 1
50
+ new_chunksize
51
+ end
52
+ end
@@ -0,0 +1,196 @@
1
+ require 'optparse'
2
+ require 'tempfile'
3
+ require 'taps/monkey'
4
+ require 'taps/config'
5
+ require 'taps/log'
6
+ require 'vendor/okjson'
7
+
8
+ Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || begin
9
+ # this is dirty but it solves a weird problem where the tempfile disappears mid-process
10
+ require 'sqlite3'
11
+ $__taps_database = Tempfile.new('taps.db')
12
+ $__taps_database.open()
13
+ "sqlite://#{$__taps_database.path}"
14
+ end
15
+
16
+ module Taps
17
+ class Cli
18
+ attr_accessor :argv
19
+
20
+ def initialize(argv)
21
+ @argv = argv
22
+ end
23
+
24
+ def run
25
+ method = (argv.shift || 'help').to_sym
26
+ if [:pull, :push, :server, :version].include? method
27
+ send(method)
28
+ else
29
+ help
30
+ end
31
+ end
32
+
33
+ def pull
34
+ opts = clientoptparse(:pull)
35
+ Taps.log.level = Logger::DEBUG if opts[:debug]
36
+ if opts[:resume_filename]
37
+ clientresumexfer(:pull, opts)
38
+ else
39
+ clientxfer(:pull, opts)
40
+ end
41
+ end
42
+
43
+ def push
44
+ opts = clientoptparse(:push)
45
+ Taps.log.level = Logger::DEBUG if opts[:debug]
46
+ if opts[:resume_filename]
47
+ clientresumexfer(:push, opts)
48
+ else
49
+ clientxfer(:push, opts)
50
+ end
51
+ end
52
+
53
+ def server
54
+ opts = serveroptparse
55
+ Taps.log.level = Logger::DEBUG if opts[:debug]
56
+ Taps::Config.database_url = opts[:database_url]
57
+ Taps::Config.login = opts[:login]
58
+ Taps::Config.password = opts[:password]
59
+
60
+ Taps::Config.verify_database_url
61
+ require 'taps/server'
62
+ Taps::Server.run!({
63
+ :port => opts[:port],
64
+ :environment => :production,
65
+ :logging => true,
66
+ :dump_errors => true,
67
+ })
68
+ end
69
+
70
+ def version
71
+ puts Taps.version
72
+ end
73
+
74
+ def help
75
+ puts <<EOHELP
76
+ Options
77
+ =======
78
+ server Start a taps database import/export server
79
+ pull Pull a database from a taps server
80
+ push Push a database to a taps server
81
+ version Taps version
82
+
83
+ Add '-h' to any command to see their usage
84
+ EOHELP
85
+ end
86
+
87
+ def serveroptparse
88
+ opts={:port => 5000, :database_url => nil, :login => nil, :password => nil, :debug => false}
89
+ OptionParser.new do |o|
90
+ o.banner = "Usage: #{File.basename($0)} server [OPTIONS] <local_database_url> <login> <password>"
91
+ o.define_head "Start a taps database import/export server"
92
+
93
+ o.on("-p", "--port=N", "Server Port") { |v| opts[:port] = v.to_i if v.to_i > 0 }
94
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
95
+ o.parse!(argv)
96
+
97
+ opts[:database_url] = argv.shift
98
+ opts[:login] = argv.shift
99
+ opts[:password] = argv.shift
100
+
101
+ if opts[:database_url].nil?
102
+ $stderr.puts "Missing Database URL"
103
+ puts o
104
+ exit 1
105
+ end
106
+ if opts[:login].nil?
107
+ $stderr.puts "Missing Login"
108
+ puts o
109
+ exit 1
110
+ end
111
+ if opts[:password].nil?
112
+ $stderr.puts "Missing Password"
113
+ puts o
114
+ exit 1
115
+ end
116
+ end
117
+ opts
118
+ end
119
+
120
+ def clientoptparse(cmd)
121
+ opts={:default_chunksize => 1000, :database_url => nil, :remote_url => nil, :debug => false, :resume_filename => nil, :disable_compresion => false, :indexes_first => false}
122
+ OptionParser.new do |o|
123
+ o.banner = "Usage: #{File.basename($0)} #{cmd} [OPTIONS] <local_database_url> <remote_url>"
124
+
125
+ case cmd
126
+ when :pull
127
+ o.define_head "Pull a database from a taps server"
128
+ when :push
129
+ o.define_head "Push a database to a taps server"
130
+ end
131
+
132
+ o.on("-s", "--skip-schema", "Don't transfer the schema, just data") { |v| opts[:skip_schema] = true }
133
+ o.on("-i", "--indexes-first", "Transfer indexes first before data") { |v| opts[:indexes_first] = true }
134
+ o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
135
+ o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
136
+ o.on("-g", "--disable-compression", "Disable Compression") { |v| opts[:disable_compression] = true }
137
+ o.on("-f", "--filter=regex", "Regex Filter for tables") { |v| opts[:table_filter] = v }
138
+ o.on("-t", "--tables=A,B,C", Array, "Shortcut to filter on a list of tables") do |v|
139
+ r_tables = v.collect { |t| "^#{t}$" }.join("|")
140
+ opts[:table_filter] = "(#{r_tables})"
141
+ end
142
+ o.on("-e", "--exclude_tables=A,B,C", Array, "Shortcut to exclude a list of tables") { |v| opts[:exclude_tables] = v }
143
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
144
+ o.parse!(argv)
145
+
146
+ opts[:database_url] = argv.shift
147
+ opts[:remote_url] = argv.shift
148
+
149
+ if opts[:database_url].nil?
150
+ $stderr.puts "Missing Database URL"
151
+ puts o
152
+ exit 1
153
+ end
154
+ if opts[:remote_url].nil?
155
+ $stderr.puts "Missing Remote Taps URL"
156
+ puts o
157
+ exit 1
158
+ end
159
+ end
160
+
161
+ opts
162
+ end
163
+
164
+ def clientxfer(method, opts)
165
+ database_url = opts.delete(:database_url)
166
+ remote_url = opts.delete(:remote_url)
167
+
168
+ Taps::Config.verify_database_url(database_url)
169
+
170
+ require 'taps/operation'
171
+
172
+ Taps::Operation.factory(method, database_url, remote_url, opts).run
173
+ end
174
+
175
+ def clientresumexfer(method, opts)
176
+ session = OkJson.decode(File.read(opts.delete(:resume_filename)))
177
+ session.symbolize_recursively!
178
+
179
+ database_url = opts.delete(:database_url)
180
+ remote_url = opts.delete(:remote_url) || session.delete(:remote_url)
181
+
182
+ Taps::Config.verify_database_url(database_url)
183
+
184
+ require 'taps/operation'
185
+
186
+ newsession = session.merge({
187
+ :default_chunksize => opts[:default_chunksize],
188
+ :disable_compression => opts[:disable_compression],
189
+ :resume => true,
190
+ })
191
+
192
+ Taps::Operation.factory(method, database_url, remote_url, newsession).run
193
+ end
194
+
195
+ end
196
+ end
@@ -0,0 +1,32 @@
1
+ require 'sequel'
2
+ require 'taps/version'
3
+
4
+ Sequel.datetime_class = DateTime
5
+
6
+ module Taps
7
+ def self.exiting=(val)
8
+ @@exiting = val
9
+ end
10
+
11
+ def exiting?
12
+ (@@exiting ||= false) == true
13
+ end
14
+
15
+ class Config
16
+ class << self
17
+ attr_accessor :taps_database_url
18
+ attr_accessor :login, :password, :database_url, :remote_url
19
+ attr_accessor :chunksize
20
+
21
+ def verify_database_url(db_url=nil)
22
+ db_url ||= self.database_url
23
+ db = Sequel.connect(db_url)
24
+ db.tables
25
+ db.disconnect
26
+ rescue Object => e
27
+ puts "Failed to connect to database:\n #{e.class} -> #{e}"
28
+ exit 1
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,343 @@
1
+ require 'taps/monkey'
2
+ require 'taps/multipart'
3
+ require 'taps/utils'
4
+ require 'taps/log'
5
+ require 'taps/errors'
6
+ require 'vendor/okjson'
7
+
8
+ module Taps
9
+
10
+ class DataStream
11
+ DEFAULT_CHUNKSIZE = 1000
12
+
13
+ attr_reader :db, :state
14
+
15
+ def initialize(db, state)
16
+ @db = db
17
+ @state = {
18
+ :offset => 0,
19
+ :avg_chunksize => 0,
20
+ :num_chunksize => 0,
21
+ :total_chunksize => 0,
22
+ }.merge(state)
23
+ @state[:chunksize] ||= DEFAULT_CHUNKSIZE
24
+ @complete = false
25
+ end
26
+
27
+ def log
28
+ Taps.log
29
+ end
30
+
31
+ def error=(val)
32
+ state[:error] = val
33
+ end
34
+
35
+ def error
36
+ state[:error] || false
37
+ end
38
+
39
+ def table_name
40
+ state[:table_name].to_sym
41
+ end
42
+
43
+ def table_name_sql
44
+ table_name.identifier
45
+ end
46
+
47
+ def to_hash
48
+ state.merge(:klass => self.class.to_s)
49
+ end
50
+
51
+ def to_json
52
+ OkJson.encode(to_hash)
53
+ end
54
+
55
+ def string_columns
56
+ @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
57
+ end
58
+
59
+ def table
60
+ @table ||= db[table_name_sql]
61
+ end
62
+
63
+ def order_by(name=nil)
64
+ @order_by ||= begin
65
+ name ||= table_name
66
+ Taps::Utils.order_by(db, name)
67
+ end
68
+ end
69
+
70
+ def increment(row_count)
71
+ state[:offset] += row_count
72
+ end
73
+
74
+ # keep a record of the average chunksize within the first few hundred thousand records, after chunksize
75
+ # goes below 100 or maybe if offset is > 1000
76
+ def fetch_rows
77
+ state[:chunksize] = fetch_chunksize
78
+ ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
79
+ log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
80
+ rows = Taps::Utils.format_data(ds.all,
81
+ :string_columns => string_columns,
82
+ :schema => db.schema(table_name),
83
+ :table => table_name
84
+ )
85
+ update_chunksize_stats
86
+ rows
87
+ end
88
+
89
+ def max_chunksize_training
90
+ 20
91
+ end
92
+
93
+ def fetch_chunksize
94
+ chunksize = state[:chunksize]
95
+ return chunksize if state[:num_chunksize] < max_chunksize_training
96
+ return chunksize if state[:avg_chunksize] == 0
97
+ return chunksize if state[:error]
98
+ state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
99
+ end
100
+
101
+ def update_chunksize_stats
102
+ return if state[:num_chunksize] >= max_chunksize_training
103
+ state[:total_chunksize] += state[:chunksize]
104
+ state[:num_chunksize] += 1
105
+ state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
106
+ end
107
+
108
+ def encode_rows(rows)
109
+ Taps::Utils.base64encode(Marshal.dump(rows))
110
+ end
111
+
112
+ def fetch
113
+ log.debug "DataStream#fetch state -> #{state.inspect}"
114
+
115
+ t1 = Time.now
116
+ rows = fetch_rows
117
+ encoded_data = encode_rows(rows)
118
+ t2 = Time.now
119
+ elapsed_time = t2 - t1
120
+
121
+ @complete = rows == { }
122
+
123
+ [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
124
+ end
125
+
126
+ def complete?
127
+ @complete
128
+ end
129
+
130
+ def fetch_remote(resource, headers)
131
+ params = fetch_from_resource(resource, headers)
132
+ encoded_data = params[:encoded_data]
133
+ json = params[:json]
134
+
135
+ rows = parse_encoded_data(encoded_data, json[:checksum])
136
+ @complete = rows == { }
137
+
138
+ # update local state
139
+ state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
140
+
141
+ unless @complete
142
+ import_rows(rows)
143
+ rows[:data].size
144
+ else
145
+ 0
146
+ end
147
+ end
148
+
149
+ # this one is used inside the server process
150
+ def fetch_remote_in_server(params)
151
+ json = self.class.parse_json(params[:json])
152
+ encoded_data = params[:encoded_data]
153
+
154
+ rows = parse_encoded_data(encoded_data, json[:checksum])
155
+ @complete = rows == { }
156
+
157
+ unless @complete
158
+ import_rows(rows)
159
+ rows[:data].size
160
+ else
161
+ 0
162
+ end
163
+ end
164
+
165
+ def fetch_from_resource(resource, headers)
166
+ res = nil
167
+ log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
168
+ state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
169
+ state[:chunksize] = c.to_i
170
+ res = resource.post({:state => OkJson.encode(self.to_hash)}, headers)
171
+ end
172
+
173
+ begin
174
+ params = Taps::Multipart.parse(res)
175
+ params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
176
+ return params
177
+ rescue OkJson::Parser
178
+ raise Taps::CorruptedData.new("Invalid OkJson Received")
179
+ end
180
+ end
181
+
182
+ def self.parse_json(json)
183
+ hash = OkJson.decode(json).symbolize_keys
184
+ hash[:state].symbolize_keys! if hash.has_key?(:state)
185
+ hash
186
+ end
187
+
188
+ def parse_encoded_data(encoded_data, checksum)
189
+ raise Taps::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
190
+
191
+ begin
192
+ return Marshal.load(Taps::Utils.base64decode(encoded_data))
193
+ rescue Object => e
194
+ unless ENV['NO_DUMP_MARSHAL_ERRORS']
195
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
196
+ File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
197
+ end
198
+ raise
199
+ end
200
+ end
201
+
202
+ def import_rows(rows)
203
+ table.import(rows[:header], rows[:data])
204
+ state[:offset] += rows[:data].size
205
+ rescue Exception => ex
206
+ case ex.message
207
+ when /integer out of range/ then
208
+ raise Taps::InvalidData, <<-ERROR, []
209
+ \nDetected integer data that exceeds the maximum allowable size for an integer type.
210
+ This generally occurs when importing from SQLite due to the fact that SQLite does
211
+ not enforce maximum values on integer types.
212
+ ERROR
213
+ else raise ex
214
+ end
215
+ end
216
+
217
+ def verify_stream
218
+ state[:offset] = table.count
219
+ end
220
+
221
+ def verify_remote_stream(resource, headers)
222
+ json_raw = resource.post({:state => OkJson.encode(self)}, headers).to_s
223
+ json = self.class.parse_json(json_raw)
224
+
225
+ self.class.new(db, json[:state])
226
+ end
227
+
228
+ def self.factory(db, state)
229
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
230
+ Sequel::MySQL.convert_invalid_date_time = :nil
231
+ end
232
+
233
+ if state.has_key?(:klass)
234
+ return eval(state[:klass]).new(db, state)
235
+ end
236
+
237
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
238
+ DataStreamKeyed.new(db, state)
239
+ else
240
+ DataStream.new(db, state)
241
+ end
242
+ end
243
+ end
244
+
245
+
246
+ class DataStreamKeyed < DataStream
247
+ attr_accessor :buffer
248
+
249
+ def initialize(db, state)
250
+ super(db, state)
251
+ @state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
252
+ @state[:chunksize] ||= DEFAULT_CHUNKSIZE
253
+ @buffer = []
254
+ end
255
+
256
+ def primary_key
257
+ state[:primary_key].to_sym
258
+ end
259
+
260
+ def buffer_limit
261
+ if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
262
+ state[:last_fetched]
263
+ else
264
+ state[:filter]
265
+ end
266
+ end
267
+
268
+ def calc_limit(chunksize)
269
+ # we want to not fetch more than is needed while we're
270
+ # inside sinatra but locally we can select more than
271
+ # is strictly needed
272
+ if defined?(Sinatra)
273
+ (chunksize * 1.1).ceil
274
+ else
275
+ (chunksize * 3).ceil
276
+ end
277
+ end
278
+
279
+ def load_buffer(chunksize)
280
+ # make sure BasicObject is not polluted by subsequent requires
281
+ Sequel::BasicObject.remove_methods!
282
+
283
+ num = 0
284
+ loop do
285
+ limit = calc_limit(chunksize)
286
+ # we have to use local variables in order for the virtual row filter to work correctly
287
+ key = primary_key
288
+ buf_limit = buffer_limit
289
+ ds = table.order(*order_by).filter { key.sql_number > buf_limit }.limit(limit)
290
+ log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
291
+ data = ds.all
292
+ self.buffer += data
293
+ num += data.size
294
+ if data.size > 0
295
+ # keep a record of the last primary key value in the buffer
296
+ state[:filter] = self.buffer.last[ primary_key ]
297
+ end
298
+
299
+ break if num >= chunksize or data.size == 0
300
+ end
301
+ end
302
+
303
+ def fetch_buffered(chunksize)
304
+ load_buffer(chunksize) if self.buffer.size < chunksize
305
+ rows = buffer.slice(0, chunksize)
306
+ state[:last_fetched] = if rows.size > 0
307
+ rows.last[ primary_key ]
308
+ else
309
+ nil
310
+ end
311
+ rows
312
+ end
313
+
314
+ def import_rows(rows)
315
+ table.import(rows[:header], rows[:data])
316
+ end
317
+
318
+ def fetch_rows
319
+ chunksize = state[:chunksize]
320
+ Taps::Utils.format_data(fetch_buffered(chunksize) || [],
321
+ :string_columns => string_columns)
322
+ end
323
+
324
+ def increment(row_count)
325
+ # pop the rows we just successfully sent off the buffer
326
+ @buffer.slice!(0, row_count)
327
+ end
328
+
329
+ def verify_stream
330
+ key = primary_key
331
+ ds = table.order(*order_by)
332
+ current_filter = ds.max(key.sql_number)
333
+
334
+ # set the current filter to the max of the primary key
335
+ state[:filter] = current_filter
336
+ # clear out the last_fetched value so it can restart from scratch
337
+ state[:last_fetched] = nil
338
+
339
+ log.debug "DataStreamKeyed#verify_stream -> state: #{state.inspect}"
340
+ end
341
+ end
342
+
343
+ end