taps-taps 0.3.24

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1bc580dc61808bdb17b0de45c3fa27ce6edc445d
4
+ data.tar.gz: d3b1faefee6bf8828d20f71c73e809f54ab5ef8d
5
+ SHA512:
6
+ metadata.gz: 11057870f0e66186550ec1aceae822a7f3f2907417b134215e44cad8b7defd4fb82bf4c5f7d2721670600e525246cbf5770d24da515e513f3e308f7f39c4d6b8
7
+ data.tar.gz: bf6c27c267e777ae54f97d51106dfd7d8ee0fcdf8e6d1fc402f9ef50513e418f0d6a6775d24337f18def51b6471a0b61f66da7fa8243b0e31a86bf8e5b3b4729
@@ -0,0 +1,51 @@
1
+ = Taps -- simple database import/export app
2
+
3
+ A simple database agnostic import/export app to transfer data to/from a remote database.
4
+
5
+ == Usage: Server
6
+
7
+ Here's how you start a taps server
8
+
9
+ $ taps server postgres://localdbuser:localdbpass@localhost/dbname httpuser httppassword
10
+
11
+ You can also specify an encoding in the database url
12
+
13
+ $ taps server mysql://localdbuser:localdbpass@localhost/dbname?encoding=latin1 httpuser httppassword
14
+
15
+ == Usage: Client
16
+
17
+ When you want to pull down a database from a taps server
18
+
19
+ $ taps pull postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
20
+
21
+ or when you want to push a local database to a taps server
22
+
23
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
24
+
25
+ or when you want to transfer a list of tables
26
+
27
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --tables logs,tags
28
+
29
+ or when you want to transfer tables that start with a word
30
+
31
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --filter '^log_'
32
+
33
+ == Known Issues
34
+
35
+ * Foreign key constraints get lost in the schema transfer
36
+ * Tables without primary keys will be incredibly slow to transfer. This is due to it being inefficient having large offset values in queries.
37
+ * Multiple schemas are currently not supported
38
+
39
+ == Meta
40
+
41
+ Maintained by Ricardo Chimal, Jr. (ricardo at heroku dot com)
42
+
43
+ Written by Ricardo Chimal, Jr. (ricardo at heroku dot com) and Adam Wiggins (adam at heroku dot com)
44
+
45
+ Early research and inspiration by Blake Mizerany
46
+
47
+ Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
48
+
49
+ http://github.com/ricardochimal/taps
50
+
51
+ Special Thanks to Sequel for making this tool possible http://sequel.rubyforge.org/
@@ -0,0 +1,5 @@
1
+ ---
2
+ :build:
3
+ :major: 0
4
+ :minor: 3
5
+ :patch: 24
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ gem 'sequel', '~> 3.20.0'
5
+
6
+ $:.unshift File.dirname(__FILE__) + '/../lib'
7
+
8
+ require 'taps/schema'
9
+
10
+ cmd = ARGV.shift.strip rescue ''
11
+ database_url = ARGV.shift.strip rescue ''
12
+
13
+ def show_usage_and_exit
14
+ puts <<EOTXT
15
+ schema console <database_url>
16
+ schema dump <database_url>
17
+ schema dump_table <database_url> <table>
18
+ schema indexes <database_url>
19
+ schema indexes_individual <database_url>
20
+ schema reset_db_sequences <database_url>
21
+ schema load <database_url> <schema_file>
22
+ schema load_indexes <database_url> <indexes_file>
23
+ EOTXT
24
+ exit(1)
25
+ end
26
+
27
+ case cmd
28
+ when 'dump'
29
+ puts Taps::Schema.dump(database_url)
30
+ when 'dump_table'
31
+ table = ARGV.shift.strip
32
+ puts Taps::Schema.dump_table(database_url, table)
33
+ when 'indexes'
34
+ puts Taps::Schema.indexes(database_url)
35
+ when 'indexes_individual'
36
+ puts Taps::Schema.indexes_individual(database_url)
37
+ when 'load_indexes'
38
+ filename = ARGV.shift.strip rescue ''
39
+ indexes = File.read(filename) rescue show_usage_and_exit
40
+ Taps::Schema.load_indexes(database_url, indexes)
41
+ when 'load'
42
+ filename = ARGV.shift.strip rescue ''
43
+ schema = File.read(filename) rescue show_usage_and_exit
44
+ Taps::Schema.load(database_url, schema)
45
+ when 'reset_db_sequences'
46
+ Taps::Schema.reset_db_sequences(database_url)
47
+ when 'console'
48
+ $db = Sequel.connect(database_url)
49
+ require 'irb'
50
+ require 'irb/completion'
51
+ IRB.start
52
+ else
53
+ show_usage_and_exit
54
+ end
@@ -0,0 +1,6 @@
1
+ @ECHO OFF
2
+ IF NOT "%~f0" == "~f0" GOTO :WinNT
3
+ @"ruby.exe" "./schema" %1 %2 %3 %4 %5 %6 %7 %8 %9
4
+ GOTO :EOF
5
+ :WinNT
6
+ @"ruby.exe" "%~dpn0" %*
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.dirname(__FILE__) + '/../lib'
4
+ require 'taps/cli'
5
+
6
+ Taps::Cli.new(ARGV.dup).run
@@ -0,0 +1,52 @@
1
+ require 'taps/errors'
2
+
3
+ class Taps::Chunksize
4
+ attr_accessor :idle_secs, :time_in_db, :start_time, :end_time, :retries
5
+ attr_reader :chunksize
6
+
7
+ def initialize(chunksize)
8
+ @chunksize = chunksize
9
+ @idle_secs = 0.0
10
+ @retries = 0
11
+ end
12
+
13
+ def to_i
14
+ chunksize
15
+ end
16
+
17
+ def reset_chunksize
18
+ @chunksize = (retries <= 1) ? 10 : 1
19
+ end
20
+
21
+ def diff
22
+ end_time - start_time - time_in_db - idle_secs
23
+ end
24
+
25
+ def time_in_db=(t)
26
+ @time_in_db = t
27
+ @time_in_db = @time_in_db.to_f rescue 0.0
28
+ end
29
+
30
+ def time_delta
31
+ t1 = Time.now
32
+ yield if block_given?
33
+ t2 = Time.now
34
+ t2 - t1
35
+ end
36
+
37
+ def calc_new_chunksize
38
+ new_chunksize = if retries > 0
39
+ chunksize
40
+ elsif diff > 3.0
41
+ (chunksize / 3).ceil
42
+ elsif diff > 1.1
43
+ chunksize - 100
44
+ elsif diff < 0.8
45
+ chunksize * 2
46
+ else
47
+ chunksize + 100
48
+ end
49
+ new_chunksize = 1 if new_chunksize < 1
50
+ new_chunksize
51
+ end
52
+ end
@@ -0,0 +1,196 @@
1
+ require 'optparse'
2
+ require 'tempfile'
3
+ require 'taps/monkey'
4
+ require 'taps/config'
5
+ require 'taps/log'
6
+ require 'vendor/okjson'
7
+
8
+ Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || begin
9
+ # this is dirty but it solves a weird problem where the tempfile disappears mid-process
10
+ require 'sqlite3'
11
+ $__taps_database = Tempfile.new('taps.db')
12
+ $__taps_database.open()
13
+ "sqlite://#{$__taps_database.path}"
14
+ end
15
+
16
+ module Taps
17
+ class Cli
18
+ attr_accessor :argv
19
+
20
+ def initialize(argv)
21
+ @argv = argv
22
+ end
23
+
24
+ def run
25
+ method = (argv.shift || 'help').to_sym
26
+ if [:pull, :push, :server, :version].include? method
27
+ send(method)
28
+ else
29
+ help
30
+ end
31
+ end
32
+
33
+ def pull
34
+ opts = clientoptparse(:pull)
35
+ Taps.log.level = Logger::DEBUG if opts[:debug]
36
+ if opts[:resume_filename]
37
+ clientresumexfer(:pull, opts)
38
+ else
39
+ clientxfer(:pull, opts)
40
+ end
41
+ end
42
+
43
+ def push
44
+ opts = clientoptparse(:push)
45
+ Taps.log.level = Logger::DEBUG if opts[:debug]
46
+ if opts[:resume_filename]
47
+ clientresumexfer(:push, opts)
48
+ else
49
+ clientxfer(:push, opts)
50
+ end
51
+ end
52
+
53
+ def server
54
+ opts = serveroptparse
55
+ Taps.log.level = Logger::DEBUG if opts[:debug]
56
+ Taps::Config.database_url = opts[:database_url]
57
+ Taps::Config.login = opts[:login]
58
+ Taps::Config.password = opts[:password]
59
+
60
+ Taps::Config.verify_database_url
61
+ require 'taps/server'
62
+ Taps::Server.run!({
63
+ :port => opts[:port],
64
+ :environment => :production,
65
+ :logging => true,
66
+ :dump_errors => true,
67
+ })
68
+ end
69
+
70
+ def version
71
+ puts Taps.version
72
+ end
73
+
74
+ def help
75
+ puts <<EOHELP
76
+ Options
77
+ =======
78
+ server Start a taps database import/export server
79
+ pull Pull a database from a taps server
80
+ push Push a database to a taps server
81
+ version Taps version
82
+
83
+ Add '-h' to any command to see their usage
84
+ EOHELP
85
+ end
86
+
87
+ def serveroptparse
88
+ opts={:port => 5000, :database_url => nil, :login => nil, :password => nil, :debug => false}
89
+ OptionParser.new do |o|
90
+ o.banner = "Usage: #{File.basename($0)} server [OPTIONS] <local_database_url> <login> <password>"
91
+ o.define_head "Start a taps database import/export server"
92
+
93
+ o.on("-p", "--port=N", "Server Port") { |v| opts[:port] = v.to_i if v.to_i > 0 }
94
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
95
+ o.parse!(argv)
96
+
97
+ opts[:database_url] = argv.shift
98
+ opts[:login] = argv.shift
99
+ opts[:password] = argv.shift
100
+
101
+ if opts[:database_url].nil?
102
+ $stderr.puts "Missing Database URL"
103
+ puts o
104
+ exit 1
105
+ end
106
+ if opts[:login].nil?
107
+ $stderr.puts "Missing Login"
108
+ puts o
109
+ exit 1
110
+ end
111
+ if opts[:password].nil?
112
+ $stderr.puts "Missing Password"
113
+ puts o
114
+ exit 1
115
+ end
116
+ end
117
+ opts
118
+ end
119
+
120
+ def clientoptparse(cmd)
121
+ opts={:default_chunksize => 1000, :database_url => nil, :remote_url => nil, :debug => false, :resume_filename => nil, :disable_compresion => false, :indexes_first => false}
122
+ OptionParser.new do |o|
123
+ o.banner = "Usage: #{File.basename($0)} #{cmd} [OPTIONS] <local_database_url> <remote_url>"
124
+
125
+ case cmd
126
+ when :pull
127
+ o.define_head "Pull a database from a taps server"
128
+ when :push
129
+ o.define_head "Push a database to a taps server"
130
+ end
131
+
132
+ o.on("-s", "--skip-schema", "Don't transfer the schema, just data") { |v| opts[:skip_schema] = true }
133
+ o.on("-i", "--indexes-first", "Transfer indexes first before data") { |v| opts[:indexes_first] = true }
134
+ o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
135
+ o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
136
+ o.on("-g", "--disable-compression", "Disable Compression") { |v| opts[:disable_compression] = true }
137
+ o.on("-f", "--filter=regex", "Regex Filter for tables") { |v| opts[:table_filter] = v }
138
+ o.on("-t", "--tables=A,B,C", Array, "Shortcut to filter on a list of tables") do |v|
139
+ r_tables = v.collect { |t| "^#{t}$" }.join("|")
140
+ opts[:table_filter] = "(#{r_tables})"
141
+ end
142
+ o.on("-e", "--exclude_tables=A,B,C", Array, "Shortcut to exclude a list of tables") { |v| opts[:exclude_tables] = v }
143
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
144
+ o.parse!(argv)
145
+
146
+ opts[:database_url] = argv.shift
147
+ opts[:remote_url] = argv.shift
148
+
149
+ if opts[:database_url].nil?
150
+ $stderr.puts "Missing Database URL"
151
+ puts o
152
+ exit 1
153
+ end
154
+ if opts[:remote_url].nil?
155
+ $stderr.puts "Missing Remote Taps URL"
156
+ puts o
157
+ exit 1
158
+ end
159
+ end
160
+
161
+ opts
162
+ end
163
+
164
+ def clientxfer(method, opts)
165
+ database_url = opts.delete(:database_url)
166
+ remote_url = opts.delete(:remote_url)
167
+
168
+ Taps::Config.verify_database_url(database_url)
169
+
170
+ require 'taps/operation'
171
+
172
+ Taps::Operation.factory(method, database_url, remote_url, opts).run
173
+ end
174
+
175
+ def clientresumexfer(method, opts)
176
+ session = OkJson.decode(File.read(opts.delete(:resume_filename)))
177
+ session.symbolize_recursively!
178
+
179
+ database_url = opts.delete(:database_url)
180
+ remote_url = opts.delete(:remote_url) || session.delete(:remote_url)
181
+
182
+ Taps::Config.verify_database_url(database_url)
183
+
184
+ require 'taps/operation'
185
+
186
+ newsession = session.merge({
187
+ :default_chunksize => opts[:default_chunksize],
188
+ :disable_compression => opts[:disable_compression],
189
+ :resume => true,
190
+ })
191
+
192
+ Taps::Operation.factory(method, database_url, remote_url, newsession).run
193
+ end
194
+
195
+ end
196
+ end
@@ -0,0 +1,32 @@
1
+ require 'sequel'
2
+ require 'taps/version'
3
+
4
+ Sequel.datetime_class = DateTime
5
+
6
+ module Taps
7
+ def self.exiting=(val)
8
+ @@exiting = val
9
+ end
10
+
11
+ def exiting?
12
+ (@@exiting ||= false) == true
13
+ end
14
+
15
+ class Config
16
+ class << self
17
+ attr_accessor :taps_database_url
18
+ attr_accessor :login, :password, :database_url, :remote_url
19
+ attr_accessor :chunksize
20
+
21
+ def verify_database_url(db_url=nil)
22
+ db_url ||= self.database_url
23
+ db = Sequel.connect(db_url)
24
+ db.tables
25
+ db.disconnect
26
+ rescue Object => e
27
+ puts "Failed to connect to database:\n #{e.class} -> #{e}"
28
+ exit 1
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,343 @@
1
+ require 'taps/monkey'
2
+ require 'taps/multipart'
3
+ require 'taps/utils'
4
+ require 'taps/log'
5
+ require 'taps/errors'
6
+ require 'vendor/okjson'
7
+
8
+ module Taps
9
+
10
+ class DataStream
11
+ DEFAULT_CHUNKSIZE = 1000
12
+
13
+ attr_reader :db, :state
14
+
15
+ def initialize(db, state)
16
+ @db = db
17
+ @state = {
18
+ :offset => 0,
19
+ :avg_chunksize => 0,
20
+ :num_chunksize => 0,
21
+ :total_chunksize => 0,
22
+ }.merge(state)
23
+ @state[:chunksize] ||= DEFAULT_CHUNKSIZE
24
+ @complete = false
25
+ end
26
+
27
+ def log
28
+ Taps.log
29
+ end
30
+
31
+ def error=(val)
32
+ state[:error] = val
33
+ end
34
+
35
+ def error
36
+ state[:error] || false
37
+ end
38
+
39
+ def table_name
40
+ state[:table_name].to_sym
41
+ end
42
+
43
+ def table_name_sql
44
+ table_name.identifier
45
+ end
46
+
47
+ def to_hash
48
+ state.merge(:klass => self.class.to_s)
49
+ end
50
+
51
+ def to_json
52
+ OkJson.encode(to_hash)
53
+ end
54
+
55
+ def string_columns
56
+ @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
57
+ end
58
+
59
+ def table
60
+ @table ||= db[table_name_sql]
61
+ end
62
+
63
+ def order_by(name=nil)
64
+ @order_by ||= begin
65
+ name ||= table_name
66
+ Taps::Utils.order_by(db, name)
67
+ end
68
+ end
69
+
70
+ def increment(row_count)
71
+ state[:offset] += row_count
72
+ end
73
+
74
+ # keep a record of the average chunksize within the first few hundred thousand records, after chunksize
75
+ # goes below 100 or maybe if offset is > 1000
76
+ def fetch_rows
77
+ state[:chunksize] = fetch_chunksize
78
+ ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
79
+ log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
80
+ rows = Taps::Utils.format_data(ds.all,
81
+ :string_columns => string_columns,
82
+ :schema => db.schema(table_name),
83
+ :table => table_name
84
+ )
85
+ update_chunksize_stats
86
+ rows
87
+ end
88
+
89
+ def max_chunksize_training
90
+ 20
91
+ end
92
+
93
+ def fetch_chunksize
94
+ chunksize = state[:chunksize]
95
+ return chunksize if state[:num_chunksize] < max_chunksize_training
96
+ return chunksize if state[:avg_chunksize] == 0
97
+ return chunksize if state[:error]
98
+ state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
99
+ end
100
+
101
+ def update_chunksize_stats
102
+ return if state[:num_chunksize] >= max_chunksize_training
103
+ state[:total_chunksize] += state[:chunksize]
104
+ state[:num_chunksize] += 1
105
+ state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
106
+ end
107
+
108
+ def encode_rows(rows)
109
+ Taps::Utils.base64encode(Marshal.dump(rows))
110
+ end
111
+
112
+ def fetch
113
+ log.debug "DataStream#fetch state -> #{state.inspect}"
114
+
115
+ t1 = Time.now
116
+ rows = fetch_rows
117
+ encoded_data = encode_rows(rows)
118
+ t2 = Time.now
119
+ elapsed_time = t2 - t1
120
+
121
+ @complete = rows == { }
122
+
123
+ [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
124
+ end
125
+
126
+ def complete?
127
+ @complete
128
+ end
129
+
130
+ def fetch_remote(resource, headers)
131
+ params = fetch_from_resource(resource, headers)
132
+ encoded_data = params[:encoded_data]
133
+ json = params[:json]
134
+
135
+ rows = parse_encoded_data(encoded_data, json[:checksum])
136
+ @complete = rows == { }
137
+
138
+ # update local state
139
+ state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
140
+
141
+ unless @complete
142
+ import_rows(rows)
143
+ rows[:data].size
144
+ else
145
+ 0
146
+ end
147
+ end
148
+
149
+ # this one is used inside the server process
150
+ def fetch_remote_in_server(params)
151
+ json = self.class.parse_json(params[:json])
152
+ encoded_data = params[:encoded_data]
153
+
154
+ rows = parse_encoded_data(encoded_data, json[:checksum])
155
+ @complete = rows == { }
156
+
157
+ unless @complete
158
+ import_rows(rows)
159
+ rows[:data].size
160
+ else
161
+ 0
162
+ end
163
+ end
164
+
165
+ def fetch_from_resource(resource, headers)
166
+ res = nil
167
+ log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
168
+ state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
169
+ state[:chunksize] = c.to_i
170
+ res = resource.post({:state => OkJson.encode(self.to_hash)}, headers)
171
+ end
172
+
173
+ begin
174
+ params = Taps::Multipart.parse(res)
175
+ params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
176
+ return params
177
+ rescue OkJson::Parser
178
+ raise Taps::CorruptedData.new("Invalid OkJson Received")
179
+ end
180
+ end
181
+
182
+ def self.parse_json(json)
183
+ hash = OkJson.decode(json).symbolize_keys
184
+ hash[:state].symbolize_keys! if hash.has_key?(:state)
185
+ hash
186
+ end
187
+
188
+ def parse_encoded_data(encoded_data, checksum)
189
+ raise Taps::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
190
+
191
+ begin
192
+ return Marshal.load(Taps::Utils.base64decode(encoded_data))
193
+ rescue Object => e
194
+ unless ENV['NO_DUMP_MARSHAL_ERRORS']
195
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
196
+ File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
197
+ end
198
+ raise
199
+ end
200
+ end
201
+
202
+ def import_rows(rows)
203
+ table.import(rows[:header], rows[:data])
204
+ state[:offset] += rows[:data].size
205
+ rescue Exception => ex
206
+ case ex.message
207
+ when /integer out of range/ then
208
+ raise Taps::InvalidData, <<-ERROR, []
209
+ \nDetected integer data that exceeds the maximum allowable size for an integer type.
210
+ This generally occurs when importing from SQLite due to the fact that SQLite does
211
+ not enforce maximum values on integer types.
212
+ ERROR
213
+ else raise ex
214
+ end
215
+ end
216
+
217
+ def verify_stream
218
+ state[:offset] = table.count
219
+ end
220
+
221
+ def verify_remote_stream(resource, headers)
222
+ json_raw = resource.post({:state => OkJson.encode(self)}, headers).to_s
223
+ json = self.class.parse_json(json_raw)
224
+
225
+ self.class.new(db, json[:state])
226
+ end
227
+
228
+ def self.factory(db, state)
229
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
230
+ Sequel::MySQL.convert_invalid_date_time = :nil
231
+ end
232
+
233
+ if state.has_key?(:klass)
234
+ return eval(state[:klass]).new(db, state)
235
+ end
236
+
237
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
238
+ DataStreamKeyed.new(db, state)
239
+ else
240
+ DataStream.new(db, state)
241
+ end
242
+ end
243
+ end
244
+
245
+
246
+ class DataStreamKeyed < DataStream
247
+ attr_accessor :buffer
248
+
249
+ def initialize(db, state)
250
+ super(db, state)
251
+ @state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
252
+ @state[:chunksize] ||= DEFAULT_CHUNKSIZE
253
+ @buffer = []
254
+ end
255
+
256
+ def primary_key
257
+ state[:primary_key].to_sym
258
+ end
259
+
260
+ def buffer_limit
261
+ if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
262
+ state[:last_fetched]
263
+ else
264
+ state[:filter]
265
+ end
266
+ end
267
+
268
+ def calc_limit(chunksize)
269
+ # we want to not fetch more than is needed while we're
270
+ # inside sinatra but locally we can select more than
271
+ # is strictly needed
272
+ if defined?(Sinatra)
273
+ (chunksize * 1.1).ceil
274
+ else
275
+ (chunksize * 3).ceil
276
+ end
277
+ end
278
+
279
+ def load_buffer(chunksize)
280
+ # make sure BasicObject is not polluted by subsequent requires
281
+ Sequel::BasicObject.remove_methods!
282
+
283
+ num = 0
284
+ loop do
285
+ limit = calc_limit(chunksize)
286
+ # we have to use local variables in order for the virtual row filter to work correctly
287
+ key = primary_key
288
+ buf_limit = buffer_limit
289
+ ds = table.order(*order_by).filter { key.sql_number > buf_limit }.limit(limit)
290
+ log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
291
+ data = ds.all
292
+ self.buffer += data
293
+ num += data.size
294
+ if data.size > 0
295
+ # keep a record of the last primary key value in the buffer
296
+ state[:filter] = self.buffer.last[ primary_key ]
297
+ end
298
+
299
+ break if num >= chunksize or data.size == 0
300
+ end
301
+ end
302
+
303
+ def fetch_buffered(chunksize)
304
+ load_buffer(chunksize) if self.buffer.size < chunksize
305
+ rows = buffer.slice(0, chunksize)
306
+ state[:last_fetched] = if rows.size > 0
307
+ rows.last[ primary_key ]
308
+ else
309
+ nil
310
+ end
311
+ rows
312
+ end
313
+
314
+ def import_rows(rows)
315
+ table.import(rows[:header], rows[:data])
316
+ end
317
+
318
+ def fetch_rows
319
+ chunksize = state[:chunksize]
320
+ Taps::Utils.format_data(fetch_buffered(chunksize) || [],
321
+ :string_columns => string_columns)
322
+ end
323
+
324
+ def increment(row_count)
325
+ # pop the rows we just successfully sent off the buffer
326
+ @buffer.slice!(0, row_count)
327
+ end
328
+
329
+ def verify_stream
330
+ key = primary_key
331
+ ds = table.order(*order_by)
332
+ current_filter = ds.max(key.sql_number)
333
+
334
+ # set the current filter to the max of the primary key
335
+ state[:filter] = current_filter
336
+ # clear out the last_fetched value so it can restart from scratch
337
+ state[:last_fetched] = nil
338
+
339
+ log.debug "DataStreamKeyed#verify_stream -> state: #{state.inspect}"
340
+ end
341
+ end
342
+
343
+ end