taps 0.2.26 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -22,11 +22,15 @@ or when you want to push a local database to a taps server
22
22
 
23
23
  $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
24
24
 
25
+ or when you want to transfer only 1 table
26
+
27
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --filter mytable
28
+
25
29
  == Known Issues
26
30
 
27
- * Blob data may not transfer properly, I suspect that SQLite3 is modifying some native ruby objects.
28
31
  * Foreign Keys get lost in the schema transfer
29
- * Large tables (>1 million rows with a large number of columns) get slower as the offset gets larger. This is due to it being inefficient having large offset values in queries.
32
+ * Tables without primary keys will be incredibly slow to transfer. This is due to it being inefficient having large offset values in queries.
33
+ * Multiple schemas are currently not supported
30
34
 
31
35
  == Meta
32
36
 
@@ -39,3 +43,5 @@ Early research and inspiration by Blake Mizerany
39
43
  Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
40
44
 
41
45
  http://github.com/ricardochimal/taps
46
+
47
+ Special Thanks to Sequel for making this tool possible http://sequel.rubyforge.org/
data/Rakefile CHANGED
@@ -6,17 +6,18 @@ begin
6
6
  s.email = "ricardo@heroku.com"
7
7
  s.homepage = "http://github.com/ricardochimal/taps"
8
8
  s.description = "A simple database agnostic import/export app to transfer data to/from a remote database."
9
- s.authors = ["Ricardo Chimal, Jr.", "Adam Wiggins"]
9
+ s.authors = ["Ricardo Chimal, Jr."]
10
10
 
11
- s.add_dependency 'sinatra', '= 0.9.2'
12
- s.add_dependency 'activerecord', '= 2.2.2'
13
- s.add_dependency 'thor', '= 0.9.9'
14
- s.add_dependency 'rest-client', '~> 1.3.0'
15
- s.add_dependency 'sequel', '>= 3.0.0', '< 3.1.0'
11
+ s.rubygems_version = %q{1.3.5}
12
+
13
+ s.add_dependency 'json_pure', '~> 1.2.0'
14
+ s.add_dependency 'sinatra', '~> 1.0.0'
15
+ s.add_dependency 'rest-client', '~> 1.4.0'
16
+ s.add_dependency 'sequel', '~> 3.10.0'
16
17
  s.add_dependency 'sqlite3-ruby', '~> 1.2.0'
18
+ s.add_dependency 'rack', '>= 1.0.1'
17
19
 
18
20
  s.rubyforge_project = "taps"
19
- s.rubygems_version = '1.3.1'
20
21
 
21
22
  s.files = FileList['spec/*.rb'] + FileList['lib/**/*.rb'] + ['README.rdoc', 'LICENSE', 'VERSION.yml', 'Rakefile'] + FileList['bin/*']
22
23
  s.executables = ['taps', 'schema']
@@ -29,13 +30,17 @@ rescue LoadError => e
29
30
  end
30
31
  end
31
32
 
32
- require 'rake/rdoctask'
33
- Rake::RDocTask.new do |rdoc|
34
- rdoc.rdoc_dir = 'rdoc'
35
- rdoc.title = 'taps'
36
- rdoc.options << '--line-numbers' << '--inline-source'
37
- rdoc.rdoc_files.include('README*')
38
- rdoc.rdoc_files.include('lib/**/*.rb')
33
+ begin
34
+ require 'rake/rdoctask'
35
+ Rake::RDocTask.new do |rdoc|
36
+ rdoc.rdoc_dir = 'rdoc'
37
+ rdoc.title = 'taps'
38
+ rdoc.options << '--line-numbers' << '--inline-source'
39
+ rdoc.rdoc_files.include('README*')
40
+ rdoc.rdoc_files.include('lib/**/*.rb')
41
+ end
42
+ rescue LoadError
43
+ puts "Rdoc is not available"
39
44
  end
40
45
 
41
46
  begin
@@ -46,7 +51,7 @@ begin
46
51
  t.verbose = true
47
52
  end
48
53
  rescue LoadError
49
- puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
54
+ puts "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
50
55
  end
51
56
 
52
57
  desc "Run all specs; requires the bacon gem"
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
- :major: 0
3
- :minor: 2
4
2
  :build:
5
- :patch: 26
3
+ :minor: 3
4
+ :patch: 0
5
+ :major: 0
data/bin/schema CHANGED
@@ -1,9 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- gem 'activerecord', '= 2.2.2'
4
+ gem 'sequel', '~> 3.10.0'
5
5
 
6
- require File.dirname(__FILE__) + '/../lib/taps/schema'
6
+ $:.unshift File.dirname(__FILE__) + '/../lib'
7
+
8
+ require 'taps/schema'
7
9
 
8
10
  cmd = ARGV.shift.strip rescue ''
9
11
  database_url = ARGV.shift.strip rescue ''
@@ -12,7 +14,9 @@ def show_usage_and_exit
12
14
  puts <<EOTXT
13
15
  schema console <database_url>
14
16
  schema dump <database_url>
17
+ schema dump_table <database_url> <table>
15
18
  schema indexes <database_url>
19
+ schema indexes_individual <database_url>
16
20
  schema reset_db_sequences <database_url>
17
21
  schema load <database_url> <schema_file>
18
22
  schema load_indexes <database_url> <indexes_file>
@@ -22,9 +26,14 @@ end
22
26
 
23
27
  case cmd
24
28
  when 'dump'
25
- puts Taps::Schema.dump_without_indexes(database_url)
29
+ puts Taps::Schema.dump(database_url)
30
+ when 'dump_table'
31
+ table = ARGV.shift.strip
32
+ puts Taps::Schema.dump_table(database_url, table)
26
33
  when 'indexes'
27
34
  puts Taps::Schema.indexes(database_url)
35
+ when 'indexes_individual'
36
+ puts Taps::Schema.indexes_individual(database_url)
28
37
  when 'load_indexes'
29
38
  filename = ARGV.shift.strip rescue ''
30
39
  indexes = File.read(filename) rescue show_usage_and_exit
@@ -36,8 +45,7 @@ when 'load'
36
45
  when 'reset_db_sequences'
37
46
  Taps::Schema.reset_db_sequences(database_url)
38
47
  when 'console'
39
- Taps::Schema.connection(database_url)
40
- $db = ActiveRecord::Base.connection
48
+ $db = Sequel.connect(database_url)
41
49
  require 'irb'
42
50
  require 'irb/completion'
43
51
  IRB.start
data/bin/taps CHANGED
@@ -1,13 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'rubygems'
4
- gem 'activerecord', '= 2.2.2'
5
- gem 'thor', '= 0.9.9'
6
- gem 'rest-client', '~> 1.3.0'
7
- gem 'sinatra', '= 0.9.2'
8
- gem 'sequel', '>= 3.0.0', '< 3.1.0'
9
- gem 'sqlite3-ruby', '~> 1.2.0'
3
+ $:.unshift File.dirname(__FILE__) + '/../lib'
4
+ require 'taps/cli'
10
5
 
11
- require File.dirname(__FILE__) + '/../lib/taps/cli'
12
-
13
- Taps::Cli.start
6
+ Taps::Cli.new(ARGV.dup).run
data/lib/taps/cli.rb CHANGED
@@ -1,63 +1,183 @@
1
- require 'thor'
1
+ require 'optparse'
2
2
  require 'tempfile'
3
- require File.dirname(__FILE__) + '/config'
3
+ require 'json'
4
+ require 'taps/monkey'
5
+ require 'taps/config'
6
+ require 'taps/log'
4
7
 
5
8
  Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || "sqlite://#{Tempfile.new('taps.db').path}"
6
9
 
7
10
  module Taps
8
- class Cli < Thor
9
- desc "server <local_database_url> <login> <password>", "Start a taps database import/export server"
10
- method_options(:port => :numeric)
11
- def server(database_url, login, password)
12
- Taps::Config.database_url = database_url
13
- Taps::Config.login = login
14
- Taps::Config.password = password
11
+ class Cli
12
+ attr_accessor :argv
15
13
 
16
- port = options[:port] || 5000
14
+ def initialize(argv)
15
+ @argv = argv
16
+ end
17
17
 
18
- Taps::Config.verify_database_url
18
+ def run
19
+ method = (argv.shift || 'help').to_sym
20
+ if [:pull, :push, :server, :version].include? method
21
+ send(method)
22
+ else
23
+ help
24
+ end
25
+ end
19
26
 
20
- require File.dirname(__FILE__) + '/server'
21
- Taps::Server.run!({
22
- :port => port,
23
- :environment => :production,
24
- :logging => true
25
- })
27
+ def pull
28
+ opts = clientoptparse(:pull)
29
+ Taps.log.level = Logger::DEBUG if opts[:debug]
30
+ if opts[:resume_filename]
31
+ clientresumexfer(:pull, opts)
32
+ else
33
+ clientxfer(:pull, opts)
34
+ end
26
35
  end
27
36
 
28
- desc "pull <local_database_url> <remote_url>", "Pull a database from a taps server"
29
- method_options(:chunksize => :numeric)
30
- def pull(database_url, remote_url)
31
- clientxfer(:cmd_receive, database_url, remote_url)
37
+ def push
38
+ opts = clientoptparse(:push)
39
+ Taps.log.level = Logger::DEBUG if opts[:debug]
40
+ if opts[:resume_filename]
41
+ clientresumexfer(:push, opts)
42
+ else
43
+ clientxfer(:push, opts)
44
+ end
32
45
  end
33
46
 
34
- desc "push <local_database_url> <remote_url>", "Push a database to a taps server"
35
- method_options(:chunksize => :numeric)
36
- def push(database_url, remote_url)
37
- clientxfer(:cmd_send, database_url, remote_url)
47
+ def server
48
+ opts = serveroptparse
49
+ Taps.log.level = Logger::DEBUG if opts[:debug]
50
+ Taps::Config.database_url = opts[:database_url]
51
+ Taps::Config.login = opts[:login]
52
+ Taps::Config.password = opts[:password]
53
+
54
+ Taps::Config.verify_database_url
55
+ require 'taps/server'
56
+ Taps::Server.run!({
57
+ :port => opts[:port],
58
+ :environment => :production,
59
+ :logging => true,
60
+ :dump_errors => true,
61
+ })
38
62
  end
39
63
 
40
- desc "version", "Taps version"
41
64
  def version
42
65
  puts Taps.version
43
66
  end
44
67
 
45
- def clientxfer(method, database_url, remote_url)
46
- if options[:chunksize]
47
- Taps::Config.chunksize = options[:chunksize] < 100 ? 100 : options[:chunksize]
48
- else
49
- Taps::Config.chunksize = 1000
68
+ def help
69
+ puts <<EOHELP
70
+ Options
71
+ =======
72
+ server Start a taps database import/export server
73
+ pull Pull a database from a taps server
74
+ push Push a database to a taps server
75
+ version Taps version
76
+
77
+ Add '-h' to any command to see their usage
78
+ EOHELP
79
+ end
80
+
81
+ def serveroptparse
82
+ opts={:port => 5000, :database_url => nil, :login => nil, :password => nil, :debug => false}
83
+ OptionParser.new do |o|
84
+ o.banner = "Usage: #{File.basename($0)} server [OPTIONS] <local_database_url> <login> <password>"
85
+ o.define_head "Start a taps database import/export server"
86
+
87
+ o.on("-p", "--port=N", "Server Port") { |v| opts[:port] = v.to_i if v.to_i > 0 }
88
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
89
+ o.parse!(argv)
90
+
91
+ opts[:database_url] = argv.shift
92
+ opts[:login] = argv.shift
93
+ opts[:password] = argv.shift
94
+
95
+ if opts[:database_url].nil?
96
+ $stderr.puts "Missing Database URL"
97
+ puts o
98
+ exit 1
99
+ end
100
+ if opts[:login].nil?
101
+ $stderr.puts "Missing Login"
102
+ puts o
103
+ exit 1
104
+ end
105
+ if opts[:password].nil?
106
+ $stderr.puts "Missing Password"
107
+ puts o
108
+ exit 1
109
+ end
50
110
  end
51
- Taps::Config.database_url = database_url
52
- Taps::Config.remote_url = remote_url
111
+ opts
112
+ end
53
113
 
54
- Taps::Config.verify_database_url
114
+ def clientoptparse(cmd)
115
+ opts={:default_chunksize => 1000, :database_url => nil, :remote_url => nil, :debug => false, :resume_filename => nil, :disable_compresion => false}
116
+ OptionParser.new do |o|
117
+ o.banner = "Usage: #{File.basename($0)} #{cmd} [OPTIONS] <local_database_url> <remote_url>"
118
+
119
+ case cmd
120
+ when :pull
121
+ o.define_head "Pull a database from a taps server"
122
+ when :push
123
+ o.define_head "Push a database to a taps server"
124
+ end
125
+
126
+ o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
127
+ o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
128
+ o.on("-g", "--disable-compression", "Disable Compression") { |v| opts[:disable_compression] = true }
129
+ o.on("-f", "--filter=regex", "Regex Filter for tables") { |v| opts[:table_filter] = v }
130
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
131
+ o.parse!(argv)
55
132
 
56
- require File.dirname(__FILE__) + '/client_session'
133
+ opts[:database_url] = argv.shift
134
+ opts[:remote_url] = argv.shift
57
135
 
58
- Taps::ClientSession.quickstart do |session|
59
- session.send(method)
136
+ if opts[:database_url].nil?
137
+ $stderr.puts "Missing Database URL"
138
+ puts o
139
+ exit 1
140
+ end
141
+ if opts[:remote_url].nil?
142
+ $stderr.puts "Missing Remote Taps URL"
143
+ puts o
144
+ exit 1
145
+ end
60
146
  end
147
+
148
+ opts
61
149
  end
150
+
151
+ def clientxfer(method, opts)
152
+ database_url = opts.delete(:database_url)
153
+ remote_url = opts.delete(:remote_url)
154
+
155
+ Taps::Config.verify_database_url(database_url)
156
+
157
+ require 'taps/operation'
158
+
159
+ Taps::Operation.factory(method, database_url, remote_url, opts).run
160
+ end
161
+
162
+ def clientresumexfer(method, opts)
163
+ session = JSON.parse(File.read(opts.delete(:resume_filename)))
164
+ session.symbolize_recursively!
165
+
166
+ database_url = opts.delete(:database_url)
167
+ remote_url = opts.delete(:remote_url) || session.delete(:remote_url)
168
+
169
+ Taps::Config.verify_database_url(database_url)
170
+
171
+ require 'taps/operation'
172
+
173
+ newsession = session.merge({
174
+ :default_chunksize => opts[:default_chunksize],
175
+ :disable_compression => opts[:disable_compression],
176
+ :resume => true,
177
+ })
178
+
179
+ Taps::Operation.factory(method, database_url, remote_url, newsession).run
180
+ end
181
+
62
182
  end
63
183
  end
data/lib/taps/config.rb CHANGED
@@ -8,21 +8,32 @@ module Taps
8
8
  end
9
9
 
10
10
  def self.version
11
- "#{version_yml[:major]}.#{version_yml[:minor]}.#{version_yml[:patch]}"
11
+ version = "#{version_yml[:major]}.#{version_yml[:minor]}.#{version_yml[:patch]}"
12
+ version += ".#{version_yml[:build]}" if version_yml[:build]
13
+ version
12
14
  end
13
15
 
14
16
  def self.compatible_version
15
17
  "#{version_yml[:major]}.#{version_yml[:minor]}"
16
18
  end
17
19
 
20
+ def self.exiting=(val)
21
+ @@exiting = val
22
+ end
23
+
24
+ def exiting?
25
+ (@@exiting ||= false) == true
26
+ end
27
+
18
28
  class Config
19
29
  class << self
20
30
  attr_accessor :taps_database_url
21
31
  attr_accessor :login, :password, :database_url, :remote_url
22
32
  attr_accessor :chunksize
23
33
 
24
- def verify_database_url
25
- db = Sequel.connect(self.database_url)
34
+ def verify_database_url(db_url=nil)
35
+ db_url ||= self.database_url
36
+ db = Sequel.connect(db_url)
26
37
  db.tables
27
38
  db.disconnect
28
39
  rescue Object => e
@@ -0,0 +1,299 @@
1
+ require 'taps/monkey'
2
+ require 'taps/multipart'
3
+ require 'taps/utils'
4
+ require 'taps/log'
5
+ require 'json'
6
+
7
+ module Taps
8
+
9
+ class DataStream
10
+ class CorruptedData < Exception; end
11
+
12
+ attr_reader :db, :state
13
+
14
+ def initialize(db, state)
15
+ @db = db
16
+ @state = {
17
+ :offset => 0,
18
+ :avg_chunksize => 0,
19
+ :num_chunksize => 0,
20
+ :total_chunksize => 0,
21
+ }.merge(state)
22
+ @complete = false
23
+ end
24
+
25
+ def log
26
+ Taps.log
27
+ end
28
+
29
+ def error=(val)
30
+ state[:error] = val
31
+ end
32
+
33
+ def error
34
+ state[:error] || false
35
+ end
36
+
37
+ def table_name
38
+ state[:table_name].to_sym
39
+ end
40
+
41
+ def to_hash
42
+ state.merge(:klass => self.class.to_s)
43
+ end
44
+
45
+ def to_json
46
+ to_hash.to_json
47
+ end
48
+
49
+ def string_columns
50
+ @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
51
+ end
52
+
53
+ def table
54
+ @table ||= db[table_name]
55
+ end
56
+
57
+ def order_by(name=nil)
58
+ @order_by ||= begin
59
+ name ||= table_name
60
+ Taps::Utils.order_by(db, name)
61
+ end
62
+ end
63
+
64
+ def increment(row_count)
65
+ state[:offset] += row_count
66
+ end
67
+
68
+ # keep a record of the average chunksize within the first few hundred thousand records, after chunksize
69
+ # goes below 100 or maybe if offset is > 1000
70
+ def fetch_rows
71
+ state[:chunksize] = fetch_chunksize
72
+ ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
73
+ log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
74
+ rows = Taps::Utils.format_data(ds.all,
75
+ :string_columns => string_columns)
76
+ update_chunksize_stats
77
+ rows
78
+ end
79
+
80
+ def max_chunksize_training
81
+ 20
82
+ end
83
+
84
+ def fetch_chunksize
85
+ chunksize = state[:chunksize]
86
+ return chunksize if state[:num_chunksize] < max_chunksize_training
87
+ return chunksize if state[:avg_chunksize] == 0
88
+ return chunksize if state[:error]
89
+ state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
90
+ end
91
+
92
+ def update_chunksize_stats
93
+ return if state[:num_chunksize] >= max_chunksize_training
94
+ state[:total_chunksize] += state[:chunksize]
95
+ state[:num_chunksize] += 1
96
+ state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
97
+ end
98
+
99
+ def encode_rows(rows)
100
+ Taps::Utils.base64encode(Marshal.dump(rows))
101
+ end
102
+
103
+ def fetch
104
+ log.debug "DataStream#fetch state -> #{state.inspect}"
105
+
106
+ t1 = Time.now
107
+ rows = fetch_rows
108
+ encoded_data = encode_rows(rows)
109
+ t2 = Time.now
110
+ elapsed_time = t2 - t1
111
+
112
+ @complete = rows == { }
113
+
114
+ [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
115
+ end
116
+
117
+ def complete?
118
+ @complete
119
+ end
120
+
121
+ def fetch_remote(resource, headers)
122
+ params = fetch_from_resource(resource, headers)
123
+ encoded_data = params[:encoded_data]
124
+ json = params[:json]
125
+
126
+ rows = parse_encoded_data(encoded_data, json[:checksum])
127
+ @complete = rows == { }
128
+
129
+ # update local state
130
+ state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
131
+
132
+ unless @complete
133
+ import_rows(rows)
134
+ rows[:data].size
135
+ else
136
+ 0
137
+ end
138
+ end
139
+
140
+ # this one is used inside the server process
141
+ def fetch_remote_in_server(params)
142
+ json = self.class.parse_json(params[:json])
143
+ encoded_data = params[:encoded_data]
144
+
145
+ rows = parse_encoded_data(encoded_data, json[:checksum])
146
+ @complete = rows == { }
147
+
148
+ unless @complete
149
+ import_rows(rows)
150
+ rows[:data].size
151
+ else
152
+ 0
153
+ end
154
+ end
155
+
156
+ def fetch_from_resource(resource, headers)
157
+ res = nil
158
+ log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
159
+ state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
160
+ state[:chunksize] = c
161
+ res = resource.post({:state => self.to_json}, headers)
162
+ end
163
+
164
+ begin
165
+ params = Taps::Multipart.parse(res)
166
+ params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
167
+ return params
168
+ rescue JSON::Parser
169
+ raise DataStream::CorruptedData.new("Invalid JSON Received")
170
+ end
171
+ end
172
+
173
+ def self.parse_json(json)
174
+ hash = JSON.parse(json).symbolize_keys
175
+ hash[:state].symbolize_keys! if hash.has_key?(:state)
176
+ hash
177
+ end
178
+
179
+ def parse_encoded_data(encoded_data, checksum)
180
+ raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
181
+
182
+ begin
183
+ return Marshal.load(Taps::Utils.base64decode(encoded_data))
184
+ rescue Object => e
185
+ unless ENV['NO_DUMP_MARSHAL_ERRORS']
186
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
187
+ File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
188
+ end
189
+ raise
190
+ end
191
+ end
192
+
193
+ def import_rows(rows)
194
+ table.import(rows[:header], rows[:data])
195
+ state[:offset] += rows[:data].size
196
+ end
197
+
198
+ def self.factory(db, state)
199
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
200
+ Sequel::MySQL.convert_invalid_date_time = :nil
201
+ end
202
+
203
+ if state.has_key?(:klass)
204
+ return eval(state[:klass]).new(db, state)
205
+ end
206
+
207
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
208
+ DataStreamKeyed.new(db, state)
209
+ else
210
+ DataStream.new(db, state)
211
+ end
212
+ end
213
+ end
214
+
215
+
216
+ class DataStreamKeyed < DataStream
217
+ attr_accessor :buffer
218
+
219
+ def initialize(db, state)
220
+ super(db, state)
221
+ @state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
222
+ @buffer = []
223
+ end
224
+
225
+ def primary_key
226
+ state[:primary_key].to_sym
227
+ end
228
+
229
+ def buffer_limit
230
+ if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
231
+ state[:last_fetched]
232
+ else
233
+ state[:filter]
234
+ end
235
+ end
236
+
237
+ def calc_limit(chunksize)
238
+ # we want to not fetch more than is needed while we're
239
+ # inside sinatra but locally we can select more than
240
+ # is strictly needed
241
+ if defined?(Sinatra)
242
+ (chunksize * 1.1).ceil
243
+ else
244
+ (chunksize * 3).ceil
245
+ end
246
+ end
247
+
248
+ def load_buffer(chunksize)
249
+ # make sure BasicObject is not polluted by subsequent requires
250
+ Sequel::BasicObject.remove_methods!
251
+
252
+ num = 0
253
+ loop do
254
+ limit = calc_limit(chunksize)
255
+ # we have to use local variables in order for the virtual row filter to work correctly
256
+ key = primary_key
257
+ buf_limit = buffer_limit
258
+ ds = table.order(*order_by).filter { key > buf_limit }.limit(limit)
259
+ log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
260
+ data = ds.all
261
+ self.buffer += data
262
+ num += data.size
263
+ if data.size > 0
264
+ # keep a record of the last primary key value in the buffer
265
+ state[:filter] = self.buffer.last[ primary_key ]
266
+ end
267
+
268
+ break if num >= chunksize or data.size == 0
269
+ end
270
+ end
271
+
272
+ def fetch_buffered(chunksize)
273
+ load_buffer(chunksize) if self.buffer.size < chunksize
274
+ rows = buffer.slice(0, chunksize)
275
+ state[:last_fetched] = if rows.size > 0
276
+ rows.last[ primary_key ]
277
+ else
278
+ nil
279
+ end
280
+ rows
281
+ end
282
+
283
+ def import_rows(rows)
284
+ table.import(rows[:header], rows[:data])
285
+ end
286
+
287
+ def fetch_rows
288
+ chunksize = state[:chunksize]
289
+ Taps::Utils.format_data(fetch_buffered(chunksize) || [],
290
+ :string_columns => string_columns)
291
+ end
292
+
293
+ def increment(row_count)
294
+ # pop the rows we just successfully sent off the buffer
295
+ @buffer.slice!(0, row_count)
296
+ end
297
+ end
298
+
299
+ end