taps 0.2.26 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -22,11 +22,15 @@ or when you want to push a local database to a taps server
22
22
 
23
23
  $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
24
24
 
25
+ or when you want to transfer only 1 table
26
+
27
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000 --filter mytable
28
+
25
29
  == Known Issues
26
30
 
27
- * Blob data may not transfer properly, I suspect that SQLite3 is modifying some native ruby objects.
28
31
  * Foreign Keys get lost in the schema transfer
29
- * Large tables (>1 million rows with a large number of columns) get slower as the offset gets larger. This is due to it being inefficient having large offset values in queries.
32
+ * Tables without primary keys will be incredibly slow to transfer. This is due to it being inefficient having large offset values in queries.
33
+ * Multiple schemas are currently not supported
30
34
 
31
35
  == Meta
32
36
 
@@ -39,3 +43,5 @@ Early research and inspiration by Blake Mizerany
39
43
  Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
40
44
 
41
45
  http://github.com/ricardochimal/taps
46
+
47
+ Special Thanks to Sequel for making this tool possible http://sequel.rubyforge.org/
data/Rakefile CHANGED
@@ -6,17 +6,18 @@ begin
6
6
  s.email = "ricardo@heroku.com"
7
7
  s.homepage = "http://github.com/ricardochimal/taps"
8
8
  s.description = "A simple database agnostic import/export app to transfer data to/from a remote database."
9
- s.authors = ["Ricardo Chimal, Jr.", "Adam Wiggins"]
9
+ s.authors = ["Ricardo Chimal, Jr."]
10
10
 
11
- s.add_dependency 'sinatra', '= 0.9.2'
12
- s.add_dependency 'activerecord', '= 2.2.2'
13
- s.add_dependency 'thor', '= 0.9.9'
14
- s.add_dependency 'rest-client', '~> 1.3.0'
15
- s.add_dependency 'sequel', '>= 3.0.0', '< 3.1.0'
11
+ s.rubygems_version = %q{1.3.5}
12
+
13
+ s.add_dependency 'json_pure', '~> 1.2.0'
14
+ s.add_dependency 'sinatra', '~> 1.0.0'
15
+ s.add_dependency 'rest-client', '~> 1.4.0'
16
+ s.add_dependency 'sequel', '~> 3.10.0'
16
17
  s.add_dependency 'sqlite3-ruby', '~> 1.2.0'
18
+ s.add_dependency 'rack', '>= 1.0.1'
17
19
 
18
20
  s.rubyforge_project = "taps"
19
- s.rubygems_version = '1.3.1'
20
21
 
21
22
  s.files = FileList['spec/*.rb'] + FileList['lib/**/*.rb'] + ['README.rdoc', 'LICENSE', 'VERSION.yml', 'Rakefile'] + FileList['bin/*']
22
23
  s.executables = ['taps', 'schema']
@@ -29,13 +30,17 @@ rescue LoadError => e
29
30
  end
30
31
  end
31
32
 
32
- require 'rake/rdoctask'
33
- Rake::RDocTask.new do |rdoc|
34
- rdoc.rdoc_dir = 'rdoc'
35
- rdoc.title = 'taps'
36
- rdoc.options << '--line-numbers' << '--inline-source'
37
- rdoc.rdoc_files.include('README*')
38
- rdoc.rdoc_files.include('lib/**/*.rb')
33
+ begin
34
+ require 'rake/rdoctask'
35
+ Rake::RDocTask.new do |rdoc|
36
+ rdoc.rdoc_dir = 'rdoc'
37
+ rdoc.title = 'taps'
38
+ rdoc.options << '--line-numbers' << '--inline-source'
39
+ rdoc.rdoc_files.include('README*')
40
+ rdoc.rdoc_files.include('lib/**/*.rb')
41
+ end
42
+ rescue LoadError
43
+ puts "Rdoc is not available"
39
44
  end
40
45
 
41
46
  begin
@@ -46,7 +51,7 @@ begin
46
51
  t.verbose = true
47
52
  end
48
53
  rescue LoadError
49
- puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
54
+ puts "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
50
55
  end
51
56
 
52
57
  desc "Run all specs; requires the bacon gem"
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
- :major: 0
3
- :minor: 2
4
2
  :build:
5
- :patch: 26
3
+ :minor: 3
4
+ :patch: 0
5
+ :major: 0
data/bin/schema CHANGED
@@ -1,9 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- gem 'activerecord', '= 2.2.2'
4
+ gem 'sequel', '~> 3.10.0'
5
5
 
6
- require File.dirname(__FILE__) + '/../lib/taps/schema'
6
+ $:.unshift File.dirname(__FILE__) + '/../lib'
7
+
8
+ require 'taps/schema'
7
9
 
8
10
  cmd = ARGV.shift.strip rescue ''
9
11
  database_url = ARGV.shift.strip rescue ''
@@ -12,7 +14,9 @@ def show_usage_and_exit
12
14
  puts <<EOTXT
13
15
  schema console <database_url>
14
16
  schema dump <database_url>
17
+ schema dump_table <database_url> <table>
15
18
  schema indexes <database_url>
19
+ schema indexes_individual <database_url>
16
20
  schema reset_db_sequences <database_url>
17
21
  schema load <database_url> <schema_file>
18
22
  schema load_indexes <database_url> <indexes_file>
@@ -22,9 +26,14 @@ end
22
26
 
23
27
  case cmd
24
28
  when 'dump'
25
- puts Taps::Schema.dump_without_indexes(database_url)
29
+ puts Taps::Schema.dump(database_url)
30
+ when 'dump_table'
31
+ table = ARGV.shift.strip
32
+ puts Taps::Schema.dump_table(database_url, table)
26
33
  when 'indexes'
27
34
  puts Taps::Schema.indexes(database_url)
35
+ when 'indexes_individual'
36
+ puts Taps::Schema.indexes_individual(database_url)
28
37
  when 'load_indexes'
29
38
  filename = ARGV.shift.strip rescue ''
30
39
  indexes = File.read(filename) rescue show_usage_and_exit
@@ -36,8 +45,7 @@ when 'load'
36
45
  when 'reset_db_sequences'
37
46
  Taps::Schema.reset_db_sequences(database_url)
38
47
  when 'console'
39
- Taps::Schema.connection(database_url)
40
- $db = ActiveRecord::Base.connection
48
+ $db = Sequel.connect(database_url)
41
49
  require 'irb'
42
50
  require 'irb/completion'
43
51
  IRB.start
data/bin/taps CHANGED
@@ -1,13 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'rubygems'
4
- gem 'activerecord', '= 2.2.2'
5
- gem 'thor', '= 0.9.9'
6
- gem 'rest-client', '~> 1.3.0'
7
- gem 'sinatra', '= 0.9.2'
8
- gem 'sequel', '>= 3.0.0', '< 3.1.0'
9
- gem 'sqlite3-ruby', '~> 1.2.0'
3
+ $:.unshift File.dirname(__FILE__) + '/../lib'
4
+ require 'taps/cli'
10
5
 
11
- require File.dirname(__FILE__) + '/../lib/taps/cli'
12
-
13
- Taps::Cli.start
6
+ Taps::Cli.new(ARGV.dup).run
data/lib/taps/cli.rb CHANGED
@@ -1,63 +1,183 @@
1
- require 'thor'
1
+ require 'optparse'
2
2
  require 'tempfile'
3
- require File.dirname(__FILE__) + '/config'
3
+ require 'json'
4
+ require 'taps/monkey'
5
+ require 'taps/config'
6
+ require 'taps/log'
4
7
 
5
8
  Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || "sqlite://#{Tempfile.new('taps.db').path}"
6
9
 
7
10
  module Taps
8
- class Cli < Thor
9
- desc "server <local_database_url> <login> <password>", "Start a taps database import/export server"
10
- method_options(:port => :numeric)
11
- def server(database_url, login, password)
12
- Taps::Config.database_url = database_url
13
- Taps::Config.login = login
14
- Taps::Config.password = password
11
+ class Cli
12
+ attr_accessor :argv
15
13
 
16
- port = options[:port] || 5000
14
+ def initialize(argv)
15
+ @argv = argv
16
+ end
17
17
 
18
- Taps::Config.verify_database_url
18
+ def run
19
+ method = (argv.shift || 'help').to_sym
20
+ if [:pull, :push, :server, :version].include? method
21
+ send(method)
22
+ else
23
+ help
24
+ end
25
+ end
19
26
 
20
- require File.dirname(__FILE__) + '/server'
21
- Taps::Server.run!({
22
- :port => port,
23
- :environment => :production,
24
- :logging => true
25
- })
27
+ def pull
28
+ opts = clientoptparse(:pull)
29
+ Taps.log.level = Logger::DEBUG if opts[:debug]
30
+ if opts[:resume_filename]
31
+ clientresumexfer(:pull, opts)
32
+ else
33
+ clientxfer(:pull, opts)
34
+ end
26
35
  end
27
36
 
28
- desc "pull <local_database_url> <remote_url>", "Pull a database from a taps server"
29
- method_options(:chunksize => :numeric)
30
- def pull(database_url, remote_url)
31
- clientxfer(:cmd_receive, database_url, remote_url)
37
+ def push
38
+ opts = clientoptparse(:push)
39
+ Taps.log.level = Logger::DEBUG if opts[:debug]
40
+ if opts[:resume_filename]
41
+ clientresumexfer(:push, opts)
42
+ else
43
+ clientxfer(:push, opts)
44
+ end
32
45
  end
33
46
 
34
- desc "push <local_database_url> <remote_url>", "Push a database to a taps server"
35
- method_options(:chunksize => :numeric)
36
- def push(database_url, remote_url)
37
- clientxfer(:cmd_send, database_url, remote_url)
47
+ def server
48
+ opts = serveroptparse
49
+ Taps.log.level = Logger::DEBUG if opts[:debug]
50
+ Taps::Config.database_url = opts[:database_url]
51
+ Taps::Config.login = opts[:login]
52
+ Taps::Config.password = opts[:password]
53
+
54
+ Taps::Config.verify_database_url
55
+ require 'taps/server'
56
+ Taps::Server.run!({
57
+ :port => opts[:port],
58
+ :environment => :production,
59
+ :logging => true,
60
+ :dump_errors => true,
61
+ })
38
62
  end
39
63
 
40
- desc "version", "Taps version"
41
64
  def version
42
65
  puts Taps.version
43
66
  end
44
67
 
45
- def clientxfer(method, database_url, remote_url)
46
- if options[:chunksize]
47
- Taps::Config.chunksize = options[:chunksize] < 100 ? 100 : options[:chunksize]
48
- else
49
- Taps::Config.chunksize = 1000
68
+ def help
69
+ puts <<EOHELP
70
+ Options
71
+ =======
72
+ server Start a taps database import/export server
73
+ pull Pull a database from a taps server
74
+ push Push a database to a taps server
75
+ version Taps version
76
+
77
+ Add '-h' to any command to see their usage
78
+ EOHELP
79
+ end
80
+
81
+ def serveroptparse
82
+ opts={:port => 5000, :database_url => nil, :login => nil, :password => nil, :debug => false}
83
+ OptionParser.new do |o|
84
+ o.banner = "Usage: #{File.basename($0)} server [OPTIONS] <local_database_url> <login> <password>"
85
+ o.define_head "Start a taps database import/export server"
86
+
87
+ o.on("-p", "--port=N", "Server Port") { |v| opts[:port] = v.to_i if v.to_i > 0 }
88
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
89
+ o.parse!(argv)
90
+
91
+ opts[:database_url] = argv.shift
92
+ opts[:login] = argv.shift
93
+ opts[:password] = argv.shift
94
+
95
+ if opts[:database_url].nil?
96
+ $stderr.puts "Missing Database URL"
97
+ puts o
98
+ exit 1
99
+ end
100
+ if opts[:login].nil?
101
+ $stderr.puts "Missing Login"
102
+ puts o
103
+ exit 1
104
+ end
105
+ if opts[:password].nil?
106
+ $stderr.puts "Missing Password"
107
+ puts o
108
+ exit 1
109
+ end
50
110
  end
51
- Taps::Config.database_url = database_url
52
- Taps::Config.remote_url = remote_url
111
+ opts
112
+ end
53
113
 
54
- Taps::Config.verify_database_url
114
+ def clientoptparse(cmd)
115
+ opts={:default_chunksize => 1000, :database_url => nil, :remote_url => nil, :debug => false, :resume_filename => nil, :disable_compresion => false}
116
+ OptionParser.new do |o|
117
+ o.banner = "Usage: #{File.basename($0)} #{cmd} [OPTIONS] <local_database_url> <remote_url>"
118
+
119
+ case cmd
120
+ when :pull
121
+ o.define_head "Pull a database from a taps server"
122
+ when :push
123
+ o.define_head "Push a database to a taps server"
124
+ end
125
+
126
+ o.on("-r", "--resume=file", "Resume a Taps Session from a stored file") { |v| opts[:resume_filename] = v }
127
+ o.on("-c", "--chunksize=N", "Initial Chunksize") { |v| opts[:default_chunksize] = (v.to_i < 10 ? 10 : v.to_i) }
128
+ o.on("-g", "--disable-compression", "Disable Compression") { |v| opts[:disable_compression] = true }
129
+ o.on("-f", "--filter=regex", "Regex Filter for tables") { |v| opts[:table_filter] = v }
130
+ o.on("-d", "--debug", "Enable Debug Messages") { |v| opts[:debug] = true }
131
+ o.parse!(argv)
55
132
 
56
- require File.dirname(__FILE__) + '/client_session'
133
+ opts[:database_url] = argv.shift
134
+ opts[:remote_url] = argv.shift
57
135
 
58
- Taps::ClientSession.quickstart do |session|
59
- session.send(method)
136
+ if opts[:database_url].nil?
137
+ $stderr.puts "Missing Database URL"
138
+ puts o
139
+ exit 1
140
+ end
141
+ if opts[:remote_url].nil?
142
+ $stderr.puts "Missing Remote Taps URL"
143
+ puts o
144
+ exit 1
145
+ end
60
146
  end
147
+
148
+ opts
61
149
  end
150
+
151
+ def clientxfer(method, opts)
152
+ database_url = opts.delete(:database_url)
153
+ remote_url = opts.delete(:remote_url)
154
+
155
+ Taps::Config.verify_database_url(database_url)
156
+
157
+ require 'taps/operation'
158
+
159
+ Taps::Operation.factory(method, database_url, remote_url, opts).run
160
+ end
161
+
162
+ def clientresumexfer(method, opts)
163
+ session = JSON.parse(File.read(opts.delete(:resume_filename)))
164
+ session.symbolize_recursively!
165
+
166
+ database_url = opts.delete(:database_url)
167
+ remote_url = opts.delete(:remote_url) || session.delete(:remote_url)
168
+
169
+ Taps::Config.verify_database_url(database_url)
170
+
171
+ require 'taps/operation'
172
+
173
+ newsession = session.merge({
174
+ :default_chunksize => opts[:default_chunksize],
175
+ :disable_compression => opts[:disable_compression],
176
+ :resume => true,
177
+ })
178
+
179
+ Taps::Operation.factory(method, database_url, remote_url, newsession).run
180
+ end
181
+
62
182
  end
63
183
  end
data/lib/taps/config.rb CHANGED
@@ -8,21 +8,32 @@ module Taps
8
8
  end
9
9
 
10
10
  def self.version
11
- "#{version_yml[:major]}.#{version_yml[:minor]}.#{version_yml[:patch]}"
11
+ version = "#{version_yml[:major]}.#{version_yml[:minor]}.#{version_yml[:patch]}"
12
+ version += ".#{version_yml[:build]}" if version_yml[:build]
13
+ version
12
14
  end
13
15
 
14
16
  def self.compatible_version
15
17
  "#{version_yml[:major]}.#{version_yml[:minor]}"
16
18
  end
17
19
 
20
+ def self.exiting=(val)
21
+ @@exiting = val
22
+ end
23
+
24
+ def exiting?
25
+ (@@exiting ||= false) == true
26
+ end
27
+
18
28
  class Config
19
29
  class << self
20
30
  attr_accessor :taps_database_url
21
31
  attr_accessor :login, :password, :database_url, :remote_url
22
32
  attr_accessor :chunksize
23
33
 
24
- def verify_database_url
25
- db = Sequel.connect(self.database_url)
34
+ def verify_database_url(db_url=nil)
35
+ db_url ||= self.database_url
36
+ db = Sequel.connect(db_url)
26
37
  db.tables
27
38
  db.disconnect
28
39
  rescue Object => e
@@ -0,0 +1,299 @@
1
+ require 'taps/monkey'
2
+ require 'taps/multipart'
3
+ require 'taps/utils'
4
+ require 'taps/log'
5
+ require 'json'
6
+
7
+ module Taps
8
+
9
+ class DataStream
10
+ class CorruptedData < Exception; end
11
+
12
+ attr_reader :db, :state
13
+
14
+ def initialize(db, state)
15
+ @db = db
16
+ @state = {
17
+ :offset => 0,
18
+ :avg_chunksize => 0,
19
+ :num_chunksize => 0,
20
+ :total_chunksize => 0,
21
+ }.merge(state)
22
+ @complete = false
23
+ end
24
+
25
+ def log
26
+ Taps.log
27
+ end
28
+
29
+ def error=(val)
30
+ state[:error] = val
31
+ end
32
+
33
+ def error
34
+ state[:error] || false
35
+ end
36
+
37
+ def table_name
38
+ state[:table_name].to_sym
39
+ end
40
+
41
+ def to_hash
42
+ state.merge(:klass => self.class.to_s)
43
+ end
44
+
45
+ def to_json
46
+ to_hash.to_json
47
+ end
48
+
49
+ def string_columns
50
+ @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
51
+ end
52
+
53
+ def table
54
+ @table ||= db[table_name]
55
+ end
56
+
57
+ def order_by(name=nil)
58
+ @order_by ||= begin
59
+ name ||= table_name
60
+ Taps::Utils.order_by(db, name)
61
+ end
62
+ end
63
+
64
+ def increment(row_count)
65
+ state[:offset] += row_count
66
+ end
67
+
68
+ # keep a record of the average chunksize within the first few hundred thousand records, after chunksize
69
+ # goes below 100 or maybe if offset is > 1000
70
+ def fetch_rows
71
+ state[:chunksize] = fetch_chunksize
72
+ ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
73
+ log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
74
+ rows = Taps::Utils.format_data(ds.all,
75
+ :string_columns => string_columns)
76
+ update_chunksize_stats
77
+ rows
78
+ end
79
+
80
+ def max_chunksize_training
81
+ 20
82
+ end
83
+
84
+ def fetch_chunksize
85
+ chunksize = state[:chunksize]
86
+ return chunksize if state[:num_chunksize] < max_chunksize_training
87
+ return chunksize if state[:avg_chunksize] == 0
88
+ return chunksize if state[:error]
89
+ state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
90
+ end
91
+
92
+ def update_chunksize_stats
93
+ return if state[:num_chunksize] >= max_chunksize_training
94
+ state[:total_chunksize] += state[:chunksize]
95
+ state[:num_chunksize] += 1
96
+ state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
97
+ end
98
+
99
+ def encode_rows(rows)
100
+ Taps::Utils.base64encode(Marshal.dump(rows))
101
+ end
102
+
103
+ def fetch
104
+ log.debug "DataStream#fetch state -> #{state.inspect}"
105
+
106
+ t1 = Time.now
107
+ rows = fetch_rows
108
+ encoded_data = encode_rows(rows)
109
+ t2 = Time.now
110
+ elapsed_time = t2 - t1
111
+
112
+ @complete = rows == { }
113
+
114
+ [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
115
+ end
116
+
117
+ def complete?
118
+ @complete
119
+ end
120
+
121
+ def fetch_remote(resource, headers)
122
+ params = fetch_from_resource(resource, headers)
123
+ encoded_data = params[:encoded_data]
124
+ json = params[:json]
125
+
126
+ rows = parse_encoded_data(encoded_data, json[:checksum])
127
+ @complete = rows == { }
128
+
129
+ # update local state
130
+ state.merge!(json[:state].merge(:chunksize => state[:chunksize]))
131
+
132
+ unless @complete
133
+ import_rows(rows)
134
+ rows[:data].size
135
+ else
136
+ 0
137
+ end
138
+ end
139
+
140
+ # this one is used inside the server process
141
+ def fetch_remote_in_server(params)
142
+ json = self.class.parse_json(params[:json])
143
+ encoded_data = params[:encoded_data]
144
+
145
+ rows = parse_encoded_data(encoded_data, json[:checksum])
146
+ @complete = rows == { }
147
+
148
+ unless @complete
149
+ import_rows(rows)
150
+ rows[:data].size
151
+ else
152
+ 0
153
+ end
154
+ end
155
+
156
+ def fetch_from_resource(resource, headers)
157
+ res = nil
158
+ log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
159
+ state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
160
+ state[:chunksize] = c
161
+ res = resource.post({:state => self.to_json}, headers)
162
+ end
163
+
164
+ begin
165
+ params = Taps::Multipart.parse(res)
166
+ params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
167
+ return params
168
+ rescue JSON::Parser
169
+ raise DataStream::CorruptedData.new("Invalid JSON Received")
170
+ end
171
+ end
172
+
173
+ def self.parse_json(json)
174
+ hash = JSON.parse(json).symbolize_keys
175
+ hash[:state].symbolize_keys! if hash.has_key?(:state)
176
+ hash
177
+ end
178
+
179
+ def parse_encoded_data(encoded_data, checksum)
180
+ raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)
181
+
182
+ begin
183
+ return Marshal.load(Taps::Utils.base64decode(encoded_data))
184
+ rescue Object => e
185
+ unless ENV['NO_DUMP_MARSHAL_ERRORS']
186
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
187
+ File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
188
+ end
189
+ raise
190
+ end
191
+ end
192
+
193
+ def import_rows(rows)
194
+ table.import(rows[:header], rows[:data])
195
+ state[:offset] += rows[:data].size
196
+ end
197
+
198
+ def self.factory(db, state)
199
+ if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
200
+ Sequel::MySQL.convert_invalid_date_time = :nil
201
+ end
202
+
203
+ if state.has_key?(:klass)
204
+ return eval(state[:klass]).new(db, state)
205
+ end
206
+
207
+ if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
208
+ DataStreamKeyed.new(db, state)
209
+ else
210
+ DataStream.new(db, state)
211
+ end
212
+ end
213
+ end
214
+
215
+
216
+ class DataStreamKeyed < DataStream
217
+ attr_accessor :buffer
218
+
219
+ def initialize(db, state)
220
+ super(db, state)
221
+ @state = { :primary_key => order_by(state[:table_name]).first, :filter => 0 }.merge(state)
222
+ @buffer = []
223
+ end
224
+
225
+ def primary_key
226
+ state[:primary_key].to_sym
227
+ end
228
+
229
+ def buffer_limit
230
+ if state[:last_fetched] and state[:last_fetched] < state[:filter] and self.buffer.size == 0
231
+ state[:last_fetched]
232
+ else
233
+ state[:filter]
234
+ end
235
+ end
236
+
237
+ def calc_limit(chunksize)
238
+ # we want to not fetch more than is needed while we're
239
+ # inside sinatra but locally we can select more than
240
+ # is strictly needed
241
+ if defined?(Sinatra)
242
+ (chunksize * 1.1).ceil
243
+ else
244
+ (chunksize * 3).ceil
245
+ end
246
+ end
247
+
248
+ def load_buffer(chunksize)
249
+ # make sure BasicObject is not polluted by subsequent requires
250
+ Sequel::BasicObject.remove_methods!
251
+
252
+ num = 0
253
+ loop do
254
+ limit = calc_limit(chunksize)
255
+ # we have to use local variables in order for the virtual row filter to work correctly
256
+ key = primary_key
257
+ buf_limit = buffer_limit
258
+ ds = table.order(*order_by).filter { key > buf_limit }.limit(limit)
259
+ log.debug "DataStreamKeyed#load_buffer SQL -> #{ds.sql}"
260
+ data = ds.all
261
+ self.buffer += data
262
+ num += data.size
263
+ if data.size > 0
264
+ # keep a record of the last primary key value in the buffer
265
+ state[:filter] = self.buffer.last[ primary_key ]
266
+ end
267
+
268
+ break if num >= chunksize or data.size == 0
269
+ end
270
+ end
271
+
272
+ def fetch_buffered(chunksize)
273
+ load_buffer(chunksize) if self.buffer.size < chunksize
274
+ rows = buffer.slice(0, chunksize)
275
+ state[:last_fetched] = if rows.size > 0
276
+ rows.last[ primary_key ]
277
+ else
278
+ nil
279
+ end
280
+ rows
281
+ end
282
+
283
+ def import_rows(rows)
284
+ table.import(rows[:header], rows[:data])
285
+ end
286
+
287
+ def fetch_rows
288
+ chunksize = state[:chunksize]
289
+ Taps::Utils.format_data(fetch_buffered(chunksize) || [],
290
+ :string_columns => string_columns)
291
+ end
292
+
293
+ def increment(row_count)
294
+ # pop the rows we just successfully sent off the buffer
295
+ @buffer.slice!(0, row_count)
296
+ end
297
+ end
298
+
299
+ end