taps 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Ricardo Chimal, Jr
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,35 @@
1
+ = Taps -- simple database import/export app
2
+
3
+ A simple database agnostic import/export app to transfer data to/from a remote database.
4
+
5
+ == Usage: Server
6
+
7
+ $ taps server postgres://localdbuser:localdbpass@localhost/dbname httpuser httppassword
8
+
9
+ == Usage: Client
10
+
11
+ When you want to pull down a database from a taps server
12
+
13
+ $ taps pull postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
14
+
15
+ or when you want to push a local database to a taps server
16
+
17
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
18
+
19
+ == Known Issues
20
+
21
+ * Blob Data does not transfer properly.
22
+ * Foreign Keys get lost in the schema transfer
23
+ * Large tables (>1 million rows with a large number of columns) get slower as the offset gets larger. This is due to it being inefficient having large offset values in queries.
24
+
25
+ == Meta
26
+
27
+ Maintained by Ricardo Chimal, Jr. (ricardo at heroku dot com)
28
+
29
+ Written by Ricardo Chimal, Jr. (ricardo at heroku dot com) and Adam Wiggins (adam at heroku dot com)
30
+
31
+ Early research and inspiration by Blake Mizerany
32
+
33
+ Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
34
+
35
+ http://github.com/ricardochimal/taps
@@ -0,0 +1,53 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |s|
4
+ s.name = "taps"
5
+ s.summary = %Q{simple database import/export app}
6
+ s.email = "ricardo@heroku.com"
7
+ s.homepage = "http://github.com/ricardochimal/taps"
8
+ s.description = "A simple database agnostic import/export app to transfer data to/from a remote database."
9
+ s.authors = ["Ricardo Chimal, Jr.", "Adam Wiggins"]
10
+
11
+ s.add_dependency 'sinatra', '~> 0.9.0'
12
+ s.add_dependency 'activerecord', '= 2.2.2'
13
+ s.add_dependency 'thor', '= 0.9.9'
14
+ s.add_dependency 'rest-client', '~> 0.9.0'
15
+ s.add_dependency 'sequel', '~> 2.10.0'
16
+ s.add_dependency 'sqlite3-ruby', '~> 1.2.0'
17
+
18
+ s.rubyforge_project = "taps"
19
+ s.rubygems_version = '1.3.1'
20
+
21
+ s.files = FileList['spec/*.rb'] + FileList['lib/**/*.rb'] + ['README.rdoc', 'LICENSE', 'VERSION.yml', 'Rakefile']
22
+ s.executables = ['taps', 'schema']
23
+ end
24
+ rescue LoadError
25
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
26
+ end
27
+
28
+ require 'rake/rdoctask'
29
+ Rake::RDocTask.new do |rdoc|
30
+ rdoc.rdoc_dir = 'rdoc'
31
+ rdoc.title = 'taps'
32
+ rdoc.options << '--line-numbers' << '--inline-source'
33
+ rdoc.rdoc_files.include('README*')
34
+ rdoc.rdoc_files.include('lib/**/*.rb')
35
+ end
36
+
37
+ begin
38
+ require 'rcov/rcovtask'
39
+ Rcov::RcovTask.new do |t|
40
+ t.libs << 'spec'
41
+ t.test_files = FileList['spec/*_spec.rb']
42
+ t.verbose = true
43
+ end
44
+ rescue LoadError
45
+ puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
46
+ end
47
+
48
+ desc "Run all specs"
49
+ task :spec do
50
+ system "bacon #{File.dirname(__FILE__)}/spec/*_spec.rb"
51
+ end
52
+
53
+ task :default => :spec
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 2
4
+ :patch: 1
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ gem 'activerecord', '= 2.2.2'
5
+
6
+ require File.dirname(__FILE__) + '/../lib/taps/schema'
7
+
8
+ cmd = ARGV.shift.strip rescue ''
9
+ database_url = ARGV.shift.strip rescue ''
10
+
11
+ def show_usage_and_exit
12
+ puts <<EOTXT
13
+ schema dump <database_url>
14
+ schema indexes <database_url>
15
+ schema reset_db_sequences <database_url>
16
+ schema load <database_url> <schema_file>
17
+ schema load_indexes <database_url> <indexes_file>
18
+ EOTXT
19
+ exit(1)
20
+ end
21
+
22
+ if cmd == 'dump'
23
+ puts Taps::Schema.dump_without_indexes(database_url)
24
+ elsif cmd == 'indexes'
25
+ puts Taps::Schema.indexes(database_url)
26
+ elsif cmd == 'load_indexes'
27
+ filename = ARGV.shift.strip rescue ''
28
+ indexes = File.read(filename) rescue show_usage_and_exit
29
+ Taps::Schema.load_indexes(database_url, indexes)
30
+ elsif cmd == 'load'
31
+ filename = ARGV.shift.strip rescue ''
32
+ schema = File.read(filename) rescue show_usage_and_exit
33
+ Taps::Schema.load(database_url, schema)
34
+ elsif cmd == 'reset_db_sequences'
35
+ Taps::Schema.reset_db_sequences(database_url)
36
+ else
37
+ show_usage_and_exit
38
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ gem 'activerecord', '= 2.2.2'
5
+ gem 'thor', '= 0.9.9'
6
+ gem 'rest-client', '~> 0.9.0'
7
+ gem 'sinatra', '~> 0.9.0'
8
+ gem 'sequel', '~> 2.10.0'
9
+ gem 'sqlite3-ruby', '~> 1.2.0'
10
+
11
+ require File.dirname(__FILE__) + '/../lib/taps/cli'
12
+
13
+ Taps::Cli.start
@@ -0,0 +1,57 @@
1
+ require 'thor'
2
+ require File.dirname(__FILE__) + '/config'
3
+
4
+ Taps::Config.taps_database_url = 'sqlite://taps.db'
5
+
6
+ module Taps
7
+ class Cli < Thor
8
+ desc "server <local_database_url> <login> <password>", "Start a taps database import/export server"
9
+ method_options(:port => :numeric)
10
+ def server(database_url, login, password)
11
+ Taps::Config.database_url = database_url
12
+ Taps::Config.login = login
13
+ Taps::Config.password = password
14
+
15
+ port = options[:port] || 5000
16
+
17
+ Taps::Config.verify_database_url
18
+
19
+ require File.dirname(__FILE__) + '/server'
20
+ Taps::Server.run!({
21
+ :port => port,
22
+ :environment => :production,
23
+ :logging => true
24
+ })
25
+ end
26
+
27
+ desc "pull <local_database_url> <remote_url>", "Pull a database from a taps server"
28
+ method_options(:chunksize => :numeric)
29
+ def pull(database_url, remote_url)
30
+ clientxfer(:cmd_receive, database_url, remote_url)
31
+ end
32
+
33
+ desc "push <local_database_url> <remote_url>", "Push a database to a taps server"
34
+ method_options(:chunksize => :numeric)
35
+ def push(database_url, remote_url)
36
+ clientxfer(:cmd_send, database_url, remote_url)
37
+ end
38
+
39
+ def clientxfer(method, database_url, remote_url)
40
+ if options[:chunksize]
41
+ Taps::Config.chunksize = options[:chunksize] < 100 ? 100 : options[:chunksize]
42
+ else
43
+ Taps::Config.chunksize = 1000
44
+ end
45
+ Taps::Config.database_url = database_url
46
+ Taps::Config.remote_url = remote_url
47
+
48
+ Taps::Config.verify_database_url
49
+
50
+ require File.dirname(__FILE__) + '/client_session'
51
+
52
+ Taps::ClientSession.quickstart do |session|
53
+ session.send(method)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,237 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require File.dirname(__FILE__) + '/progress_bar'
6
+ require File.dirname(__FILE__) + '/config'
7
+ require File.dirname(__FILE__) + '/utils'
8
+
9
+ module Taps
10
+ class ClientSession
11
+ attr_reader :database_url, :remote_url, :default_chunksize
12
+
13
+ def initialize(database_url, remote_url, default_chunksize)
14
+ @database_url = database_url
15
+ @remote_url = remote_url
16
+ @default_chunksize = default_chunksize
17
+ end
18
+
19
+ def self.start(database_url, remote_url, default_chunksize, &block)
20
+ s = new(database_url, remote_url, default_chunksize)
21
+ yield s
22
+ s.close_session
23
+ end
24
+
25
+ def self.quickstart(&block)
26
+ start(Taps::Config.database_url, Taps::Config.remote_url, Taps::Config.chunksize) do |s|
27
+ yield s
28
+ end
29
+ end
30
+
31
+ def db
32
+ @db ||= Sequel.connect(database_url)
33
+ end
34
+
35
+ def server
36
+ @server ||= RestClient::Resource.new(remote_url)
37
+ end
38
+
39
+ def session_resource
40
+ @session_resource ||= open_session
41
+ end
42
+
43
+ def open_session
44
+ uri = server['sessions'].post('', :taps_version => Taps::VERSION)
45
+ server[uri]
46
+ end
47
+
48
+ def close_session
49
+ @session_resource.delete(:taps_version => Taps::VERSION) if @session_resource
50
+ end
51
+
52
+ def cmd_send
53
+ verify_server
54
+ cmd_send_schema
55
+ cmd_send_data
56
+ cmd_send_indexes
57
+ cmd_send_reset_sequences
58
+ end
59
+
60
+ def cmd_send_indexes
61
+ puts "Sending schema indexes to remote taps server #{remote_url} from local database #{database_url}"
62
+
63
+ index_data = `#{File.dirname(__FILE__)}/../../bin/schema indexes #{database_url}`
64
+ session_resource['indexes'].post(index_data, :taps_version => Taps::VERSION)
65
+ end
66
+
67
+ def cmd_send_schema
68
+ puts "Sending schema to remote taps server #{remote_url} from local database #{database_url}"
69
+
70
+ schema_data = `#{File.dirname(__FILE__)}/../../bin/schema dump #{database_url}`
71
+ session_resource['schema'].post(schema_data, :taps_version => Taps::VERSION)
72
+ end
73
+
74
+ def cmd_send_reset_sequences
75
+ puts "Resetting db sequences in remote taps server at #{remote_url}"
76
+
77
+ session_resource["reset_sequences"].post('', :taps_version => Taps::VERSION)
78
+ end
79
+
80
+ def cmd_send_data
81
+ puts "Sending schema and data from local database #{database_url} to remote taps server at #{remote_url}"
82
+
83
+ db.tables.each do |table_name|
84
+ table = db[table_name]
85
+ count = table.count
86
+ columns = table.columns
87
+ order = columns.include?(:id) ? :id : columns.first
88
+ chunksize = self.default_chunksize
89
+
90
+ progress = ProgressBar.new(table_name.to_s, count)
91
+
92
+ offset = 0
93
+ loop do
94
+ rows = Taps::Utils.format_data(table.order(order).limit(chunksize, offset).all)
95
+ break if rows == { }
96
+
97
+ gzip_data = Taps::Utils.gzip(Marshal.dump(rows))
98
+
99
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do
100
+ begin
101
+ session_resource["tables/#{table_name}"].post(gzip_data,
102
+ :taps_version => Taps::VERSION,
103
+ :content_type => 'application/octet-stream',
104
+ :taps_checksum => Taps::Utils.checksum(gzip_data).to_s)
105
+ rescue RestClient::RequestFailed => e
106
+ # retry the same data, it got corrupted somehow.
107
+ if e.http_code == 412
108
+ next
109
+ end
110
+ raise
111
+ end
112
+ end
113
+
114
+ progress.inc(rows[:data].size)
115
+ offset += rows[:data].size
116
+ end
117
+
118
+ progress.finish
119
+ end
120
+ end
121
+
122
+ def cmd_receive
123
+ verify_server
124
+ cmd_receive_schema
125
+ cmd_receive_data
126
+ cmd_receive_indexes
127
+ cmd_reset_sequences
128
+ end
129
+
130
+ def cmd_receive_data
131
+ puts "Receiving data from remote taps server #{remote_url} into local database #{database_url}"
132
+
133
+ tables_with_counts, record_count = fetch_tables_info
134
+
135
+ puts "#{tables_with_counts.size} tables, #{format_number(record_count)} records"
136
+
137
+ tables_with_counts.each do |table_name, count|
138
+ table = db[table_name.to_sym]
139
+ chunksize = default_chunksize
140
+
141
+ progress = ProgressBar.new(table_name.to_s, count)
142
+
143
+ offset = 0
144
+ loop do
145
+ begin
146
+ chunksize, rows = fetch_table_rows(table_name, chunksize, offset)
147
+ rescue CorruptedData
148
+ next
149
+ end
150
+ break if rows == { }
151
+
152
+ table.multi_insert(rows[:header], rows[:data])
153
+
154
+ progress.inc(rows[:data].size)
155
+ offset += rows[:data].size
156
+ end
157
+
158
+ progress.finish
159
+ end
160
+ end
161
+
162
+ class CorruptedData < Exception; end
163
+
164
+ def fetch_table_rows(table_name, chunksize, offset)
165
+ response = nil
166
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do
167
+ response = session_resource["tables/#{table_name}/#{chunksize}?offset=#{offset}"].get(:taps_version => Taps::VERSION)
168
+ end
169
+ raise CorruptedData unless Taps::Utils.valid_data?(response.to_s, response.headers[:taps_checksum])
170
+
171
+ rows = Marshal.load(Taps::Utils.gunzip(response.to_s))
172
+ [chunksize, rows]
173
+ end
174
+
175
+ def fetch_tables_info
176
+ retries = 0
177
+ max_retries = 1
178
+ begin
179
+ tables_with_counts = Marshal.load(session_resource['tables'].get(:taps_version => Taps::VERSION))
180
+ record_count = tables_with_counts.values.inject(0) { |a,c| a += c }
181
+ rescue RestClient::Exception
182
+ retries += 1
183
+ retry if retries <= max_retries
184
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
185
+ exit(1)
186
+ end
187
+
188
+ [ tables_with_counts, record_count ]
189
+ end
190
+
191
+ def cmd_receive_schema
192
+ puts "Receiving schema from remote taps server #{remote_url} into local database #{database_url}"
193
+
194
+ schema_data = session_resource['schema'].get(:taps_version => Taps::VERSION)
195
+ output = Taps::Utils.load_schema(database_url, schema_data)
196
+ puts output if output
197
+ end
198
+
199
+ def cmd_receive_indexes
200
+ puts "Receiving schema indexes from remote taps server #{remote_url} into local database #{database_url}"
201
+
202
+ index_data = session_resource['indexes'].get(:taps_version => Taps::VERSION)
203
+
204
+ puts Taps::Utils.load_indexes(database_url, index_data)
205
+ end
206
+
207
+ def cmd_reset_sequences
208
+ puts "Resetting db sequences in #{database_url}"
209
+
210
+ puts `#{File.dirname(__FILE__)}/../../bin/schema reset_db_sequences #{database_url}`
211
+ end
212
+
213
+ def format_number(num)
214
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
215
+ end
216
+
217
+ def verify_server
218
+ begin
219
+ server['/'].get(:taps_version => Taps::VERSION)
220
+ rescue RestClient::RequestFailed => e
221
+ if e.http_code == 417
222
+ puts "#{remote_url} is running a different version of taps."
223
+ puts "#{e.response.body}"
224
+ exit(1)
225
+ else
226
+ raise
227
+ end
228
+ rescue RestClient::Unauthorized
229
+ puts "Bad credentials given for #{remote_url}"
230
+ exit(1)
231
+ rescue Errno::ECONNREFUSED
232
+ puts "Can't connect to #{remote_url}. Please check that it's running"
233
+ exit(1)
234
+ end
235
+ end
236
+ end
237
+ end