matthewtodd-taps 0.2.19

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Ricardo Chimal, Jr
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,41 @@
1
+ = Taps -- simple database import/export app
2
+
3
+ A simple database agnostic import/export app to transfer data to/from a remote database.
4
+
5
+ == Usage: Server
6
+
7
+ Here's how you start a taps server
8
+
9
+ $ taps server postgres://localdbuser:localdbpass@localhost/dbname httpuser httppassword
10
+
11
+ You can also specify an encoding in the database url
12
+
13
+ $ taps server mysql://localdbuser:localdbpass@localhost/dbname?encoding=latin1 httpuser httppassword
14
+
15
+ == Usage: Client
16
+
17
+ When you want to pull down a database from a taps server
18
+
19
+ $ taps pull postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
20
+
21
+ or when you want to push a local database to a taps server
22
+
23
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
24
+
25
+ == Known Issues
26
+
27
+ * Blob data may not transfer properly, I suspect that SQLite3 is modifying some native ruby objects.
28
+ * Foreign Keys get lost in the schema transfer
29
+ * Large tables (>1 million rows with a large number of columns) get slower as the offset gets larger. This is due to it being inefficient having large offset values in queries.
30
+
31
+ == Meta
32
+
33
+ Maintained by Ricardo Chimal, Jr. (ricardo at heroku dot com)
34
+
35
+ Written by Ricardo Chimal, Jr. (ricardo at heroku dot com) and Adam Wiggins (adam at heroku dot com)
36
+
37
+ Early research and inspiration by Blake Mizerany
38
+
39
+ Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
40
+
41
+ http://github.com/ricardochimal/taps
@@ -0,0 +1,61 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |s|
4
+ s.name = "matthewtodd-taps"
5
+ s.summary = %Q{simple database import/export app}
6
+ s.email = "ricardo@heroku.com"
7
+ s.homepage = "http://github.com/matthewtodd/taps"
8
+ s.description = "A simple database agnostic import/export app to transfer data to/from a remote database."
9
+ s.authors = ["Ricardo Chimal, Jr.", "Adam Wiggins"]
10
+
11
+ s.add_dependency 'sinatra', '= 0.9.2'
12
+ s.add_dependency 'activerecord', '~> 2.3.4'
13
+ s.add_dependency 'thor', '= 0.9.9'
14
+ s.add_dependency 'rest-client', '>= 1.0.1', '< 1.1.0'
15
+ s.add_dependency 'sequel', '>= 3.0.0', '< 3.1.0'
16
+ s.add_dependency 'sqlite3-ruby', '~> 1.2.0'
17
+
18
+ s.rubyforge_project = "taps"
19
+ s.rubygems_version = '1.3.1'
20
+
21
+ s.files = FileList['spec/*.rb'] + FileList['lib/**/*.rb'] + ['README.rdoc', 'LICENSE', 'VERSION.yml', 'Rakefile'] + FileList['bin/*']
22
+ s.executables = ['taps', 'schema']
23
+ end
24
+ rescue LoadError => e
25
+ if e.message =~ /jeweler/
26
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
27
+ else
28
+ puts e.message + ' -- while loading jeweler.'
29
+ end
30
+ end
31
+
32
+ require 'rake/rdoctask'
33
+ Rake::RDocTask.new do |rdoc|
34
+ rdoc.rdoc_dir = 'rdoc'
35
+ rdoc.title = 'taps'
36
+ rdoc.options << '--line-numbers' << '--inline-source'
37
+ rdoc.rdoc_files.include('README*')
38
+ rdoc.rdoc_files.include('lib/**/*.rb')
39
+ end
40
+
41
+ begin
42
+ require 'rcov/rcovtask'
43
+ Rcov::RcovTask.new do |t|
44
+ t.libs << 'spec'
45
+ t.test_files = FileList['spec/*_spec.rb']
46
+ t.verbose = true
47
+ end
48
+ rescue LoadError
49
+ puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
50
+ end
51
+
52
+ desc "Run all specs; requires the bacon gem"
53
+ task :spec do
54
+ if `which bacon`.empty?
55
+ puts "bacon is not available. In order to run the specs, you must: sudo gem install bacon."
56
+ else
57
+ system "bacon #{File.dirname(__FILE__)}/spec/*_spec.rb"
58
+ end
59
+ end
60
+
61
+ task :default => :spec
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 19
3
+ :major: 0
4
+ :minor: 2
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'taps/schema'
4
+
5
+ cmd = ARGV.shift.strip rescue ''
6
+ database_url = ARGV.shift.strip rescue ''
7
+
8
+ def show_usage_and_exit
9
+ puts <<EOTXT
10
+ schema console <database_url>
11
+ schema dump <database_url>
12
+ schema indexes <database_url>
13
+ schema reset_db_sequences <database_url>
14
+ schema load <database_url> <schema_file>
15
+ schema load_indexes <database_url> <indexes_file>
16
+ EOTXT
17
+ exit(1)
18
+ end
19
+
20
+ if cmd == 'dump'
21
+ puts Taps::Schema.dump_without_indexes(database_url)
22
+ elsif cmd == 'indexes'
23
+ puts Taps::Schema.indexes(database_url)
24
+ elsif cmd == 'load_indexes'
25
+ filename = ARGV.shift.strip rescue ''
26
+ indexes = File.read(filename) rescue show_usage_and_exit
27
+ Taps::Schema.load_indexes(database_url, indexes)
28
+ elsif cmd == 'load'
29
+ filename = ARGV.shift.strip rescue ''
30
+ schema = File.read(filename) rescue show_usage_and_exit
31
+ Taps::Schema.load(database_url, schema)
32
+ elsif cmd == 'reset_db_sequences'
33
+ Taps::Schema.reset_db_sequences(database_url)
34
+ elsif cmd == 'console'
35
+ Taps::Schema.connection(database_url)
36
+ $db = ActiveRecord::Base.connection
37
+ require 'irb'
38
+ require 'irb/completion'
39
+ IRB.start
40
+ else
41
+ show_usage_and_exit
42
+ end
@@ -0,0 +1,6 @@
1
+ @ECHO OFF
2
+ IF NOT "%~f0" == "~f0" GOTO :WinNT
3
+ @"ruby.exe" "./schema" %1 %2 %3 %4 %5 %6 %7 %8 %9
4
+ GOTO :EOF
5
+ :WinNT
6
+ @"ruby.exe" "%~dpn0" %*
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'taps/cli'
4
+
5
+ Taps::Cli.start
@@ -0,0 +1,21 @@
1
+ module Taps
2
+ module AdapterHacks
3
+ extend self
4
+
5
+ LIST = {
6
+ :all => ['non_rails_schema_dump'],
7
+ :mysql => ['invalid_text_limit', 'mysql_invalid_primary_key'],
8
+ :postgresql => ['invalid_text_limit', 'invalid_binary_limit']
9
+ }
10
+
11
+ def load(adapter)
12
+ LIST[:all].each do |r|
13
+ require "taps/adapter_hacks/#{r}"
14
+ end
15
+
16
+ (LIST[adapter.to_sym] || []).each do |r|
17
+ require "taps/adapter_hacks/#{r}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,13 @@
1
+ module ActiveRecord
2
+ module ConnectionAdapters
3
+ class TableDefinition
4
+ alias_method :original_binary, :binary
5
+ def binary(*args)
6
+ options = args.extract_options!
7
+ options.delete(:limit)
8
+ column_names = args
9
+ column_names.each { |name| column(name, 'binary', options) }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module ActiveRecord
2
+ module ConnectionAdapters
3
+ class TableDefinition
4
+ alias_method :original_text, :text
5
+ def text(*args)
6
+ options = args.extract_options!
7
+ options.delete(:limit)
8
+ column_names = args
9
+ column_names.each { |name| column(name, 'text', options) }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,17 @@
1
+ module ActiveRecord
2
+ module ConnectionAdapters
3
+ class MysqlAdapter < AbstractAdapter
4
+ alias_method :orig_pk_and_sequence_for, :pk_and_sequence_for
5
+ # mysql accepts varchar as a primary key but most others do not.
6
+ # only say that a field is a primary key if mysql says so
7
+ # and the field is a kind of integer
8
+ def pk_and_sequence_for(table)
9
+ keys = []
10
+ execute("describe #{quote_table_name(table)}").each_hash do |h|
11
+ keys << h["Field"] if h["Key"] == "PRI" and !(h["Type"] =~ /int/).nil?
12
+ end
13
+ keys.length == 1 ? [keys.first, nil] : nil
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module ActiveRecord
2
+ class SchemaDumper
3
+ private
4
+
5
+ def header(stream)
6
+ stream.puts "ActiveRecord::Schema.define do"
7
+ end
8
+
9
+ def tables(stream)
10
+ @connection.tables.sort.each do |tbl|
11
+ table(tbl, stream)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,62 @@
1
+ require 'thor'
2
+ require 'taps/config'
3
+
4
+ Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || 'sqlite://taps.db'
5
+
6
+ module Taps
7
+ class Cli < Thor
8
+ desc "server <local_database_url> <login> <password>", "Start a taps database import/export server"
9
+ method_options(:port => :numeric)
10
+ def server(database_url, login, password)
11
+ Taps::Config.database_url = database_url
12
+ Taps::Config.login = login
13
+ Taps::Config.password = password
14
+
15
+ port = options[:port] || 5000
16
+
17
+ Taps::Config.verify_database_url
18
+
19
+ require 'taps/server'
20
+ Taps::Server.run!({
21
+ :port => port,
22
+ :environment => :production,
23
+ :logging => true
24
+ })
25
+ end
26
+
27
+ desc "pull <local_database_url> <remote_url>", "Pull a database from a taps server"
28
+ method_options(:chunksize => :numeric)
29
+ def pull(database_url, remote_url)
30
+ clientxfer(:cmd_receive, database_url, remote_url)
31
+ end
32
+
33
+ desc "push <local_database_url> <remote_url>", "Push a database to a taps server"
34
+ method_options(:chunksize => :numeric)
35
+ def push(database_url, remote_url)
36
+ clientxfer(:cmd_send, database_url, remote_url)
37
+ end
38
+
39
+ desc "version", "Taps version"
40
+ def version
41
+ puts Taps.version
42
+ end
43
+
44
+ def clientxfer(method, database_url, remote_url)
45
+ if options[:chunksize]
46
+ Taps::Config.chunksize = options[:chunksize] < 100 ? 100 : options[:chunksize]
47
+ else
48
+ Taps::Config.chunksize = 1000
49
+ end
50
+ Taps::Config.database_url = database_url
51
+ Taps::Config.remote_url = remote_url
52
+
53
+ Taps::Config.verify_database_url
54
+
55
+ require 'taps/client_session'
56
+
57
+ Taps::ClientSession.quickstart do |session|
58
+ session.send(method)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,306 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/schema'
9
+
10
+ module Taps
11
+ class ClientSession
12
+ attr_reader :database_url, :remote_url, :default_chunksize
13
+
14
+ def initialize(database_url, remote_url, default_chunksize)
15
+ @database_url = database_url
16
+ @remote_url = remote_url
17
+ @default_chunksize = default_chunksize
18
+ end
19
+
20
+ def self.start(database_url, remote_url, default_chunksize, &block)
21
+ s = new(database_url, remote_url, default_chunksize)
22
+ yield s
23
+ s.close_session
24
+ end
25
+
26
+ def self.quickstart(&block)
27
+ start(Taps::Config.database_url, Taps::Config.remote_url, Taps::Config.chunksize) do |s|
28
+ yield s
29
+ end
30
+ end
31
+
32
+ def db
33
+ @db ||= Sequel.connect(database_url)
34
+ end
35
+
36
+ def server
37
+ @server ||= RestClient::Resource.new(remote_url)
38
+ end
39
+
40
+ def session_resource
41
+ @session_resource ||= open_session
42
+ end
43
+
44
+ def open_session
45
+ uri = server['sessions'].post('', http_headers)
46
+ server[uri]
47
+ end
48
+
49
+ def set_session(uri)
50
+ @session_resource = server[uri]
51
+ end
52
+
53
+ def close_session
54
+ @session_resource.delete(http_headers) if @session_resource
55
+ end
56
+
57
+ def safe_url(url)
58
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
59
+ end
60
+
61
+ def safe_remote_url
62
+ safe_url(remote_url)
63
+ end
64
+
65
+ def safe_database_url
66
+ safe_url(database_url)
67
+ end
68
+
69
+ def http_headers(extra = {})
70
+ { :taps_version => Taps.compatible_version }.merge(extra)
71
+ end
72
+
73
+ def cmd_send
74
+ begin
75
+ verify_server
76
+ cmd_send_schema
77
+ cmd_send_data
78
+ cmd_send_indexes
79
+ cmd_send_reset_sequences
80
+ rescue RestClient::Exception => e
81
+ if e.respond_to?(:response)
82
+ puts "!!! Caught Server Exception"
83
+ puts "#{e.response.body}"
84
+ exit(1)
85
+ else
86
+ raise
87
+ end
88
+ end
89
+ end
90
+
91
+ def cmd_send_indexes
92
+ puts "Sending indexes"
93
+
94
+ index_data = Taps::Schema.indexes(database_url)
95
+ session_resource['indexes'].post(index_data, http_headers)
96
+ end
97
+
98
+ def cmd_send_schema
99
+ puts "Sending schema"
100
+
101
+ schema_data = Taps::Schema.dump_without_indexes(database_url)
102
+ session_resource['schema'].post(schema_data, http_headers)
103
+ end
104
+
105
+ def cmd_send_reset_sequences
106
+ puts "Resetting sequences"
107
+
108
+ session_resource["reset_sequences"].post('', http_headers)
109
+ end
110
+
111
+ def cmd_send_data
112
+ puts "Sending data"
113
+
114
+ tables_with_counts, record_count = fetch_tables_info
115
+
116
+ puts "#{tables_with_counts.size} tables, #{format_number(record_count)} records"
117
+
118
+
119
+ db.tables.each do |table_name|
120
+ table = db[table_name]
121
+ count = table.count
122
+ order = Taps::Utils.order_by(db, table_name)
123
+ chunksize = self.default_chunksize
124
+ string_columns = Taps::Utils.incorrect_blobs(db, table_name)
125
+
126
+ progress = ProgressBar.new(table_name.to_s, count)
127
+
128
+ offset = 0
129
+ loop do
130
+ row_size = 0
131
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
132
+ rows = Taps::Utils.format_data(table.order(*order).limit(c, offset).all, string_columns)
133
+ break if rows == { }
134
+
135
+ row_size = rows[:data].size
136
+ gzip_data = Taps::Utils.gzip(Marshal.dump(rows))
137
+
138
+ begin
139
+ session_resource["tables/#{table_name}"].post(gzip_data, http_headers({
140
+ :content_type => 'application/octet-stream',
141
+ :taps_checksum => Taps::Utils.checksum(gzip_data).to_s}))
142
+ rescue RestClient::RequestFailed => e
143
+ # retry the same data, it got corrupted somehow.
144
+ if e.http_code == 412
145
+ next
146
+ end
147
+ raise
148
+ end
149
+ end
150
+
151
+ progress.inc(row_size)
152
+ offset += row_size
153
+
154
+ break if row_size == 0
155
+ end
156
+
157
+ progress.finish
158
+ end
159
+ end
160
+
161
+ def fetch_tables_info
162
+ record_count = 0
163
+ tables = db.tables
164
+ tables_with_counts = tables.inject({}) do |accum, table|
165
+ accum[table] = db[table].count
166
+ record_count += accum[table]
167
+ accum
168
+ end
169
+
170
+ [ tables_with_counts, record_count ]
171
+ end
172
+
173
+ def cmd_receive
174
+ begin
175
+ verify_server
176
+ cmd_receive_schema
177
+ cmd_receive_data
178
+ cmd_receive_indexes
179
+ cmd_reset_sequences
180
+ rescue RestClient::Exception => e
181
+ if e.respond_to?(:response)
182
+ puts "!!! Caught Server Exception"
183
+ puts "#{e.response.body}"
184
+ exit(1)
185
+ else
186
+ raise
187
+ end
188
+ end
189
+ end
190
+
191
+ def cmd_receive_data
192
+ puts "Receiving data"
193
+
194
+ tables_with_counts, record_count = fetch_remote_tables_info
195
+
196
+ puts "#{tables_with_counts.size} tables, #{format_number(record_count)} records"
197
+
198
+ tables_with_counts.each do |table_name, count|
199
+ table = db[table_name.to_sym]
200
+ chunksize = default_chunksize
201
+
202
+ progress = ProgressBar.new(table_name.to_s, count)
203
+
204
+ offset = 0
205
+ loop do
206
+ begin
207
+ chunksize, rows = fetch_table_rows(table_name, chunksize, offset)
208
+ rescue CorruptedData
209
+ next
210
+ end
211
+ break if rows == { }
212
+
213
+ table.import(rows[:header], rows[:data])
214
+
215
+ progress.inc(rows[:data].size)
216
+ offset += rows[:data].size
217
+ end
218
+
219
+ progress.finish
220
+ end
221
+ end
222
+
223
+ class CorruptedData < Exception; end
224
+
225
+ def fetch_table_rows(table_name, chunksize, offset)
226
+ response = nil
227
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
228
+ response = session_resource["tables/#{table_name}/#{c}?offset=#{offset}"].get(http_headers)
229
+ end
230
+ raise CorruptedData unless Taps::Utils.valid_data?(response.to_s, response.headers[:taps_checksum])
231
+
232
+ begin
233
+ rows = Marshal.load(Taps::Utils.gunzip(response.to_s))
234
+ rescue Object => e
235
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.gz"
236
+ File.open("dump.#{Process.pid}.gz", "w") { |f| f.write(response.to_s) }
237
+ raise
238
+ end
239
+ [chunksize, rows]
240
+ end
241
+
242
+ def fetch_remote_tables_info
243
+ retries = 0
244
+ max_retries = 1
245
+ begin
246
+ tables_with_counts = Marshal.load(session_resource['tables'].get(http_headers))
247
+ record_count = tables_with_counts.values.inject(0) { |a,c| a += c }
248
+ rescue RestClient::Exception
249
+ retries += 1
250
+ retry if retries <= max_retries
251
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
252
+ exit(1)
253
+ end
254
+
255
+ [ tables_with_counts, record_count ]
256
+ end
257
+
258
+ def cmd_receive_schema
259
+ puts "Receiving schema"
260
+
261
+ schema_data = session_resource['schema'].get(http_headers)
262
+ output = Taps::Schema.load(database_url, schema_data)
263
+ puts output if output
264
+ end
265
+
266
+ def cmd_receive_indexes
267
+ puts "Receiving indexes"
268
+
269
+ index_data = session_resource['indexes'].get(http_headers)
270
+
271
+ output = Taps::Schema.load_indexes(database_url, index_data)
272
+ puts output if output
273
+ end
274
+
275
+ def cmd_reset_sequences
276
+ puts "Resetting sequences"
277
+
278
+ output = Taps::Schema.reset_db_sequences(database_url)
279
+ puts output if output
280
+ end
281
+
282
+ def format_number(num)
283
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
284
+ end
285
+
286
+ def verify_server
287
+ begin
288
+ server['/'].get(http_headers)
289
+ rescue RestClient::RequestFailed => e
290
+ if e.http_code == 417
291
+ puts "#{safe_remote_url} is running a different minor version of taps."
292
+ puts "#{e.response.body}"
293
+ exit(1)
294
+ else
295
+ raise
296
+ end
297
+ rescue RestClient::Unauthorized
298
+ puts "Bad credentials given for #{safe_remote_url}"
299
+ exit(1)
300
+ rescue Errno::ECONNREFUSED
301
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
302
+ exit(1)
303
+ end
304
+ end
305
+ end
306
+ end