matthewtodd-taps 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Ricardo Chimal, Jr
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,41 @@
1
+ = Taps -- simple database import/export app
2
+
3
+ A simple database agnostic import/export app to transfer data to/from a remote database.
4
+
5
+ == Usage: Server
6
+
7
+ Here's how you start a taps server
8
+
9
+ $ taps server postgres://localdbuser:localdbpass@localhost/dbname httpuser httppassword
10
+
11
+ You can also specify an encoding in the database url
12
+
13
+ $ taps server mysql://localdbuser:localdbpass@localhost/dbname?encoding=latin1 httpuser httppassword
14
+
15
+ == Usage: Client
16
+
17
+ When you want to pull down a database from a taps server
18
+
19
+ $ taps pull postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
20
+
21
+ or when you want to push a local database to a taps server
22
+
23
+ $ taps push postgres://dbuser:dbpassword@localhost/dbname http://httpuser:httppassword@example.com:5000
24
+
25
+ == Known Issues
26
+
27
+ * Blob data may not transfer properly, I suspect that SQLite3 is modifying some native ruby objects.
28
+ * Foreign Keys get lost in the schema transfer
29
+ * Large tables (>1 million rows with a large number of columns) get slower as the offset gets larger. This is due to it being inefficient having large offset values in queries.
30
+
31
+ == Meta
32
+
33
+ Maintained by Ricardo Chimal, Jr. (ricardo at heroku dot com)
34
+
35
+ Written by Ricardo Chimal, Jr. (ricardo at heroku dot com) and Adam Wiggins (adam at heroku dot com)
36
+
37
+ Early research and inspiration by Blake Mizerany
38
+
39
+ Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
40
+
41
+ http://github.com/ricardochimal/taps
@@ -0,0 +1,61 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |s|
4
+ s.name = "matthewtodd-taps"
5
+ s.summary = %Q{simple database import/export app}
6
+ s.email = "ricardo@heroku.com"
7
+ s.homepage = "http://github.com/matthewtodd/taps"
8
+ s.description = "A simple database agnostic import/export app to transfer data to/from a remote database."
9
+ s.authors = ["Ricardo Chimal, Jr.", "Adam Wiggins"]
10
+
11
+ s.add_dependency 'sinatra', '= 0.9.2'
12
+ s.add_dependency 'activerecord', '~> 2.3.4'
13
+ s.add_dependency 'thor', '= 0.9.9'
14
+ s.add_dependency 'rest-client', '>= 1.0.1', '< 1.1.0'
15
+ s.add_dependency 'sequel', '>= 3.0.0', '< 3.1.0'
16
+ s.add_dependency 'sqlite3-ruby', '~> 1.2.0'
17
+
18
+ s.rubyforge_project = "taps"
19
+ s.rubygems_version = '1.3.1'
20
+
21
+ s.files = FileList['spec/*.rb'] + FileList['lib/**/*.rb'] + ['README.rdoc', 'LICENSE', 'VERSION.yml', 'Rakefile'] + FileList['bin/*']
22
+ s.executables = ['taps', 'schema']
23
+ end
24
+ rescue LoadError => e
25
+ if e.message =~ /jeweler/
26
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
27
+ else
28
+ puts e.message + ' -- while loading jeweler.'
29
+ end
30
+ end
31
+
32
+ require 'rake/rdoctask'
33
+ Rake::RDocTask.new do |rdoc|
34
+ rdoc.rdoc_dir = 'rdoc'
35
+ rdoc.title = 'taps'
36
+ rdoc.options << '--line-numbers' << '--inline-source'
37
+ rdoc.rdoc_files.include('README*')
38
+ rdoc.rdoc_files.include('lib/**/*.rb')
39
+ end
40
+
41
+ begin
42
+ require 'rcov/rcovtask'
43
+ Rcov::RcovTask.new do |t|
44
+ t.libs << 'spec'
45
+ t.test_files = FileList['spec/*_spec.rb']
46
+ t.verbose = true
47
+ end
48
+ rescue LoadError
49
+ puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
50
+ end
51
+
52
+ desc "Run all specs; requires the bacon gem"
53
+ task :spec do
54
+ if `which bacon`.empty?
55
+ puts "bacon is not available. In order to run the specs, you must: sudo gem install bacon."
56
+ else
57
+ system "bacon #{File.dirname(__FILE__)}/spec/*_spec.rb"
58
+ end
59
+ end
60
+
61
+ task :default => :spec
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 19
3
+ :major: 0
4
+ :minor: 2
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'taps/schema'
4
+
5
+ cmd = ARGV.shift.strip rescue ''
6
+ database_url = ARGV.shift.strip rescue ''
7
+
8
+ def show_usage_and_exit
9
+ puts <<EOTXT
10
+ schema console <database_url>
11
+ schema dump <database_url>
12
+ schema indexes <database_url>
13
+ schema reset_db_sequences <database_url>
14
+ schema load <database_url> <schema_file>
15
+ schema load_indexes <database_url> <indexes_file>
16
+ EOTXT
17
+ exit(1)
18
+ end
19
+
20
+ if cmd == 'dump'
21
+ puts Taps::Schema.dump_without_indexes(database_url)
22
+ elsif cmd == 'indexes'
23
+ puts Taps::Schema.indexes(database_url)
24
+ elsif cmd == 'load_indexes'
25
+ filename = ARGV.shift.strip rescue ''
26
+ indexes = File.read(filename) rescue show_usage_and_exit
27
+ Taps::Schema.load_indexes(database_url, indexes)
28
+ elsif cmd == 'load'
29
+ filename = ARGV.shift.strip rescue ''
30
+ schema = File.read(filename) rescue show_usage_and_exit
31
+ Taps::Schema.load(database_url, schema)
32
+ elsif cmd == 'reset_db_sequences'
33
+ Taps::Schema.reset_db_sequences(database_url)
34
+ elsif cmd == 'console'
35
+ Taps::Schema.connection(database_url)
36
+ $db = ActiveRecord::Base.connection
37
+ require 'irb'
38
+ require 'irb/completion'
39
+ IRB.start
40
+ else
41
+ show_usage_and_exit
42
+ end
@@ -0,0 +1,6 @@
1
+ @ECHO OFF
2
+ IF NOT "%~f0" == "~f0" GOTO :WinNT
3
+ @"ruby.exe" "./schema" %1 %2 %3 %4 %5 %6 %7 %8 %9
4
+ GOTO :EOF
5
+ :WinNT
6
+ @"ruby.exe" "%~dpn0" %*
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'taps/cli'
4
+
5
+ Taps::Cli.start
@@ -0,0 +1,21 @@
1
+ module Taps
2
+ module AdapterHacks
3
+ extend self
4
+
5
+ LIST = {
6
+ :all => ['non_rails_schema_dump'],
7
+ :mysql => ['invalid_text_limit', 'mysql_invalid_primary_key'],
8
+ :postgresql => ['invalid_text_limit', 'invalid_binary_limit']
9
+ }
10
+
11
+ def load(adapter)
12
+ LIST[:all].each do |r|
13
+ require "taps/adapter_hacks/#{r}"
14
+ end
15
+
16
+ (LIST[adapter.to_sym] || []).each do |r|
17
+ require "taps/adapter_hacks/#{r}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,13 @@
1
+ module ActiveRecord
2
+ module ConnectionAdapters
3
+ class TableDefinition
4
+ alias_method :original_binary, :binary
5
+ def binary(*args)
6
+ options = args.extract_options!
7
+ options.delete(:limit)
8
+ column_names = args
9
+ column_names.each { |name| column(name, 'binary', options) }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module ActiveRecord
2
+ module ConnectionAdapters
3
+ class TableDefinition
4
+ alias_method :original_text, :text
5
+ def text(*args)
6
+ options = args.extract_options!
7
+ options.delete(:limit)
8
+ column_names = args
9
+ column_names.each { |name| column(name, 'text', options) }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,17 @@
1
+ module ActiveRecord
2
+ module ConnectionAdapters
3
+ class MysqlAdapter < AbstractAdapter
4
+ alias_method :orig_pk_and_sequence_for, :pk_and_sequence_for
5
+ # mysql accepts varchar as a primary key but most others do not.
6
+ # only say that a field is a primary key if mysql says so
7
+ # and the field is a kind of integer
8
+ def pk_and_sequence_for(table)
9
+ keys = []
10
+ execute("describe #{quote_table_name(table)}").each_hash do |h|
11
+ keys << h["Field"] if h["Key"] == "PRI" and !(h["Type"] =~ /int/).nil?
12
+ end
13
+ keys.length == 1 ? [keys.first, nil] : nil
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module ActiveRecord
2
+ class SchemaDumper
3
+ private
4
+
5
+ def header(stream)
6
+ stream.puts "ActiveRecord::Schema.define do"
7
+ end
8
+
9
+ def tables(stream)
10
+ @connection.tables.sort.each do |tbl|
11
+ table(tbl, stream)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,62 @@
1
+ require 'thor'
2
+ require 'taps/config'
3
+
4
+ Taps::Config.taps_database_url = ENV['TAPS_DATABASE_URL'] || 'sqlite://taps.db'
5
+
6
+ module Taps
7
+ class Cli < Thor
8
+ desc "server <local_database_url> <login> <password>", "Start a taps database import/export server"
9
+ method_options(:port => :numeric)
10
+ def server(database_url, login, password)
11
+ Taps::Config.database_url = database_url
12
+ Taps::Config.login = login
13
+ Taps::Config.password = password
14
+
15
+ port = options[:port] || 5000
16
+
17
+ Taps::Config.verify_database_url
18
+
19
+ require 'taps/server'
20
+ Taps::Server.run!({
21
+ :port => port,
22
+ :environment => :production,
23
+ :logging => true
24
+ })
25
+ end
26
+
27
+ desc "pull <local_database_url> <remote_url>", "Pull a database from a taps server"
28
+ method_options(:chunksize => :numeric)
29
+ def pull(database_url, remote_url)
30
+ clientxfer(:cmd_receive, database_url, remote_url)
31
+ end
32
+
33
+ desc "push <local_database_url> <remote_url>", "Push a database to a taps server"
34
+ method_options(:chunksize => :numeric)
35
+ def push(database_url, remote_url)
36
+ clientxfer(:cmd_send, database_url, remote_url)
37
+ end
38
+
39
+ desc "version", "Taps version"
40
+ def version
41
+ puts Taps.version
42
+ end
43
+
44
+ def clientxfer(method, database_url, remote_url)
45
+ if options[:chunksize]
46
+ Taps::Config.chunksize = options[:chunksize] < 100 ? 100 : options[:chunksize]
47
+ else
48
+ Taps::Config.chunksize = 1000
49
+ end
50
+ Taps::Config.database_url = database_url
51
+ Taps::Config.remote_url = remote_url
52
+
53
+ Taps::Config.verify_database_url
54
+
55
+ require 'taps/client_session'
56
+
57
+ Taps::ClientSession.quickstart do |session|
58
+ session.send(method)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,306 @@
1
+ require 'rest_client'
2
+ require 'sequel'
3
+ require 'zlib'
4
+
5
+ require 'taps/progress_bar'
6
+ require 'taps/config'
7
+ require 'taps/utils'
8
+ require 'taps/schema'
9
+
10
+ module Taps
11
+ class ClientSession
12
+ attr_reader :database_url, :remote_url, :default_chunksize
13
+
14
+ def initialize(database_url, remote_url, default_chunksize)
15
+ @database_url = database_url
16
+ @remote_url = remote_url
17
+ @default_chunksize = default_chunksize
18
+ end
19
+
20
+ def self.start(database_url, remote_url, default_chunksize, &block)
21
+ s = new(database_url, remote_url, default_chunksize)
22
+ yield s
23
+ s.close_session
24
+ end
25
+
26
+ def self.quickstart(&block)
27
+ start(Taps::Config.database_url, Taps::Config.remote_url, Taps::Config.chunksize) do |s|
28
+ yield s
29
+ end
30
+ end
31
+
32
+ def db
33
+ @db ||= Sequel.connect(database_url)
34
+ end
35
+
36
+ def server
37
+ @server ||= RestClient::Resource.new(remote_url)
38
+ end
39
+
40
+ def session_resource
41
+ @session_resource ||= open_session
42
+ end
43
+
44
+ def open_session
45
+ uri = server['sessions'].post('', http_headers)
46
+ server[uri]
47
+ end
48
+
49
+ def set_session(uri)
50
+ @session_resource = server[uri]
51
+ end
52
+
53
+ def close_session
54
+ @session_resource.delete(http_headers) if @session_resource
55
+ end
56
+
57
+ def safe_url(url)
58
+ url.sub(/\/\/(.+?)?:(.*?)@/, '//\1:[hidden]@')
59
+ end
60
+
61
+ def safe_remote_url
62
+ safe_url(remote_url)
63
+ end
64
+
65
+ def safe_database_url
66
+ safe_url(database_url)
67
+ end
68
+
69
+ def http_headers(extra = {})
70
+ { :taps_version => Taps.compatible_version }.merge(extra)
71
+ end
72
+
73
+ def cmd_send
74
+ begin
75
+ verify_server
76
+ cmd_send_schema
77
+ cmd_send_data
78
+ cmd_send_indexes
79
+ cmd_send_reset_sequences
80
+ rescue RestClient::Exception => e
81
+ if e.respond_to?(:response)
82
+ puts "!!! Caught Server Exception"
83
+ puts "#{e.response.body}"
84
+ exit(1)
85
+ else
86
+ raise
87
+ end
88
+ end
89
+ end
90
+
91
+ def cmd_send_indexes
92
+ puts "Sending indexes"
93
+
94
+ index_data = Taps::Schema.indexes(database_url)
95
+ session_resource['indexes'].post(index_data, http_headers)
96
+ end
97
+
98
+ def cmd_send_schema
99
+ puts "Sending schema"
100
+
101
+ schema_data = Taps::Schema.dump_without_indexes(database_url)
102
+ session_resource['schema'].post(schema_data, http_headers)
103
+ end
104
+
105
+ def cmd_send_reset_sequences
106
+ puts "Resetting sequences"
107
+
108
+ session_resource["reset_sequences"].post('', http_headers)
109
+ end
110
+
111
+ def cmd_send_data
112
+ puts "Sending data"
113
+
114
+ tables_with_counts, record_count = fetch_tables_info
115
+
116
+ puts "#{tables_with_counts.size} tables, #{format_number(record_count)} records"
117
+
118
+
119
+ db.tables.each do |table_name|
120
+ table = db[table_name]
121
+ count = table.count
122
+ order = Taps::Utils.order_by(db, table_name)
123
+ chunksize = self.default_chunksize
124
+ string_columns = Taps::Utils.incorrect_blobs(db, table_name)
125
+
126
+ progress = ProgressBar.new(table_name.to_s, count)
127
+
128
+ offset = 0
129
+ loop do
130
+ row_size = 0
131
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
132
+ rows = Taps::Utils.format_data(table.order(*order).limit(c, offset).all, string_columns)
133
+ break if rows == { }
134
+
135
+ row_size = rows[:data].size
136
+ gzip_data = Taps::Utils.gzip(Marshal.dump(rows))
137
+
138
+ begin
139
+ session_resource["tables/#{table_name}"].post(gzip_data, http_headers({
140
+ :content_type => 'application/octet-stream',
141
+ :taps_checksum => Taps::Utils.checksum(gzip_data).to_s}))
142
+ rescue RestClient::RequestFailed => e
143
+ # retry the same data, it got corrupted somehow.
144
+ if e.http_code == 412
145
+ next
146
+ end
147
+ raise
148
+ end
149
+ end
150
+
151
+ progress.inc(row_size)
152
+ offset += row_size
153
+
154
+ break if row_size == 0
155
+ end
156
+
157
+ progress.finish
158
+ end
159
+ end
160
+
161
+ def fetch_tables_info
162
+ record_count = 0
163
+ tables = db.tables
164
+ tables_with_counts = tables.inject({}) do |accum, table|
165
+ accum[table] = db[table].count
166
+ record_count += accum[table]
167
+ accum
168
+ end
169
+
170
+ [ tables_with_counts, record_count ]
171
+ end
172
+
173
+ def cmd_receive
174
+ begin
175
+ verify_server
176
+ cmd_receive_schema
177
+ cmd_receive_data
178
+ cmd_receive_indexes
179
+ cmd_reset_sequences
180
+ rescue RestClient::Exception => e
181
+ if e.respond_to?(:response)
182
+ puts "!!! Caught Server Exception"
183
+ puts "#{e.response.body}"
184
+ exit(1)
185
+ else
186
+ raise
187
+ end
188
+ end
189
+ end
190
+
191
+ def cmd_receive_data
192
+ puts "Receiving data"
193
+
194
+ tables_with_counts, record_count = fetch_remote_tables_info
195
+
196
+ puts "#{tables_with_counts.size} tables, #{format_number(record_count)} records"
197
+
198
+ tables_with_counts.each do |table_name, count|
199
+ table = db[table_name.to_sym]
200
+ chunksize = default_chunksize
201
+
202
+ progress = ProgressBar.new(table_name.to_s, count)
203
+
204
+ offset = 0
205
+ loop do
206
+ begin
207
+ chunksize, rows = fetch_table_rows(table_name, chunksize, offset)
208
+ rescue CorruptedData
209
+ next
210
+ end
211
+ break if rows == { }
212
+
213
+ table.import(rows[:header], rows[:data])
214
+
215
+ progress.inc(rows[:data].size)
216
+ offset += rows[:data].size
217
+ end
218
+
219
+ progress.finish
220
+ end
221
+ end
222
+
223
+ class CorruptedData < Exception; end
224
+
225
+ def fetch_table_rows(table_name, chunksize, offset)
226
+ response = nil
227
+ chunksize = Taps::Utils.calculate_chunksize(chunksize) do |c|
228
+ response = session_resource["tables/#{table_name}/#{c}?offset=#{offset}"].get(http_headers)
229
+ end
230
+ raise CorruptedData unless Taps::Utils.valid_data?(response.to_s, response.headers[:taps_checksum])
231
+
232
+ begin
233
+ rows = Marshal.load(Taps::Utils.gunzip(response.to_s))
234
+ rescue Object => e
235
+ puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.gz"
236
+ File.open("dump.#{Process.pid}.gz", "w") { |f| f.write(response.to_s) }
237
+ raise
238
+ end
239
+ [chunksize, rows]
240
+ end
241
+
242
+ def fetch_remote_tables_info
243
+ retries = 0
244
+ max_retries = 1
245
+ begin
246
+ tables_with_counts = Marshal.load(session_resource['tables'].get(http_headers))
247
+ record_count = tables_with_counts.values.inject(0) { |a,c| a += c }
248
+ rescue RestClient::Exception
249
+ retries += 1
250
+ retry if retries <= max_retries
251
+ puts "Unable to fetch tables information from #{remote_url}. Please check the server log."
252
+ exit(1)
253
+ end
254
+
255
+ [ tables_with_counts, record_count ]
256
+ end
257
+
258
+ def cmd_receive_schema
259
+ puts "Receiving schema"
260
+
261
+ schema_data = session_resource['schema'].get(http_headers)
262
+ output = Taps::Schema.load(database_url, schema_data)
263
+ puts output if output
264
+ end
265
+
266
+ def cmd_receive_indexes
267
+ puts "Receiving indexes"
268
+
269
+ index_data = session_resource['indexes'].get(http_headers)
270
+
271
+ output = Taps::Schema.load_indexes(database_url, index_data)
272
+ puts output if output
273
+ end
274
+
275
+ def cmd_reset_sequences
276
+ puts "Resetting sequences"
277
+
278
+ output = Taps::Schema.reset_db_sequences(database_url)
279
+ puts output if output
280
+ end
281
+
282
+ def format_number(num)
283
+ num.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
284
+ end
285
+
286
+ def verify_server
287
+ begin
288
+ server['/'].get(http_headers)
289
+ rescue RestClient::RequestFailed => e
290
+ if e.http_code == 417
291
+ puts "#{safe_remote_url} is running a different minor version of taps."
292
+ puts "#{e.response.body}"
293
+ exit(1)
294
+ else
295
+ raise
296
+ end
297
+ rescue RestClient::Unauthorized
298
+ puts "Bad credentials given for #{safe_remote_url}"
299
+ exit(1)
300
+ rescue Errno::ECONNREFUSED
301
+ puts "Can't connect to #{safe_remote_url}. Please check that it's running"
302
+ exit(1)
303
+ end
304
+ end
305
+ end
306
+ end