wyrm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rvmrc +1 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +71 -0
- data/Rakefile +1 -0
- data/lib/wyrm.rb +8 -0
- data/lib/wyrm/db_pump.rb +410 -0
- data/lib/wyrm/dump_schema.rb +111 -0
- data/lib/wyrm/other_schema.rb +6 -0
- data/lib/wyrm/restore_schema.rb +64 -0
- data/lib/wyrm/transferer.rb +32 -0
- data/lib/wyrm/version.rb +3 -0
- data/wyrm.gemspec +23 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: dc95033a6576cd33f78d345b18f08564317c0bc1
|
4
|
+
data.tar.gz: 2c046fa329dc3cef258d1185ab390b3106e9fc1a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 07a728780ea1121ca6784ef83059a0fad6c5218c793d3a8e5033203a6ba60fb435587c1e65f5ff1e3a184a18a2d6ad87c4bf1b2a1e300bc056ded1662ff4f225
|
7
|
+
data.tar.gz: 94a9579c0070a612439a428e4379874671b4aee41d89bb1bb7c6e1a930838fa278f7a178435fc41a6e3cbf8ad98d16824c168f1e8edcc33576d158b92e206a2f
|
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm 2.0.0@wyrm --create
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 John Anderson
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
# Wyrm
|
2
|
+
|
3
|
+
Transfer data from one database to another. Has been used to dump > 100M dbs,
|
4
|
+
and one 850G db. Should theoretically work for any dbs supported by Sequel.
|
5
|
+
|
6
|
+
Currently transfers tables and views only. Does not attempt to transfer
|
7
|
+
stored procs, permissions, triggers etc.
|
8
|
+
|
9
|
+
Works best for tables that have single numeric primary keys, but should also
|
10
|
+
handle compound primary keys and tables without primary keys.
|
11
|
+
|
12
|
+
Wyrm because:
|
13
|
+
|
14
|
+
- I like dragons
|
15
|
+
- I can have a Wyrm::Hole to transfer data through :-D
|
16
|
+
|
17
|
+
## Installation
|
18
|
+
|
19
|
+
Add this line to your application's Gemfile:
|
20
|
+
|
21
|
+
gem 'wyrm'
|
22
|
+
|
23
|
+
And then execute:
|
24
|
+
|
25
|
+
$ bundle
|
26
|
+
|
27
|
+
Or install it yourself as:
|
28
|
+
|
29
|
+
$ gem install wyrm
|
30
|
+
|
31
|
+
Make sure you install the db gems, typically
|
32
|
+
|
33
|
+
$ gem install pg mysql2
|
34
|
+
|
35
|
+
## Usage
|
36
|
+
|
37
|
+
This is mostly a toolkit right now. To transfer from mysql to postgres do:
|
38
|
+
```ruby
|
39
|
+
require 'sequel'
|
40
|
+
require 'pathname'
|
41
|
+
|
42
|
+
# on the source host
|
43
|
+
# dump tables from mysql
|
44
|
+
require 'gbump/dump_schema'
|
45
|
+
src_db = Sequel.connect "mysql2://localhost/lots"
|
46
|
+
ds = DumpSchema.new src_db, Pathname('/tmp/lots')
|
47
|
+
ds.dump_schema
|
48
|
+
|
49
|
+
# this might take a while ;-)
|
50
|
+
ds.dump_tables
|
51
|
+
|
52
|
+
# transfer data. Already compressed, so no -z
|
53
|
+
# rsync -var /tmp/lots user@host:/var/data/
|
54
|
+
|
55
|
+
# on the destination host
|
56
|
+
# restore tables to postgres
|
57
|
+
require 'gbump/restore_schema'
|
58
|
+
dst_db = Sequel.connect "postgres://localhost/lots"
|
59
|
+
rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
|
60
|
+
rs.create
|
61
|
+
rs.restore_tables
|
62
|
+
rs.index
|
63
|
+
```
|
64
|
+
|
65
|
+
## Contributing
|
66
|
+
|
67
|
+
1. Fork it
|
68
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
69
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
70
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
71
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/lib/wyrm.rb
ADDED
data/lib/wyrm/db_pump.rb
ADDED
@@ -0,0 +1,410 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'yaml'
|
3
|
+
require 'ostruct'
|
4
|
+
require 'logger'
|
5
|
+
require 'fastandand'
|
6
|
+
|
7
|
+
Sequel.extension :migration, :schema_dumper, :pagination
|
8
|
+
|
9
|
+
|
10
|
+
# TODO possibly use Gem::Package::TarWriter to write tar files
|
11
|
+
# TODO when restoring, could use a SizeQueue to make sure the db is kept busy
|
12
|
+
|
13
|
+
# TODO need to version the dumps, or something like that.
|
14
|
+
class DbPump
|
15
|
+
class RespondsTo
|
16
|
+
def initialize( *methods )
|
17
|
+
@methods = methods
|
18
|
+
end
|
19
|
+
|
20
|
+
def ===( instance )
|
21
|
+
@methods.all?{|m| instance.respond_to? m}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize( codec = :marshal )
|
26
|
+
@codec =
|
27
|
+
case codec
|
28
|
+
when :yaml; YamlCodec.new
|
29
|
+
when :marshal; MarshalCodec.new
|
30
|
+
when Class
|
31
|
+
codec.new
|
32
|
+
when RespondsTo.new( :encode, :decode )
|
33
|
+
codec
|
34
|
+
else
|
35
|
+
raise "unknown codec #{codec}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :codec
|
40
|
+
|
41
|
+
# TODO could use msgpack as serialization here, but its API is unpleasant.
|
42
|
+
|
43
|
+
class MarshalCodec
|
44
|
+
def encode( obj, io )
|
45
|
+
Marshal.dump obj, io
|
46
|
+
end
|
47
|
+
|
48
|
+
def decode( io, &block )
|
49
|
+
obj = Marshal.load(io)
|
50
|
+
yield obj if block_given?
|
51
|
+
obj
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class MsgPackCodec
|
56
|
+
def encode( obj, io )
|
57
|
+
Marshal.dump obj, io
|
58
|
+
end
|
59
|
+
|
60
|
+
def decode( io, &block )
|
61
|
+
obj = Marshal.load(io)
|
62
|
+
yield obj if block_given?
|
63
|
+
obj
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class YamlCodec
|
68
|
+
def encode( obj, io )
|
69
|
+
YAML.dump obj, io
|
70
|
+
end
|
71
|
+
|
72
|
+
def decode( io, &block )
|
73
|
+
obj = YAML.load(io)
|
74
|
+
yield obj if block_given?
|
75
|
+
obj
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def logger
|
80
|
+
@logger ||= Logger.new STDERR
|
81
|
+
end
|
82
|
+
|
83
|
+
def primary_keys( db, table_name )
|
84
|
+
db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
|
85
|
+
end
|
86
|
+
|
87
|
+
# TODO possibly use select from outer / inner join to
|
88
|
+
# http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
|
89
|
+
# because mysql is useless
|
90
|
+
def paginated_dump( table_name, options = {} )
|
91
|
+
options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
|
92
|
+
pk = primary_keys options.db, table_name
|
93
|
+
options.db[table_name].order(*pk).each_page(options[:page_size]) do |page|
|
94
|
+
logger.info page.sql
|
95
|
+
page.each do |row|
|
96
|
+
unless options[:dry_run]
|
97
|
+
codec.encode row.values, options.io
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
options.io.flush
|
102
|
+
end
|
103
|
+
|
104
|
+
# have to use this for non-integer pks
|
105
|
+
# The idea is that large offsets are expensive in the db because the db server has to read
|
106
|
+
# through the data set to reach the required offset. So make that only ids, and then
|
107
|
+
# do the main select from the limited id list.
|
108
|
+
# TODO could speed this up by have a query thread which runs the next page-query while
|
109
|
+
# the current one is being written/compressed.
|
110
|
+
# select * from massive as full
|
111
|
+
# inner join (select id from massive order by whatever limit m, n) limit
|
112
|
+
# on full.id = limit.id
|
113
|
+
# order by full.whatever
|
114
|
+
def inner_dump( table_name, options = {} )
|
115
|
+
options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
|
116
|
+
pk = primary_keys options.db, table_name
|
117
|
+
|
118
|
+
table_dataset = options.db[table_name]
|
119
|
+
# could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
|
120
|
+
0.step(table_dataset.count, options.page_size).each do |offset|
|
121
|
+
limit_dataset = table_dataset.select( *pk ).limit( options.page_size, offset ).order( *pk )
|
122
|
+
page = table_dataset.join( limit_dataset, Hash[ pk.map{|f| [f,f]} ] ).order( *pk ).qualify_to(table_name)
|
123
|
+
logger.info page.sql
|
124
|
+
page.each do |row|
|
125
|
+
unless options[:dry_run]
|
126
|
+
codec.encode row.values, options.io
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
options.io.flush
|
131
|
+
end
|
132
|
+
|
133
|
+
# TODO need to also dump a first row containing useful stuff:
|
134
|
+
# - source table name
|
135
|
+
# - number of rows
|
136
|
+
# - source db url
|
137
|
+
# - permissions?
|
138
|
+
# These should all be in one object that can be Marshall.load-ed easily.
|
139
|
+
def dump( table_name, options = {} )
|
140
|
+
pk = primary_keys options[:db], table_name
|
141
|
+
case
|
142
|
+
when pk.empty?
|
143
|
+
paginated_dump( table_name, options )
|
144
|
+
when pk.all?{|i| i == :id }
|
145
|
+
min_max_dump( table_name, options )
|
146
|
+
else
|
147
|
+
inner_dump( table_name, options )
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# could use this for integer pks
|
152
|
+
def min_max_dump( table_name, options = {} )
|
153
|
+
# select max(id), min(id) from patents
|
154
|
+
# and then split that up into 10000 size chunks. Not really important if there aren't exactly 10000
|
155
|
+
options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
|
156
|
+
pk = primary_keys options.db, table_name
|
157
|
+
|
158
|
+
table_dataset = options.db[table_name]
|
159
|
+
min, max = table_dataset.select{[min(id), max(id)]}.first.values
|
160
|
+
return unless min && max
|
161
|
+
# could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
|
162
|
+
# TODO definitely need to refactor this
|
163
|
+
|
164
|
+
# will always include the last item because
|
165
|
+
(min..max).step(options.page_size).each do |offset|
|
166
|
+
page = table_dataset.where( id: offset...(offset+options.page_size) )
|
167
|
+
logger.info page.sql
|
168
|
+
page.each do |row|
|
169
|
+
unless options[:dry_run]
|
170
|
+
codec.encode row.values, options.io
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
options.io.flush
|
175
|
+
end
|
176
|
+
|
177
|
+
# TODO possible memory issues here if the rows are big. May need to fork this.
|
178
|
+
# TODO lazy evaluation
|
179
|
+
def restore( table_name, options = {} )
|
180
|
+
logger.info "restoring #{table_name}"
|
181
|
+
options = OpenStruct.new( {io: STDIN, page_size: 10000, start_row: 0, dry_run: false}.merge( options ) )
|
182
|
+
dataset = options.db[table_name.to_sym]
|
183
|
+
# destination db should be same structure as incoming data
|
184
|
+
column_names = options.db.schema(table_name.to_sym).map( &:first )
|
185
|
+
first = ->(row){raise "schema mismatch" if row.size != column_names.size}
|
186
|
+
|
187
|
+
rows_restored = 0
|
188
|
+
|
189
|
+
# skip this many rows
|
190
|
+
options.start_row.times do
|
191
|
+
codec.decode( options.io ) {|row|}
|
192
|
+
end
|
193
|
+
|
194
|
+
# copy rows into db
|
195
|
+
while !options.io.eof?
|
196
|
+
# fetch a page of rows
|
197
|
+
rows_ary = []
|
198
|
+
begin
|
199
|
+
options.page_size.times do |i|
|
200
|
+
codec.decode( options.io ) do |row|
|
201
|
+
rows_ary << row
|
202
|
+
end
|
203
|
+
rows_restored += 1
|
204
|
+
end
|
205
|
+
rescue EOFError => e
|
206
|
+
# ran out of rows, so just use the ones we have so far
|
207
|
+
end
|
208
|
+
|
209
|
+
# insert to db. Hopeful db support bulk insert, which Sequel will figure out
|
210
|
+
options.db.transaction do
|
211
|
+
dataset.import column_names, rows_ary
|
212
|
+
yield rows_restored if block_given?
|
213
|
+
logger.info "restored #{rows_restored}"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
rows_restored
|
218
|
+
end
|
219
|
+
|
220
|
+
def from_bz2( filename, db, table_name, options = {} )
|
221
|
+
IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
|
222
|
+
restore table_name, options.merge( io: io, db: db )
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# There are actually 2 sources for this:
|
228
|
+
# one is the src db, the other is the dumped files
|
229
|
+
# And the one that transfers live is another version
|
230
|
+
class Schema
|
231
|
+
def initialize( src_db, dst_db = nil )
|
232
|
+
@src_db = src_db
|
233
|
+
@dst_db = dst_db
|
234
|
+
end
|
235
|
+
|
236
|
+
def schema_migration
|
237
|
+
@schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
|
238
|
+
end
|
239
|
+
|
240
|
+
def index_migration
|
241
|
+
@index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
|
242
|
+
end
|
243
|
+
|
244
|
+
def fk_migration
|
245
|
+
@fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
|
246
|
+
end
|
247
|
+
|
248
|
+
def restore_migration
|
249
|
+
<<-EOF
|
250
|
+
require 'restore_migration'
|
251
|
+
Sequel.migration do
|
252
|
+
def db_pump
|
253
|
+
end
|
254
|
+
|
255
|
+
up do
|
256
|
+
restore_tables
|
257
|
+
end
|
258
|
+
|
259
|
+
down do
|
260
|
+
# from each table clear table
|
261
|
+
each_table do |table_name|
|
262
|
+
db_pump.restore table_name, io: io, db: db
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
EOF
|
267
|
+
end
|
268
|
+
|
269
|
+
attr_accessor :dst_db
|
270
|
+
attr_reader :src_db
|
271
|
+
|
272
|
+
def same_db
|
273
|
+
@dst_db.andand.database_type == @src_db.andand.database_type
|
274
|
+
end
|
275
|
+
|
276
|
+
def logger
|
277
|
+
@logger ||= Logger.new STDERR
|
278
|
+
end
|
279
|
+
|
280
|
+
# create the destination schema
|
281
|
+
def create
|
282
|
+
eval( @schema_migration ).apply dst_db, :up
|
283
|
+
end
|
284
|
+
|
285
|
+
# create indexes and foreign keys, and reset sequences
|
286
|
+
def index
|
287
|
+
logger.info "creating indexes"
|
288
|
+
eval(@index_migration).apply dst, :up
|
289
|
+
logger.info "creating foreign keys"
|
290
|
+
eval(@fk_migration).apply dst, :up
|
291
|
+
|
292
|
+
if dst.database_type == :postgres
|
293
|
+
logger.info "reset primary key sequences"
|
294
|
+
dst.tables.each{|t| dst.reset_primary_key_sequence(t)}
|
295
|
+
logger.info "Primary key sequences reset successfully"
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def transfer_table( table_name, options = {} )
|
300
|
+
options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
|
301
|
+
total_records = @src_db[table_name].count
|
302
|
+
logger.info "transferring #{total_records}"
|
303
|
+
column_names = @src_db.schema(table_name.to_sym).map( &:first )
|
304
|
+
|
305
|
+
@src_db[table_name].each_page(options.page_size) do |page|
|
306
|
+
logger.info "#{page.sql} of #{total_records}"
|
307
|
+
unless options.dry_run
|
308
|
+
@dst_db.transaction do
|
309
|
+
rows_ary = []
|
310
|
+
page.each do |row_hash|
|
311
|
+
rows_ary << row_hash.values
|
312
|
+
end
|
313
|
+
@dst_db[table_name.to_sym].import column_names, rows_ary
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# copy the data in the tables
|
320
|
+
def transfer
|
321
|
+
create
|
322
|
+
transfer_tables
|
323
|
+
index
|
324
|
+
end
|
325
|
+
|
326
|
+
def dump_schema( container, options = {codec: :marshal} )
|
327
|
+
(container + '001_schema.rb').open('w') do |io|
|
328
|
+
io.write schema_migration
|
329
|
+
end
|
330
|
+
|
331
|
+
(container + '002_populate_tables.rb').open('w') do |io|
|
332
|
+
io.write restore_migration
|
333
|
+
end
|
334
|
+
|
335
|
+
(container + '003_indexes.rb').open('w') do |io|
|
336
|
+
io.write index_migration
|
337
|
+
end
|
338
|
+
|
339
|
+
(container + '004_foreign keys.rb').open('w') do |io|
|
340
|
+
io.write fk_migration
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def load_migrations( container )
|
345
|
+
@schema_migration = eval (container + '001_schema.rb').read
|
346
|
+
@index_migration = eval (container + '003_indexes.rb').read
|
347
|
+
@fk_migration = eval (container + '004_foreign keys.rb').read
|
348
|
+
end
|
349
|
+
|
350
|
+
def dump_one_table( table_name, pathname, db_pump )
|
351
|
+
logger.info "dumping #{table_name} to #{pathname}"
|
352
|
+
fio = pathname.open('w')
|
353
|
+
# open subprocess in read-write mode
|
354
|
+
zio = IO.popen( "pbzip2 -z", 'r+' )
|
355
|
+
copier = Thread.new do
|
356
|
+
begin
|
357
|
+
IO.copy_stream zio, fio
|
358
|
+
logger.debug "finished stream copy"
|
359
|
+
ensure
|
360
|
+
fio.close
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
# generate the dump
|
365
|
+
db_pump.dump table_name, db: src_db, io: zio
|
366
|
+
|
367
|
+
# signal the copier thread to stop
|
368
|
+
zio.close_write
|
369
|
+
logger.debug 'finished dumping'
|
370
|
+
# wait for copier thread to
|
371
|
+
copier.join
|
372
|
+
logger.debug 'stream copy thread finished'
|
373
|
+
ensure
|
374
|
+
zio.close unless zio.closed?
|
375
|
+
fio.close unless fio.closed?
|
376
|
+
end
|
377
|
+
|
378
|
+
def dump_tables( container, options = {:codec => :marshal} )
|
379
|
+
container = Pathname(container)
|
380
|
+
db_pump = DbPump.new( options[:codec] )
|
381
|
+
|
382
|
+
src_db.tables.each do |table_name|
|
383
|
+
filename = container + "#{table_name}.dbp.bz2"
|
384
|
+
dump_one_table table_name, filename, db_pump
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
def restore_one_table( table_file, db_pump )
|
389
|
+
logger.info "restoring from #{table_file}"
|
390
|
+
table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
|
391
|
+
# check if table has been restored already, and has the correct rows,
|
392
|
+
# otherwise pass in a start row.
|
393
|
+
db_pump.from_bz2 table_file, dst_db, table_name
|
394
|
+
end
|
395
|
+
|
396
|
+
def restore_tables( container, options = {:codec => :marshal} )
|
397
|
+
db_pump = DbPump.new( options[:codec] )
|
398
|
+
table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
|
399
|
+
table_files.each{|table_file| restore_one_table table_file, db_pump}
|
400
|
+
end
|
401
|
+
|
402
|
+
def restore_tables( container, options = {:codec => :marshal} )
|
403
|
+
container = Pathname(container)
|
404
|
+
container.child ren
|
405
|
+
end
|
406
|
+
|
407
|
+
def self.transfer( src_db, dst_db )
|
408
|
+
new( src_db, dst_db ).transfer
|
409
|
+
end
|
410
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# Dump a schema and compressed data from a db to a set of files
|
2
|
+
# src_db = Sequel.connect "postgres://localhost:5454/lots"
|
3
|
+
# ds = DumpSchema.new src_db, Pathname('/var/data/lots')
|
4
|
+
# ds.dump_schema
|
5
|
+
# ds.dump_tables
|
6
|
+
class DumpSchema
|
7
|
+
def initialize( src_db, container = nil, options = {} )
|
8
|
+
@options = {:codec => :marshal}.merge( options )
|
9
|
+
|
10
|
+
@src_db = src_db
|
11
|
+
@container = Pathname(container)
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :src_db, :container, :codec
|
15
|
+
|
16
|
+
def schema_migration
|
17
|
+
@schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
|
18
|
+
end
|
19
|
+
|
20
|
+
def index_migration
|
21
|
+
@index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
|
22
|
+
end
|
23
|
+
|
24
|
+
def fk_migration
|
25
|
+
@fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
|
26
|
+
end
|
27
|
+
|
28
|
+
def restore_migration
|
29
|
+
<<-EOF
|
30
|
+
require 'restore_migration'
|
31
|
+
Sequel.migration do
|
32
|
+
def db_pump
|
33
|
+
end
|
34
|
+
|
35
|
+
up do
|
36
|
+
restore_tables
|
37
|
+
end
|
38
|
+
|
39
|
+
down do
|
40
|
+
# from each table clear table
|
41
|
+
each_table do |table_name|
|
42
|
+
db_pump.restore table_name, io: io, db: db
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
EOF
|
47
|
+
end
|
48
|
+
|
49
|
+
def same_db
|
50
|
+
false
|
51
|
+
end
|
52
|
+
|
53
|
+
def logger
|
54
|
+
@logger ||= Logger.new STDERR
|
55
|
+
end
|
56
|
+
|
57
|
+
def dump_schema
|
58
|
+
(container + '001_schema.rb').open('w') do |io|
|
59
|
+
io.write schema_migration
|
60
|
+
end
|
61
|
+
|
62
|
+
(container + '002_populate_tables.rb').open('w') do |io|
|
63
|
+
io.write restore_migration
|
64
|
+
end
|
65
|
+
|
66
|
+
(container + '003_indexes.rb').open('w') do |io|
|
67
|
+
io.write index_migration
|
68
|
+
end
|
69
|
+
|
70
|
+
(container + '004_foreign keys.rb').open('w') do |io|
|
71
|
+
io.write fk_migration
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def dump_one_table( table_name, pathname, db_pump )
|
76
|
+
logger.info "dumping #{table_name} to #{pathname}"
|
77
|
+
fio = pathname.open('w')
|
78
|
+
# open subprocess in read-write mode
|
79
|
+
zio = IO.popen( "pbzip2 -z", 'r+' )
|
80
|
+
copier = Thread.new do
|
81
|
+
begin
|
82
|
+
IO.copy_stream zio, fio
|
83
|
+
logger.debug "finished stream copy"
|
84
|
+
ensure
|
85
|
+
fio.close
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# generate the dump
|
90
|
+
db_pump.dump table_name, db: src_db, io: zio
|
91
|
+
|
92
|
+
# signal the copier thread to stop
|
93
|
+
zio.close_write
|
94
|
+
logger.debug 'finished dumping'
|
95
|
+
# wait for copier thread to
|
96
|
+
copier.join
|
97
|
+
logger.debug 'stream copy thread finished'
|
98
|
+
ensure
|
99
|
+
zio.close unless zio.closed?
|
100
|
+
fio.close unless fio.closed?
|
101
|
+
end
|
102
|
+
|
103
|
+
def dump_tables
|
104
|
+
db_pump = DbPump.new( @options[:codec] )
|
105
|
+
|
106
|
+
src_db.tables.each do |table_name|
|
107
|
+
filename = container + "#{table_name}.dbp.bz2"
|
108
|
+
dump_one_table table_name, filename, db_pump
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
# Load a schema from a set of dump files (from DumpSchema)
|
4
|
+
# and restore the table data
|
5
|
+
# dst_db = Sequel.connect "postgres://localhost:5454/lots"
|
6
|
+
# rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
|
7
|
+
# rs.create
|
8
|
+
# rs.restore_tables
|
9
|
+
class RestoreSchema
|
10
|
+
def initialize( dst_db, container )
|
11
|
+
@container = container
|
12
|
+
@dst_db = dst_db
|
13
|
+
@options = {:codec => :marshal}
|
14
|
+
load_migrations @container
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :dst_db
|
18
|
+
attr_reader :options
|
19
|
+
attr_reader :container
|
20
|
+
attr_reader :schema_migration, :index_migration
|
21
|
+
|
22
|
+
def logger
|
23
|
+
@logger ||= Logger.new STDERR
|
24
|
+
end
|
25
|
+
|
26
|
+
def load_migrations( container )
|
27
|
+
@schema_migration = (container + '001_schema.rb').read
|
28
|
+
@index_migration = (container + '003_indexes.rb').read
|
29
|
+
@fk_migration = (container + '004_foreign keys.rb').read
|
30
|
+
end
|
31
|
+
|
32
|
+
# create indexes and foreign keys, and reset sequences
|
33
|
+
def index
|
34
|
+
logger.info "creating indexes"
|
35
|
+
eval( index_migration ).apply dst_db, :up
|
36
|
+
logger.info "creating foreign keys"
|
37
|
+
eval( fk_migration ).apply dst_db, :up
|
38
|
+
|
39
|
+
if dst_db.database_type == :postgres
|
40
|
+
logger.info "reset primary key sequences"
|
41
|
+
dst_db.tables.each{|t| dst_db.reset_primary_key_sequence(t)}
|
42
|
+
logger.info "Primary key sequences reset successfully"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# create the destination schema
|
47
|
+
def create
|
48
|
+
eval( schema_migration ).apply dst_db, :up
|
49
|
+
end
|
50
|
+
|
51
|
+
def restore_one_table( table_file, db_pump )
|
52
|
+
logger.info "restoring from #{table_file}"
|
53
|
+
table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
|
54
|
+
# check if table has been restored already, and has the correct rows,
|
55
|
+
# otherwise pass in a start row.
|
56
|
+
db_pump.from_bz2 table_file, dst_db, table_name
|
57
|
+
end
|
58
|
+
|
59
|
+
def restore_tables
|
60
|
+
db_pump = DbPump.new( options[:codec] )
|
61
|
+
table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
|
62
|
+
table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file, db_pump}
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class Transferer
|
2
|
+
def transfer_table( table_name, options = {} )
|
3
|
+
options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
|
4
|
+
total_records = @src_db[table_name].count
|
5
|
+
logger.info "transferring #{total_records}"
|
6
|
+
column_names = @src_db.schema(table_name.to_sym).map( &:first )
|
7
|
+
|
8
|
+
@src_db[table_name].each_page(options.page_size) do |page|
|
9
|
+
logger.info "#{page.sql} of #{total_records}"
|
10
|
+
unless options.dry_run
|
11
|
+
@dst_db.transaction do
|
12
|
+
rows_ary = []
|
13
|
+
page.each do |row_hash|
|
14
|
+
rows_ary << row_hash.values
|
15
|
+
end
|
16
|
+
@dst_db[table_name.to_sym].import column_names, rows_ary
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# copy the data in the tables
|
23
|
+
def transfer
|
24
|
+
create
|
25
|
+
transfer_tables
|
26
|
+
index
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.transfer( src_db, dst_db )
|
30
|
+
new( src_db, dst_db ).transfer
|
31
|
+
end
|
32
|
+
end
|
data/lib/wyrm/version.rb
ADDED
data/wyrm.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'wyrm/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "wyrm"
|
8
|
+
spec.version = Wyrm::VERSION
|
9
|
+
spec.authors = ["John Anderson"]
|
10
|
+
spec.email = ["panic@semiosix.com"]
|
11
|
+
spec.description = %q{Transfer from one SQL database to another}
|
12
|
+
spec.summary = %q{Transfer from one SQL database to another}
|
13
|
+
spec.homepage = "http://djellemah.com"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wyrm
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Anderson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-05-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Transfer from one SQL database to another
|
42
|
+
email:
|
43
|
+
- panic@semiosix.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- .rvmrc
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- lib/wyrm.rb
|
55
|
+
- lib/wyrm/db_pump.rb
|
56
|
+
- lib/wyrm/dump_schema.rb
|
57
|
+
- lib/wyrm/other_schema.rb
|
58
|
+
- lib/wyrm/restore_schema.rb
|
59
|
+
- lib/wyrm/transferer.rb
|
60
|
+
- lib/wyrm/version.rb
|
61
|
+
- wyrm.gemspec
|
62
|
+
homepage: http://djellemah.com
|
63
|
+
licenses:
|
64
|
+
- MIT
|
65
|
+
metadata: {}
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 2.0.0.rc.2
|
83
|
+
signing_key:
|
84
|
+
specification_version: 4
|
85
|
+
summary: Transfer from one SQL database to another
|
86
|
+
test_files: []
|