wyrm 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dc95033a6576cd33f78d345b18f08564317c0bc1
4
+ data.tar.gz: 2c046fa329dc3cef258d1185ab390b3106e9fc1a
5
+ SHA512:
6
+ metadata.gz: 07a728780ea1121ca6784ef83059a0fad6c5218c793d3a8e5033203a6ba60fb435587c1e65f5ff1e3a184a18a2d6ad87c4bf1b2a1e300bc056ded1662ff4f225
7
+ data.tar.gz: 94a9579c0070a612439a428e4379874671b4aee41d89bb1bb7c6e1a930838fa278f7a178435fc41a6e3cbf8ad98d16824c168f1e8edcc33576d158b92e206a2f
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm 2.0.0@wyrm --create
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # source 'https://rubygems.org'
2
+ source 'file:///var/cache/rubygems'
3
+
4
+ gem 'sequel'
5
+ gem 'fastandand'
6
+
7
+ # Specify your gem's dependencies in wyrm.gemspec
8
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 John Anderson
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,71 @@
1
+ # Wyrm
2
+
3
+ Transfer data from one database to another. Has been used to dump > 100M dbs,
4
+ and one 850G db. Should theoretically work for any dbs supported by Sequel.
5
+
6
+ Currently transfers tables and views only. Does not attempt to transfer
7
+ stored procs, permissions, triggers etc.
8
+
9
+ Works best for tables that have single numeric primary keys, but should also
10
+ handle compound primary keys and tables without primary keys.
11
+
12
+ Wyrm because:
13
+
14
+ - I like dragons
15
+ - I can have a Wyrm::Hole to transfer data through :-D
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ gem 'wyrm'
22
+
23
+ And then execute:
24
+
25
+ $ bundle
26
+
27
+ Or install it yourself as:
28
+
29
+ $ gem install wyrm
30
+
31
+ Make sure you install the db gems, typically
32
+
33
+ $ gem install pg mysql2
34
+
35
+ ## Usage
36
+
37
+ This is mostly a toolkit right now. To transfer from mysql to postgres do:
38
+ ```ruby
39
+ require 'sequel'
40
+ require 'pathname'
41
+
42
+ # on the source host
43
+ # dump tables from mysql
44
+ require 'gbump/dump_schema'
45
+ src_db = Sequel.connect "mysql2://localhost/lots"
46
+ ds = DumpSchema.new src_db, Pathname('/tmp/lots')
47
+ ds.dump_schema
48
+
49
+ # this might take a while ;-)
50
+ ds.dump_tables
51
+
52
+ # transfer data. Already compressed, so no -z
53
+ # rsync -var /tmp/lots user@host:/var/data/
54
+
55
+ # on the destination host
56
+ # restore tables to postgres
57
+ require 'gbump/restore_schema'
58
+ dst_db = Sequel.connect "postgres://localhost/lots"
59
+ rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
60
+ rs.create
61
+ rs.restore_tables
62
+ rs.index
63
+ ```
64
+
65
+ ## Contributing
66
+
67
+ 1. Fork it
68
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
69
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
70
+ 4. Push to the branch (`git push origin my-new-feature`)
71
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,8 @@
1
+ require "wyrm/version"
2
+ require "wyrm/db_pump.rb"
3
+ require "wyrm/restore_schema.rb"
4
+ require "wyrm/dump_schema.rb"
5
+
6
+ module Wyrm
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,410 @@
1
+ require 'sequel'
2
+ require 'yaml'
3
+ require 'ostruct'
4
+ require 'logger'
5
+ require 'fastandand'
6
+
7
+ Sequel.extension :migration, :schema_dumper, :pagination
8
+
9
+
10
+ # TODO possibly use Gem::Package::TarWriter to write tar files
11
+ # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
12
+
13
+ # TODO need to version the dumps, or something like that.
14
+ class DbPump
15
+ class RespondsTo
16
+ def initialize( *methods )
17
+ @methods = methods
18
+ end
19
+
20
+ def ===( instance )
21
+ @methods.all?{|m| instance.respond_to? m}
22
+ end
23
+ end
24
+
25
+ def initialize( codec = :marshal )
26
+ @codec =
27
+ case codec
28
+ when :yaml; YamlCodec.new
29
+ when :marshal; MarshalCodec.new
30
+ when Class
31
+ codec.new
32
+ when RespondsTo.new( :encode, :decode )
33
+ codec
34
+ else
35
+ raise "unknown codec #{codec}"
36
+ end
37
+ end
38
+
39
+ attr_reader :codec
40
+
41
+ # TODO could use msgpack as serialization here, but its API is unpleasant.
42
+
43
+ class MarshalCodec
44
+ def encode( obj, io )
45
+ Marshal.dump obj, io
46
+ end
47
+
48
+ def decode( io, &block )
49
+ obj = Marshal.load(io)
50
+ yield obj if block_given?
51
+ obj
52
+ end
53
+ end
54
+
55
+ class MsgPackCodec
56
+ def encode( obj, io )
57
+ Marshal.dump obj, io
58
+ end
59
+
60
+ def decode( io, &block )
61
+ obj = Marshal.load(io)
62
+ yield obj if block_given?
63
+ obj
64
+ end
65
+ end
66
+
67
+ class YamlCodec
68
+ def encode( obj, io )
69
+ YAML.dump obj, io
70
+ end
71
+
72
+ def decode( io, &block )
73
+ obj = YAML.load(io)
74
+ yield obj if block_given?
75
+ obj
76
+ end
77
+ end
78
+
79
+ def logger
80
+ @logger ||= Logger.new STDERR
81
+ end
82
+
83
+ def primary_keys( db, table_name )
84
+ db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
85
+ end
86
+
87
+ # TODO possibly use select from outer / inner join to
88
+ # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
89
+ # because mysql is useless
90
+ def paginated_dump( table_name, options = {} )
91
+ options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
92
+ pk = primary_keys options.db, table_name
93
+ options.db[table_name].order(*pk).each_page(options[:page_size]) do |page|
94
+ logger.info page.sql
95
+ page.each do |row|
96
+ unless options[:dry_run]
97
+ codec.encode row.values, options.io
98
+ end
99
+ end
100
+ end
101
+ options.io.flush
102
+ end
103
+
104
+ # have to use this for non-integer pks
105
+ # The idea is that large offsets are expensive in the db because the db server has to read
106
+ # through the data set to reach the required offset. So make that only ids, and then
107
+ # do the main select from the limited id list.
108
+ # TODO could speed this up by have a query thread which runs the next page-query while
109
+ # the current one is being written/compressed.
110
+ # select * from massive as full
111
+ # inner join (select id from massive order by whatever limit m, n) limit
112
+ # on full.id = limit.id
113
+ # order by full.whatever
114
+ def inner_dump( table_name, options = {} )
115
+ options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
116
+ pk = primary_keys options.db, table_name
117
+
118
+ table_dataset = options.db[table_name]
119
+ # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
120
+ 0.step(table_dataset.count, options.page_size).each do |offset|
121
+ limit_dataset = table_dataset.select( *pk ).limit( options.page_size, offset ).order( *pk )
122
+ page = table_dataset.join( limit_dataset, Hash[ pk.map{|f| [f,f]} ] ).order( *pk ).qualify_to(table_name)
123
+ logger.info page.sql
124
+ page.each do |row|
125
+ unless options[:dry_run]
126
+ codec.encode row.values, options.io
127
+ end
128
+ end
129
+ end
130
+ options.io.flush
131
+ end
132
+
133
+ # TODO need to also dump a first row containing useful stuff:
134
+ # - source table name
135
+ # - number of rows
136
+ # - source db url
137
+ # - permissions?
138
+ # These should all be in one object that can be Marshall.load-ed easily.
139
+ def dump( table_name, options = {} )
140
+ pk = primary_keys options[:db], table_name
141
+ case
142
+ when pk.empty?
143
+ paginated_dump( table_name, options )
144
+ when pk.all?{|i| i == :id }
145
+ min_max_dump( table_name, options )
146
+ else
147
+ inner_dump( table_name, options )
148
+ end
149
+ end
150
+
151
+ # could use this for integer pks
152
+ def min_max_dump( table_name, options = {} )
153
+ # select max(id), min(id) from patents
154
+ # and then split that up into 10000 size chunks. Not really important if there aren't exactly 10000
155
+ options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
156
+ pk = primary_keys options.db, table_name
157
+
158
+ table_dataset = options.db[table_name]
159
+ min, max = table_dataset.select{[min(id), max(id)]}.first.values
160
+ return unless min && max
161
+ # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
162
+ # TODO definitely need to refactor this
163
+
164
+ # will always include the last item because
165
+ (min..max).step(options.page_size).each do |offset|
166
+ page = table_dataset.where( id: offset...(offset+options.page_size) )
167
+ logger.info page.sql
168
+ page.each do |row|
169
+ unless options[:dry_run]
170
+ codec.encode row.values, options.io
171
+ end
172
+ end
173
+ end
174
+ options.io.flush
175
+ end
176
+
177
+ # TODO possible memory issues here if the rows are big. May need to fork this.
178
+ # TODO lazy evaluation
179
+ def restore( table_name, options = {} )
180
+ logger.info "restoring #{table_name}"
181
+ options = OpenStruct.new( {io: STDIN, page_size: 10000, start_row: 0, dry_run: false}.merge( options ) )
182
+ dataset = options.db[table_name.to_sym]
183
+ # destination db should be same structure as incoming data
184
+ column_names = options.db.schema(table_name.to_sym).map( &:first )
185
+ first = ->(row){raise "schema mismatch" if row.size != column_names.size}
186
+
187
+ rows_restored = 0
188
+
189
+ # skip this many rows
190
+ options.start_row.times do
191
+ codec.decode( options.io ) {|row|}
192
+ end
193
+
194
+ # copy rows into db
195
+ while !options.io.eof?
196
+ # fetch a page of rows
197
+ rows_ary = []
198
+ begin
199
+ options.page_size.times do |i|
200
+ codec.decode( options.io ) do |row|
201
+ rows_ary << row
202
+ end
203
+ rows_restored += 1
204
+ end
205
+ rescue EOFError => e
206
+ # ran out of rows, so just use the ones we have so far
207
+ end
208
+
209
+ # insert to db. Hopeful db support bulk insert, which Sequel will figure out
210
+ options.db.transaction do
211
+ dataset.import column_names, rows_ary
212
+ yield rows_restored if block_given?
213
+ logger.info "restored #{rows_restored}"
214
+ end
215
+ end
216
+
217
+ rows_restored
218
+ end
219
+
220
+ def from_bz2( filename, db, table_name, options = {} )
221
+ IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
222
+ restore table_name, options.merge( io: io, db: db )
223
+ end
224
+ end
225
+ end
226
+
227
+ # There are actually 2 sources for this:
228
+ # one is the src db, the other is the dumped files
229
+ # And the one that transfers live is another version
230
+ class Schema
231
+ def initialize( src_db, dst_db = nil )
232
+ @src_db = src_db
233
+ @dst_db = dst_db
234
+ end
235
+
236
+ def schema_migration
237
+ @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
238
+ end
239
+
240
+ def index_migration
241
+ @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
242
+ end
243
+
244
+ def fk_migration
245
+ @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
246
+ end
247
+
248
+ def restore_migration
249
+ <<-EOF
250
+ require 'restore_migration'
251
+ Sequel.migration do
252
+ def db_pump
253
+ end
254
+
255
+ up do
256
+ restore_tables
257
+ end
258
+
259
+ down do
260
+ # from each table clear table
261
+ each_table do |table_name|
262
+ db_pump.restore table_name, io: io, db: db
263
+ end
264
+ end
265
+ end
266
+ EOF
267
+ end
268
+
269
+ attr_accessor :dst_db
270
+ attr_reader :src_db
271
+
272
+ def same_db
273
+ @dst_db.andand.database_type == @src_db.andand.database_type
274
+ end
275
+
276
+ def logger
277
+ @logger ||= Logger.new STDERR
278
+ end
279
+
280
+ # create the destination schema
281
+ def create
282
+ eval( @schema_migration ).apply dst_db, :up
283
+ end
284
+
285
+ # create indexes and foreign keys, and reset sequences
286
+ def index
287
+ logger.info "creating indexes"
288
+ eval(@index_migration).apply dst, :up
289
+ logger.info "creating foreign keys"
290
+ eval(@fk_migration).apply dst, :up
291
+
292
+ if dst.database_type == :postgres
293
+ logger.info "reset primary key sequences"
294
+ dst.tables.each{|t| dst.reset_primary_key_sequence(t)}
295
+ logger.info "Primary key sequences reset successfully"
296
+ end
297
+ end
298
+
299
+ def transfer_table( table_name, options = {} )
300
+ options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
301
+ total_records = @src_db[table_name].count
302
+ logger.info "transferring #{total_records}"
303
+ column_names = @src_db.schema(table_name.to_sym).map( &:first )
304
+
305
+ @src_db[table_name].each_page(options.page_size) do |page|
306
+ logger.info "#{page.sql} of #{total_records}"
307
+ unless options.dry_run
308
+ @dst_db.transaction do
309
+ rows_ary = []
310
+ page.each do |row_hash|
311
+ rows_ary << row_hash.values
312
+ end
313
+ @dst_db[table_name.to_sym].import column_names, rows_ary
314
+ end
315
+ end
316
+ end
317
+ end
318
+
319
+ # copy the data in the tables
320
+ def transfer
321
+ create
322
+ transfer_tables
323
+ index
324
+ end
325
+
326
+ def dump_schema( container, options = {codec: :marshal} )
327
+ (container + '001_schema.rb').open('w') do |io|
328
+ io.write schema_migration
329
+ end
330
+
331
+ (container + '002_populate_tables.rb').open('w') do |io|
332
+ io.write restore_migration
333
+ end
334
+
335
+ (container + '003_indexes.rb').open('w') do |io|
336
+ io.write index_migration
337
+ end
338
+
339
+ (container + '004_foreign keys.rb').open('w') do |io|
340
+ io.write fk_migration
341
+ end
342
+ end
343
+
344
+ def load_migrations( container )
345
+ @schema_migration = eval (container + '001_schema.rb').read
346
+ @index_migration = eval (container + '003_indexes.rb').read
347
+ @fk_migration = eval (container + '004_foreign keys.rb').read
348
+ end
349
+
350
+ def dump_one_table( table_name, pathname, db_pump )
351
+ logger.info "dumping #{table_name} to #{pathname}"
352
+ fio = pathname.open('w')
353
+ # open subprocess in read-write mode
354
+ zio = IO.popen( "pbzip2 -z", 'r+' )
355
+ copier = Thread.new do
356
+ begin
357
+ IO.copy_stream zio, fio
358
+ logger.debug "finished stream copy"
359
+ ensure
360
+ fio.close
361
+ end
362
+ end
363
+
364
+ # generate the dump
365
+ db_pump.dump table_name, db: src_db, io: zio
366
+
367
+ # signal the copier thread to stop
368
+ zio.close_write
369
+ logger.debug 'finished dumping'
370
+ # wait for copier thread to
371
+ copier.join
372
+ logger.debug 'stream copy thread finished'
373
+ ensure
374
+ zio.close unless zio.closed?
375
+ fio.close unless fio.closed?
376
+ end
377
+
378
+ def dump_tables( container, options = {:codec => :marshal} )
379
+ container = Pathname(container)
380
+ db_pump = DbPump.new( options[:codec] )
381
+
382
+ src_db.tables.each do |table_name|
383
+ filename = container + "#{table_name}.dbp.bz2"
384
+ dump_one_table table_name, filename, db_pump
385
+ end
386
+ end
387
+
388
+ def restore_one_table( table_file, db_pump )
389
+ logger.info "restoring from #{table_file}"
390
+ table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
391
+ # check if table has been restored already, and has the correct rows,
392
+ # otherwise pass in a start row.
393
+ db_pump.from_bz2 table_file, dst_db, table_name
394
+ end
395
+
396
+ def restore_tables( container, options = {:codec => :marshal} )
397
+ db_pump = DbPump.new( options[:codec] )
398
+ table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
399
+ table_files.each{|table_file| restore_one_table table_file, db_pump}
400
+ end
401
+
402
+ def restore_tables( container, options = {:codec => :marshal} )
403
+ container = Pathname(container)
404
+ container.child ren
405
+ end
406
+
407
+ def self.transfer( src_db, dst_db )
408
+ new( src_db, dst_db ).transfer
409
+ end
410
+ end
@@ -0,0 +1,111 @@
1
+ # Dump a schema and compressed data from a db to a set of files
2
+ # src_db = Sequel.connect "postgres://localhost:5454/lots"
3
+ # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
4
+ # ds.dump_schema
5
+ # ds.dump_tables
6
+ class DumpSchema
7
+ def initialize( src_db, container = nil, options = {} )
8
+ @options = {:codec => :marshal}.merge( options )
9
+
10
+ @src_db = src_db
11
+ @container = Pathname(container)
12
+ end
13
+
14
+ attr_reader :src_db, :container, :codec
15
+
16
+ def schema_migration
17
+ @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
18
+ end
19
+
20
+ def index_migration
21
+ @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
22
+ end
23
+
24
+ def fk_migration
25
+ @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
26
+ end
27
+
28
+ def restore_migration
29
+ <<-EOF
30
+ require 'restore_migration'
31
+ Sequel.migration do
32
+ def db_pump
33
+ end
34
+
35
+ up do
36
+ restore_tables
37
+ end
38
+
39
+ down do
40
+ # from each table clear table
41
+ each_table do |table_name|
42
+ db_pump.restore table_name, io: io, db: db
43
+ end
44
+ end
45
+ end
46
+ EOF
47
+ end
48
+
49
+ def same_db
50
+ false
51
+ end
52
+
53
+ def logger
54
+ @logger ||= Logger.new STDERR
55
+ end
56
+
57
+ def dump_schema
58
+ (container + '001_schema.rb').open('w') do |io|
59
+ io.write schema_migration
60
+ end
61
+
62
+ (container + '002_populate_tables.rb').open('w') do |io|
63
+ io.write restore_migration
64
+ end
65
+
66
+ (container + '003_indexes.rb').open('w') do |io|
67
+ io.write index_migration
68
+ end
69
+
70
+ (container + '004_foreign keys.rb').open('w') do |io|
71
+ io.write fk_migration
72
+ end
73
+ end
74
+
75
+ def dump_one_table( table_name, pathname, db_pump )
76
+ logger.info "dumping #{table_name} to #{pathname}"
77
+ fio = pathname.open('w')
78
+ # open subprocess in read-write mode
79
+ zio = IO.popen( "pbzip2 -z", 'r+' )
80
+ copier = Thread.new do
81
+ begin
82
+ IO.copy_stream zio, fio
83
+ logger.debug "finished stream copy"
84
+ ensure
85
+ fio.close
86
+ end
87
+ end
88
+
89
+ # generate the dump
90
+ db_pump.dump table_name, db: src_db, io: zio
91
+
92
+ # signal the copier thread to stop
93
+ zio.close_write
94
+ logger.debug 'finished dumping'
95
+ # wait for copier thread to
96
+ copier.join
97
+ logger.debug 'stream copy thread finished'
98
+ ensure
99
+ zio.close unless zio.closed?
100
+ fio.close unless fio.closed?
101
+ end
102
+
103
+ def dump_tables
104
+ db_pump = DbPump.new( @options[:codec] )
105
+
106
+ src_db.tables.each do |table_name|
107
+ filename = container + "#{table_name}.dbp.bz2"
108
+ dump_one_table table_name, filename, db_pump
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ # Place for stuff that I'm not sure about yet
2
+ class OtherSchema
3
+ def same_db
4
+ @dst_db.andand.database_type == @src_db.andand.database_type
5
+ end
6
+ end
@@ -0,0 +1,64 @@
1
+ require 'logger'
2
+
3
+ # Load a schema from a set of dump files (from DumpSchema)
4
+ # and restore the table data
5
+ # dst_db = Sequel.connect "postgres://localhost:5454/lots"
6
+ # rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
7
+ # rs.create
8
+ # rs.restore_tables
9
+ class RestoreSchema
10
+ def initialize( dst_db, container )
11
+ @container = container
12
+ @dst_db = dst_db
13
+ @options = {:codec => :marshal}
14
+ load_migrations @container
15
+ end
16
+
17
+ attr_reader :dst_db
18
+ attr_reader :options
19
+ attr_reader :container
20
+ attr_reader :schema_migration, :index_migration
21
+
22
+ def logger
23
+ @logger ||= Logger.new STDERR
24
+ end
25
+
26
+ def load_migrations( container )
27
+ @schema_migration = (container + '001_schema.rb').read
28
+ @index_migration = (container + '003_indexes.rb').read
29
+ @fk_migration = (container + '004_foreign keys.rb').read
30
+ end
31
+
32
+ # create indexes and foreign keys, and reset sequences
33
+ def index
34
+ logger.info "creating indexes"
35
+ eval( index_migration ).apply dst_db, :up
36
+ logger.info "creating foreign keys"
37
+ eval( fk_migration ).apply dst_db, :up
38
+
39
+ if dst_db.database_type == :postgres
40
+ logger.info "reset primary key sequences"
41
+ dst_db.tables.each{|t| dst_db.reset_primary_key_sequence(t)}
42
+ logger.info "Primary key sequences reset successfully"
43
+ end
44
+ end
45
+
46
+ # create the destination schema
47
+ def create
48
+ eval( schema_migration ).apply dst_db, :up
49
+ end
50
+
51
+ def restore_one_table( table_file, db_pump )
52
+ logger.info "restoring from #{table_file}"
53
+ table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
54
+ # check if table has been restored already, and has the correct rows,
55
+ # otherwise pass in a start row.
56
+ db_pump.from_bz2 table_file, dst_db, table_name
57
+ end
58
+
59
+ def restore_tables
60
+ db_pump = DbPump.new( options[:codec] )
61
+ table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
62
+ table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file, db_pump}
63
+ end
64
+ end
@@ -0,0 +1,32 @@
1
+ class Transferer
2
+ def transfer_table( table_name, options = {} )
3
+ options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
4
+ total_records = @src_db[table_name].count
5
+ logger.info "transferring #{total_records}"
6
+ column_names = @src_db.schema(table_name.to_sym).map( &:first )
7
+
8
+ @src_db[table_name].each_page(options.page_size) do |page|
9
+ logger.info "#{page.sql} of #{total_records}"
10
+ unless options.dry_run
11
+ @dst_db.transaction do
12
+ rows_ary = []
13
+ page.each do |row_hash|
14
+ rows_ary << row_hash.values
15
+ end
16
+ @dst_db[table_name.to_sym].import column_names, rows_ary
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ # copy the data in the tables
23
+ def transfer
24
+ create
25
+ transfer_tables
26
+ index
27
+ end
28
+
29
+ def self.transfer( src_db, dst_db )
30
+ new( src_db, dst_db ).transfer
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Wyrm
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'wyrm/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "wyrm"
8
+ spec.version = Wyrm::VERSION
9
+ spec.authors = ["John Anderson"]
10
+ spec.email = ["panic@semiosix.com"]
11
+ spec.description = %q{Transfer from one SQL database to another}
12
+ spec.summary = %q{Transfer from one SQL database to another}
13
+ spec.homepage = "http://djellemah.com"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wyrm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - John Anderson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Transfer from one SQL database to another
42
+ email:
43
+ - panic@semiosix.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - .rvmrc
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - lib/wyrm.rb
55
+ - lib/wyrm/db_pump.rb
56
+ - lib/wyrm/dump_schema.rb
57
+ - lib/wyrm/other_schema.rb
58
+ - lib/wyrm/restore_schema.rb
59
+ - lib/wyrm/transferer.rb
60
+ - lib/wyrm/version.rb
61
+ - wyrm.gemspec
62
+ homepage: http://djellemah.com
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.0.0.rc.2
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Transfer from one SQL database to another
86
+ test_files: []