wyrm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dc95033a6576cd33f78d345b18f08564317c0bc1
4
+ data.tar.gz: 2c046fa329dc3cef258d1185ab390b3106e9fc1a
5
+ SHA512:
6
+ metadata.gz: 07a728780ea1121ca6784ef83059a0fad6c5218c793d3a8e5033203a6ba60fb435587c1e65f5ff1e3a184a18a2d6ad87c4bf1b2a1e300bc056ded1662ff4f225
7
+ data.tar.gz: 94a9579c0070a612439a428e4379874671b4aee41d89bb1bb7c6e1a930838fa278f7a178435fc41a6e3cbf8ad98d16824c168f1e8edcc33576d158b92e206a2f
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm 2.0.0@wyrm --create
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # source 'https://rubygems.org'
2
+ source 'file:///var/cache/rubygems'
3
+
4
+ gem 'sequel'
5
+ gem 'fastandand'
6
+
7
+ # Specify your gem's dependencies in wyrm.gemspec
8
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 John Anderson
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,71 @@
1
+ # Wyrm
2
+
3
+ Transfer data from one database to another. Has been used to dump > 100M dbs,
4
+ and one 850G db. Should theoretically work for any dbs supported by Sequel.
5
+
6
+ Currently transfers tables and views only. Does not attempt to transfer
7
+ stored procs, permissions, triggers etc.
8
+
9
+ Works best for tables that have single numeric primary keys, but should also
10
+ handle compound primary keys and tables without primary keys.
11
+
12
+ Wyrm because:
13
+
14
+ - I like dragons
15
+ - I can have a Wyrm::Hole to transfer data through :-D
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ gem 'wyrm'
22
+
23
+ And then execute:
24
+
25
+ $ bundle
26
+
27
+ Or install it yourself as:
28
+
29
+ $ gem install wyrm
30
+
31
+ Make sure you install the db gems, typically
32
+
33
+ $ gem install pg mysql2
34
+
35
+ ## Usage
36
+
37
+ This is mostly a toolkit right now. To transfer from mysql to postgres do:
38
+ ```ruby
39
+ require 'sequel'
40
+ require 'pathname'
41
+
42
+ # on the source host
43
+ # dump tables from mysql
44
+ require 'gbump/dump_schema'
45
+ src_db = Sequel.connect "mysql2://localhost/lots"
46
+ ds = DumpSchema.new src_db, Pathname('/tmp/lots')
47
+ ds.dump_schema
48
+
49
+ # this might take a while ;-)
50
+ ds.dump_tables
51
+
52
+ # transfer data. Already compressed, so no -z
53
+ # rsync -var /tmp/lots user@host:/var/data/
54
+
55
+ # on the destination host
56
+ # restore tables to postgres
57
+ require 'gbump/restore_schema'
58
+ dst_db = Sequel.connect "postgres://localhost/lots"
59
+ rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
60
+ rs.create
61
+ rs.restore_tables
62
+ rs.index
63
+ ```
64
+
65
+ ## Contributing
66
+
67
+ 1. Fork it
68
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
69
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
70
+ 4. Push to the branch (`git push origin my-new-feature`)
71
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,8 @@
1
+ require "wyrm/version"
2
+ require "wyrm/db_pump.rb"
3
+ require "wyrm/restore_schema.rb"
4
+ require "wyrm/dump_schema.rb"
5
+
6
+ module Wyrm
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,410 @@
1
+ require 'sequel'
2
+ require 'yaml'
3
+ require 'ostruct'
4
+ require 'logger'
5
+ require 'fastandand'
6
+
7
+ Sequel.extension :migration, :schema_dumper, :pagination
8
+
9
+
10
+ # TODO possibly use Gem::Package::TarWriter to write tar files
11
+ # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
12
+
13
+ # TODO need to version the dumps, or something like that.
14
+ class DbPump
15
+ class RespondsTo
16
+ def initialize( *methods )
17
+ @methods = methods
18
+ end
19
+
20
+ def ===( instance )
21
+ @methods.all?{|m| instance.respond_to? m}
22
+ end
23
+ end
24
+
25
+ def initialize( codec = :marshal )
26
+ @codec =
27
+ case codec
28
+ when :yaml; YamlCodec.new
29
+ when :marshal; MarshalCodec.new
30
+ when Class
31
+ codec.new
32
+ when RespondsTo.new( :encode, :decode )
33
+ codec
34
+ else
35
+ raise "unknown codec #{codec}"
36
+ end
37
+ end
38
+
39
+ attr_reader :codec
40
+
41
+ # TODO could use msgpack as serialization here, but its API is unpleasant.
42
+
43
+ class MarshalCodec
44
+ def encode( obj, io )
45
+ Marshal.dump obj, io
46
+ end
47
+
48
+ def decode( io, &block )
49
+ obj = Marshal.load(io)
50
+ yield obj if block_given?
51
+ obj
52
+ end
53
+ end
54
+
55
+ class MsgPackCodec
56
+ def encode( obj, io )
57
+ Marshal.dump obj, io
58
+ end
59
+
60
+ def decode( io, &block )
61
+ obj = Marshal.load(io)
62
+ yield obj if block_given?
63
+ obj
64
+ end
65
+ end
66
+
67
+ class YamlCodec
68
+ def encode( obj, io )
69
+ YAML.dump obj, io
70
+ end
71
+
72
+ def decode( io, &block )
73
+ obj = YAML.load(io)
74
+ yield obj if block_given?
75
+ obj
76
+ end
77
+ end
78
+
79
+ def logger
80
+ @logger ||= Logger.new STDERR
81
+ end
82
+
83
+ def primary_keys( db, table_name )
84
+ db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
85
+ end
86
+
87
+ # TODO possibly use select from outer / inner join to
88
+ # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
89
+ # because mysql is useless
90
+ def paginated_dump( table_name, options = {} )
91
+ options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
92
+ pk = primary_keys options.db, table_name
93
+ options.db[table_name].order(*pk).each_page(options[:page_size]) do |page|
94
+ logger.info page.sql
95
+ page.each do |row|
96
+ unless options[:dry_run]
97
+ codec.encode row.values, options.io
98
+ end
99
+ end
100
+ end
101
+ options.io.flush
102
+ end
103
+
104
+ # have to use this for non-integer pks
105
+ # The idea is that large offsets are expensive in the db because the db server has to read
106
+ # through the data set to reach the required offset. So make that only ids, and then
107
+ # do the main select from the limited id list.
108
+ # TODO could speed this up by have a query thread which runs the next page-query while
109
+ # the current one is being written/compressed.
110
+ # select * from massive as full
111
+ # inner join (select id from massive order by whatever limit m, n) limit
112
+ # on full.id = limit.id
113
+ # order by full.whatever
114
+ def inner_dump( table_name, options = {} )
115
+ options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
116
+ pk = primary_keys options.db, table_name
117
+
118
+ table_dataset = options.db[table_name]
119
+ # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
120
+ 0.step(table_dataset.count, options.page_size).each do |offset|
121
+ limit_dataset = table_dataset.select( *pk ).limit( options.page_size, offset ).order( *pk )
122
+ page = table_dataset.join( limit_dataset, Hash[ pk.map{|f| [f,f]} ] ).order( *pk ).qualify_to(table_name)
123
+ logger.info page.sql
124
+ page.each do |row|
125
+ unless options[:dry_run]
126
+ codec.encode row.values, options.io
127
+ end
128
+ end
129
+ end
130
+ options.io.flush
131
+ end
132
+
133
+ # TODO need to also dump a first row containing useful stuff:
134
+ # - source table name
135
+ # - number of rows
136
+ # - source db url
137
+ # - permissions?
138
+ # These should all be in one object that can be Marshall.load-ed easily.
139
+ def dump( table_name, options = {} )
140
+ pk = primary_keys options[:db], table_name
141
+ case
142
+ when pk.empty?
143
+ paginated_dump( table_name, options )
144
+ when pk.all?{|i| i == :id }
145
+ min_max_dump( table_name, options )
146
+ else
147
+ inner_dump( table_name, options )
148
+ end
149
+ end
150
+
151
+ # could use this for integer pks
152
+ def min_max_dump( table_name, options = {} )
153
+ # select max(id), min(id) from patents
154
+ # and then split that up into 10000 size chunks. Not really important if there aren't exactly 10000
155
+ options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
156
+ pk = primary_keys options.db, table_name
157
+
158
+ table_dataset = options.db[table_name]
159
+ min, max = table_dataset.select{[min(id), max(id)]}.first.values
160
+ return unless min && max
161
+ # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
162
+ # TODO definitely need to refactor this
163
+
164
+ # will always include the last item because
165
+ (min..max).step(options.page_size).each do |offset|
166
+ page = table_dataset.where( id: offset...(offset+options.page_size) )
167
+ logger.info page.sql
168
+ page.each do |row|
169
+ unless options[:dry_run]
170
+ codec.encode row.values, options.io
171
+ end
172
+ end
173
+ end
174
+ options.io.flush
175
+ end
176
+
177
+ # TODO possible memory issues here if the rows are big. May need to fork this.
178
+ # TODO lazy evaluation
179
+ def restore( table_name, options = {} )
180
+ logger.info "restoring #{table_name}"
181
+ options = OpenStruct.new( {io: STDIN, page_size: 10000, start_row: 0, dry_run: false}.merge( options ) )
182
+ dataset = options.db[table_name.to_sym]
183
+ # destination db should be same structure as incoming data
184
+ column_names = options.db.schema(table_name.to_sym).map( &:first )
185
+ first = ->(row){raise "schema mismatch" if row.size != column_names.size}
186
+
187
+ rows_restored = 0
188
+
189
+ # skip this many rows
190
+ options.start_row.times do
191
+ codec.decode( options.io ) {|row|}
192
+ end
193
+
194
+ # copy rows into db
195
+ while !options.io.eof?
196
+ # fetch a page of rows
197
+ rows_ary = []
198
+ begin
199
+ options.page_size.times do |i|
200
+ codec.decode( options.io ) do |row|
201
+ rows_ary << row
202
+ end
203
+ rows_restored += 1
204
+ end
205
+ rescue EOFError => e
206
+ # ran out of rows, so just use the ones we have so far
207
+ end
208
+
209
+ # insert to db. Hopeful db support bulk insert, which Sequel will figure out
210
+ options.db.transaction do
211
+ dataset.import column_names, rows_ary
212
+ yield rows_restored if block_given?
213
+ logger.info "restored #{rows_restored}"
214
+ end
215
+ end
216
+
217
+ rows_restored
218
+ end
219
+
220
+ def from_bz2( filename, db, table_name, options = {} )
221
+ IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
222
+ restore table_name, options.merge( io: io, db: db )
223
+ end
224
+ end
225
+ end
226
+
227
+ # There are actually 2 sources for this:
228
+ # one is the src db, the other is the dumped files
229
+ # And the one that transfers live is another version
230
+ class Schema
231
+ def initialize( src_db, dst_db = nil )
232
+ @src_db = src_db
233
+ @dst_db = dst_db
234
+ end
235
+
236
+ def schema_migration
237
+ @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
238
+ end
239
+
240
+ def index_migration
241
+ @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
242
+ end
243
+
244
+ def fk_migration
245
+ @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
246
+ end
247
+
248
+ def restore_migration
249
+ <<-EOF
250
+ require 'restore_migration'
251
+ Sequel.migration do
252
+ def db_pump
253
+ end
254
+
255
+ up do
256
+ restore_tables
257
+ end
258
+
259
+ down do
260
+ # from each table clear table
261
+ each_table do |table_name|
262
+ db_pump.restore table_name, io: io, db: db
263
+ end
264
+ end
265
+ end
266
+ EOF
267
+ end
268
+
269
+ attr_accessor :dst_db
270
+ attr_reader :src_db
271
+
272
+ def same_db
273
+ @dst_db.andand.database_type == @src_db.andand.database_type
274
+ end
275
+
276
+ def logger
277
+ @logger ||= Logger.new STDERR
278
+ end
279
+
280
+ # create the destination schema
281
+ def create
282
+ eval( @schema_migration ).apply dst_db, :up
283
+ end
284
+
285
+ # create indexes and foreign keys, and reset sequences
286
+ def index
287
+ logger.info "creating indexes"
288
+ eval(@index_migration).apply dst, :up
289
+ logger.info "creating foreign keys"
290
+ eval(@fk_migration).apply dst, :up
291
+
292
+ if dst.database_type == :postgres
293
+ logger.info "reset primary key sequences"
294
+ dst.tables.each{|t| dst.reset_primary_key_sequence(t)}
295
+ logger.info "Primary key sequences reset successfully"
296
+ end
297
+ end
298
+
299
+ def transfer_table( table_name, options = {} )
300
+ options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
301
+ total_records = @src_db[table_name].count
302
+ logger.info "transferring #{total_records}"
303
+ column_names = @src_db.schema(table_name.to_sym).map( &:first )
304
+
305
+ @src_db[table_name].each_page(options.page_size) do |page|
306
+ logger.info "#{page.sql} of #{total_records}"
307
+ unless options.dry_run
308
+ @dst_db.transaction do
309
+ rows_ary = []
310
+ page.each do |row_hash|
311
+ rows_ary << row_hash.values
312
+ end
313
+ @dst_db[table_name.to_sym].import column_names, rows_ary
314
+ end
315
+ end
316
+ end
317
+ end
318
+
319
+ # copy the data in the tables
320
+ def transfer
321
+ create
322
+ transfer_tables
323
+ index
324
+ end
325
+
326
+ def dump_schema( container, options = {codec: :marshal} )
327
+ (container + '001_schema.rb').open('w') do |io|
328
+ io.write schema_migration
329
+ end
330
+
331
+ (container + '002_populate_tables.rb').open('w') do |io|
332
+ io.write restore_migration
333
+ end
334
+
335
+ (container + '003_indexes.rb').open('w') do |io|
336
+ io.write index_migration
337
+ end
338
+
339
+ (container + '004_foreign keys.rb').open('w') do |io|
340
+ io.write fk_migration
341
+ end
342
+ end
343
+
344
+ def load_migrations( container )
345
+ @schema_migration = eval (container + '001_schema.rb').read
346
+ @index_migration = eval (container + '003_indexes.rb').read
347
+ @fk_migration = eval (container + '004_foreign keys.rb').read
348
+ end
349
+
350
+ def dump_one_table( table_name, pathname, db_pump )
351
+ logger.info "dumping #{table_name} to #{pathname}"
352
+ fio = pathname.open('w')
353
+ # open subprocess in read-write mode
354
+ zio = IO.popen( "pbzip2 -z", 'r+' )
355
+ copier = Thread.new do
356
+ begin
357
+ IO.copy_stream zio, fio
358
+ logger.debug "finished stream copy"
359
+ ensure
360
+ fio.close
361
+ end
362
+ end
363
+
364
+ # generate the dump
365
+ db_pump.dump table_name, db: src_db, io: zio
366
+
367
+ # signal the copier thread to stop
368
+ zio.close_write
369
+ logger.debug 'finished dumping'
370
+ # wait for copier thread to
371
+ copier.join
372
+ logger.debug 'stream copy thread finished'
373
+ ensure
374
+ zio.close unless zio.closed?
375
+ fio.close unless fio.closed?
376
+ end
377
+
378
+ def dump_tables( container, options = {:codec => :marshal} )
379
+ container = Pathname(container)
380
+ db_pump = DbPump.new( options[:codec] )
381
+
382
+ src_db.tables.each do |table_name|
383
+ filename = container + "#{table_name}.dbp.bz2"
384
+ dump_one_table table_name, filename, db_pump
385
+ end
386
+ end
387
+
388
+ def restore_one_table( table_file, db_pump )
389
+ logger.info "restoring from #{table_file}"
390
+ table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
391
+ # check if table has been restored already, and has the correct rows,
392
+ # otherwise pass in a start row.
393
+ db_pump.from_bz2 table_file, dst_db, table_name
394
+ end
395
+
396
+ def restore_tables( container, options = {:codec => :marshal} )
397
+ db_pump = DbPump.new( options[:codec] )
398
+ table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
399
+ table_files.each{|table_file| restore_one_table table_file, db_pump}
400
+ end
401
+
402
+ def restore_tables( container, options = {:codec => :marshal} )
403
+ container = Pathname(container)
404
+ container.child ren
405
+ end
406
+
407
+ def self.transfer( src_db, dst_db )
408
+ new( src_db, dst_db ).transfer
409
+ end
410
+ end
@@ -0,0 +1,111 @@
1
+ # Dump a schema and compressed data from a db to a set of files
2
+ # src_db = Sequel.connect "postgres://localhost:5454/lots"
3
+ # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
4
+ # ds.dump_schema
5
+ # ds.dump_tables
6
+ class DumpSchema
7
+ def initialize( src_db, container = nil, options = {} )
8
+ @options = {:codec => :marshal}.merge( options )
9
+
10
+ @src_db = src_db
11
+ @container = Pathname(container)
12
+ end
13
+
14
+ attr_reader :src_db, :container, :codec
15
+
16
+ def schema_migration
17
+ @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
18
+ end
19
+
20
+ def index_migration
21
+ @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
22
+ end
23
+
24
+ def fk_migration
25
+ @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
26
+ end
27
+
28
+ def restore_migration
29
+ <<-EOF
30
+ require 'restore_migration'
31
+ Sequel.migration do
32
+ def db_pump
33
+ end
34
+
35
+ up do
36
+ restore_tables
37
+ end
38
+
39
+ down do
40
+ # from each table clear table
41
+ each_table do |table_name|
42
+ db_pump.restore table_name, io: io, db: db
43
+ end
44
+ end
45
+ end
46
+ EOF
47
+ end
48
+
49
+ def same_db
50
+ false
51
+ end
52
+
53
+ def logger
54
+ @logger ||= Logger.new STDERR
55
+ end
56
+
57
+ def dump_schema
58
+ (container + '001_schema.rb').open('w') do |io|
59
+ io.write schema_migration
60
+ end
61
+
62
+ (container + '002_populate_tables.rb').open('w') do |io|
63
+ io.write restore_migration
64
+ end
65
+
66
+ (container + '003_indexes.rb').open('w') do |io|
67
+ io.write index_migration
68
+ end
69
+
70
+ (container + '004_foreign keys.rb').open('w') do |io|
71
+ io.write fk_migration
72
+ end
73
+ end
74
+
75
+ def dump_one_table( table_name, pathname, db_pump )
76
+ logger.info "dumping #{table_name} to #{pathname}"
77
+ fio = pathname.open('w')
78
+ # open subprocess in read-write mode
79
+ zio = IO.popen( "pbzip2 -z", 'r+' )
80
+ copier = Thread.new do
81
+ begin
82
+ IO.copy_stream zio, fio
83
+ logger.debug "finished stream copy"
84
+ ensure
85
+ fio.close
86
+ end
87
+ end
88
+
89
+ # generate the dump
90
+ db_pump.dump table_name, db: src_db, io: zio
91
+
92
+ # signal the copier thread to stop
93
+ zio.close_write
94
+ logger.debug 'finished dumping'
95
+ # wait for copier thread to
96
+ copier.join
97
+ logger.debug 'stream copy thread finished'
98
+ ensure
99
+ zio.close unless zio.closed?
100
+ fio.close unless fio.closed?
101
+ end
102
+
103
+ def dump_tables
104
+ db_pump = DbPump.new( @options[:codec] )
105
+
106
+ src_db.tables.each do |table_name|
107
+ filename = container + "#{table_name}.dbp.bz2"
108
+ dump_one_table table_name, filename, db_pump
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ # Place for stuff that I'm not sure about yet
2
+ class OtherSchema
3
+ def same_db
4
+ @dst_db.andand.database_type == @src_db.andand.database_type
5
+ end
6
+ end
@@ -0,0 +1,64 @@
1
+ require 'logger'
2
+
3
+ # Load a schema from a set of dump files (from DumpSchema)
4
+ # and restore the table data
5
+ # dst_db = Sequel.connect "postgres://localhost:5454/lots"
6
+ # rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
7
+ # rs.create
8
+ # rs.restore_tables
9
+ class RestoreSchema
10
+ def initialize( dst_db, container )
11
+ @container = container
12
+ @dst_db = dst_db
13
+ @options = {:codec => :marshal}
14
+ load_migrations @container
15
+ end
16
+
17
+ attr_reader :dst_db
18
+ attr_reader :options
19
+ attr_reader :container
20
+ attr_reader :schema_migration, :index_migration
21
+
22
+ def logger
23
+ @logger ||= Logger.new STDERR
24
+ end
25
+
26
+ def load_migrations( container )
27
+ @schema_migration = (container + '001_schema.rb').read
28
+ @index_migration = (container + '003_indexes.rb').read
29
+ @fk_migration = (container + '004_foreign keys.rb').read
30
+ end
31
+
32
+ # create indexes and foreign keys, and reset sequences
33
+ def index
34
+ logger.info "creating indexes"
35
+ eval( index_migration ).apply dst_db, :up
36
+ logger.info "creating foreign keys"
37
+ eval( fk_migration ).apply dst_db, :up
38
+
39
+ if dst_db.database_type == :postgres
40
+ logger.info "reset primary key sequences"
41
+ dst_db.tables.each{|t| dst_db.reset_primary_key_sequence(t)}
42
+ logger.info "Primary key sequences reset successfully"
43
+ end
44
+ end
45
+
46
+ # create the destination schema
47
+ def create
48
+ eval( schema_migration ).apply dst_db, :up
49
+ end
50
+
51
+ def restore_one_table( table_file, db_pump )
52
+ logger.info "restoring from #{table_file}"
53
+ table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
54
+ # check if table has been restored already, and has the correct rows,
55
+ # otherwise pass in a start row.
56
+ db_pump.from_bz2 table_file, dst_db, table_name
57
+ end
58
+
59
+ def restore_tables
60
+ db_pump = DbPump.new( options[:codec] )
61
+ table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
62
+ table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file, db_pump}
63
+ end
64
+ end
@@ -0,0 +1,32 @@
1
+ class Transferer
2
+ def transfer_table( table_name, options = {} )
3
+ options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
4
+ total_records = @src_db[table_name].count
5
+ logger.info "transferring #{total_records}"
6
+ column_names = @src_db.schema(table_name.to_sym).map( &:first )
7
+
8
+ @src_db[table_name].each_page(options.page_size) do |page|
9
+ logger.info "#{page.sql} of #{total_records}"
10
+ unless options.dry_run
11
+ @dst_db.transaction do
12
+ rows_ary = []
13
+ page.each do |row_hash|
14
+ rows_ary << row_hash.values
15
+ end
16
+ @dst_db[table_name.to_sym].import column_names, rows_ary
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ # copy the data in the tables
23
+ def transfer
24
+ create
25
+ transfer_tables
26
+ index
27
+ end
28
+
29
+ def self.transfer( src_db, dst_db )
30
+ new( src_db, dst_db ).transfer
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Wyrm
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'wyrm/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "wyrm"
8
+ spec.version = Wyrm::VERSION
9
+ spec.authors = ["John Anderson"]
10
+ spec.email = ["panic@semiosix.com"]
11
+ spec.description = %q{Transfer from one SQL database to another}
12
+ spec.summary = %q{Transfer from one SQL database to another}
13
+ spec.homepage = "http://djellemah.com"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wyrm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - John Anderson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Transfer from one SQL database to another
42
+ email:
43
+ - panic@semiosix.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - .rvmrc
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - lib/wyrm.rb
55
+ - lib/wyrm/db_pump.rb
56
+ - lib/wyrm/dump_schema.rb
57
+ - lib/wyrm/other_schema.rb
58
+ - lib/wyrm/restore_schema.rb
59
+ - lib/wyrm/transferer.rb
60
+ - lib/wyrm/version.rb
61
+ - wyrm.gemspec
62
+ homepage: http://djellemah.com
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.0.0.rc.2
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Transfer from one SQL database to another
86
+ test_files: []