wyrm 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rvmrc +1 -1
- data/Gemfile +0 -3
- data/README.md +18 -11
- data/bin/wyrm +40 -24
- data/bin/wyrm-view +34 -0
- data/lib/wyrm.rb +3 -5
- data/lib/wyrm/cli.rb +9 -0
- data/lib/wyrm/core_extensions.rb +10 -0
- data/lib/wyrm/{dump_schema.rb → dump.rb} +22 -21
- data/lib/wyrm/hole.rb +164 -0
- data/lib/wyrm/logger.rb +11 -0
- data/lib/wyrm/module.rb +1 -0
- data/lib/wyrm/{db_pump.rb → pump.rb} +40 -34
- data/lib/wyrm/pump_maker.rb +10 -4
- data/lib/wyrm/{restore_schema.rb → restore.rb} +40 -33
- data/lib/wyrm/schema_tools.rb +91 -0
- data/lib/wyrm/version.rb +1 -1
- data/snippets/console.rb +5 -3
- data/spec/core_extensions_spec.rb +50 -0
- data/spec/hole_mouth_spec.rb +176 -0
- data/spec/pump_spec.rb +62 -0
- data/spec/schema_tools_spec.rb +201 -0
- data/wyrm.gemspec +12 -3
- metadata +135 -23
- data/lib/wyrm/other_schema.rb +0 -6
- data/lib/wyrm/transferer.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c67007e35f84f5542da888c17fc91f6e1fe03cb
|
4
|
+
data.tar.gz: 7434160ebe9385f72d04546b69480d457a13e315
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11ccf21e5c471e7c06a2a0c27739875d1197570d3c2049a27d284327bd748a0411f3280ab9c89bb865c2c882aa1019a371ecff5105b86baab81c01ce9be8520c
|
7
|
+
data.tar.gz: 96f35474f2770b85b277b5b1cdb64798b4cba183d9ad6b7753d2ded403f878bd7130c4ca634b69262ee812160147a4c674466ca4347fef42c71c2e99e87f9934
|
data/.gitignore
CHANGED
data/.rvmrc
CHANGED
@@ -1 +1 @@
|
|
1
|
-
rvm 2.
|
1
|
+
rvm 2.1.1@wyrm --create
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
# Wyrm
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/wyrm.png)](http://badge.fury.io/rb/wyrm)
|
4
|
+
|
5
|
+
Transfer a database from one rdbms to another (eg mysql to postgres). Either via
|
6
|
+
a set of files, or direct from one db server to another.
|
7
|
+
|
8
|
+
Has been used to dump > 100M dbs, and one 850G db.
|
9
|
+
Should theoretically work for any rdbms supported by [Sequel](http://sequel.jeremyevans.net/).
|
5
10
|
|
6
11
|
Dumps are compressed with bz2, using pbzip2. Fast *and* small :-D For example:
|
7
12
|
mysqldump | bzip2 for a certain 850G db comes to 127G. With wyrm it
|
8
13
|
comes to 134G.
|
9
14
|
|
10
|
-
|
15
|
+
Transfers tables and views only. Does not attempt to transfer
|
11
16
|
stored procs, permissions, triggers etc.
|
12
17
|
|
13
18
|
Handles tables with a single numeric key, single non-numeric key, and no
|
@@ -19,7 +24,7 @@ Will use result set streaming if available.
|
|
19
24
|
Wyrm because:
|
20
25
|
|
21
26
|
- I like dragons
|
22
|
-
- I can
|
27
|
+
- I can have a Wyrm::Hole to transfer data ;-)
|
23
28
|
|
24
29
|
## Dependencies
|
25
30
|
|
@@ -29,7 +34,6 @@ on your path.
|
|
29
34
|
|
30
35
|
## Installation
|
31
36
|
|
32
|
-
|
33
37
|
Add this line to your application's Gemfile:
|
34
38
|
|
35
39
|
gem 'wyrm'
|
@@ -52,6 +56,11 @@ Make sure you install the db gems, typically
|
|
52
56
|
|
53
57
|
Very basic cli at this point.
|
54
58
|
|
59
|
+
#### For direct db-to-db transfer
|
60
|
+
|
61
|
+
$ wyrm mysql2://localhost/beeg_data_bays postgres://localhost/betta_dee_bee
|
62
|
+
|
63
|
+
#### Via files
|
55
64
|
From the source db to the file system
|
56
65
|
|
57
66
|
$ wyrm mysql2://localhost/beeg_data_bays /tmp/lots_fs_space
|
@@ -70,20 +79,18 @@ For restoring. dump will be similar.
|
|
70
79
|
|
71
80
|
``` ruby
|
72
81
|
require 'wyrm/restore_schema'
|
73
|
-
rs =
|
74
|
-
rs.
|
75
|
-
rs.restore_tables
|
76
|
-
rs.index
|
82
|
+
rs = Restore.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
|
83
|
+
rs.call
|
77
84
|
```
|
78
85
|
|
79
86
|
Or for the lower-level stuff
|
80
87
|
|
81
88
|
``` ruby
|
82
89
|
require 'sequel'
|
83
|
-
require 'wyrm/
|
90
|
+
require 'wyrm/pump'
|
84
91
|
|
85
92
|
db = Sequel.connect 'postgres://postgres@localhost/other_db'
|
86
|
-
dbp =
|
93
|
+
dbp = Wyrm::Pump.new db, :things
|
87
94
|
dbp.io = IO.popen 'pbzip2 -d -c /mnt/disk/wyrm/things.dbp.bz2'
|
88
95
|
dbp.each_row do |row|
|
89
96
|
puts row.inspect
|
data/bin/wyrm
CHANGED
@@ -1,36 +1,52 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'pathname'
|
4
3
|
require 'uri'
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
rs.index
|
12
|
-
end
|
5
|
+
if ARGV.size != 2
|
6
|
+
puts <<EOF
|
7
|
+
Usage: #{$0} src_db|dirname dst_db|dirname
|
8
|
+
|
9
|
+
dirname contains a set of wyrm files, or will soon.
|
13
10
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
sample db strings:
|
12
|
+
postgres://localhost/lotsa_datsa
|
13
|
+
mysql://root:pwned@localhost/lotsa_datsa
|
14
|
+
mysql2://root:pwned@localhost/lotsa_fastsa_datsa
|
15
|
+
EOF
|
16
|
+
exit(1)
|
19
17
|
end
|
20
18
|
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
module FsPath
|
20
|
+
def fs_path?
|
21
|
+
scheme == 'file' || scheme.nil?
|
22
|
+
end
|
24
23
|
end
|
25
24
|
|
26
|
-
src, dst = ARGV.map{|arg| URI.parse
|
25
|
+
src, dst = ARGV.map{|arg| URI.parse(arg).extend(FsPath)}
|
26
|
+
|
27
|
+
require 'wyrm/cli'
|
28
|
+
Wyrm.sanity_check_pbzip2
|
29
|
+
|
30
|
+
include Wyrm
|
31
|
+
|
32
|
+
case
|
33
|
+
when src.fs_path? && dst.fs_path?
|
34
|
+
puts "No point copying one directory to another. Just use filesystem tools. It's faster."
|
35
|
+
exit(1)
|
36
|
+
|
37
|
+
when !src.fs_path? && dst.fs_path?
|
38
|
+
# src is a url, and dst is a path, so dump to file system
|
39
|
+
require 'wyrm/dump'
|
40
|
+
Dump.new( src.to_s, dst.path ).call
|
41
|
+
|
42
|
+
when src.fs_path? && !dst.fs_path?
|
43
|
+
# src is a path and dst is a url, so restore to db
|
44
|
+
require 'wyrm/restore'
|
45
|
+
Restore.new(src.path, dst.to_s, drop_tables: true).call
|
27
46
|
|
28
|
-
if src.scheme && Pathname(dst.to_s).exist?
|
29
|
-
# src is a db path, so dump from it
|
30
|
-
dump( src.to_s, dst.to_s )
|
31
|
-
elsif dst.scheme && Pathname(src.to_s).exist?
|
32
|
-
# dst is a path and src is a url, so restore
|
33
|
-
restore( dst.to_s, src.to_s )
|
34
47
|
else
|
35
|
-
|
48
|
+
# both db urls, so transfer
|
49
|
+
require 'wyrm/hole.rb'
|
50
|
+
Hole.new( src.to_s, dst.to_s ).call
|
51
|
+
|
36
52
|
end
|
data/bin/wyrm-view
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
if ARGV.empty? || ARGV.first =~ /^-+(\?|h(elp)?)$/
|
4
|
+
puts <<EOF
|
5
|
+
Usage: #{$0} dbp_file.dbp[.bz2]
|
6
|
+
|
7
|
+
Display contents of dbp file, optionally compressed with bz2.
|
8
|
+
EOF
|
9
|
+
exit(0)
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'wyrm/cli'
|
13
|
+
Wyrm.sanity_check_pbzip2
|
14
|
+
|
15
|
+
require 'pathname'
|
16
|
+
require 'yaml'
|
17
|
+
|
18
|
+
out_block = lambda do |io|
|
19
|
+
record_count = 1
|
20
|
+
until io.eof?
|
21
|
+
puts "# record #{record_count}"
|
22
|
+
puts Marshal.load(io).to_yaml
|
23
|
+
record_count += 1
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
ARGV.map{|a| Pathname(a)}.each do |path|
|
28
|
+
puts "# source: #{path}"
|
29
|
+
if path.extname == '.bz2'
|
30
|
+
IO.popen "pbzip2 -d -c #{path}", &out_block
|
31
|
+
else
|
32
|
+
path.open &out_block
|
33
|
+
end
|
34
|
+
end
|
data/lib/wyrm.rb
CHANGED
data/lib/wyrm/cli.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# would be nice to use refinements here, but that breaks 2.0 compatibility
|
2
|
+
class Method
|
3
|
+
def kwargs_as_hash( invocation_binding )
|
4
|
+
named_locals = parameters. \
|
5
|
+
select{|type,_| type == :key}. \
|
6
|
+
flat_map{|_,name| [name,invocation_binding.eval(name.to_s)]}
|
7
|
+
|
8
|
+
Hash[ *named_locals ]
|
9
|
+
end
|
10
|
+
end
|
@@ -1,18 +1,25 @@
|
|
1
|
-
require '
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
require 'wyrm/module'
|
2
4
|
require 'wyrm/pump_maker'
|
5
|
+
require 'wyrm/schema_tools'
|
6
|
+
require 'wyrm/logger'
|
3
7
|
|
4
8
|
# Dump a schema and compressed data from a db to a set of files
|
5
9
|
# src_db = Sequel.connect "postgres://localhost:5454/lots"
|
6
10
|
# ds = DumpSchema.new src_db, Pathname('/var/data/lots')
|
7
|
-
# ds.
|
8
|
-
# ds.dump_tables
|
11
|
+
# ds.call
|
9
12
|
# TODO possibly use Gem::Package::TarWriter to write tar files
|
10
|
-
class
|
13
|
+
class Wyrm::Dump
|
11
14
|
include PumpMaker
|
15
|
+
include SchemaTools
|
16
|
+
include Wyrm::Logger
|
12
17
|
|
13
18
|
def initialize( src_db, container = nil, pump: nil )
|
19
|
+
@container = Pathname.new container || '.'
|
20
|
+
raise "#{@container} does not exist" unless @container.exist?
|
21
|
+
|
14
22
|
@src_db = maybe_deebe src_db
|
15
|
-
@container = Pathname.new container
|
16
23
|
@pump = make_pump( @src_db, pump )
|
17
24
|
|
18
25
|
@src_db.extension :schema_dumper
|
@@ -20,33 +27,21 @@ class DumpSchema
|
|
20
27
|
|
21
28
|
attr_reader :src_db, :container, :pump
|
22
29
|
|
23
|
-
def schema_migration
|
24
|
-
@schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
|
25
|
-
end
|
26
|
-
|
27
|
-
def index_migration
|
28
|
-
@index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
|
29
|
-
end
|
30
|
-
|
31
|
-
def fk_migration
|
32
|
-
@fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
|
33
|
-
end
|
34
|
-
|
35
30
|
def same_db
|
36
31
|
false
|
37
32
|
end
|
38
33
|
|
39
|
-
def
|
40
|
-
@
|
34
|
+
def numbering
|
35
|
+
@numbering ||= '000'
|
41
36
|
end
|
42
37
|
|
43
38
|
def dump_schema
|
44
|
-
numbering = '000'
|
45
|
-
|
46
39
|
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
47
40
|
io.write schema_migration
|
48
41
|
end
|
42
|
+
end
|
49
43
|
|
44
|
+
def dump_indexes
|
50
45
|
(container + "#{numbering.next!}_indexes.rb").open('w') do |io|
|
51
46
|
io.write index_migration
|
52
47
|
end
|
@@ -107,4 +102,10 @@ class DumpSchema
|
|
107
102
|
dump_table table_name
|
108
103
|
end
|
109
104
|
end
|
105
|
+
|
106
|
+
def call
|
107
|
+
dump_schema
|
108
|
+
dump_tables
|
109
|
+
dump_indexes
|
110
|
+
end
|
110
111
|
end
|
data/lib/wyrm/hole.rb
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'wyrm/logger'
|
3
|
+
|
4
|
+
require 'wyrm/module'
|
5
|
+
require 'wyrm/pump'
|
6
|
+
require 'wyrm/pump_maker'
|
7
|
+
require 'wyrm/schema_tools'
|
8
|
+
require 'wyrm/core_extensions'
|
9
|
+
|
10
|
+
module Wyrm
|
11
|
+
# This bypasses the need to marshal objects between two pumps.
|
12
|
+
# It uses a queue of the record arrays instead.
|
13
|
+
class Hole
|
14
|
+
include PumpMaker
|
15
|
+
include Logger
|
16
|
+
|
17
|
+
# This is the codec. Named for the mouth of a wormhole. Cos finding a good name for this is hard.
|
18
|
+
#
|
19
|
+
# Connects the two pumps together. Implements Codec, Quacks like IO.
|
20
|
+
class Mouth
|
21
|
+
include Logger
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@flushed = false
|
25
|
+
end
|
26
|
+
|
27
|
+
# This is a bit weird because io_queue will usually == self
|
28
|
+
def encode( obj, io_queue )
|
29
|
+
io_queue.enq obj
|
30
|
+
end
|
31
|
+
|
32
|
+
# This is a bit weird because io_queue will usually == self
|
33
|
+
def decode( io_queue, &block )
|
34
|
+
obj = io_queue.deq
|
35
|
+
yield obj if block_given?
|
36
|
+
obj
|
37
|
+
end
|
38
|
+
|
39
|
+
def reset
|
40
|
+
# do this first, so any (hopefully not) remaining waiters don't
|
41
|
+
# go into the blocking deq again.
|
42
|
+
@flushed = false
|
43
|
+
|
44
|
+
# clear any poisons, and release any (hopefully not) remaining waiters
|
45
|
+
queue.clear
|
46
|
+
end
|
47
|
+
|
48
|
+
# queue could be empty while producer is generating something,
|
49
|
+
# so only eof after flush has been called.
|
50
|
+
def eof?
|
51
|
+
# queue is not empty if it's been poisoned.
|
52
|
+
@flushed && queue.empty?
|
53
|
+
end
|
54
|
+
|
55
|
+
# use a SizedQueue so we don't run out of memory during a big transfer
|
56
|
+
def queue
|
57
|
+
@queue ||=
|
58
|
+
if RUBY_VERSION == '2.1.0'
|
59
|
+
logger.warn "SizedQueue broken in 2.1.0 (https://bugs.ruby-lang.org/issues/9302). Falling back to Queue, which may run out of memory."
|
60
|
+
Queue.new
|
61
|
+
else
|
62
|
+
SizedQueue.new 5000
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def enq( value )
|
67
|
+
queue.enq value
|
68
|
+
end
|
69
|
+
|
70
|
+
def deq( *args )
|
71
|
+
rv = queue.deq( *args )
|
72
|
+
if rv == :poison
|
73
|
+
poison_queue
|
74
|
+
raise StopIteration
|
75
|
+
end
|
76
|
+
rv
|
77
|
+
end
|
78
|
+
|
79
|
+
def poison_queue
|
80
|
+
# poison the queue. waiters will have to re-queue this.
|
81
|
+
queue << :poison if queue.empty? && queue.num_waiting > 0
|
82
|
+
end
|
83
|
+
|
84
|
+
# this gets called after dump is finished, by pump
|
85
|
+
def flush
|
86
|
+
# do this first, so any non-poisoned waiters will eof
|
87
|
+
# synchronisation is not really important because
|
88
|
+
@flushed = true
|
89
|
+
poison_queue
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def initialize( src_db, dst_db, drop_tables: true, queue_size: 5000 )
|
94
|
+
# called only once per run, so not really a performance issue
|
95
|
+
@options = method(__method__).kwargs_as_hash( binding )
|
96
|
+
|
97
|
+
@src_db = maybe_deebe src_db
|
98
|
+
@dst_db = maybe_deebe dst_db
|
99
|
+
|
100
|
+
@src_db.extension :schema_dumper
|
101
|
+
end
|
102
|
+
|
103
|
+
attr_reader :src_db, :dst_db, :options
|
104
|
+
|
105
|
+
def mouth
|
106
|
+
@mouth ||= Mouth.new
|
107
|
+
end
|
108
|
+
|
109
|
+
def src_pump
|
110
|
+
@src_pump ||= Pump.new( {db: src_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
|
111
|
+
end
|
112
|
+
|
113
|
+
def dst_pump
|
114
|
+
@dst_pump ||= Pump.new( {db: dst_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
|
115
|
+
end
|
116
|
+
|
117
|
+
def transfer_table( table_name )
|
118
|
+
mouth.reset
|
119
|
+
src_pump.table_name = dst_pump.table_name = table_name
|
120
|
+
|
121
|
+
if src_pump.table_dataset.empty?
|
122
|
+
logger.info "No records in #{table_name}"
|
123
|
+
return
|
124
|
+
end
|
125
|
+
|
126
|
+
# Use threads so the db read/writes aren't waiting for one another.
|
127
|
+
recv_thread = Thread.new{ dst_pump.restore }
|
128
|
+
send_thread = Thread.new{ src_pump.dump }
|
129
|
+
|
130
|
+
send_thread.join
|
131
|
+
recv_thread.join
|
132
|
+
end
|
133
|
+
|
134
|
+
include SchemaTools
|
135
|
+
|
136
|
+
def transfer_schema( &transfer_table_block )
|
137
|
+
create_tables
|
138
|
+
|
139
|
+
# transfer tables here
|
140
|
+
yield self if block_given?
|
141
|
+
|
142
|
+
create_indexes
|
143
|
+
end
|
144
|
+
|
145
|
+
def transfer_tables
|
146
|
+
logger.info "transferring tables"
|
147
|
+
src_db.tables.each do |table_name|
|
148
|
+
transfer_table table_name
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def call
|
153
|
+
if options[:drop_tables]
|
154
|
+
logger.info "dropping tables"
|
155
|
+
drop_tables src_db.tables
|
156
|
+
end
|
157
|
+
|
158
|
+
transfer_schema do
|
159
|
+
transfer_tables
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|