wyrm 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rvmrc +1 -1
- data/Gemfile +0 -3
- data/README.md +18 -11
- data/bin/wyrm +40 -24
- data/bin/wyrm-view +34 -0
- data/lib/wyrm.rb +3 -5
- data/lib/wyrm/cli.rb +9 -0
- data/lib/wyrm/core_extensions.rb +10 -0
- data/lib/wyrm/{dump_schema.rb → dump.rb} +22 -21
- data/lib/wyrm/hole.rb +164 -0
- data/lib/wyrm/logger.rb +11 -0
- data/lib/wyrm/module.rb +1 -0
- data/lib/wyrm/{db_pump.rb → pump.rb} +40 -34
- data/lib/wyrm/pump_maker.rb +10 -4
- data/lib/wyrm/{restore_schema.rb → restore.rb} +40 -33
- data/lib/wyrm/schema_tools.rb +91 -0
- data/lib/wyrm/version.rb +1 -1
- data/snippets/console.rb +5 -3
- data/spec/core_extensions_spec.rb +50 -0
- data/spec/hole_mouth_spec.rb +176 -0
- data/spec/pump_spec.rb +62 -0
- data/spec/schema_tools_spec.rb +201 -0
- data/wyrm.gemspec +12 -3
- metadata +135 -23
- data/lib/wyrm/other_schema.rb +0 -6
- data/lib/wyrm/transferer.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c67007e35f84f5542da888c17fc91f6e1fe03cb
|
4
|
+
data.tar.gz: 7434160ebe9385f72d04546b69480d457a13e315
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11ccf21e5c471e7c06a2a0c27739875d1197570d3c2049a27d284327bd748a0411f3280ab9c89bb865c2c882aa1019a371ecff5105b86baab81c01ce9be8520c
|
7
|
+
data.tar.gz: 96f35474f2770b85b277b5b1cdb64798b4cba183d9ad6b7753d2ded403f878bd7130c4ca634b69262ee812160147a4c674466ca4347fef42c71c2e99e87f9934
|
data/.gitignore
CHANGED
data/.rvmrc
CHANGED
@@ -1 +1 @@
|
|
1
|
-
rvm 2.
|
1
|
+
rvm 2.1.1@wyrm --create
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
# Wyrm
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
[](http://badge.fury.io/rb/wyrm)
|
4
|
+
|
5
|
+
Transfer a database from one rdbms to another (eg mysql to postgres). Either via
|
6
|
+
a set of files, or direct from one db server to another.
|
7
|
+
|
8
|
+
Has been used to dump > 100M dbs, and one 850G db.
|
9
|
+
Should theoretically work for any rdbms supported by [Sequel](http://sequel.jeremyevans.net/).
|
5
10
|
|
6
11
|
Dumps are compressed with bz2, using pbzip2. Fast *and* small :-D For example:
|
7
12
|
mysqldump | bzip2 for a certain 850G db comes to 127G. With wyrm it
|
8
13
|
comes to 134G.
|
9
14
|
|
10
|
-
|
15
|
+
Transfers tables and views only. Does not attempt to transfer
|
11
16
|
stored procs, permissions, triggers etc.
|
12
17
|
|
13
18
|
Handles tables with a single numeric key, single non-numeric key, and no
|
@@ -19,7 +24,7 @@ Will use result set streaming if available.
|
|
19
24
|
Wyrm because:
|
20
25
|
|
21
26
|
- I like dragons
|
22
|
-
- I can
|
27
|
+
- I can have a Wyrm::Hole to transfer data ;-)
|
23
28
|
|
24
29
|
## Dependencies
|
25
30
|
|
@@ -29,7 +34,6 @@ on your path.
|
|
29
34
|
|
30
35
|
## Installation
|
31
36
|
|
32
|
-
|
33
37
|
Add this line to your application's Gemfile:
|
34
38
|
|
35
39
|
gem 'wyrm'
|
@@ -52,6 +56,11 @@ Make sure you install the db gems, typically
|
|
52
56
|
|
53
57
|
Very basic cli at this point.
|
54
58
|
|
59
|
+
#### For direct db-to-db transfer
|
60
|
+
|
61
|
+
$ wyrm mysql2://localhost/beeg_data_bays postgres://localhost/betta_dee_bee
|
62
|
+
|
63
|
+
#### Via files
|
55
64
|
From the source db to the file system
|
56
65
|
|
57
66
|
$ wyrm mysql2://localhost/beeg_data_bays /tmp/lots_fs_space
|
@@ -70,20 +79,18 @@ For restoring. dump will be similar.
|
|
70
79
|
|
71
80
|
``` ruby
|
72
81
|
require 'wyrm/restore_schema'
|
73
|
-
rs =
|
74
|
-
rs.
|
75
|
-
rs.restore_tables
|
76
|
-
rs.index
|
82
|
+
rs = Restore.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
|
83
|
+
rs.call
|
77
84
|
```
|
78
85
|
|
79
86
|
Or for the lower-level stuff
|
80
87
|
|
81
88
|
``` ruby
|
82
89
|
require 'sequel'
|
83
|
-
require 'wyrm/
|
90
|
+
require 'wyrm/pump'
|
84
91
|
|
85
92
|
db = Sequel.connect 'postgres://postgres@localhost/other_db'
|
86
|
-
dbp =
|
93
|
+
dbp = Wyrm::Pump.new db, :things
|
87
94
|
dbp.io = IO.popen 'pbzip2 -d -c /mnt/disk/wyrm/things.dbp.bz2'
|
88
95
|
dbp.each_row do |row|
|
89
96
|
puts row.inspect
|
data/bin/wyrm
CHANGED
@@ -1,36 +1,52 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'pathname'
|
4
3
|
require 'uri'
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
rs.index
|
12
|
-
end
|
5
|
+
if ARGV.size != 2
|
6
|
+
puts <<EOF
|
7
|
+
Usage: #{$0} src_db|dirname dst_db|dirname
|
8
|
+
|
9
|
+
dirname contains a set of wyrm files, or will soon.
|
13
10
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
sample db strings:
|
12
|
+
postgres://localhost/lotsa_datsa
|
13
|
+
mysql://root:pwned@localhost/lotsa_datsa
|
14
|
+
mysql2://root:pwned@localhost/lotsa_fastsa_datsa
|
15
|
+
EOF
|
16
|
+
exit(1)
|
19
17
|
end
|
20
18
|
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
module FsPath
|
20
|
+
def fs_path?
|
21
|
+
scheme == 'file' || scheme.nil?
|
22
|
+
end
|
24
23
|
end
|
25
24
|
|
26
|
-
src, dst = ARGV.map{|arg| URI.parse
|
25
|
+
src, dst = ARGV.map{|arg| URI.parse(arg).extend(FsPath)}
|
26
|
+
|
27
|
+
require 'wyrm/cli'
|
28
|
+
Wyrm.sanity_check_pbzip2
|
29
|
+
|
30
|
+
include Wyrm
|
31
|
+
|
32
|
+
case
|
33
|
+
when src.fs_path? && dst.fs_path?
|
34
|
+
puts "No point copying one directory to another. Just use filesystem tools. It's faster."
|
35
|
+
exit(1)
|
36
|
+
|
37
|
+
when !src.fs_path? && dst.fs_path?
|
38
|
+
# src is a url, and dst is a path, so dump to file system
|
39
|
+
require 'wyrm/dump'
|
40
|
+
Dump.new( src.to_s, dst.path ).call
|
41
|
+
|
42
|
+
when src.fs_path? && !dst.fs_path?
|
43
|
+
# src is a path and dst is a url, so restore to db
|
44
|
+
require 'wyrm/restore'
|
45
|
+
Restore.new(src.path, dst.to_s, drop_tables: true).call
|
27
46
|
|
28
|
-
if src.scheme && Pathname(dst.to_s).exist?
|
29
|
-
# src is a db path, so dump from it
|
30
|
-
dump( src.to_s, dst.to_s )
|
31
|
-
elsif dst.scheme && Pathname(src.to_s).exist?
|
32
|
-
# dst is a path and src is a url, so restore
|
33
|
-
restore( dst.to_s, src.to_s )
|
34
47
|
else
|
35
|
-
|
48
|
+
# both db urls, so transfer
|
49
|
+
require 'wyrm/hole.rb'
|
50
|
+
Hole.new( src.to_s, dst.to_s ).call
|
51
|
+
|
36
52
|
end
|
data/bin/wyrm-view
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
if ARGV.empty? || ARGV.first =~ /^-+(\?|h(elp)?)$/
|
4
|
+
puts <<EOF
|
5
|
+
Usage: #{$0} dbp_file.dbp[.bz2]
|
6
|
+
|
7
|
+
Display contents of dbp file, optionally compressed with bz2.
|
8
|
+
EOF
|
9
|
+
exit(0)
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'wyrm/cli'
|
13
|
+
Wyrm.sanity_check_pbzip2
|
14
|
+
|
15
|
+
require 'pathname'
|
16
|
+
require 'yaml'
|
17
|
+
|
18
|
+
out_block = lambda do |io|
|
19
|
+
record_count = 1
|
20
|
+
until io.eof?
|
21
|
+
puts "# record #{record_count}"
|
22
|
+
puts Marshal.load(io).to_yaml
|
23
|
+
record_count += 1
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
ARGV.map{|a| Pathname(a)}.each do |path|
|
28
|
+
puts "# source: #{path}"
|
29
|
+
if path.extname == '.bz2'
|
30
|
+
IO.popen "pbzip2 -d -c #{path}", &out_block
|
31
|
+
else
|
32
|
+
path.open &out_block
|
33
|
+
end
|
34
|
+
end
|
data/lib/wyrm.rb
CHANGED
data/lib/wyrm/cli.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# would be nice to use refinements here, but that breaks 2.0 compatibility
|
2
|
+
class Method
|
3
|
+
def kwargs_as_hash( invocation_binding )
|
4
|
+
named_locals = parameters. \
|
5
|
+
select{|type,_| type == :key}. \
|
6
|
+
flat_map{|_,name| [name,invocation_binding.eval(name.to_s)]}
|
7
|
+
|
8
|
+
Hash[ *named_locals ]
|
9
|
+
end
|
10
|
+
end
|
@@ -1,18 +1,25 @@
|
|
1
|
-
require '
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
require 'wyrm/module'
|
2
4
|
require 'wyrm/pump_maker'
|
5
|
+
require 'wyrm/schema_tools'
|
6
|
+
require 'wyrm/logger'
|
3
7
|
|
4
8
|
# Dump a schema and compressed data from a db to a set of files
|
5
9
|
# src_db = Sequel.connect "postgres://localhost:5454/lots"
|
6
10
|
# ds = DumpSchema.new src_db, Pathname('/var/data/lots')
|
7
|
-
# ds.
|
8
|
-
# ds.dump_tables
|
11
|
+
# ds.call
|
9
12
|
# TODO possibly use Gem::Package::TarWriter to write tar files
|
10
|
-
class
|
13
|
+
class Wyrm::Dump
|
11
14
|
include PumpMaker
|
15
|
+
include SchemaTools
|
16
|
+
include Wyrm::Logger
|
12
17
|
|
13
18
|
def initialize( src_db, container = nil, pump: nil )
|
19
|
+
@container = Pathname.new container || '.'
|
20
|
+
raise "#{@container} does not exist" unless @container.exist?
|
21
|
+
|
14
22
|
@src_db = maybe_deebe src_db
|
15
|
-
@container = Pathname.new container
|
16
23
|
@pump = make_pump( @src_db, pump )
|
17
24
|
|
18
25
|
@src_db.extension :schema_dumper
|
@@ -20,33 +27,21 @@ class DumpSchema
|
|
20
27
|
|
21
28
|
attr_reader :src_db, :container, :pump
|
22
29
|
|
23
|
-
def schema_migration
|
24
|
-
@schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
|
25
|
-
end
|
26
|
-
|
27
|
-
def index_migration
|
28
|
-
@index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
|
29
|
-
end
|
30
|
-
|
31
|
-
def fk_migration
|
32
|
-
@fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
|
33
|
-
end
|
34
|
-
|
35
30
|
def same_db
|
36
31
|
false
|
37
32
|
end
|
38
33
|
|
39
|
-
def
|
40
|
-
@
|
34
|
+
def numbering
|
35
|
+
@numbering ||= '000'
|
41
36
|
end
|
42
37
|
|
43
38
|
def dump_schema
|
44
|
-
numbering = '000'
|
45
|
-
|
46
39
|
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
47
40
|
io.write schema_migration
|
48
41
|
end
|
42
|
+
end
|
49
43
|
|
44
|
+
def dump_indexes
|
50
45
|
(container + "#{numbering.next!}_indexes.rb").open('w') do |io|
|
51
46
|
io.write index_migration
|
52
47
|
end
|
@@ -107,4 +102,10 @@ class DumpSchema
|
|
107
102
|
dump_table table_name
|
108
103
|
end
|
109
104
|
end
|
105
|
+
|
106
|
+
def call
|
107
|
+
dump_schema
|
108
|
+
dump_tables
|
109
|
+
dump_indexes
|
110
|
+
end
|
110
111
|
end
|
data/lib/wyrm/hole.rb
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'wyrm/logger'
|
3
|
+
|
4
|
+
require 'wyrm/module'
|
5
|
+
require 'wyrm/pump'
|
6
|
+
require 'wyrm/pump_maker'
|
7
|
+
require 'wyrm/schema_tools'
|
8
|
+
require 'wyrm/core_extensions'
|
9
|
+
|
10
|
+
module Wyrm
|
11
|
+
# This bypasses the need to marshal objects between two pumps.
|
12
|
+
# It uses a queue of the record arrays instead.
|
13
|
+
class Hole
|
14
|
+
include PumpMaker
|
15
|
+
include Logger
|
16
|
+
|
17
|
+
# This is the codec. Named for the mouth of a wormhole. Cos finding a good name for this is hard.
|
18
|
+
#
|
19
|
+
# Connects the two pumps together. Implements Codec, Quacks like IO.
|
20
|
+
class Mouth
|
21
|
+
include Logger
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@flushed = false
|
25
|
+
end
|
26
|
+
|
27
|
+
# This is a bit weird because io_queue will usually == self
|
28
|
+
def encode( obj, io_queue )
|
29
|
+
io_queue.enq obj
|
30
|
+
end
|
31
|
+
|
32
|
+
# This is a bit weird because io_queue will usually == self
|
33
|
+
def decode( io_queue, &block )
|
34
|
+
obj = io_queue.deq
|
35
|
+
yield obj if block_given?
|
36
|
+
obj
|
37
|
+
end
|
38
|
+
|
39
|
+
def reset
|
40
|
+
# do this first, so any (hopefully not) remaining waiters don't
|
41
|
+
# go into the blocking deq again.
|
42
|
+
@flushed = false
|
43
|
+
|
44
|
+
# clear any poisons, and release any (hopefully not) remaining waiters
|
45
|
+
queue.clear
|
46
|
+
end
|
47
|
+
|
48
|
+
# queue could be empty while producer is generating something,
|
49
|
+
# so only eof after flush has been called.
|
50
|
+
def eof?
|
51
|
+
# queue is not empty if it's been poisoned.
|
52
|
+
@flushed && queue.empty?
|
53
|
+
end
|
54
|
+
|
55
|
+
# use a SizedQueue so we don't run out of memory during a big transfer
|
56
|
+
def queue
|
57
|
+
@queue ||=
|
58
|
+
if RUBY_VERSION == '2.1.0'
|
59
|
+
logger.warn "SizedQueue broken in 2.1.0 (https://bugs.ruby-lang.org/issues/9302). Falling back to Queue, which may run out of memory."
|
60
|
+
Queue.new
|
61
|
+
else
|
62
|
+
SizedQueue.new 5000
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def enq( value )
|
67
|
+
queue.enq value
|
68
|
+
end
|
69
|
+
|
70
|
+
def deq( *args )
|
71
|
+
rv = queue.deq( *args )
|
72
|
+
if rv == :poison
|
73
|
+
poison_queue
|
74
|
+
raise StopIteration
|
75
|
+
end
|
76
|
+
rv
|
77
|
+
end
|
78
|
+
|
79
|
+
def poison_queue
|
80
|
+
# poison the queue. waiters will have to re-queue this.
|
81
|
+
queue << :poison if queue.empty? && queue.num_waiting > 0
|
82
|
+
end
|
83
|
+
|
84
|
+
# this gets called after dump is finished, by pump
|
85
|
+
def flush
|
86
|
+
# do this first, so any non-poisoned waiters will eof
|
87
|
+
# synchronisation is not really important because
|
88
|
+
@flushed = true
|
89
|
+
poison_queue
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def initialize( src_db, dst_db, drop_tables: true, queue_size: 5000 )
|
94
|
+
# called only once per run, so not really a performance issue
|
95
|
+
@options = method(__method__).kwargs_as_hash( binding )
|
96
|
+
|
97
|
+
@src_db = maybe_deebe src_db
|
98
|
+
@dst_db = maybe_deebe dst_db
|
99
|
+
|
100
|
+
@src_db.extension :schema_dumper
|
101
|
+
end
|
102
|
+
|
103
|
+
attr_reader :src_db, :dst_db, :options
|
104
|
+
|
105
|
+
def mouth
|
106
|
+
@mouth ||= Mouth.new
|
107
|
+
end
|
108
|
+
|
109
|
+
def src_pump
|
110
|
+
@src_pump ||= Pump.new( {db: src_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
|
111
|
+
end
|
112
|
+
|
113
|
+
def dst_pump
|
114
|
+
@dst_pump ||= Pump.new( {db: dst_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
|
115
|
+
end
|
116
|
+
|
117
|
+
def transfer_table( table_name )
|
118
|
+
mouth.reset
|
119
|
+
src_pump.table_name = dst_pump.table_name = table_name
|
120
|
+
|
121
|
+
if src_pump.table_dataset.empty?
|
122
|
+
logger.info "No records in #{table_name}"
|
123
|
+
return
|
124
|
+
end
|
125
|
+
|
126
|
+
# Use threads so the db read/writes aren't waiting for one another.
|
127
|
+
recv_thread = Thread.new{ dst_pump.restore }
|
128
|
+
send_thread = Thread.new{ src_pump.dump }
|
129
|
+
|
130
|
+
send_thread.join
|
131
|
+
recv_thread.join
|
132
|
+
end
|
133
|
+
|
134
|
+
include SchemaTools
|
135
|
+
|
136
|
+
def transfer_schema( &transfer_table_block )
|
137
|
+
create_tables
|
138
|
+
|
139
|
+
# transfer tables here
|
140
|
+
yield self if block_given?
|
141
|
+
|
142
|
+
create_indexes
|
143
|
+
end
|
144
|
+
|
145
|
+
def transfer_tables
|
146
|
+
logger.info "transferring tables"
|
147
|
+
src_db.tables.each do |table_name|
|
148
|
+
transfer_table table_name
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def call
|
153
|
+
if options[:drop_tables]
|
154
|
+
logger.info "dropping tables"
|
155
|
+
drop_tables src_db.tables
|
156
|
+
end
|
157
|
+
|
158
|
+
transfer_schema do
|
159
|
+
transfer_tables
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|