wyrm 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6c0bb0fe99a301ead2da2ce8a64dc1eb20c925b0
4
- data.tar.gz: 031b66ab01f20c5ebad94dbfa3c50338dbd15cba
3
+ metadata.gz: 7c67007e35f84f5542da888c17fc91f6e1fe03cb
4
+ data.tar.gz: 7434160ebe9385f72d04546b69480d457a13e315
5
5
  SHA512:
6
- metadata.gz: 5feadc5c19a9df8417414cb91270ae55b8d114cf94fc9daa33b01b2f23292e858a109b0d703cfc7474952b963c26876d7e732f12c0802146cd5ef7838e803629
7
- data.tar.gz: a73a7c30e43a430fb05d22552f5b0f10c81cdcd99ec818ff8f838f6ff1a6f59f3c9d71a9d8eacdd26ff4066f1140e82e0236db0e13c2421fba8b1800b7d35710
6
+ metadata.gz: 11ccf21e5c471e7c06a2a0c27739875d1197570d3c2049a27d284327bd748a0411f3280ab9c89bb865c2c882aa1019a371ecff5105b86baab81c01ce9be8520c
7
+ data.tar.gz: 96f35474f2770b85b277b5b1cdb64798b4cba183d9ad6b7753d2ded403f878bd7130c4ca634b69262ee812160147a4c674466ca4347fef42c71c2e99e87f9934
data/.gitignore CHANGED
@@ -15,3 +15,5 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ wyrm.sublime-project
19
+ wyrm.sublime-workspace
data/.rvmrc CHANGED
@@ -1 +1 @@
1
- rvm 2.0.0@wyrm --create
1
+ rvm 2.1.1@wyrm --create
data/Gemfile CHANGED
@@ -20,8 +20,5 @@ end
20
20
 
21
21
  preferred_sources.each{|src| source src}
22
22
 
23
- gem 'sequel'
24
- gem 'fastandand'
25
-
26
23
  # Specify your gem's dependencies in wyrm.gemspec
27
24
  gemspec
data/README.md CHANGED
@@ -1,13 +1,18 @@
1
1
  # Wyrm
2
2
 
3
- Transfer data from one database to another. Has been used to dump > 100M dbs,
4
- and one 850G db. Should theoretically work for any dbs supported by Sequel.
3
+ [![Gem Version](https://badge.fury.io/rb/wyrm.png)](http://badge.fury.io/rb/wyrm)
4
+
5
+ Transfer a database from one rdbms to another (eg mysql to postgres). Either via
6
+ a set of files, or direct from one db server to another.
7
+
8
+ Has been used to dump > 100M dbs, and one 850G db.
9
+ Should theoretically work for any rdbms supported by [Sequel](http://sequel.jeremyevans.net/).
5
10
 
6
11
  Dumps are compressed with bz2, using pbzip2. Fast *and* small :-D For example:
7
12
  mysqldump | bzip2 for a certain 850G db comes to 127G. With wyrm it
8
13
  comes to 134G.
9
14
 
10
- Currently transfers tables and views only. Does not attempt to transfer
15
+ Transfers tables and views only. Does not attempt to transfer
11
16
  stored procs, permissions, triggers etc.
12
17
 
13
18
  Handles tables with a single numeric key, single non-numeric key, and no
@@ -19,7 +24,7 @@ Will use result set streaming if available.
19
24
  Wyrm because:
20
25
 
21
26
  - I like dragons
22
- - I can (eventually) have a Wyrm::Hole to transfer data through ;-)
27
+ - I can have a Wyrm::Hole to transfer data ;-)
23
28
 
24
29
  ## Dependencies
25
30
 
@@ -29,7 +34,6 @@ on your path.
29
34
 
30
35
  ## Installation
31
36
 
32
-
33
37
  Add this line to your application's Gemfile:
34
38
 
35
39
  gem 'wyrm'
@@ -52,6 +56,11 @@ Make sure you install the db gems, typically
52
56
 
53
57
  Very basic cli at this point.
54
58
 
59
+ #### For direct db-to-db transfer
60
+
61
+ $ wyrm mysql2://localhost/beeg_data_bays postgres://localhost/betta_dee_bee
62
+
63
+ #### Via files
55
64
  From the source db to the file system
56
65
 
57
66
  $ wyrm mysql2://localhost/beeg_data_bays /tmp/lots_fs_space
@@ -70,20 +79,18 @@ For restoring. dump will be similar.
70
79
 
71
80
  ``` ruby
72
81
  require 'wyrm/restore_schema'
73
- rs = RestoreSchema.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
74
- rs.create
75
- rs.restore_tables
76
- rs.index
82
+ rs = Restore.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
83
+ rs.call
77
84
  ```
78
85
 
79
86
  Or for the lower-level stuff
80
87
 
81
88
  ``` ruby
82
89
  require 'sequel'
83
- require 'wyrm/db_pump'
90
+ require 'wyrm/pump'
84
91
 
85
92
  db = Sequel.connect 'postgres://postgres@localhost/other_db'
86
- dbp = DbPump.new db, :things
93
+ dbp = Wyrm::Pump.new db, :things
87
94
  dbp.io = IO.popen 'pbzip2 -d -c /mnt/disk/wyrm/things.dbp.bz2'
88
95
  dbp.each_row do |row|
89
96
  puts row.inspect
data/bin/wyrm CHANGED
@@ -1,36 +1,52 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
- require 'pathname'
4
3
  require 'uri'
5
4
 
6
- def restore( db, directory )
7
- require 'wyrm/restore_schema'
8
- rs = RestoreSchema.new db, directory
9
- rs.create
10
- rs.restore_tables
11
- rs.index
12
- end
5
+ if ARGV.size != 2
6
+ puts <<EOF
7
+ Usage: #{$0} src_db|dirname dst_db|dirname
8
+
9
+ dirname contains a set of wyrm files, or will soon.
13
10
 
14
- def dump( db, directory )
15
- require 'wyrm/dump_schema'
16
- ds = DumpSchema.new db, directory
17
- ds.dump_schema
18
- ds.dump_tables
11
+ sample db strings:
12
+ postgres://localhost/lotsa_datsa
13
+ mysql://root:pwned@localhost/lotsa_datsa
14
+ mysql2://root:pwned@localhost/lotsa_fastsa_datsa
15
+ EOF
16
+ exit(1)
19
17
  end
20
18
 
21
- if ARGV.empty?
22
- puts "Provide source and destination"
23
- puts "Either can be a sequel db string or a directory"
19
+ module FsPath
20
+ def fs_path?
21
+ scheme == 'file' || scheme.nil?
22
+ end
24
23
  end
25
24
 
26
- src, dst = ARGV.map{|arg| URI.parse arg}
25
+ src, dst = ARGV.map{|arg| URI.parse(arg).extend(FsPath)}
26
+
27
+ require 'wyrm/cli'
28
+ Wyrm.sanity_check_pbzip2
29
+
30
+ include Wyrm
31
+
32
+ case
33
+ when src.fs_path? && dst.fs_path?
34
+ puts "No point copying one directory to another. Just use filesystem tools. It's faster."
35
+ exit(1)
36
+
37
+ when !src.fs_path? && dst.fs_path?
38
+ # src is a url, and dst is a path, so dump to file system
39
+ require 'wyrm/dump'
40
+ Dump.new( src.to_s, dst.path ).call
41
+
42
+ when src.fs_path? && !dst.fs_path?
43
+ # src is a path and dst is a url, so restore to db
44
+ require 'wyrm/restore'
45
+ Restore.new(src.path, dst.to_s, drop_tables: true).call
27
46
 
28
- if src.scheme && Pathname(dst.to_s).exist?
29
- # src is a db path, so dump from it
30
- dump( src.to_s, dst.to_s )
31
- elsif dst.scheme && Pathname(src.to_s).exist?
32
- # dst is a path and src is a url, so restore
33
- restore( dst.to_s, src.to_s )
34
47
  else
35
- puts "Don't know how to handle #{src} -> #{dst}"
48
+ # both db urls, so transfer
49
+ require 'wyrm/hole.rb'
50
+ Hole.new( src.to_s, dst.to_s ).call
51
+
36
52
  end
data/bin/wyrm-view ADDED
@@ -0,0 +1,34 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ if ARGV.empty? || ARGV.first =~ /^-+(\?|h(elp)?)$/
4
+ puts <<EOF
5
+ Usage: #{$0} dbp_file.dbp[.bz2]
6
+
7
+ Display contents of dbp file, optionally compressed with bz2.
8
+ EOF
9
+ exit(0)
10
+ end
11
+
12
+ require 'wyrm/cli'
13
+ Wyrm.sanity_check_pbzip2
14
+
15
+ require 'pathname'
16
+ require 'yaml'
17
+
18
+ out_block = lambda do |io|
19
+ record_count = 1
20
+ until io.eof?
21
+ puts "# record #{record_count}"
22
+ puts Marshal.load(io).to_yaml
23
+ record_count += 1
24
+ end
25
+ end
26
+
27
+ ARGV.map{|a| Pathname(a)}.each do |path|
28
+ puts "# source: #{path}"
29
+ if path.extname == '.bz2'
30
+ IO.popen "pbzip2 -d -c #{path}", &out_block
31
+ else
32
+ path.open &out_block
33
+ end
34
+ end
data/lib/wyrm.rb CHANGED
@@ -1,8 +1,6 @@
1
+ require 'wyrm/module'
2
+
1
3
  require "wyrm/version"
2
- require "wyrm/db_pump.rb"
4
+ require "wyrm/pump.rb"
3
5
  require "wyrm/restore_schema.rb"
4
6
  require "wyrm/dump_schema.rb"
5
-
6
- module Wyrm
7
- # Your code goes here...
8
- end
data/lib/wyrm/cli.rb ADDED
@@ -0,0 +1,9 @@
1
+ module Wyrm
2
+ def self.sanity_check_pbzip2
3
+ rv = `which pbzip2`
4
+ unless $?.exitstatus == 0
5
+ puts "\npbzip2 not installed or not in PATH"
6
+ exit(1)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,10 @@
1
+ # would be nice to use refinements here, but that breaks 2.0 compatibility
2
+ class Method
3
+ def kwargs_as_hash( invocation_binding )
4
+ named_locals = parameters. \
5
+ select{|type,_| type == :key}. \
6
+ flat_map{|_,name| [name,invocation_binding.eval(name.to_s)]}
7
+
8
+ Hash[ *named_locals ]
9
+ end
10
+ end
@@ -1,18 +1,25 @@
1
- require 'logger'
1
+ require 'pathname'
2
+
3
+ require 'wyrm/module'
2
4
  require 'wyrm/pump_maker'
5
+ require 'wyrm/schema_tools'
6
+ require 'wyrm/logger'
3
7
 
4
8
  # Dump a schema and compressed data from a db to a set of files
5
9
  # src_db = Sequel.connect "postgres://localhost:5454/lots"
6
10
  # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
7
- # ds.dump_schema
8
- # ds.dump_tables
11
+ # ds.call
9
12
  # TODO possibly use Gem::Package::TarWriter to write tar files
10
- class DumpSchema
13
+ class Wyrm::Dump
11
14
  include PumpMaker
15
+ include SchemaTools
16
+ include Wyrm::Logger
12
17
 
13
18
  def initialize( src_db, container = nil, pump: nil )
19
+ @container = Pathname.new container || '.'
20
+ raise "#{@container} does not exist" unless @container.exist?
21
+
14
22
  @src_db = maybe_deebe src_db
15
- @container = Pathname.new container
16
23
  @pump = make_pump( @src_db, pump )
17
24
 
18
25
  @src_db.extension :schema_dumper
@@ -20,33 +27,21 @@ class DumpSchema
20
27
 
21
28
  attr_reader :src_db, :container, :pump
22
29
 
23
- def schema_migration
24
- @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
25
- end
26
-
27
- def index_migration
28
- @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
29
- end
30
-
31
- def fk_migration
32
- @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
33
- end
34
-
35
30
  def same_db
36
31
  false
37
32
  end
38
33
 
39
- def logger
40
- @logger ||= Logger.new STDERR
34
+ def numbering
35
+ @numbering ||= '000'
41
36
  end
42
37
 
43
38
  def dump_schema
44
- numbering = '000'
45
-
46
39
  (container + "#{numbering.next!}_schema.rb").open('w') do |io|
47
40
  io.write schema_migration
48
41
  end
42
+ end
49
43
 
44
+ def dump_indexes
50
45
  (container + "#{numbering.next!}_indexes.rb").open('w') do |io|
51
46
  io.write index_migration
52
47
  end
@@ -107,4 +102,10 @@ class DumpSchema
107
102
  dump_table table_name
108
103
  end
109
104
  end
105
+
106
+ def call
107
+ dump_schema
108
+ dump_tables
109
+ dump_indexes
110
+ end
110
111
  end
data/lib/wyrm/hole.rb ADDED
@@ -0,0 +1,164 @@
1
+ require 'thread'
2
+ require 'wyrm/logger'
3
+
4
+ require 'wyrm/module'
5
+ require 'wyrm/pump'
6
+ require 'wyrm/pump_maker'
7
+ require 'wyrm/schema_tools'
8
+ require 'wyrm/core_extensions'
9
+
10
+ module Wyrm
11
+ # This bypasses the need to marshal objects between two pumps.
12
+ # It uses a queue of the record arrays instead.
13
+ class Hole
14
+ include PumpMaker
15
+ include Logger
16
+
17
+ # This is the codec. Named for the mouth of a wormhole. Cos finding a good name for this is hard.
18
+ #
19
+ # Connects the two pumps together. Implements Codec, Quacks like IO.
20
+ class Mouth
21
+ include Logger
22
+
23
+ def initialize
24
+ @flushed = false
25
+ end
26
+
27
+ # This is a bit weird because io_queue will usually == self
28
+ def encode( obj, io_queue )
29
+ io_queue.enq obj
30
+ end
31
+
32
+ # This is a bit weird because io_queue will usually == self
33
+ def decode( io_queue, &block )
34
+ obj = io_queue.deq
35
+ yield obj if block_given?
36
+ obj
37
+ end
38
+
39
+ def reset
40
+ # do this first, so any (hopefully not) remaining waiters don't
41
+ # go into the blocking deq again.
42
+ @flushed = false
43
+
44
+ # clear any poisons, and release any (hopefully not) remaining waiters
45
+ queue.clear
46
+ end
47
+
48
+ # queue could be empty while producer is generating something,
49
+ # so only eof after flush has been called.
50
+ def eof?
51
+ # queue is not empty if it's been poisoned.
52
+ @flushed && queue.empty?
53
+ end
54
+
55
+ # use a SizedQueue so we don't run out of memory during a big transfer
56
+ def queue
57
+ @queue ||=
58
+ if RUBY_VERSION == '2.1.0'
59
+ logger.warn "SizedQueue broken in 2.1.0 (https://bugs.ruby-lang.org/issues/9302). Falling back to Queue, which may run out of memory."
60
+ Queue.new
61
+ else
62
+ SizedQueue.new 5000
63
+ end
64
+ end
65
+
66
+ def enq( value )
67
+ queue.enq value
68
+ end
69
+
70
+ def deq( *args )
71
+ rv = queue.deq( *args )
72
+ if rv == :poison
73
+ poison_queue
74
+ raise StopIteration
75
+ end
76
+ rv
77
+ end
78
+
79
+ def poison_queue
80
+ # poison the queue. waiters will have to re-queue this.
81
+ queue << :poison if queue.empty? && queue.num_waiting > 0
82
+ end
83
+
84
+ # this gets called after dump is finished, by pump
85
+ def flush
86
+ # do this first, so any non-poisoned waiters will eof
87
+ # synchronisation is not really important because
88
+ @flushed = true
89
+ poison_queue
90
+ end
91
+ end
92
+
93
+ def initialize( src_db, dst_db, drop_tables: true, queue_size: 5000 )
94
+ # called only once per run, so not really a performance issue
95
+ @options = method(__method__).kwargs_as_hash( binding )
96
+
97
+ @src_db = maybe_deebe src_db
98
+ @dst_db = maybe_deebe dst_db
99
+
100
+ @src_db.extension :schema_dumper
101
+ end
102
+
103
+ attr_reader :src_db, :dst_db, :options
104
+
105
+ def mouth
106
+ @mouth ||= Mouth.new
107
+ end
108
+
109
+ def src_pump
110
+ @src_pump ||= Pump.new( {db: src_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
111
+ end
112
+
113
+ def dst_pump
114
+ @dst_pump ||= Pump.new( {db: dst_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
115
+ end
116
+
117
+ def transfer_table( table_name )
118
+ mouth.reset
119
+ src_pump.table_name = dst_pump.table_name = table_name
120
+
121
+ if src_pump.table_dataset.empty?
122
+ logger.info "No records in #{table_name}"
123
+ return
124
+ end
125
+
126
+ # Use threads so the db read/writes aren't waiting for one another.
127
+ recv_thread = Thread.new{ dst_pump.restore }
128
+ send_thread = Thread.new{ src_pump.dump }
129
+
130
+ send_thread.join
131
+ recv_thread.join
132
+ end
133
+
134
+ include SchemaTools
135
+
136
+ def transfer_schema( &transfer_table_block )
137
+ create_tables
138
+
139
+ # transfer tables here
140
+ yield self if block_given?
141
+
142
+ create_indexes
143
+ end
144
+
145
+ def transfer_tables
146
+ logger.info "transferring tables"
147
+ src_db.tables.each do |table_name|
148
+ transfer_table table_name
149
+ end
150
+ end
151
+
152
+ def call
153
+ if options[:drop_tables]
154
+ logger.info "dropping tables"
155
+ drop_tables src_db.tables
156
+ end
157
+
158
+ transfer_schema do
159
+ transfer_tables
160
+ end
161
+ end
162
+
163
+ end
164
+ end