wyrm 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6c0bb0fe99a301ead2da2ce8a64dc1eb20c925b0
4
- data.tar.gz: 031b66ab01f20c5ebad94dbfa3c50338dbd15cba
3
+ metadata.gz: 7c67007e35f84f5542da888c17fc91f6e1fe03cb
4
+ data.tar.gz: 7434160ebe9385f72d04546b69480d457a13e315
5
5
  SHA512:
6
- metadata.gz: 5feadc5c19a9df8417414cb91270ae55b8d114cf94fc9daa33b01b2f23292e858a109b0d703cfc7474952b963c26876d7e732f12c0802146cd5ef7838e803629
7
- data.tar.gz: a73a7c30e43a430fb05d22552f5b0f10c81cdcd99ec818ff8f838f6ff1a6f59f3c9d71a9d8eacdd26ff4066f1140e82e0236db0e13c2421fba8b1800b7d35710
6
+ metadata.gz: 11ccf21e5c471e7c06a2a0c27739875d1197570d3c2049a27d284327bd748a0411f3280ab9c89bb865c2c882aa1019a371ecff5105b86baab81c01ce9be8520c
7
+ data.tar.gz: 96f35474f2770b85b277b5b1cdb64798b4cba183d9ad6b7753d2ded403f878bd7130c4ca634b69262ee812160147a4c674466ca4347fef42c71c2e99e87f9934
data/.gitignore CHANGED
@@ -15,3 +15,5 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ wyrm.sublime-project
19
+ wyrm.sublime-workspace
data/.rvmrc CHANGED
@@ -1 +1 @@
1
- rvm 2.0.0@wyrm --create
1
+ rvm 2.1.1@wyrm --create
data/Gemfile CHANGED
@@ -20,8 +20,5 @@ end
20
20
 
21
21
  preferred_sources.each{|src| source src}
22
22
 
23
- gem 'sequel'
24
- gem 'fastandand'
25
-
26
23
  # Specify your gem's dependencies in wyrm.gemspec
27
24
  gemspec
data/README.md CHANGED
@@ -1,13 +1,18 @@
1
1
  # Wyrm
2
2
 
3
- Transfer data from one database to another. Has been used to dump > 100M dbs,
4
- and one 850G db. Should theoretically work for any dbs supported by Sequel.
3
+ [![Gem Version](https://badge.fury.io/rb/wyrm.png)](http://badge.fury.io/rb/wyrm)
4
+
5
+ Transfer a database from one rdbms to another (eg mysql to postgres). Either via
6
+ a set of files, or direct from one db server to another.
7
+
8
+ Has been used to dump > 100M dbs, and one 850G db.
9
+ Should theoretically work for any rdbms supported by [Sequel](http://sequel.jeremyevans.net/).
5
10
 
6
11
  Dumps are compressed with bz2, using pbzip2. Fast *and* small :-D For example:
7
12
  mysqldump | bzip2 for a certain 850G db comes to 127G. With wyrm it
8
13
  comes to 134G.
9
14
 
10
- Currently transfers tables and views only. Does not attempt to transfer
15
+ Transfers tables and views only. Does not attempt to transfer
11
16
  stored procs, permissions, triggers etc.
12
17
 
13
18
  Handles tables with a single numeric key, single non-numeric key, and no
@@ -19,7 +24,7 @@ Will use result set streaming if available.
19
24
  Wyrm because:
20
25
 
21
26
  - I like dragons
22
- - I can (eventually) have a Wyrm::Hole to transfer data through ;-)
27
+ - I can have a Wyrm::Hole to transfer data ;-)
23
28
 
24
29
  ## Dependencies
25
30
 
@@ -29,7 +34,6 @@ on your path.
29
34
 
30
35
  ## Installation
31
36
 
32
-
33
37
  Add this line to your application's Gemfile:
34
38
 
35
39
  gem 'wyrm'
@@ -52,6 +56,11 @@ Make sure you install the db gems, typically
52
56
 
53
57
  Very basic cli at this point.
54
58
 
59
+ #### For direct db-to-db transfer
60
+
61
+ $ wyrm mysql2://localhost/beeg_data_bays postgres://localhost/betta_dee_bee
62
+
63
+ #### Via files
55
64
  From the source db to the file system
56
65
 
57
66
  $ wyrm mysql2://localhost/beeg_data_bays /tmp/lots_fs_space
@@ -70,20 +79,18 @@ For restoring. dump will be similar.
70
79
 
71
80
  ``` ruby
72
81
  require 'wyrm/restore_schema'
73
- rs = RestoreSchema.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
74
- rs.create
75
- rs.restore_tables
76
- rs.index
82
+ rs = Restore.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
83
+ rs.call
77
84
  ```
78
85
 
79
86
  Or for the lower-level stuff
80
87
 
81
88
  ``` ruby
82
89
  require 'sequel'
83
- require 'wyrm/db_pump'
90
+ require 'wyrm/pump'
84
91
 
85
92
  db = Sequel.connect 'postgres://postgres@localhost/other_db'
86
- dbp = DbPump.new db, :things
93
+ dbp = Wyrm::Pump.new db, :things
87
94
  dbp.io = IO.popen 'pbzip2 -d -c /mnt/disk/wyrm/things.dbp.bz2'
88
95
  dbp.each_row do |row|
89
96
  puts row.inspect
data/bin/wyrm CHANGED
@@ -1,36 +1,52 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
- require 'pathname'
4
3
  require 'uri'
5
4
 
6
- def restore( db, directory )
7
- require 'wyrm/restore_schema'
8
- rs = RestoreSchema.new db, directory
9
- rs.create
10
- rs.restore_tables
11
- rs.index
12
- end
5
+ if ARGV.size != 2
6
+ puts <<EOF
7
+ Usage: #{$0} src_db|dirname dst_db|dirname
8
+
9
+ dirname contains a set of wyrm files, or will soon.
13
10
 
14
- def dump( db, directory )
15
- require 'wyrm/dump_schema'
16
- ds = DumpSchema.new db, directory
17
- ds.dump_schema
18
- ds.dump_tables
11
+ sample db strings:
12
+ postgres://localhost/lotsa_datsa
13
+ mysql://root:pwned@localhost/lotsa_datsa
14
+ mysql2://root:pwned@localhost/lotsa_fastsa_datsa
15
+ EOF
16
+ exit(1)
19
17
  end
20
18
 
21
- if ARGV.empty?
22
- puts "Provide source and destination"
23
- puts "Either can be a sequel db string or a directory"
19
+ module FsPath
20
+ def fs_path?
21
+ scheme == 'file' || scheme.nil?
22
+ end
24
23
  end
25
24
 
26
- src, dst = ARGV.map{|arg| URI.parse arg}
25
+ src, dst = ARGV.map{|arg| URI.parse(arg).extend(FsPath)}
26
+
27
+ require 'wyrm/cli'
28
+ Wyrm.sanity_check_pbzip2
29
+
30
+ include Wyrm
31
+
32
+ case
33
+ when src.fs_path? && dst.fs_path?
34
+ puts "No point copying one directory to another. Just use filesystem tools. It's faster."
35
+ exit(1)
36
+
37
+ when !src.fs_path? && dst.fs_path?
38
+ # src is a url, and dst is a path, so dump to file system
39
+ require 'wyrm/dump'
40
+ Dump.new( src.to_s, dst.path ).call
41
+
42
+ when src.fs_path? && !dst.fs_path?
43
+ # src is a path and dst is a url, so restore to db
44
+ require 'wyrm/restore'
45
+ Restore.new(src.path, dst.to_s, drop_tables: true).call
27
46
 
28
- if src.scheme && Pathname(dst.to_s).exist?
29
- # src is a db path, so dump from it
30
- dump( src.to_s, dst.to_s )
31
- elsif dst.scheme && Pathname(src.to_s).exist?
32
- # dst is a path and src is a url, so restore
33
- restore( dst.to_s, src.to_s )
34
47
  else
35
- puts "Don't know how to handle #{src} -> #{dst}"
48
+ # both db urls, so transfer
49
+ require 'wyrm/hole.rb'
50
+ Hole.new( src.to_s, dst.to_s ).call
51
+
36
52
  end
data/bin/wyrm-view ADDED
@@ -0,0 +1,34 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ if ARGV.empty? || ARGV.first =~ /^-+(\?|h(elp)?)$/
4
+ puts <<EOF
5
+ Usage: #{$0} dbp_file.dbp[.bz2]
6
+
7
+ Display contents of dbp file, optionally compressed with bz2.
8
+ EOF
9
+ exit(0)
10
+ end
11
+
12
+ require 'wyrm/cli'
13
+ Wyrm.sanity_check_pbzip2
14
+
15
+ require 'pathname'
16
+ require 'yaml'
17
+
18
+ out_block = lambda do |io|
19
+ record_count = 1
20
+ until io.eof?
21
+ puts "# record #{record_count}"
22
+ puts Marshal.load(io).to_yaml
23
+ record_count += 1
24
+ end
25
+ end
26
+
27
+ ARGV.map{|a| Pathname(a)}.each do |path|
28
+ puts "# source: #{path}"
29
+ if path.extname == '.bz2'
30
+ IO.popen "pbzip2 -d -c #{path}", &out_block
31
+ else
32
+ path.open &out_block
33
+ end
34
+ end
data/lib/wyrm.rb CHANGED
@@ -1,8 +1,6 @@
1
+ require 'wyrm/module'
2
+
1
3
  require "wyrm/version"
2
- require "wyrm/db_pump.rb"
4
+ require "wyrm/pump.rb"
3
5
  require "wyrm/restore_schema.rb"
4
6
  require "wyrm/dump_schema.rb"
5
-
6
- module Wyrm
7
- # Your code goes here...
8
- end
data/lib/wyrm/cli.rb ADDED
@@ -0,0 +1,9 @@
1
+ module Wyrm
2
+ def self.sanity_check_pbzip2
3
+ rv = `which pbzip2`
4
+ unless $?.exitstatus == 0
5
+ puts "\npbzip2 not installed or not in PATH"
6
+ exit(1)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,10 @@
1
+ # would be nice to use refinements here, but that breaks 2.0 compatibility
2
+ class Method
3
+ def kwargs_as_hash( invocation_binding )
4
+ named_locals = parameters. \
5
+ select{|type,_| type == :key}. \
6
+ flat_map{|_,name| [name,invocation_binding.eval(name.to_s)]}
7
+
8
+ Hash[ *named_locals ]
9
+ end
10
+ end
@@ -1,18 +1,25 @@
1
- require 'logger'
1
+ require 'pathname'
2
+
3
+ require 'wyrm/module'
2
4
  require 'wyrm/pump_maker'
5
+ require 'wyrm/schema_tools'
6
+ require 'wyrm/logger'
3
7
 
4
8
  # Dump a schema and compressed data from a db to a set of files
5
9
  # src_db = Sequel.connect "postgres://localhost:5454/lots"
6
10
  # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
7
- # ds.dump_schema
8
- # ds.dump_tables
11
+ # ds.call
9
12
  # TODO possibly use Gem::Package::TarWriter to write tar files
10
- class DumpSchema
13
+ class Wyrm::Dump
11
14
  include PumpMaker
15
+ include SchemaTools
16
+ include Wyrm::Logger
12
17
 
13
18
  def initialize( src_db, container = nil, pump: nil )
19
+ @container = Pathname.new container || '.'
20
+ raise "#{@container} does not exist" unless @container.exist?
21
+
14
22
  @src_db = maybe_deebe src_db
15
- @container = Pathname.new container
16
23
  @pump = make_pump( @src_db, pump )
17
24
 
18
25
  @src_db.extension :schema_dumper
@@ -20,33 +27,21 @@ class DumpSchema
20
27
 
21
28
  attr_reader :src_db, :container, :pump
22
29
 
23
- def schema_migration
24
- @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
25
- end
26
-
27
- def index_migration
28
- @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
29
- end
30
-
31
- def fk_migration
32
- @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
33
- end
34
-
35
30
  def same_db
36
31
  false
37
32
  end
38
33
 
39
- def logger
40
- @logger ||= Logger.new STDERR
34
+ def numbering
35
+ @numbering ||= '000'
41
36
  end
42
37
 
43
38
  def dump_schema
44
- numbering = '000'
45
-
46
39
  (container + "#{numbering.next!}_schema.rb").open('w') do |io|
47
40
  io.write schema_migration
48
41
  end
42
+ end
49
43
 
44
+ def dump_indexes
50
45
  (container + "#{numbering.next!}_indexes.rb").open('w') do |io|
51
46
  io.write index_migration
52
47
  end
@@ -107,4 +102,10 @@ class DumpSchema
107
102
  dump_table table_name
108
103
  end
109
104
  end
105
+
106
+ def call
107
+ dump_schema
108
+ dump_tables
109
+ dump_indexes
110
+ end
110
111
  end
data/lib/wyrm/hole.rb ADDED
@@ -0,0 +1,164 @@
1
+ require 'thread'
2
+ require 'wyrm/logger'
3
+
4
+ require 'wyrm/module'
5
+ require 'wyrm/pump'
6
+ require 'wyrm/pump_maker'
7
+ require 'wyrm/schema_tools'
8
+ require 'wyrm/core_extensions'
9
+
10
+ module Wyrm
11
+ # This bypasses the need to marshal objects between two pumps.
12
+ # It uses a queue of the record arrays instead.
13
+ class Hole
14
+ include PumpMaker
15
+ include Logger
16
+
17
+ # This is the codec. Named for the mouth of a wormhole. Cos finding a good name for this is hard.
18
+ #
19
+ # Connects the two pumps together. Implements Codec, Quacks like IO.
20
+ class Mouth
21
+ include Logger
22
+
23
+ def initialize
24
+ @flushed = false
25
+ end
26
+
27
+ # This is a bit weird because io_queue will usually == self
28
+ def encode( obj, io_queue )
29
+ io_queue.enq obj
30
+ end
31
+
32
+ # This is a bit weird because io_queue will usually == self
33
+ def decode( io_queue, &block )
34
+ obj = io_queue.deq
35
+ yield obj if block_given?
36
+ obj
37
+ end
38
+
39
+ def reset
40
+ # do this first, so any (hopefully not) remaining waiters don't
41
+ # go into the blocking deq again.
42
+ @flushed = false
43
+
44
+ # clear any poisons, and release any (hopefully not) remaining waiters
45
+ queue.clear
46
+ end
47
+
48
+ # queue could be empty while producer is generating something,
49
+ # so only eof after flush has been called.
50
+ def eof?
51
+ # queue is not empty if it's been poisoned.
52
+ @flushed && queue.empty?
53
+ end
54
+
55
+ # use a SizedQueue so we don't run out of memory during a big transfer
56
+ def queue
57
+ @queue ||=
58
+ if RUBY_VERSION == '2.1.0'
59
+ logger.warn "SizedQueue broken in 2.1.0 (https://bugs.ruby-lang.org/issues/9302). Falling back to Queue, which may run out of memory."
60
+ Queue.new
61
+ else
62
+ SizedQueue.new 5000
63
+ end
64
+ end
65
+
66
+ def enq( value )
67
+ queue.enq value
68
+ end
69
+
70
+ def deq( *args )
71
+ rv = queue.deq( *args )
72
+ if rv == :poison
73
+ poison_queue
74
+ raise StopIteration
75
+ end
76
+ rv
77
+ end
78
+
79
+ def poison_queue
80
+ # poison the queue. waiters will have to re-queue this.
81
+ queue << :poison if queue.empty? && queue.num_waiting > 0
82
+ end
83
+
84
+ # this gets called after dump is finished, by pump
85
+ def flush
86
+ # do this first, so any non-poisoned waiters will eof
87
+ # synchronisation is not really important because
88
+ @flushed = true
89
+ poison_queue
90
+ end
91
+ end
92
+
93
+ def initialize( src_db, dst_db, drop_tables: true, queue_size: 5000 )
94
+ # called only once per run, so not really a performance issue
95
+ @options = method(__method__).kwargs_as_hash( binding )
96
+
97
+ @src_db = maybe_deebe src_db
98
+ @dst_db = maybe_deebe dst_db
99
+
100
+ @src_db.extension :schema_dumper
101
+ end
102
+
103
+ attr_reader :src_db, :dst_db, :options
104
+
105
+ def mouth
106
+ @mouth ||= Mouth.new
107
+ end
108
+
109
+ def src_pump
110
+ @src_pump ||= Pump.new( {db: src_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
111
+ end
112
+
113
+ def dst_pump
114
+ @dst_pump ||= Pump.new( {db: dst_db, io: mouth, codec: mouth, logger: logger}.merge( options[:pump] ||{} ) )
115
+ end
116
+
117
+ def transfer_table( table_name )
118
+ mouth.reset
119
+ src_pump.table_name = dst_pump.table_name = table_name
120
+
121
+ if src_pump.table_dataset.empty?
122
+ logger.info "No records in #{table_name}"
123
+ return
124
+ end
125
+
126
+ # Use threads so the db read/writes aren't waiting for one another.
127
+ recv_thread = Thread.new{ dst_pump.restore }
128
+ send_thread = Thread.new{ src_pump.dump }
129
+
130
+ send_thread.join
131
+ recv_thread.join
132
+ end
133
+
134
+ include SchemaTools
135
+
136
+ def transfer_schema( &transfer_table_block )
137
+ create_tables
138
+
139
+ # transfer tables here
140
+ yield self if block_given?
141
+
142
+ create_indexes
143
+ end
144
+
145
+ def transfer_tables
146
+ logger.info "transferring tables"
147
+ src_db.tables.each do |table_name|
148
+ transfer_table table_name
149
+ end
150
+ end
151
+
152
+ def call
153
+ if options[:drop_tables]
154
+ logger.info "dropping tables"
155
+ drop_tables src_db.tables
156
+ end
157
+
158
+ transfer_schema do
159
+ transfer_tables
160
+ end
161
+ end
162
+
163
+ end
164
+ end