wyrm 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1446fe2b49cd863188938f3aebbccda3bb41f47
4
- data.tar.gz: 265fcd26522e424398b5c9584e8a15687d595431
3
+ metadata.gz: c432ee798bf7c5208a16696daa5741d64351721d
4
+ data.tar.gz: 51e22092611ef48f16d4757eb8c327f96e6847f5
5
5
  SHA512:
6
- metadata.gz: 8014ba8205ad7e2d1af85539291fc9e95d0af94bd1561a59368fdd8264246c7566fd5cd7dd45271fad0f41fc001d3dad811840922e03a5965e8cd9e2997514a7
7
- data.tar.gz: 1c450d1b9efd49717e81b67c03109b6670a3e6215cf60f3b0df38f8fb05e89dbc71e6d83527e87e8d3a038e92d93ca0a5b2ad2c7c66473a5434de072e88b4013
6
+ metadata.gz: b907a9adbd5b47ac9847a0aeaa95e5318eff307735947f8e4dfd4aa35e819f8ff873cbf53053202a4a944f2961fe6a3254f33bd4816753386580e2049c13e186
7
+ data.tar.gz: 65e512436c2991f2b9786c8e9e2587854b227e95bac2dcc1ec2f87b3d0840e786ac4e13a454197cdeb38a43eae948c9fbaa743b3c1f3b21d02714b398fd56894
data/Gemfile CHANGED
@@ -1,10 +1,8 @@
1
1
  source 'https://rubygems.org'
2
2
  # source 'file:///var/cache/rubygems'
3
3
 
4
- gem 'sequel'
4
+ gem 'sequel', ~> '4.0.0'
5
5
  gem 'fastandand'
6
- gem 'pry'
7
- gem 'pry-debundle'
8
6
 
9
7
  # Specify your gem's dependencies in wyrm.gemspec
10
8
  gemspec
data/README.md CHANGED
@@ -12,10 +12,17 @@ handle compound primary keys and tables without primary keys.
12
12
  Wyrm because:
13
13
 
14
14
  - I like dragons
15
- - I can have a Wyrm::Hole to transfer data through :-D
15
+ - I can (eventually) have a Wyrm::Hole to transfer data through :-D
16
+
17
+ ## Dependencies
18
+
19
+ You must have a working
20
+ [pbzip2](http://compression.ca/pbzip2/ "Will use all your cores")
21
+ on your path.
16
22
 
17
23
  ## Installation
18
24
 
25
+
19
26
  Add this line to your application's Gemfile:
20
27
 
21
28
  gem 'wyrm'
@@ -30,38 +37,52 @@ Or install it yourself as:
30
37
 
31
38
  Make sure you install the db gems, typically
32
39
 
33
- $ gem install pg mysql2
40
+ $ gem install pg sequel_pg mysql2
34
41
 
35
42
  ## Usage
36
43
 
37
- This is mostly a toolkit right now. To transfer from mysql to postgres do:
38
- ```ruby
39
- require 'sequel'
40
- require 'pathname'
44
+ ### CLI
45
+
46
+ Very basic cli at this point.
47
+
48
+ From the source db to the file system
49
+
50
+ $ wyrm mysql2://localhost/beeg_data_bays /tmp/lots_fs_space
41
51
 
42
- # on the source host
43
- # dump tables from mysql
44
- require 'wyrm/dump_schema'
45
- src_db = Sequel.connect "mysql2://localhost/lots"
46
- ds = DumpSchema.new src_db, Pathname('/tmp/lots')
47
- ds.dump_schema
52
+ Optionally transfer data. Already compressed, so no -z
48
53
 
49
- # this might take a while ;-)
50
- ds.dump_tables
54
+ $ rsync -var /tmp/lots_fs_space user@host:/tmp/lots_fs_space
51
55
 
52
- # transfer data. Already compressed, so no -z
53
- # rsync -var /tmp/lots user@host:/var/data/
56
+ On the destination host
54
57
 
55
- # on the destination host
56
- # restore tables to postgres
58
+ $ wyrm /tmp/lots_fs_space postgres://localhost/betta_dee_bee
59
+
60
+ ### irb / pry
61
+
62
+ For restoring. dump will be similar.
63
+
64
+ ``` ruby
57
65
  require 'wyrm/restore_schema'
58
- dst_db = Sequel.connect "postgres://localhost/lots"
59
- rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
66
+ rs = RestoreSchema.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
60
67
  rs.create
61
68
  rs.restore_tables
62
69
  rs.index
63
70
  ```
64
71
 
72
+ Or for the lower-level stuff
73
+
74
+ ``` ruby
75
+ require 'sequel'
76
+ require 'wyrm/db_pump'
77
+
78
+ db = Sequel.connect 'postgres://postgres@localhost/other_db'
79
+ dbp = DbPump.new db, :things
80
+ dbp.open_bz2 '/mnt/disk/wyrm/things.dbp.bz2'
81
+ dbp.each_row do |row|
82
+ puts row.inspect
83
+ end
84
+ ```
85
+
65
86
  ## Contributing
66
87
 
67
88
  1. Fork it
@@ -0,0 +1,36 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+ require 'uri'
5
+
6
+ def restore( db, directory )
7
+ require 'wyrm/restore_schema'
8
+ rs = RestoreSchema.new db, directory
9
+ rs.create
10
+ rs.restore_tables
11
+ rs.index
12
+ end
13
+
14
+ def dump( db, directory )
15
+ require 'wyrm/dump_schema'
16
+ ds = DumpSchema.new db, directory
17
+ ds.dump_schema
18
+ ds.dump_tables
19
+ end
20
+
21
+ if ARGV.empty?
22
+ puts "Provide source and destination"
23
+ puts "Either can be a sequel db string or a directory"
24
+ end
25
+
26
+ src, dst = ARGV.map{|arg| URI.parse arg}
27
+
28
+ if src.scheme && Pathname(dst.to_s).exist?
29
+ # src is a db path, so dump from it
30
+ dump( src.to_s, dst.to_s )
31
+ elsif dst.scheme && Pathname(src.to_s).exist?
32
+ # dst is a path and src is a url, so restore
33
+ restore( dst.to_s, src.to_s )
34
+ else
35
+ puts "Don't know how to handle #{src} -> #{dst}"
36
+ end
@@ -10,12 +10,6 @@ Sequel.extension :migration
10
10
  # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
11
11
 
12
12
  # TODO need to version the dumps, or something like that.
13
- # So the slowest-changing variables are the db, the io stream
14
- # and the page size.
15
- # table will change every call. Will IO stream change between
16
- # table changes? No. So a currying type approach will work.
17
- # Somebody must have done this before.
18
- # But table and io are often related (ie table going to one file)
19
13
  # TODO This really should be Wyrm::Hole. Or maybe Wyrm::Hole should
20
14
  # be the codec that connects two DbPumps, for direct transfer?
21
15
  class DbPump
@@ -31,32 +25,36 @@ class DbPump
31
25
  end
32
26
 
33
27
  attr_accessor :io, :page_size, :dry_run
28
+ def dry_run?; dry_run; end
34
29
 
35
30
  # These affect cached values
36
31
  attr_reader :db, :table_name
37
32
 
38
- def table_name=( name_sym )
33
+ def invalidate_cached_members
39
34
  @primary_keys = nil
40
35
  @table_dataset = nil
36
+ end
37
+
38
+ def table_name=( name_sym )
39
+ invalidate_cached_members
41
40
  @table_name = name_sym
42
41
  end
43
42
 
44
43
  def db=( other_db )
45
- @primary_keys = nil
46
- @table_dataset = nil
44
+ invalidate_cached_members
47
45
  @db = other_db
48
46
  @db.extension :pagination
49
47
  end
50
48
 
51
- def dry_run?; dry_run; end
52
-
53
- class RespondsTo
54
- def initialize( *methods )
55
- @methods = methods
56
- end
57
-
58
- def ===( instance )
59
- @methods.all?{|m| instance.respond_to? m}
49
+ # return an object that responds to ===
50
+ # which returns true if ==='s parameter
51
+ # responds to all the methods
52
+ def quacks_like( *methods )
53
+ @quacks_like ||= {}
54
+ @quacks_like[methods] ||= Object.new.tap do |obj|
55
+ obj.define_singleton_method(:===) do |instance|
56
+ methods.all?{|m| instance.respond_to? m}
57
+ end
60
58
  end
61
59
  end
62
60
 
@@ -66,18 +64,16 @@ class DbPump
66
64
  when :yaml; YamlCodec.new
67
65
  when :marshal; MarshalCodec.new
68
66
  when Class
69
- codec.new
70
- when RespondsTo.new( :encode, :decode )
71
- codec
67
+ codec_thing.new
68
+ when quacks_like( :encode, :decode )
69
+ codec_thing
72
70
  else
73
- raise "unknown codec #{codec}"
71
+ raise "unknown codec #{codec_thing}"
74
72
  end
75
73
  end
76
74
 
77
75
  attr_reader :codec
78
76
 
79
- # TODO could use msgpack as serialization here, but its API is unpleasant.
80
-
81
77
  class MarshalCodec
82
78
  def encode( obj, io )
83
79
  Marshal.dump obj, io
@@ -90,18 +86,6 @@ class DbPump
90
86
  end
91
87
  end
92
88
 
93
- class MsgPackCodec
94
- def encode( obj, io )
95
- Marshal.dump obj, io
96
- end
97
-
98
- def decode( io, &block )
99
- obj = Marshal.load(io)
100
- yield obj if block_given?
101
- obj
102
- end
103
- end
104
-
105
89
  class YamlCodec
106
90
  def encode( obj, io )
107
91
  YAML.dump obj, io
@@ -203,51 +187,70 @@ class DbPump
203
187
  end
204
188
  end
205
189
 
206
- # TODO possible memory issues here if the rows are big. May need to fork this.
207
- # TODO lazy evaluation
208
- def restore( start_row: 0 )
209
- logger.info "restoring #{table_name}"
210
- # destination db should be same structure as incoming data
211
- column_names = db.schema(table_name.to_sym).map( &:first )
212
- first = ->(row){raise "schema mismatch" if row.size != column_names.size}
190
+ # TODO lazy evaluation / streaming
191
+ # start_row is zero-based
192
+ def restore( start_row: 0, filename: 'io' )
193
+ columns = table_dataset.columns
194
+ logger.info{ "inserting to #{table_name} #{columns.inspect}" }
195
+
196
+ # get the Enumerator
197
+ row_enum = each_row
198
+
199
+ # check that columns match
200
+ raise "schema mismatch" if row_enum.peek.size != columns.size
213
201
 
214
202
  rows_restored = 0
215
203
 
216
- # skip this many rows
217
- start_row.times do
218
- codec.decode( io ) {|row|}
204
+ if start_row != 0
205
+ logger.info{ "skipping #{start_row} rows from #{filename}" }
206
+ start_row.times do |i|
207
+ row_enum.next
208
+ logger.info{ "skipped #{i} from #{filename}" } if i % page_size == 0
209
+ end
210
+ logger.info{ "skipped #{start_row} from #{filename}" }
211
+ rows_restored += start_row
219
212
  end
220
213
 
221
- # copy rows into db
222
- while !io.eof?
223
- # fetch a page of rows
224
- rows_ary = []
225
- begin
226
- page_size.times do |i|
227
- codec.decode( io ) do |row|
228
- rows_ary << row
229
- end
230
- rows_restored += 1
231
- end
232
- rescue EOFError => e
233
- # ran out of rows, so just use the ones we have so far
234
- end
214
+ logger.info{ "inserting to #{table_name} from #{rows_restored}" }
235
215
 
236
- # insert to db. Hopeful db support bulk insert, which Sequel will figure out
216
+ loop do
237
217
  db.transaction do
238
- table_dataset.import column_names, rows_ary
239
- yield rows_restored if block_given?
240
- logger.info "restored #{rows_restored}"
218
+ begin
219
+ page_size.times do
220
+ # This skips all the checks in the Sequel code
221
+ sql = table_dataset.clone( columns: columns, values: row_enum.next ).send( :clause_sql, :insert )
222
+ db.execute sql unless dry_run?
223
+ rows_restored += 1
224
+ end
225
+ rescue StopIteration
226
+ # er reached the end of the inout stream.
227
+ # So commit this transaction, and then re-raise
228
+ # StopIteration to get out of the loop{} statement
229
+ db.after_commit{ raise StopIteration }
230
+ end
231
+ logger.info{ "#{table_name} inserted #{rows_restored}" }
241
232
  end
242
233
  end
243
-
234
+ logger.info{ "#{table_name} done. Inserted #{rows_restored}." }
244
235
  rows_restored
245
236
  end
246
237
 
247
- def self.from_bz2( filename, db, table_name, options = {} )
248
- IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
249
- dbpump = DbPump.new db, table_name, io: io
250
- dbpump.restore
238
+ # this doesn't really belong here, but it will do for now.
239
+ def open_bz2( filename )
240
+ io.andand.close if io != STDOUT && !io.andand.closed?
241
+ self.io = IO.popen( "pbzip2 -d -c #{filename}" )
242
+ end
243
+
244
+ # enumerate through the given io at its current position
245
+ def each_row
246
+ return enum_for(__method__) unless block_given?
247
+ yield codec.decode( io ) until io.eof?
248
+ end
249
+
250
+ def insert_sql_each
251
+ return enum_for(__method__) unless block_given?
252
+ each_row do |row|
253
+ yield table_dataset.insert_sql( row )
251
254
  end
252
255
  end
253
256
  end
@@ -1,15 +1,5 @@
1
1
  require 'logger'
2
- require 'wyrm/db_pump'
3
-
4
- class Object
5
- def call_or_self( maybe_callable )
6
- if maybe_callable.respond_to? :call
7
- maybe_callable.call( self )
8
- else
9
- maybe_callable
10
- end
11
- end
12
- end
2
+ require 'wyrm/pump_maker'
13
3
 
14
4
  # Dump a schema and compressed data from a db to a set of files
15
5
  # src_db = Sequel.connect "postgres://localhost:5454/lots"
@@ -17,19 +7,18 @@ end
17
7
  # ds.dump_schema
18
8
  # ds.dump_tables
19
9
  class DumpSchema
10
+ include PumpMaker
11
+
20
12
  def initialize( src_db, container = nil, pump: nil )
21
- src_db.extension :schema_dumper
22
- @src_db = src_db
23
- @container = Pathname(container)
24
- @pump = make_pump( pump )
13
+ @src_db = maybe_deebe src_db
14
+ @container = Pathname.new container
15
+ @pump = make_pump( @src_db, pump )
16
+
17
+ @src_db.extension :schema_dumper
25
18
  end
26
19
 
27
20
  attr_reader :src_db, :container, :pump
28
21
 
29
- def make_pump( pump_thing )
30
- call_or_self(pump_thing) || DbPump.new( src_db, nil )
31
- end
32
-
33
22
  def schema_migration
34
23
  @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
35
24
  end
@@ -116,11 +105,17 @@ class DumpSchema
116
105
  end
117
106
 
118
107
  def dump_table( table_name )
108
+ pump.table_name = table_name
109
+ if pump.table_dataset.empty?
110
+ logger.info "No records in #{table_name}"
111
+ return
112
+ end
113
+
119
114
  filename = container + "#{table_name}.dbp.bz2"
120
115
  logger.info "dumping #{table_name} to #{filename}"
116
+
121
117
  open_bz2 filename do |zio|
122
118
  # generate the dump
123
- pump.table_name = table_name
124
119
  pump.io = zio
125
120
  pump.dump
126
121
  end
@@ -0,0 +1,28 @@
1
+ require 'wyrm/db_pump'
2
+
3
+ class Object
4
+ def call_or_self( maybe_callable )
5
+ if maybe_callable.respond_to? :call
6
+ maybe_callable.call( self )
7
+ else
8
+ maybe_callable
9
+ end
10
+ end
11
+ end
12
+
13
+ module PumpMaker
14
+ def make_pump( db, pump_thing )
15
+ call_or_self(pump_thing) || DbPump.new( db, nil )
16
+ end
17
+
18
+ def maybe_deebe( db_or_string )
19
+ case db_or_string
20
+ when String
21
+ Sequel.connect db_or_string
22
+ when Sequel::Database
23
+ db_or_string
24
+ else
25
+ raise "Don't know how to db-ify #{db_or_string.inspect}"
26
+ end
27
+ end
28
+ end
@@ -1,5 +1,5 @@
1
1
  require 'logger'
2
- require 'wyrm/db_pump'
2
+ require 'wyrm/pump_maker'
3
3
 
4
4
  # Load a schema from a set of dump files (from DumpSchema)
5
5
  # and restore the table data
@@ -8,15 +8,18 @@ require 'wyrm/db_pump'
8
8
  # rs.create
9
9
  # rs.restore_tables
10
10
  class RestoreSchema
11
- def initialize( dst_db, container )
12
- @container = container
13
- @dst_db = dst_db
14
- @options = {:codec => :marshal}
15
- load_migrations @container
11
+ include PumpMaker
12
+
13
+ def initialize( dst_db, container, pump: nil )
14
+ @container = Pathname.new container
15
+ @dst_db = maybe_deebe dst_db
16
+ @pump = make_pump( @dst_db, pump )
17
+
18
+ load_migrations
16
19
  end
17
20
 
21
+ attr_reader :pump
18
22
  attr_reader :dst_db
19
- attr_reader :options
20
23
  attr_reader :container
21
24
  attr_reader :schema_migration, :index_migration, :fk_migration
22
25
 
@@ -24,7 +27,7 @@ class RestoreSchema
24
27
  @logger ||= Logger.new STDERR
25
28
  end
26
29
 
27
- def load_migrations( container )
30
+ def load_migrations
28
31
  @schema_migration = (container + '001_schema.rb').read
29
32
  @index_migration = (container + '003_indexes.rb').read
30
33
  @fk_migration = (container + '004_foreign_keys.rb').read
@@ -46,19 +49,24 @@ class RestoreSchema
46
49
 
47
50
  # create the destination schema
48
51
  def create
52
+ logger.info "creating tables"
49
53
  eval( schema_migration ).apply dst_db, :up
50
54
  end
51
55
 
52
- def restore_one_table( table_file )
56
+ # assume the table name is the base name of table_file
57
+ def restore_table( table_file )
53
58
  logger.info "restoring from #{table_file}"
54
- table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
55
- # check if table has been restored already, and has the correct rows,
59
+ pump.table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
60
+ # TODO check if table has been restored already, and has the correct rows,
56
61
  # otherwise pass in a start row.
57
- DbPump.from_bz2 table_file, dst_db, table_name
62
+ IO.popen( "pbzip2 -d -c #{table_file}" ) do |io|
63
+ pump.io = io
64
+ pump.restore
65
+ end
58
66
  end
59
67
 
60
68
  def restore_tables
61
69
  table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
62
- table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file}
70
+ table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_table table_file}
63
71
  end
64
72
  end
@@ -1,3 +1,3 @@
1
1
  module Wyrm
2
- VERSION = "0.1.4"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_runtime_dependency "sequel"
21
+ spec.add_runtime_dependency 'sequel', '~> 4.0.0'
22
22
  spec.add_runtime_dependency "fastandand"
23
23
 
24
24
  spec.add_development_dependency "bundler", "~> 1.3"
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wyrm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Anderson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-02 00:00:00.000000000 Z
11
+ date: 2013-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 4.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 4.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: fastandand
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -69,7 +69,8 @@ dependencies:
69
69
  description: Transfer from one SQL database to another
70
70
  email:
71
71
  - panic@semiosix.com
72
- executables: []
72
+ executables:
73
+ - wyrm
73
74
  extensions: []
74
75
  extra_rdoc_files: []
75
76
  files:
@@ -79,10 +80,12 @@ files:
79
80
  - LICENSE.txt
80
81
  - README.md
81
82
  - Rakefile
83
+ - bin/wyrm
82
84
  - lib/wyrm.rb
83
85
  - lib/wyrm/db_pump.rb
84
86
  - lib/wyrm/dump_schema.rb
85
87
  - lib/wyrm/other_schema.rb
88
+ - lib/wyrm/pump_maker.rb
86
89
  - lib/wyrm/restore_schema.rb
87
90
  - lib/wyrm/transferer.rb
88
91
  - lib/wyrm/version.rb