wyrm 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1446fe2b49cd863188938f3aebbccda3bb41f47
4
- data.tar.gz: 265fcd26522e424398b5c9584e8a15687d595431
3
+ metadata.gz: c432ee798bf7c5208a16696daa5741d64351721d
4
+ data.tar.gz: 51e22092611ef48f16d4757eb8c327f96e6847f5
5
5
  SHA512:
6
- metadata.gz: 8014ba8205ad7e2d1af85539291fc9e95d0af94bd1561a59368fdd8264246c7566fd5cd7dd45271fad0f41fc001d3dad811840922e03a5965e8cd9e2997514a7
7
- data.tar.gz: 1c450d1b9efd49717e81b67c03109b6670a3e6215cf60f3b0df38f8fb05e89dbc71e6d83527e87e8d3a038e92d93ca0a5b2ad2c7c66473a5434de072e88b4013
6
+ metadata.gz: b907a9adbd5b47ac9847a0aeaa95e5318eff307735947f8e4dfd4aa35e819f8ff873cbf53053202a4a944f2961fe6a3254f33bd4816753386580e2049c13e186
7
+ data.tar.gz: 65e512436c2991f2b9786c8e9e2587854b227e95bac2dcc1ec2f87b3d0840e786ac4e13a454197cdeb38a43eae948c9fbaa743b3c1f3b21d02714b398fd56894
data/Gemfile CHANGED
@@ -1,10 +1,8 @@
1
1
  source 'https://rubygems.org'
2
2
  # source 'file:///var/cache/rubygems'
3
3
 
4
- gem 'sequel'
4
+ gem 'sequel', ~> '4.0.0'
5
5
  gem 'fastandand'
6
- gem 'pry'
7
- gem 'pry-debundle'
8
6
 
9
7
  # Specify your gem's dependencies in wyrm.gemspec
10
8
  gemspec
data/README.md CHANGED
@@ -12,10 +12,17 @@ handle compound primary keys and tables without primary keys.
12
12
  Wyrm because:
13
13
 
14
14
  - I like dragons
15
- - I can have a Wyrm::Hole to transfer data through :-D
15
+ - I can (eventually) have a Wyrm::Hole to transfer data through :-D
16
+
17
+ ## Dependencies
18
+
19
+ You must have a working
20
+ [pbzip2](http://compression.ca/pbzip2/ "Will use all your cores")
21
+ on your path.
16
22
 
17
23
  ## Installation
18
24
 
25
+
19
26
  Add this line to your application's Gemfile:
20
27
 
21
28
  gem 'wyrm'
@@ -30,38 +37,52 @@ Or install it yourself as:
30
37
 
31
38
  Make sure you install the db gems, typically
32
39
 
33
- $ gem install pg mysql2
40
+ $ gem install pg sequel_pg mysql2
34
41
 
35
42
  ## Usage
36
43
 
37
- This is mostly a toolkit right now. To transfer from mysql to postgres do:
38
- ```ruby
39
- require 'sequel'
40
- require 'pathname'
44
+ ### CLI
45
+
46
+ Very basic cli at this point.
47
+
48
+ From the source db to the file system
49
+
50
+ $ wyrm mysql2://localhost/beeg_data_bays /tmp/lots_fs_space
41
51
 
42
- # on the source host
43
- # dump tables from mysql
44
- require 'wyrm/dump_schema'
45
- src_db = Sequel.connect "mysql2://localhost/lots"
46
- ds = DumpSchema.new src_db, Pathname('/tmp/lots')
47
- ds.dump_schema
52
+ Optionally transfer data. Already compressed, so no -z
48
53
 
49
- # this might take a while ;-)
50
- ds.dump_tables
54
+ $ rsync -var /tmp/lots_fs_space user@host:/tmp/lots_fs_space
51
55
 
52
- # transfer data. Already compressed, so no -z
53
- # rsync -var /tmp/lots user@host:/var/data/
56
+ On the destination host
54
57
 
55
- # on the destination host
56
- # restore tables to postgres
58
+ $ wyrm /tmp/lots_fs_space postgres://localhost/betta_dee_bee
59
+
60
+ ### irb / pry
61
+
62
+ For restoring. dump will be similar.
63
+
64
+ ``` ruby
57
65
  require 'wyrm/restore_schema'
58
- dst_db = Sequel.connect "postgres://localhost/lots"
59
- rs = RestoreSchema.new dst_db, Pathname('/var/data/lots')
66
+ rs = RestoreSchema.new 'postgres://postgres@localhost/your_db', '/mnt/disk/wyrm'
60
67
  rs.create
61
68
  rs.restore_tables
62
69
  rs.index
63
70
  ```
64
71
 
72
+ Or for the lower-level stuff
73
+
74
+ ``` ruby
75
+ require 'sequel'
76
+ require 'wyrm/db_pump'
77
+
78
+ db = Sequel.connect 'postgres://postgres@localhost/other_db'
79
+ dbp = DbPump.new db, :things
80
+ dbp.open_bz2 '/mnt/disk/wyrm/things.dbp.bz2'
81
+ dbp.each_row do |row|
82
+ puts row.inspect
83
+ end
84
+ ```
85
+
65
86
  ## Contributing
66
87
 
67
88
  1. Fork it
@@ -0,0 +1,36 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+ require 'uri'
5
+
6
+ def restore( db, directory )
7
+ require 'wyrm/restore_schema'
8
+ rs = RestoreSchema.new db, directory
9
+ rs.create
10
+ rs.restore_tables
11
+ rs.index
12
+ end
13
+
14
+ def dump( db, directory )
15
+ require 'wyrm/dump_schema'
16
+ ds = DumpSchema.new db, directory
17
+ ds.dump_schema
18
+ ds.dump_tables
19
+ end
20
+
21
+ if ARGV.empty?
22
+ puts "Provide source and destination"
23
+ puts "Either can be a sequel db string or a directory"
24
+ end
25
+
26
+ src, dst = ARGV.map{|arg| URI.parse arg}
27
+
28
+ if src.scheme && Pathname(dst.to_s).exist?
29
+ # src is a db path, so dump from it
30
+ dump( src.to_s, dst.to_s )
31
+ elsif dst.scheme && Pathname(src.to_s).exist?
32
+ # dst is a path and src is a url, so restore
33
+ restore( dst.to_s, src.to_s )
34
+ else
35
+ puts "Don't know how to handle #{src} -> #{dst}"
36
+ end
@@ -10,12 +10,6 @@ Sequel.extension :migration
10
10
  # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
11
11
 
12
12
  # TODO need to version the dumps, or something like that.
13
- # So the slowest-changing variables are the db, the io stream
14
- # and the page size.
15
- # table will change every call. Will IO stream change between
16
- # table changes? No. So a currying type approach will work.
17
- # Somebody must have done this before.
18
- # But table and io are often related (ie table going to one file)
19
13
  # TODO This really should be Wyrm::Hole. Or maybe Wyrm::Hole should
20
14
  # be the codec that connects two DbPumps, for direct transfer?
21
15
  class DbPump
@@ -31,32 +25,36 @@ class DbPump
31
25
  end
32
26
 
33
27
  attr_accessor :io, :page_size, :dry_run
28
+ def dry_run?; dry_run; end
34
29
 
35
30
  # These affect cached values
36
31
  attr_reader :db, :table_name
37
32
 
38
- def table_name=( name_sym )
33
+ def invalidate_cached_members
39
34
  @primary_keys = nil
40
35
  @table_dataset = nil
36
+ end
37
+
38
+ def table_name=( name_sym )
39
+ invalidate_cached_members
41
40
  @table_name = name_sym
42
41
  end
43
42
 
44
43
  def db=( other_db )
45
- @primary_keys = nil
46
- @table_dataset = nil
44
+ invalidate_cached_members
47
45
  @db = other_db
48
46
  @db.extension :pagination
49
47
  end
50
48
 
51
- def dry_run?; dry_run; end
52
-
53
- class RespondsTo
54
- def initialize( *methods )
55
- @methods = methods
56
- end
57
-
58
- def ===( instance )
59
- @methods.all?{|m| instance.respond_to? m}
49
+ # return an object that responds to ===
50
+ # which returns true if ==='s parameter
51
+ # responds to all the methods
52
+ def quacks_like( *methods )
53
+ @quacks_like ||= {}
54
+ @quacks_like[methods] ||= Object.new.tap do |obj|
55
+ obj.define_singleton_method(:===) do |instance|
56
+ methods.all?{|m| instance.respond_to? m}
57
+ end
60
58
  end
61
59
  end
62
60
 
@@ -66,18 +64,16 @@ class DbPump
66
64
  when :yaml; YamlCodec.new
67
65
  when :marshal; MarshalCodec.new
68
66
  when Class
69
- codec.new
70
- when RespondsTo.new( :encode, :decode )
71
- codec
67
+ codec_thing.new
68
+ when quacks_like( :encode, :decode )
69
+ codec_thing
72
70
  else
73
- raise "unknown codec #{codec}"
71
+ raise "unknown codec #{codec_thing}"
74
72
  end
75
73
  end
76
74
 
77
75
  attr_reader :codec
78
76
 
79
- # TODO could use msgpack as serialization here, but its API is unpleasant.
80
-
81
77
  class MarshalCodec
82
78
  def encode( obj, io )
83
79
  Marshal.dump obj, io
@@ -90,18 +86,6 @@ class DbPump
90
86
  end
91
87
  end
92
88
 
93
- class MsgPackCodec
94
- def encode( obj, io )
95
- Marshal.dump obj, io
96
- end
97
-
98
- def decode( io, &block )
99
- obj = Marshal.load(io)
100
- yield obj if block_given?
101
- obj
102
- end
103
- end
104
-
105
89
  class YamlCodec
106
90
  def encode( obj, io )
107
91
  YAML.dump obj, io
@@ -203,51 +187,70 @@ class DbPump
203
187
  end
204
188
  end
205
189
 
206
- # TODO possible memory issues here if the rows are big. May need to fork this.
207
- # TODO lazy evaluation
208
- def restore( start_row: 0 )
209
- logger.info "restoring #{table_name}"
210
- # destination db should be same structure as incoming data
211
- column_names = db.schema(table_name.to_sym).map( &:first )
212
- first = ->(row){raise "schema mismatch" if row.size != column_names.size}
190
+ # TODO lazy evaluation / streaming
191
+ # start_row is zero-based
192
+ def restore( start_row: 0, filename: 'io' )
193
+ columns = table_dataset.columns
194
+ logger.info{ "inserting to #{table_name} #{columns.inspect}" }
195
+
196
+ # get the Enumerator
197
+ row_enum = each_row
198
+
199
+ # check that columns match
200
+ raise "schema mismatch" if row_enum.peek.size != columns.size
213
201
 
214
202
  rows_restored = 0
215
203
 
216
- # skip this many rows
217
- start_row.times do
218
- codec.decode( io ) {|row|}
204
+ if start_row != 0
205
+ logger.info{ "skipping #{start_row} rows from #{filename}" }
206
+ start_row.times do |i|
207
+ row_enum.next
208
+ logger.info{ "skipped #{i} from #{filename}" } if i % page_size == 0
209
+ end
210
+ logger.info{ "skipped #{start_row} from #{filename}" }
211
+ rows_restored += start_row
219
212
  end
220
213
 
221
- # copy rows into db
222
- while !io.eof?
223
- # fetch a page of rows
224
- rows_ary = []
225
- begin
226
- page_size.times do |i|
227
- codec.decode( io ) do |row|
228
- rows_ary << row
229
- end
230
- rows_restored += 1
231
- end
232
- rescue EOFError => e
233
- # ran out of rows, so just use the ones we have so far
234
- end
214
+ logger.info{ "inserting to #{table_name} from #{rows_restored}" }
235
215
 
236
- # insert to db. Hopeful db support bulk insert, which Sequel will figure out
216
+ loop do
237
217
  db.transaction do
238
- table_dataset.import column_names, rows_ary
239
- yield rows_restored if block_given?
240
- logger.info "restored #{rows_restored}"
218
+ begin
219
+ page_size.times do
220
+ # This skips all the checks in the Sequel code
221
+ sql = table_dataset.clone( columns: columns, values: row_enum.next ).send( :clause_sql, :insert )
222
+ db.execute sql unless dry_run?
223
+ rows_restored += 1
224
+ end
225
+ rescue StopIteration
226
+ # er reached the end of the inout stream.
227
+ # So commit this transaction, and then re-raise
228
+ # StopIteration to get out of the loop{} statement
229
+ db.after_commit{ raise StopIteration }
230
+ end
231
+ logger.info{ "#{table_name} inserted #{rows_restored}" }
241
232
  end
242
233
  end
243
-
234
+ logger.info{ "#{table_name} done. Inserted #{rows_restored}." }
244
235
  rows_restored
245
236
  end
246
237
 
247
- def self.from_bz2( filename, db, table_name, options = {} )
248
- IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
249
- dbpump = DbPump.new db, table_name, io: io
250
- dbpump.restore
238
+ # this doesn't really belong here, but it will do for now.
239
+ def open_bz2( filename )
240
+ io.andand.close if io != STDOUT && !io.andand.closed?
241
+ self.io = IO.popen( "pbzip2 -d -c #{filename}" )
242
+ end
243
+
244
+ # enumerate through the given io at its current position
245
+ def each_row
246
+ return enum_for(__method__) unless block_given?
247
+ yield codec.decode( io ) until io.eof?
248
+ end
249
+
250
+ def insert_sql_each
251
+ return enum_for(__method__) unless block_given?
252
+ each_row do |row|
253
+ yield table_dataset.insert_sql( row )
251
254
  end
252
255
  end
253
256
  end
@@ -1,15 +1,5 @@
1
1
  require 'logger'
2
- require 'wyrm/db_pump'
3
-
4
- class Object
5
- def call_or_self( maybe_callable )
6
- if maybe_callable.respond_to? :call
7
- maybe_callable.call( self )
8
- else
9
- maybe_callable
10
- end
11
- end
12
- end
2
+ require 'wyrm/pump_maker'
13
3
 
14
4
  # Dump a schema and compressed data from a db to a set of files
15
5
  # src_db = Sequel.connect "postgres://localhost:5454/lots"
@@ -17,19 +7,18 @@ end
17
7
  # ds.dump_schema
18
8
  # ds.dump_tables
19
9
  class DumpSchema
10
+ include PumpMaker
11
+
20
12
  def initialize( src_db, container = nil, pump: nil )
21
- src_db.extension :schema_dumper
22
- @src_db = src_db
23
- @container = Pathname(container)
24
- @pump = make_pump( pump )
13
+ @src_db = maybe_deebe src_db
14
+ @container = Pathname.new container
15
+ @pump = make_pump( @src_db, pump )
16
+
17
+ @src_db.extension :schema_dumper
25
18
  end
26
19
 
27
20
  attr_reader :src_db, :container, :pump
28
21
 
29
- def make_pump( pump_thing )
30
- call_or_self(pump_thing) || DbPump.new( src_db, nil )
31
- end
32
-
33
22
  def schema_migration
34
23
  @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
35
24
  end
@@ -116,11 +105,17 @@ class DumpSchema
116
105
  end
117
106
 
118
107
  def dump_table( table_name )
108
+ pump.table_name = table_name
109
+ if pump.table_dataset.empty?
110
+ logger.info "No records in #{table_name}"
111
+ return
112
+ end
113
+
119
114
  filename = container + "#{table_name}.dbp.bz2"
120
115
  logger.info "dumping #{table_name} to #{filename}"
116
+
121
117
  open_bz2 filename do |zio|
122
118
  # generate the dump
123
- pump.table_name = table_name
124
119
  pump.io = zio
125
120
  pump.dump
126
121
  end
@@ -0,0 +1,28 @@
1
+ require 'wyrm/db_pump'
2
+
3
+ class Object
4
+ def call_or_self( maybe_callable )
5
+ if maybe_callable.respond_to? :call
6
+ maybe_callable.call( self )
7
+ else
8
+ maybe_callable
9
+ end
10
+ end
11
+ end
12
+
13
+ module PumpMaker
14
+ def make_pump( db, pump_thing )
15
+ call_or_self(pump_thing) || DbPump.new( db, nil )
16
+ end
17
+
18
+ def maybe_deebe( db_or_string )
19
+ case db_or_string
20
+ when String
21
+ Sequel.connect db_or_string
22
+ when Sequel::Database
23
+ db_or_string
24
+ else
25
+ raise "Don't know how to db-ify #{db_or_string.inspect}"
26
+ end
27
+ end
28
+ end
@@ -1,5 +1,5 @@
1
1
  require 'logger'
2
- require 'wyrm/db_pump'
2
+ require 'wyrm/pump_maker'
3
3
 
4
4
  # Load a schema from a set of dump files (from DumpSchema)
5
5
  # and restore the table data
@@ -8,15 +8,18 @@ require 'wyrm/db_pump'
8
8
  # rs.create
9
9
  # rs.restore_tables
10
10
  class RestoreSchema
11
- def initialize( dst_db, container )
12
- @container = container
13
- @dst_db = dst_db
14
- @options = {:codec => :marshal}
15
- load_migrations @container
11
+ include PumpMaker
12
+
13
+ def initialize( dst_db, container, pump: nil )
14
+ @container = Pathname.new container
15
+ @dst_db = maybe_deebe dst_db
16
+ @pump = make_pump( @dst_db, pump )
17
+
18
+ load_migrations
16
19
  end
17
20
 
21
+ attr_reader :pump
18
22
  attr_reader :dst_db
19
- attr_reader :options
20
23
  attr_reader :container
21
24
  attr_reader :schema_migration, :index_migration, :fk_migration
22
25
 
@@ -24,7 +27,7 @@ class RestoreSchema
24
27
  @logger ||= Logger.new STDERR
25
28
  end
26
29
 
27
- def load_migrations( container )
30
+ def load_migrations
28
31
  @schema_migration = (container + '001_schema.rb').read
29
32
  @index_migration = (container + '003_indexes.rb').read
30
33
  @fk_migration = (container + '004_foreign_keys.rb').read
@@ -46,19 +49,24 @@ class RestoreSchema
46
49
 
47
50
  # create the destination schema
48
51
  def create
52
+ logger.info "creating tables"
49
53
  eval( schema_migration ).apply dst_db, :up
50
54
  end
51
55
 
52
- def restore_one_table( table_file )
56
+ # assume the table name is the base name of table_file
57
+ def restore_table( table_file )
53
58
  logger.info "restoring from #{table_file}"
54
- table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
55
- # check if table has been restored already, and has the correct rows,
59
+ pump.table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
60
+ # TODO check if table has been restored already, and has the correct rows,
56
61
  # otherwise pass in a start row.
57
- DbPump.from_bz2 table_file, dst_db, table_name
62
+ IO.popen( "pbzip2 -d -c #{table_file}" ) do |io|
63
+ pump.io = io
64
+ pump.restore
65
+ end
58
66
  end
59
67
 
60
68
  def restore_tables
61
69
  table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
62
- table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file}
70
+ table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_table table_file}
63
71
  end
64
72
  end
@@ -1,3 +1,3 @@
1
1
  module Wyrm
2
- VERSION = "0.1.4"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_runtime_dependency "sequel"
21
+ spec.add_runtime_dependency 'sequel', '~> 4.0.0'
22
22
  spec.add_runtime_dependency "fastandand"
23
23
 
24
24
  spec.add_development_dependency "bundler", "~> 1.3"
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wyrm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Anderson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-02 00:00:00.000000000 Z
11
+ date: 2013-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 4.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 4.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: fastandand
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -69,7 +69,8 @@ dependencies:
69
69
  description: Transfer from one SQL database to another
70
70
  email:
71
71
  - panic@semiosix.com
72
- executables: []
72
+ executables:
73
+ - wyrm
73
74
  extensions: []
74
75
  extra_rdoc_files: []
75
76
  files:
@@ -79,10 +80,12 @@ files:
79
80
  - LICENSE.txt
80
81
  - README.md
81
82
  - Rakefile
83
+ - bin/wyrm
82
84
  - lib/wyrm.rb
83
85
  - lib/wyrm/db_pump.rb
84
86
  - lib/wyrm/dump_schema.rb
85
87
  - lib/wyrm/other_schema.rb
88
+ - lib/wyrm/pump_maker.rb
86
89
  - lib/wyrm/restore_schema.rb
87
90
  - lib/wyrm/transferer.rb
88
91
  - lib/wyrm/version.rb