wyrm 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: da207f92151b080d31039a364c1a2c50022f01ff
4
- data.tar.gz: ddf38f48b42597ed08671cb67fadd8457e425e07
3
+ metadata.gz: c40184e0e1f6175ad0447494ff5bf367c39292db
4
+ data.tar.gz: c7b927a63887f83ba35b6c3be3c11fb412a2212a
5
5
  SHA512:
6
- metadata.gz: 03e699a00d14fa7baacc286b886cf35074766b7b6b3b8e6e10fde08779ded7fda4930f9666bad95274bf773f8fe33f1916f3836414e98c188c81963b3a01459c
7
- data.tar.gz: 0052a0b096e62662223f9e4a9da2cfd79e2908f033f83a7a9201463aac3ec9f56407299b68e461b9b850f39ea8c3da5e53c48ad7510561d680b601a38ce739ca
6
+ metadata.gz: cd762e971e8fb35f4147b4657b5fbb67fb1de1ef26ec4d8ef7af2dac2a9f6532cf8bce4e02587021e261e302e133d6312caad46cf6e06924d3701a25dc8bb2a1
7
+ data.tar.gz: 7c38e0d0f186e78e58639220b21755b219e85ef15b3acbe8c920e145c70f1715702b4c4fd060abebad767732469296dd855af198349b27be97d70fd419060e47
@@ -1,4 +1,6 @@
1
1
  language: ruby
2
- rvm: []
3
- # - 2.3 not supported as of 16-Mar-2016. srsly, 3 months after release
2
+ rvm:
3
+ - 2.3.0
4
+ - 2.3.1
5
+ # - jruby-9.1.0.0 fails because of db drivers
4
6
  script: bundle exec rspec spec
data/Gemfile CHANGED
@@ -1,32 +1,24 @@
1
- raise "You need >= ruby-2.3 (or maybe a Queue with close would work)" unless RUBY_VERSION >= '2.3.0'
1
+ source 'https://rubygems.org'
2
2
 
3
- # TODO this is for older versions of bundler
4
- def from_gemrc
5
- # auto-load from ~/.gemrc
6
- home_gemrc = Pathname('~/.gemrc').expand_path
7
-
8
- if home_gemrc.exist?
9
- require 'yaml'
10
- # use all the sources specified in .gemrc
11
- YAML.load_file(home_gemrc)[:sources]
12
- end
13
- end
14
-
15
- # Use the gemrc source if defined, unless CANON is set,
16
- # otherwise just use the default.
17
- def preferred_sources
18
- rv = from_gemrc unless eval(ENV['CANON']||'')
19
- rv ||= []
20
- rv << 'http://rubygems.org' if rv.empty?
21
- rv
22
- end
23
-
24
- preferred_sources.each{|src| source src}
3
+ raise "You need >= ruby-2.3 for wyrm" unless RUBY_VERSION >= '2.3.0'
25
4
 
26
5
  # Specify your gem's dependencies in wyrm.gemspec
27
6
  gemspec
28
7
 
29
- if Pathname('/usr/include/mysql').exist?
30
- # version is for mysql streaming result sets
31
- gem "mysql2", '>= 0.3.12'
8
+ platforms :ruby do
9
+ gem 'pg'
10
+ gem 'sequel_pg'
11
+ gem 'sqlite3'
12
+ gem 'pry-byebug'
13
+
14
+ if Pathname('/usr/include/mysql').exist?
15
+ # version is for mysql streaming result sets
16
+ gem "mysql2", '>= 0.3.12'
17
+ end
18
+ end
19
+
20
+ platforms :jruby do
21
+ # gem "pg"
22
+ gem 'jdbc-sqlite3'
23
+ gem 'jdbc-postgres'
32
24
  end
@@ -1,3 +1,7 @@
1
+ == 0.4.2
2
+ * special case for jruby closing popen stream
3
+ * use modules better
4
+
1
5
  == 0.4.1
2
6
  * Improve docs and examples
3
7
  * make pbzip2 somewhat configurable.
data/README.md CHANGED
@@ -30,6 +30,8 @@ Wyrm because:
30
30
 
31
31
  ## Dependencies
32
32
 
33
+ Ruby >= 2.3.0, for Queue#close
34
+
33
35
  You must have a working
34
36
  [pbzip2](http://compression.ca/pbzip2/ "Will use all your cores")
35
37
  on your path. If you really have to use something else,
@@ -6,104 +6,129 @@ require 'wyrm/schema_tools'
6
6
  require 'wyrm/logger'
7
7
 
8
8
  # Dump a schema and compressed data from a db to a set of files
9
- # src_db = Sequel.connect "postgres://localhost:5454/lots"
10
- # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
11
- # ds.call
9
+ #
10
+ # Dump["postgres://localhost:5454/lots", '/var/data/lots']
11
+ #
12
12
  # TODO possibly use Gem::Package::TarWriter to write tar files
13
- class Wyrm::Dump
14
- include Wyrm::PumpMaker
15
- include Wyrm::SchemaTools
16
- include Wyrm::Logger
17
-
18
- def initialize( src_db, container = nil, pump: nil )
19
- @container = Pathname.new container || '.'
20
- raise "#{@container} does not exist" unless @container.exist?
13
+ module Wyrm
14
+ class Dump
15
+ include Wyrm::PumpMaker
16
+ include Wyrm::SchemaTools
17
+ include Wyrm::Logger
18
+
19
+ def self.[]( *args )
20
+ new(*args).call
21
+ end
21
22
 
22
- @src_db = maybe_deebe src_db
23
- @pump = make_pump( @src_db, pump )
23
+ def call
24
+ dump_schema
25
+ dump_tables
26
+ dump_indexes
27
+ end
24
28
 
25
- @src_db.extension :schema_dumper
26
- end
29
+ def initialize( src_db, container = nil, pump: nil )
30
+ @container = Pathname.new container || '.'
31
+ raise "#{@container} does not exist" unless @container.exist?
27
32
 
28
- attr_reader :src_db, :container, :pump
33
+ @src_db = maybe_deebe src_db
34
+ @pump = make_pump( @src_db, pump )
29
35
 
30
- def same_db; false end
36
+ @src_db.extension :schema_dumper
37
+ end
31
38
 
32
- def numbering
33
- @numbering ||= '000'
34
- end
39
+ attr_reader :src_db, :container, :pump
35
40
 
36
- def dump_schema
37
- (container + "#{numbering.next!}_schema.rb").open('w') do |io|
38
- io.write schema_migration
39
- end
40
- end
41
+ def same_db; false end
41
42
 
42
- def dump_indexes
43
- (container + "#{numbering.next!}_indexes.rb").open('w') do |io|
44
- io.write index_migration
43
+ def numbering
44
+ @numbering ||= '000'
45
45
  end
46
46
 
47
- (container + "#{numbering.next!}_foreign_keys.rb").open('w') do |io|
48
- io.write fk_migration
47
+ def dump_table_schemas( *tables )
48
+ (container + "#{numbering.next!}_schema.rb").open('w') do |io|
49
+ tables.each do |table|
50
+ logger.debug "schema for #{table}"
51
+ io.puts table_migration table
52
+ end
53
+ end
49
54
  end
50
- end
51
55
 
52
- def write_through_bz2( pathname )
53
- fio = pathname.open('w')
54
- # open subprocess in read-write mode
55
- zio = IO.popen( STREAM_COMP, 'r+' )
56
- copier = Thread.new do
57
- begin
58
- IO.copy_stream zio, fio
59
- logger.debug "finished stream copy"
60
- ensure
61
- fio.close
56
+ def dump_schema
57
+ (container + "#{numbering.next!}_schema.rb").open('w') do |io|
58
+ io.write schema_migration
62
59
  end
63
60
  end
64
61
 
65
- yield zio
62
+ def dump_indexes
63
+ (container + "#{numbering.next!}_indexes.rb").open('w') do |io|
64
+ io.write index_migration
65
+ end
66
66
 
67
- # signal the copier thread to stop
68
- zio.close_write
69
- logger.debug 'finished dumping'
67
+ (container + "#{numbering.next!}_foreign_keys.rb").open('w') do |io|
68
+ io.write fk_migration
69
+ end
70
+ end
70
71
 
71
- # wait for copier thread to finish
72
- copier.join
73
- logger.debug 'stream copy thread finished'
74
- ensure
75
- zio.close unless zio.closed?
76
- fio.close unless fio.closed?
77
- end
72
+ def write_through_bz2( pathname )
73
+ fio = pathname.open('w')
74
+ # open subprocess in read-write mode
75
+ zio = IO.popen( STREAM_COMP, 'r+' )
76
+ copier = Thread.new do
77
+ begin
78
+ IO.copy_stream zio, fio
79
+ logger.debug "finished stream copy"
80
+ ensure
81
+ fio.close
82
+ end
83
+ end
78
84
 
79
- def dump_table( table_name, &io_block )
80
- pump.table_name = table_name
81
- if pump.table_dataset.empty?
82
- logger.info "No records in #{table_name}"
83
- return
85
+ # block receiving zio will write to it.
86
+ yield zio
87
+
88
+ # signal the copier thread to stop
89
+ logger.debug 'flushing'
90
+ if RUBY_ENGINE == 'jruby'
91
+ # seems to be required for jruby, at least 9.1.2.0
92
+ logger.debug 'jruby flushing'
93
+ zio.flush
94
+ logger.debug 'jruby close'
95
+ zio.close
96
+ else
97
+ zio.close_write
98
+ end
99
+ logger.debug 'finished dumping'
100
+
101
+ # wait for copier thread to finish
102
+ copier.join
103
+ logger.debug 'stream copy thread finished'
104
+ ensure
105
+ zio.close if zio && !zio.closed?
106
+ fio.close if fio && !fio.closed?
84
107
  end
85
108
 
86
- filename = container + "#{table_name}.dbp.bz2"
87
- logger.info "dumping #{table_name} to #{filename}"
109
+ def dump_table( table_name, &io_block )
110
+ pump.table_name = table_name
111
+ if pump.table_dataset.empty?
112
+ logger.info "No records in #{table_name}"
113
+ return
114
+ end
88
115
 
89
- write_through_bz2 filename do |zio|
90
- # generate the dump
91
- pump.io = zio
92
- pump.dump
93
- end
94
- rescue
95
- logger.error "failed dumping #{table_name}: #{$!.message}"
96
- end
116
+ filename = container + "#{table_name}.dbp.bz2"
117
+ logger.info "dumping #{table_name} to #{filename}"
97
118
 
98
- def dump_tables
99
- src_db.tables.each do |table_name|
100
- dump_table table_name
119
+ write_through_bz2 filename do |zio|
120
+ # generate the dump
121
+ pump.io = zio
122
+ pump.dump
123
+ end
124
+ rescue
125
+ logger.error "failed dumping #{table_name}: #{$!.message}"
101
126
  end
102
- end
103
127
 
104
- def call
105
- dump_schema
106
- dump_tables
107
- dump_indexes
128
+ def dump_tables
129
+ src_db.tables.each do |table_name|
130
+ dump_table table_name
131
+ end
132
+ end
108
133
  end
109
134
  end
@@ -4,7 +4,7 @@ module Wyrm
4
4
  module Logger
5
5
  def logger
6
6
  @logger ||= ::Logger.new( STDERR ).tap do |lgr|
7
- lgr.level = ::Logger::INFO
7
+ lgr.level = ::Logger::DEBUG
8
8
  end
9
9
  end
10
10
  end
@@ -8,304 +8,306 @@ require 'wyrm/module'
8
8
  # TODO need to version the dumps, or something like that.
9
9
  # TODO looks like io should belong to codec. Hmm. Not sure.
10
10
  # TODO table_name table_dataset need some thinking about. Dataset would encapsulate both. But couldn't change db then, and primary_keys would be hard.
11
- class Wyrm::Pump
12
- def initialize( db: nil, table_name: nil, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false, logger: nil )
13
- self.codec = codec
14
- self.db = db
15
- self.table_name = table_name
16
- self.io = io
17
- self.page_size = page_size
18
- self.dry_run = dry_run
19
- self.logger = logger
20
- yield self if block_given?
21
- end
11
+ module Wyrm
12
+ class Pump
13
+ def initialize( db: nil, table_name: nil, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false, logger: nil )
14
+ self.codec = codec
15
+ self.db = db
16
+ self.table_name = table_name
17
+ self.io = io
18
+ self.page_size = page_size
19
+ self.dry_run = dry_run
20
+ self.logger = logger
21
+ yield self if block_given?
22
+ end
22
23
 
23
- include Wyrm::Logger
24
- attr_writer :logger
24
+ include Wyrm::Logger
25
+ attr_writer :logger
25
26
 
26
- attr_accessor :io, :page_size, :dry_run
27
- def dry_run?; dry_run; end
27
+ attr_accessor :io, :page_size, :dry_run
28
+ def dry_run?; dry_run; end
28
29
 
29
- # These are affected by cached values
30
- attr_reader :db, :table_name
30
+ # These are affected by cached values
31
+ attr_reader :db, :table_name
31
32
 
32
- def invalidate_cached_members
33
- @primary_keys = nil
34
- @table_dataset = nil
35
- end
33
+ def invalidate_cached_members
34
+ @primary_keys = nil
35
+ @table_dataset = nil
36
+ end
36
37
 
37
- def table_name=( name_sym )
38
- invalidate_cached_members
39
- @table_name = name_sym
40
- end
38
+ def table_name=( name_sym )
39
+ invalidate_cached_members
40
+ @table_name = name_sym
41
+ end
41
42
 
42
- def db=( other_db )
43
- invalidate_cached_members
43
+ def db=( other_db )
44
+ invalidate_cached_members
44
45
 
45
- @db = other_db
46
- return unless other_db
46
+ @db = other_db
47
+ return unless other_db
47
48
 
48
- # add extensions
49
- @db.extension :pagination
49
+ # add extensions
50
+ @db.extension :pagination
50
51
 
51
- # turn on postgres streaming if available
52
- # also gets called for non-postgres dbs, but that seems to be fine.
53
- if defined?( Sequel::Postgres ) && @db.is_a?(Sequel::Postgres::Database) && defined?(Sequel::Postgres.supports_streaming?) && Sequel::Postgres.supports_streaming?
54
- @db.extension :pg_streaming
55
- logger.info "Streaming for #{@db.uri}"
56
- else
57
- logger.info "No streaming for #{@db.uri}"
52
+ # turn on postgres streaming if available
53
+ # also gets called for non-postgres dbs, but that seems to be fine.
54
+ if defined?( Sequel::Postgres::Database ) && @db.is_a?(Sequel::Postgres::Database) && defined?(Sequel::Postgres.supports_streaming?) && Sequel::Postgres.supports_streaming?
55
+ @db.extension :pg_streaming
56
+ logger.info "Streaming for #{@db.uri}"
57
+ else
58
+ logger.info "No streaming for #{@db.uri}"
59
+ end
58
60
  end
59
- end
60
61
 
61
- # return an object that responds to ===
62
- # which returns true if ==='s parameter
63
- # responds to all the methods
64
- def self.quacks_like( *methods )
65
- @quacks_like ||= {}
66
- @quacks_like[methods] ||= lambda do |inst|
67
- methods.all?{|m| inst.respond_to? m}
62
+ # return an object that responds to ===
63
+ # which returns true if ==='s parameter
64
+ # responds to all the methods
65
+ def self.quacks_like( *methods )
66
+ @quacks_like ||= {}
67
+ @quacks_like[methods] ||= lambda do |inst|
68
+ methods.all?{|m| inst.respond_to? m}
69
+ end
68
70
  end
69
- end
70
71
 
71
- def quacks_like( *methods )
72
- self.class.quacks_like( *methods )
73
- end
72
+ def quacks_like( *methods )
73
+ self.class.quacks_like( *methods )
74
+ end
74
75
 
75
- def codec=( codec_thing )
76
- @codec =
77
- case codec_thing
78
- when :yaml; YamlCodec.new
79
- when :marshal; MarshalCodec.new
80
- when Class
81
- codec_thing.new
82
- when quacks_like(:encode,:decode)
83
- codec_thing
84
- else
85
- raise "unknown codec #{codec_thing.inspect}"
76
+ def codec=( codec_thing )
77
+ @codec =
78
+ case codec_thing
79
+ when :yaml; YamlCodec.new
80
+ when :marshal; MarshalCodec.new
81
+ when Class
82
+ codec_thing.new
83
+ when quacks_like(:encode,:decode)
84
+ codec_thing
85
+ else
86
+ raise "unknown codec #{codec_thing.inspect}"
87
+ end
86
88
  end
87
- end
88
89
 
89
- attr_reader :codec
90
+ attr_reader :codec
90
91
 
91
- class MarshalCodec
92
- def encode( obj, io )
93
- Marshal.dump obj, io
94
- end
92
+ class MarshalCodec
93
+ def encode( obj, io )
94
+ Marshal.dump obj, io
95
+ end
95
96
 
96
- def decode( io, &block )
97
- obj = Marshal.load(io)
98
- yield obj if block_given?
99
- obj
97
+ def decode( io, &block )
98
+ obj = Marshal.load(io)
99
+ yield obj if block_given?
100
+ obj
101
+ end
100
102
  end
101
- end
102
103
 
103
- class YamlCodec
104
- def encode( obj, io )
105
- YAML.dump obj, io
106
- end
104
+ class YamlCodec
105
+ def encode( obj, io )
106
+ YAML.dump obj, io
107
+ end
107
108
 
108
- def decode( io, &block )
109
- obj = YAML.load(io)
110
- yield obj if block_given?
111
- obj
109
+ def decode( io, &block )
110
+ obj = YAML.load(io)
111
+ yield obj if block_given?
112
+ obj
113
+ end
112
114
  end
113
- end
114
115
 
115
- def primary_keys
116
- # each_with_object([]){...} is only faster for < 3 items in 100000
117
- @primary_keys ||= db.schema(table_name).map{|name,column_info| name if column_info[:primary_key]}.compact
118
- end
116
+ def primary_keys
117
+ # each_with_object([]){...} is only faster for < 3 items in 100000
118
+ @primary_keys ||= db.schema(table_name).map{|name,column_info| name if column_info[:primary_key]}.compact
119
+ end
119
120
 
120
- def table_dataset
121
- @table_dataset ||= db[table_name.to_sym]
122
- end
121
+ def table_dataset
122
+ @table_dataset ||= db[table_name.to_sym]
123
+ end
123
124
 
124
- # Use limit / offset. Last fallback if there are no keys (or a compound primary key?).
125
- def paginated_dump( &encode_block )
126
- records_count = 0
127
- table_dataset.order(*primary_keys).each_page(page_size) do |page|
128
- logger.info "#{__method__} #{table_name} #{records_count}"
129
- logger.debug page.sql
130
- page.each &encode_block
131
- records_count += page_size
125
+ # Use limit / offset. Last fallback if there are no keys (or a compound primary key?).
126
+ def paginated_dump( &encode_block )
127
+ records_count = 0
128
+ table_dataset.order(*primary_keys).each_page(page_size) do |page|
129
+ logger.info "#{__method__} #{table_name} #{records_count}"
130
+ logger.debug page.sql
131
+ page.each &encode_block
132
+ records_count += page_size
133
+ end
132
134
  end
133
- end
134
135
 
135
- # Use limit / offset, but not for all fields.
136
- # The idea is that large offsets are expensive in the db because the db server has to read
137
- # through the data set to reach the required offset. So make that only ids need to be read,
138
- # and then do the main select from the limited id list.
139
- # select * from massive as full
140
- # inner join (select id from massive order by whatever limit m, n) limit
141
- # on full.id = limit.id
142
- # order by full.whatever
143
- # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
144
- def inner_dump( &encode_block )
145
- # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
146
- on_conditions = primary_keys.map{|f| [f,f]}.to_h
147
- (0..table_dataset.count).step(page_size).each do |offset|
148
- limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
149
- page = table_dataset.join( limit_dataset, on_conditions ).order( *primary_keys ).qualify(table_name)
150
- logger.info "#{__method__} #{table_name} #{offset}"
151
- logger.debug page.sql
152
- page.each &encode_block
136
+ # Use limit / offset, but not for all fields.
137
+ # The idea is that large offsets are expensive in the db because the db server has to read
138
+ # through the data set to reach the required offset. So make that only ids need to be read,
139
+ # and then do the main select from the limited id list.
140
+ # select * from massive as full
141
+ # inner join (select id from massive order by whatever limit m, n) limit
142
+ # on full.id = limit.id
143
+ # order by full.whatever
144
+ # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
145
+ def inner_dump( &encode_block )
146
+ # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
147
+ on_conditions = primary_keys.map{|f| [f,f]}.to_h
148
+ (0..table_dataset.count).step(page_size).each do |offset|
149
+ limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
150
+ page = table_dataset.join( limit_dataset, on_conditions ).order( *primary_keys ).qualify(table_name)
151
+ logger.info "#{__method__} #{table_name} #{offset}"
152
+ logger.debug page.sql
153
+ page.each &encode_block
154
+ end
153
155
  end
154
- end
155
156
 
156
- # Selects pages by a range of ids, using >= and <.
157
- # Use this for integer pks
158
- def min_max_dump( &encode_block )
159
- # select max(id), min(id) from table
160
- # and then split that up into 10000 size chunks.
161
- # Not really important if there aren't exactly 10000
162
- min, max = table_dataset.select{[min(id), max(id)]}.first.values
163
- return unless min && max
164
-
165
- # will always include the last item because page_size will be
166
- # bigger than max for the last page
167
- (min..max).step(page_size).each do |offset|
168
- page = table_dataset.where( id: offset...(offset + page_size) )
169
- logger.info "#{__method__} #{table_name} #{offset}"
170
- logger.debug page.sql
171
- page.each &encode_block
157
+ # Selects pages by a range of ids, using >= and <.
158
+ # Use this for integer pks
159
+ def min_max_dump( &encode_block )
160
+ # select max(id), min(id) from table
161
+ # and then split that up into 10000 size chunks.
162
+ # Not really important if there aren't exactly 10000
163
+ min, max = table_dataset.select{[min(id), max(id)]}.first.values
164
+ return unless min && max
165
+
166
+ # will always include the last item because page_size will be
167
+ # bigger than max for the last page
168
+ (min..max).step(page_size).each do |offset|
169
+ page = table_dataset.where( id: offset...(offset + page_size) )
170
+ logger.info "#{__method__} #{table_name} #{offset}"
171
+ logger.debug page.sql
172
+ page.each &encode_block
173
+ end
172
174
  end
173
- end
174
175
 
175
- def stream_dump( &encode_block )
176
- logger.info "using result set streaming"
177
-
178
- # I want to output progress every page_size records,
179
- # without doing a records_count % page_size every iteration.
180
- # So define an external enumerator
181
- # TODO should really performance test the options here.
182
- records_count = 0
183
- enum = table_dataset.stream.enum_for
184
- loop do
185
- begin
186
- page_size.times do
187
- encode_block.call enum.next
188
- records_count += 1
176
+ def stream_dump( &encode_block )
177
+ logger.info "using result set streaming"
178
+
179
+ # I want to output progress every page_size records,
180
+ # without doing a records_count % page_size every iteration.
181
+ # So define an external enumerator
182
+ # TODO should really performance test the options here.
183
+ records_count = 0
184
+ enum = table_dataset.stream.enum_for
185
+ loop do
186
+ begin
187
+ page_size.times do
188
+ encode_block.call enum.next
189
+ records_count += 1
190
+ end
191
+ ensure
192
+ logger.info "#{__method__} #{table_name} #{records_count}" if records_count < page_size
193
+ logger.debug " #{records_count} from #{table_dataset.sql}"
189
194
  end
190
- ensure
191
- logger.info "#{__method__} #{table_name} #{records_count}" if records_count < page_size
192
- logger.debug " #{records_count} from #{table_dataset.sql}"
193
195
  end
194
196
  end
195
- end
196
197
 
197
- # Dump the serialization of the table to the specified io.
198
- #
199
- # TODO need to also dump a first row containing useful stuff:
200
- # - source table name
201
- # - number of rows
202
- # - source db url
203
- # - permissions?
204
- # These should all be in one object that can be Marshall.load-ed easily.
205
- #
206
- # TODO could speed this up by have a query thread which runs the next page-query while
207
- # the current one is being written/compressed.
208
- def dump
209
- _dump do |row|
210
- codec.encode( row.values, io ) unless dry_run?
198
+ # Dump the serialization of the table to the specified io.
199
+ #
200
+ # TODO need to also dump a first row containing useful stuff:
201
+ # - source table name
202
+ # - number of rows
203
+ # - source db url
204
+ # - permissions?
205
+ # These should all be in one object that can be Marshall.load-ed easily.
206
+ #
207
+ # TODO could speed this up by have a query thread which runs the next page-query while
208
+ # the current one is being written/compressed.
209
+ def dump
210
+ _dump do |row|
211
+ codec.encode( row.values, io ) unless dry_run?
212
+ end
213
+ ensure
214
+ io.flush
211
215
  end
212
- ensure
213
- io.flush
214
- end
215
216
 
216
- # decide which kind of paged iteration will be best for this table.
217
- # Return an iterator, or yield row hashes to the block
218
- def _dump( &encode_block )
219
- return enum_for(__method__) unless block_given?
220
- case
221
- when table_dataset.respond_to?( :stream )
222
- stream_dump &encode_block
217
+ # decide which kind of paged iteration will be best for this table.
218
+ # Return an iterator, or yield row hashes to the block
219
+ def _dump( &encode_block )
220
+ return enum_for(__method__) unless block_given?
221
+ case
222
+ when table_dataset.respond_to?( :stream )
223
+ stream_dump &encode_block
223
224
 
224
- when primary_keys.empty?
225
- paginated_dump &encode_block
225
+ when primary_keys.empty?
226
+ paginated_dump &encode_block
226
227
 
227
- when primary_keys.all?{|i| i == :id }
228
- min_max_dump &encode_block
228
+ when primary_keys.all?{|i| i == :id }
229
+ min_max_dump &encode_block
229
230
 
230
- else
231
- inner_dump &encode_block
231
+ else
232
+ inner_dump &encode_block
233
+ end
232
234
  end
233
- end
234
235
 
235
- def dump_matches_columns?( row_enum, columns )
236
- raise "schema mismatch" unless row_enum.peek.size == columns.size
237
- true
238
- rescue StopIteration
239
- # peek threw a StopIteration, so there's no data
240
- false
241
- end
236
+ def dump_matches_columns?( row_enum, columns )
237
+ raise "schema mismatch" unless row_enum.peek.size == columns.size
238
+ true
239
+ rescue StopIteration
240
+ # peek threw a StopIteration, so there's no data
241
+ false
242
+ end
242
243
 
243
- # start_row is zero-based
244
- #
245
- # TODO don't generate the full insert, ie leave out the fields
246
- # because we've already checked that the columns and the table
247
- # match.
248
- # TODO generate column names in insert, they might still work
249
- # if columns have been added to the db, but not the dump.
250
- def restore( start_row: 0, filename: 'io' )
251
- columns = table_dataset.columns
252
- row_enum = each_row
253
-
254
- return unless dump_matches_columns?( row_enum, columns )
255
-
256
- logger.info "#{__method__} inserting to #{table_name} from #{start_row}"
257
- logger.debug " #{columns.inspect}"
258
- rows_restored = 0
259
-
260
- if start_row != 0
261
- logger.debug{ "skipping #{start_row} rows from #{filename}" }
262
- start_row.times do |i|
263
- row_enum.next
264
- logger.debug{ "skipped #{i} from #{filename}" } if i % page_size == 0
244
+ # start_row is zero-based
245
+ #
246
+ # TODO don't generate the full insert, ie leave out the fields
247
+ # because we've already checked that the columns and the table
248
+ # match.
249
+ # TODO generate column names in insert, they might still work
250
+ # if columns have been added to the db, but not the dump.
251
+ def restore( start_row: 0, filename: 'io' )
252
+ columns = table_dataset.columns
253
+ row_enum = each_row
254
+
255
+ return unless dump_matches_columns?( row_enum, columns )
256
+
257
+ logger.info "#{__method__} inserting to #{table_name} from #{start_row}"
258
+ logger.debug " #{columns.inspect}"
259
+ rows_restored = 0
260
+
261
+ if start_row != 0
262
+ logger.debug{ "skipping #{start_row} rows from #{filename}" }
263
+ start_row.times do |i|
264
+ row_enum.next
265
+ logger.debug{ "skipped #{i} from #{filename}" } if i % page_size == 0
266
+ end
267
+ logger.debug{ "skipped #{start_row} from #{filename}" }
268
+ rows_restored += start_row
265
269
  end
266
- logger.debug{ "skipped #{start_row} from #{filename}" }
267
- rows_restored += start_row
268
- end
269
270
 
270
- loop do
271
- db.transaction do
272
- begin
273
- page_size.times do
274
- # This skips all the checks in the Sequel code. Basically we want
275
- # to generate the
276
- # insert into (field1,field2) values (value1,value2)
277
- # statement as quickly as possible.
278
- #
279
- # Uses a private method so it will need to be updated repeatedly.
280
- sql = table_dataset.clone( columns: columns, values: row_enum.next ).send(:_insert_sql)
281
- db.execute sql unless dry_run?
282
- rows_restored += 1
271
+ loop do
272
+ db.transaction do
273
+ begin
274
+ page_size.times do
275
+ # This skips all the checks in the Sequel code. Basically we want
276
+ # to generate the
277
+ # insert into (field1,field2) values (value1,value2)
278
+ # statement as quickly as possible.
279
+ #
280
+ # Uses a private method so it will need to be updated repeatedly.
281
+ sql = table_dataset.clone( columns: columns, values: row_enum.next ).send(:_insert_sql)
282
+ db.execute sql unless dry_run?
283
+ rows_restored += 1
284
+ end
285
+ rescue StopIteration
286
+ # reached the end of the inout stream.
287
+ # So commit this transaction, and then re-raise
288
+ # StopIteration to get out of the loop{} statement
289
+ db.after_commit{ raise StopIteration }
283
290
  end
284
- rescue StopIteration
285
- # reached the end of the inout stream.
286
- # So commit this transaction, and then re-raise
287
- # StopIteration to get out of the loop{} statement
288
- db.after_commit{ raise StopIteration }
289
291
  end
290
292
  end
293
+ logger.info "#{__method__} #{table_name} done. Inserted #{rows_restored}."
294
+ rows_restored
291
295
  end
292
- logger.info "#{__method__} #{table_name} done. Inserted #{rows_restored}."
293
- rows_restored
294
- end
295
296
 
296
- # Enumerate through the given io at its current position.
297
- # Can raise StopIteration (ie when eof is not detected)
298
- # MAYBE don't check for io.eof here, leave that to the codec
299
- def each_row
300
- return enum_for(__method__) unless block_given?
301
- yield codec.decode( io ) until io.eof?
302
- end
297
+ # Enumerate through the given io at its current position.
298
+ # Can raise StopIteration (ie when eof is not detected)
299
+ # MAYBE don't check for io.eof here, leave that to the codec
300
+ def each_row
301
+ return enum_for(__method__) unless block_given?
302
+ yield codec.decode( io ) until io.eof?
303
+ end
303
304
 
304
- # Enumerate sql insert statements from the dump
305
- def insert_sql_each
306
- return enum_for(__method__) unless block_given?
307
- each_row do |row|
308
- yield table_dataset.insert_sql( row )
305
+ # Enumerate sql insert statements from the dump
306
+ def insert_sql_each
307
+ return enum_for(__method__) unless block_given?
308
+ each_row do |row|
309
+ yield table_dataset.insert_sql( row )
310
+ end
309
311
  end
310
312
  end
311
313
  end