wyrm 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/Gemfile +18 -26
- data/History.txt +4 -0
- data/README.md +2 -0
- data/lib/wyrm/dump.rb +100 -75
- data/lib/wyrm/logger.rb +1 -1
- data/lib/wyrm/pump.rb +249 -247
- data/lib/wyrm/pump_maker.rb +23 -22
- data/lib/wyrm/restore.rb +90 -76
- data/lib/wyrm/schema_tools.rb +69 -62
- data/lib/wyrm/version.rb +1 -1
- data/spec/pump_spec.rb +7 -4
- data/spec/rspec_syntax.rb +22 -0
- data/spec/schema_tools_spec.rb +7 -6
- data/wyrm.gemspec +0 -4
- metadata +2 -58
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c40184e0e1f6175ad0447494ff5bf367c39292db
|
4
|
+
data.tar.gz: c7b927a63887f83ba35b6c3be3c11fb412a2212a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd762e971e8fb35f4147b4657b5fbb67fb1de1ef26ec4d8ef7af2dac2a9f6532cf8bce4e02587021e261e302e133d6312caad46cf6e06924d3701a25dc8bb2a1
|
7
|
+
data.tar.gz: 7c38e0d0f186e78e58639220b21755b219e85ef15b3acbe8c920e145c70f1715702b4c4fd060abebad767732469296dd855af198349b27be97d70fd419060e47
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,32 +1,24 @@
|
|
1
|
-
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
def from_gemrc
|
5
|
-
# auto-load from ~/.gemrc
|
6
|
-
home_gemrc = Pathname('~/.gemrc').expand_path
|
7
|
-
|
8
|
-
if home_gemrc.exist?
|
9
|
-
require 'yaml'
|
10
|
-
# use all the sources specified in .gemrc
|
11
|
-
YAML.load_file(home_gemrc)[:sources]
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
# Use the gemrc source if defined, unless CANON is set,
|
16
|
-
# otherwise just use the default.
|
17
|
-
def preferred_sources
|
18
|
-
rv = from_gemrc unless eval(ENV['CANON']||'')
|
19
|
-
rv ||= []
|
20
|
-
rv << 'http://rubygems.org' if rv.empty?
|
21
|
-
rv
|
22
|
-
end
|
23
|
-
|
24
|
-
preferred_sources.each{|src| source src}
|
3
|
+
raise "You need >= ruby-2.3 for wyrm" unless RUBY_VERSION >= '2.3.0'
|
25
4
|
|
26
5
|
# Specify your gem's dependencies in wyrm.gemspec
|
27
6
|
gemspec
|
28
7
|
|
29
|
-
|
30
|
-
|
31
|
-
gem
|
8
|
+
platforms :ruby do
|
9
|
+
gem 'pg'
|
10
|
+
gem 'sequel_pg'
|
11
|
+
gem 'sqlite3'
|
12
|
+
gem 'pry-byebug'
|
13
|
+
|
14
|
+
if Pathname('/usr/include/mysql').exist?
|
15
|
+
# version is for mysql streaming result sets
|
16
|
+
gem "mysql2", '>= 0.3.12'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
platforms :jruby do
|
21
|
+
# gem "pg"
|
22
|
+
gem 'jdbc-sqlite3'
|
23
|
+
gem 'jdbc-postgres'
|
32
24
|
end
|
data/History.txt
CHANGED
data/README.md
CHANGED
data/lib/wyrm/dump.rb
CHANGED
@@ -6,104 +6,129 @@ require 'wyrm/schema_tools'
|
|
6
6
|
require 'wyrm/logger'
|
7
7
|
|
8
8
|
# Dump a schema and compressed data from a db to a set of files
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
9
|
+
#
|
10
|
+
# Dump["postgres://localhost:5454/lots", '/var/data/lots']
|
11
|
+
#
|
12
12
|
# TODO possibly use Gem::Package::TarWriter to write tar files
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
module Wyrm
|
14
|
+
class Dump
|
15
|
+
include Wyrm::PumpMaker
|
16
|
+
include Wyrm::SchemaTools
|
17
|
+
include Wyrm::Logger
|
18
|
+
|
19
|
+
def self.[]( *args )
|
20
|
+
new(*args).call
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
23
|
+
def call
|
24
|
+
dump_schema
|
25
|
+
dump_tables
|
26
|
+
dump_indexes
|
27
|
+
end
|
24
28
|
|
25
|
-
|
26
|
-
|
29
|
+
def initialize( src_db, container = nil, pump: nil )
|
30
|
+
@container = Pathname.new container || '.'
|
31
|
+
raise "#{@container} does not exist" unless @container.exist?
|
27
32
|
|
28
|
-
|
33
|
+
@src_db = maybe_deebe src_db
|
34
|
+
@pump = make_pump( @src_db, pump )
|
29
35
|
|
30
|
-
|
36
|
+
@src_db.extension :schema_dumper
|
37
|
+
end
|
31
38
|
|
32
|
-
|
33
|
-
@numbering ||= '000'
|
34
|
-
end
|
39
|
+
attr_reader :src_db, :container, :pump
|
35
40
|
|
36
|
-
|
37
|
-
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
38
|
-
io.write schema_migration
|
39
|
-
end
|
40
|
-
end
|
41
|
+
def same_db; false end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
io.write index_migration
|
43
|
+
def numbering
|
44
|
+
@numbering ||= '000'
|
45
45
|
end
|
46
46
|
|
47
|
-
(
|
48
|
-
|
47
|
+
def dump_table_schemas( *tables )
|
48
|
+
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
49
|
+
tables.each do |table|
|
50
|
+
logger.debug "schema for #{table}"
|
51
|
+
io.puts table_migration table
|
52
|
+
end
|
53
|
+
end
|
49
54
|
end
|
50
|
-
end
|
51
55
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
zio = IO.popen( STREAM_COMP, 'r+' )
|
56
|
-
copier = Thread.new do
|
57
|
-
begin
|
58
|
-
IO.copy_stream zio, fio
|
59
|
-
logger.debug "finished stream copy"
|
60
|
-
ensure
|
61
|
-
fio.close
|
56
|
+
def dump_schema
|
57
|
+
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
58
|
+
io.write schema_migration
|
62
59
|
end
|
63
60
|
end
|
64
61
|
|
65
|
-
|
62
|
+
def dump_indexes
|
63
|
+
(container + "#{numbering.next!}_indexes.rb").open('w') do |io|
|
64
|
+
io.write index_migration
|
65
|
+
end
|
66
66
|
|
67
|
-
|
68
|
-
|
69
|
-
|
67
|
+
(container + "#{numbering.next!}_foreign_keys.rb").open('w') do |io|
|
68
|
+
io.write fk_migration
|
69
|
+
end
|
70
|
+
end
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
72
|
+
def write_through_bz2( pathname )
|
73
|
+
fio = pathname.open('w')
|
74
|
+
# open subprocess in read-write mode
|
75
|
+
zio = IO.popen( STREAM_COMP, 'r+' )
|
76
|
+
copier = Thread.new do
|
77
|
+
begin
|
78
|
+
IO.copy_stream zio, fio
|
79
|
+
logger.debug "finished stream copy"
|
80
|
+
ensure
|
81
|
+
fio.close
|
82
|
+
end
|
83
|
+
end
|
78
84
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
# block receiving zio will write to it.
|
86
|
+
yield zio
|
87
|
+
|
88
|
+
# signal the copier thread to stop
|
89
|
+
logger.debug 'flushing'
|
90
|
+
if RUBY_ENGINE == 'jruby'
|
91
|
+
# seems to be required for jruby, at least 9.1.2.0
|
92
|
+
logger.debug 'jruby flushing'
|
93
|
+
zio.flush
|
94
|
+
logger.debug 'jruby close'
|
95
|
+
zio.close
|
96
|
+
else
|
97
|
+
zio.close_write
|
98
|
+
end
|
99
|
+
logger.debug 'finished dumping'
|
100
|
+
|
101
|
+
# wait for copier thread to finish
|
102
|
+
copier.join
|
103
|
+
logger.debug 'stream copy thread finished'
|
104
|
+
ensure
|
105
|
+
zio.close if zio && !zio.closed?
|
106
|
+
fio.close if fio && !fio.closed?
|
84
107
|
end
|
85
108
|
|
86
|
-
|
87
|
-
|
109
|
+
def dump_table( table_name, &io_block )
|
110
|
+
pump.table_name = table_name
|
111
|
+
if pump.table_dataset.empty?
|
112
|
+
logger.info "No records in #{table_name}"
|
113
|
+
return
|
114
|
+
end
|
88
115
|
|
89
|
-
|
90
|
-
#
|
91
|
-
pump.io = zio
|
92
|
-
pump.dump
|
93
|
-
end
|
94
|
-
rescue
|
95
|
-
logger.error "failed dumping #{table_name}: #{$!.message}"
|
96
|
-
end
|
116
|
+
filename = container + "#{table_name}.dbp.bz2"
|
117
|
+
logger.info "dumping #{table_name} to #{filename}"
|
97
118
|
|
98
|
-
|
99
|
-
|
100
|
-
|
119
|
+
write_through_bz2 filename do |zio|
|
120
|
+
# generate the dump
|
121
|
+
pump.io = zio
|
122
|
+
pump.dump
|
123
|
+
end
|
124
|
+
rescue
|
125
|
+
logger.error "failed dumping #{table_name}: #{$!.message}"
|
101
126
|
end
|
102
|
-
end
|
103
127
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
128
|
+
def dump_tables
|
129
|
+
src_db.tables.each do |table_name|
|
130
|
+
dump_table table_name
|
131
|
+
end
|
132
|
+
end
|
108
133
|
end
|
109
134
|
end
|
data/lib/wyrm/logger.rb
CHANGED
data/lib/wyrm/pump.rb
CHANGED
@@ -8,304 +8,306 @@ require 'wyrm/module'
|
|
8
8
|
# TODO need to version the dumps, or something like that.
|
9
9
|
# TODO looks like io should belong to codec. Hmm. Not sure.
|
10
10
|
# TODO table_name table_dataset need some thinking about. Dataset would encapsulate both. But couldn't change db then, and primary_keys would be hard.
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
module Wyrm
|
12
|
+
class Pump
|
13
|
+
def initialize( db: nil, table_name: nil, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false, logger: nil )
|
14
|
+
self.codec = codec
|
15
|
+
self.db = db
|
16
|
+
self.table_name = table_name
|
17
|
+
self.io = io
|
18
|
+
self.page_size = page_size
|
19
|
+
self.dry_run = dry_run
|
20
|
+
self.logger = logger
|
21
|
+
yield self if block_given?
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
24
|
+
include Wyrm::Logger
|
25
|
+
attr_writer :logger
|
25
26
|
|
26
|
-
|
27
|
-
|
27
|
+
attr_accessor :io, :page_size, :dry_run
|
28
|
+
def dry_run?; dry_run; end
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
# These are affected by cached values
|
31
|
+
attr_reader :db, :table_name
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
def invalidate_cached_members
|
34
|
+
@primary_keys = nil
|
35
|
+
@table_dataset = nil
|
36
|
+
end
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
def table_name=( name_sym )
|
39
|
+
invalidate_cached_members
|
40
|
+
@table_name = name_sym
|
41
|
+
end
|
41
42
|
|
42
|
-
|
43
|
-
|
43
|
+
def db=( other_db )
|
44
|
+
invalidate_cached_members
|
44
45
|
|
45
|
-
|
46
|
-
|
46
|
+
@db = other_db
|
47
|
+
return unless other_db
|
47
48
|
|
48
|
-
|
49
|
-
|
49
|
+
# add extensions
|
50
|
+
@db.extension :pagination
|
50
51
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
52
|
+
# turn on postgres streaming if available
|
53
|
+
# also gets called for non-postgres dbs, but that seems to be fine.
|
54
|
+
if defined?( Sequel::Postgres::Database ) && @db.is_a?(Sequel::Postgres::Database) && defined?(Sequel::Postgres.supports_streaming?) && Sequel::Postgres.supports_streaming?
|
55
|
+
@db.extension :pg_streaming
|
56
|
+
logger.info "Streaming for #{@db.uri}"
|
57
|
+
else
|
58
|
+
logger.info "No streaming for #{@db.uri}"
|
59
|
+
end
|
58
60
|
end
|
59
|
-
end
|
60
61
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
62
|
+
# return an object that responds to ===
|
63
|
+
# which returns true if ==='s parameter
|
64
|
+
# responds to all the methods
|
65
|
+
def self.quacks_like( *methods )
|
66
|
+
@quacks_like ||= {}
|
67
|
+
@quacks_like[methods] ||= lambda do |inst|
|
68
|
+
methods.all?{|m| inst.respond_to? m}
|
69
|
+
end
|
68
70
|
end
|
69
|
-
end
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
def quacks_like( *methods )
|
73
|
+
self.class.quacks_like( *methods )
|
74
|
+
end
|
74
75
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
76
|
+
def codec=( codec_thing )
|
77
|
+
@codec =
|
78
|
+
case codec_thing
|
79
|
+
when :yaml; YamlCodec.new
|
80
|
+
when :marshal; MarshalCodec.new
|
81
|
+
when Class
|
82
|
+
codec_thing.new
|
83
|
+
when quacks_like(:encode,:decode)
|
84
|
+
codec_thing
|
85
|
+
else
|
86
|
+
raise "unknown codec #{codec_thing.inspect}"
|
87
|
+
end
|
86
88
|
end
|
87
|
-
end
|
88
89
|
|
89
|
-
|
90
|
+
attr_reader :codec
|
90
91
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
92
|
+
class MarshalCodec
|
93
|
+
def encode( obj, io )
|
94
|
+
Marshal.dump obj, io
|
95
|
+
end
|
95
96
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def decode( io, &block )
|
98
|
+
obj = Marshal.load(io)
|
99
|
+
yield obj if block_given?
|
100
|
+
obj
|
101
|
+
end
|
100
102
|
end
|
101
|
-
end
|
102
103
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
104
|
+
class YamlCodec
|
105
|
+
def encode( obj, io )
|
106
|
+
YAML.dump obj, io
|
107
|
+
end
|
107
108
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
def decode( io, &block )
|
110
|
+
obj = YAML.load(io)
|
111
|
+
yield obj if block_given?
|
112
|
+
obj
|
113
|
+
end
|
112
114
|
end
|
113
|
-
end
|
114
115
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
116
|
+
def primary_keys
|
117
|
+
# each_with_object([]){...} is only faster for < 3 items in 100000
|
118
|
+
@primary_keys ||= db.schema(table_name).map{|name,column_info| name if column_info[:primary_key]}.compact
|
119
|
+
end
|
119
120
|
|
120
|
-
|
121
|
-
|
122
|
-
|
121
|
+
def table_dataset
|
122
|
+
@table_dataset ||= db[table_name.to_sym]
|
123
|
+
end
|
123
124
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
125
|
+
# Use limit / offset. Last fallback if there are no keys (or a compound primary key?).
|
126
|
+
def paginated_dump( &encode_block )
|
127
|
+
records_count = 0
|
128
|
+
table_dataset.order(*primary_keys).each_page(page_size) do |page|
|
129
|
+
logger.info "#{__method__} #{table_name} #{records_count}"
|
130
|
+
logger.debug page.sql
|
131
|
+
page.each &encode_block
|
132
|
+
records_count += page_size
|
133
|
+
end
|
132
134
|
end
|
133
|
-
end
|
134
135
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
136
|
+
# Use limit / offset, but not for all fields.
|
137
|
+
# The idea is that large offsets are expensive in the db because the db server has to read
|
138
|
+
# through the data set to reach the required offset. So make that only ids need to be read,
|
139
|
+
# and then do the main select from the limited id list.
|
140
|
+
# select * from massive as full
|
141
|
+
# inner join (select id from massive order by whatever limit m, n) limit
|
142
|
+
# on full.id = limit.id
|
143
|
+
# order by full.whatever
|
144
|
+
# http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
|
145
|
+
def inner_dump( &encode_block )
|
146
|
+
# could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
|
147
|
+
on_conditions = primary_keys.map{|f| [f,f]}.to_h
|
148
|
+
(0..table_dataset.count).step(page_size).each do |offset|
|
149
|
+
limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
|
150
|
+
page = table_dataset.join( limit_dataset, on_conditions ).order( *primary_keys ).qualify(table_name)
|
151
|
+
logger.info "#{__method__} #{table_name} #{offset}"
|
152
|
+
logger.debug page.sql
|
153
|
+
page.each &encode_block
|
154
|
+
end
|
153
155
|
end
|
154
|
-
end
|
155
156
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
157
|
+
# Selects pages by a range of ids, using >= and <.
|
158
|
+
# Use this for integer pks
|
159
|
+
def min_max_dump( &encode_block )
|
160
|
+
# select max(id), min(id) from table
|
161
|
+
# and then split that up into 10000 size chunks.
|
162
|
+
# Not really important if there aren't exactly 10000
|
163
|
+
min, max = table_dataset.select{[min(id), max(id)]}.first.values
|
164
|
+
return unless min && max
|
165
|
+
|
166
|
+
# will always include the last item because page_size will be
|
167
|
+
# bigger than max for the last page
|
168
|
+
(min..max).step(page_size).each do |offset|
|
169
|
+
page = table_dataset.where( id: offset...(offset + page_size) )
|
170
|
+
logger.info "#{__method__} #{table_name} #{offset}"
|
171
|
+
logger.debug page.sql
|
172
|
+
page.each &encode_block
|
173
|
+
end
|
172
174
|
end
|
173
|
-
end
|
174
175
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
176
|
+
def stream_dump( &encode_block )
|
177
|
+
logger.info "using result set streaming"
|
178
|
+
|
179
|
+
# I want to output progress every page_size records,
|
180
|
+
# without doing a records_count % page_size every iteration.
|
181
|
+
# So define an external enumerator
|
182
|
+
# TODO should really performance test the options here.
|
183
|
+
records_count = 0
|
184
|
+
enum = table_dataset.stream.enum_for
|
185
|
+
loop do
|
186
|
+
begin
|
187
|
+
page_size.times do
|
188
|
+
encode_block.call enum.next
|
189
|
+
records_count += 1
|
190
|
+
end
|
191
|
+
ensure
|
192
|
+
logger.info "#{__method__} #{table_name} #{records_count}" if records_count < page_size
|
193
|
+
logger.debug " #{records_count} from #{table_dataset.sql}"
|
189
194
|
end
|
190
|
-
ensure
|
191
|
-
logger.info "#{__method__} #{table_name} #{records_count}" if records_count < page_size
|
192
|
-
logger.debug " #{records_count} from #{table_dataset.sql}"
|
193
195
|
end
|
194
196
|
end
|
195
|
-
end
|
196
197
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
198
|
+
# Dump the serialization of the table to the specified io.
|
199
|
+
#
|
200
|
+
# TODO need to also dump a first row containing useful stuff:
|
201
|
+
# - source table name
|
202
|
+
# - number of rows
|
203
|
+
# - source db url
|
204
|
+
# - permissions?
|
205
|
+
# These should all be in one object that can be Marshall.load-ed easily.
|
206
|
+
#
|
207
|
+
# TODO could speed this up by have a query thread which runs the next page-query while
|
208
|
+
# the current one is being written/compressed.
|
209
|
+
def dump
|
210
|
+
_dump do |row|
|
211
|
+
codec.encode( row.values, io ) unless dry_run?
|
212
|
+
end
|
213
|
+
ensure
|
214
|
+
io.flush
|
211
215
|
end
|
212
|
-
ensure
|
213
|
-
io.flush
|
214
|
-
end
|
215
216
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
217
|
+
# decide which kind of paged iteration will be best for this table.
|
218
|
+
# Return an iterator, or yield row hashes to the block
|
219
|
+
def _dump( &encode_block )
|
220
|
+
return enum_for(__method__) unless block_given?
|
221
|
+
case
|
222
|
+
when table_dataset.respond_to?( :stream )
|
223
|
+
stream_dump &encode_block
|
223
224
|
|
224
|
-
|
225
|
-
|
225
|
+
when primary_keys.empty?
|
226
|
+
paginated_dump &encode_block
|
226
227
|
|
227
|
-
|
228
|
-
|
228
|
+
when primary_keys.all?{|i| i == :id }
|
229
|
+
min_max_dump &encode_block
|
229
230
|
|
230
|
-
|
231
|
-
|
231
|
+
else
|
232
|
+
inner_dump &encode_block
|
233
|
+
end
|
232
234
|
end
|
233
|
-
end
|
234
235
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
236
|
+
def dump_matches_columns?( row_enum, columns )
|
237
|
+
raise "schema mismatch" unless row_enum.peek.size == columns.size
|
238
|
+
true
|
239
|
+
rescue StopIteration
|
240
|
+
# peek threw a StopIteration, so there's no data
|
241
|
+
false
|
242
|
+
end
|
242
243
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
244
|
+
# start_row is zero-based
|
245
|
+
#
|
246
|
+
# TODO don't generate the full insert, ie leave out the fields
|
247
|
+
# because we've already checked that the columns and the table
|
248
|
+
# match.
|
249
|
+
# TODO generate column names in insert, they might still work
|
250
|
+
# if columns have been added to the db, but not the dump.
|
251
|
+
def restore( start_row: 0, filename: 'io' )
|
252
|
+
columns = table_dataset.columns
|
253
|
+
row_enum = each_row
|
254
|
+
|
255
|
+
return unless dump_matches_columns?( row_enum, columns )
|
256
|
+
|
257
|
+
logger.info "#{__method__} inserting to #{table_name} from #{start_row}"
|
258
|
+
logger.debug " #{columns.inspect}"
|
259
|
+
rows_restored = 0
|
260
|
+
|
261
|
+
if start_row != 0
|
262
|
+
logger.debug{ "skipping #{start_row} rows from #{filename}" }
|
263
|
+
start_row.times do |i|
|
264
|
+
row_enum.next
|
265
|
+
logger.debug{ "skipped #{i} from #{filename}" } if i % page_size == 0
|
266
|
+
end
|
267
|
+
logger.debug{ "skipped #{start_row} from #{filename}" }
|
268
|
+
rows_restored += start_row
|
265
269
|
end
|
266
|
-
logger.debug{ "skipped #{start_row} from #{filename}" }
|
267
|
-
rows_restored += start_row
|
268
|
-
end
|
269
270
|
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
271
|
+
loop do
|
272
|
+
db.transaction do
|
273
|
+
begin
|
274
|
+
page_size.times do
|
275
|
+
# This skips all the checks in the Sequel code. Basically we want
|
276
|
+
# to generate the
|
277
|
+
# insert into (field1,field2) values (value1,value2)
|
278
|
+
# statement as quickly as possible.
|
279
|
+
#
|
280
|
+
# Uses a private method so it will need to be updated repeatedly.
|
281
|
+
sql = table_dataset.clone( columns: columns, values: row_enum.next ).send(:_insert_sql)
|
282
|
+
db.execute sql unless dry_run?
|
283
|
+
rows_restored += 1
|
284
|
+
end
|
285
|
+
rescue StopIteration
|
286
|
+
# reached the end of the inout stream.
|
287
|
+
# So commit this transaction, and then re-raise
|
288
|
+
# StopIteration to get out of the loop{} statement
|
289
|
+
db.after_commit{ raise StopIteration }
|
283
290
|
end
|
284
|
-
rescue StopIteration
|
285
|
-
# reached the end of the inout stream.
|
286
|
-
# So commit this transaction, and then re-raise
|
287
|
-
# StopIteration to get out of the loop{} statement
|
288
|
-
db.after_commit{ raise StopIteration }
|
289
291
|
end
|
290
292
|
end
|
293
|
+
logger.info "#{__method__} #{table_name} done. Inserted #{rows_restored}."
|
294
|
+
rows_restored
|
291
295
|
end
|
292
|
-
logger.info "#{__method__} #{table_name} done. Inserted #{rows_restored}."
|
293
|
-
rows_restored
|
294
|
-
end
|
295
296
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
297
|
+
# Enumerate through the given io at its current position.
|
298
|
+
# Can raise StopIteration (ie when eof is not detected)
|
299
|
+
# MAYBE don't check for io.eof here, leave that to the codec
|
300
|
+
def each_row
|
301
|
+
return enum_for(__method__) unless block_given?
|
302
|
+
yield codec.decode( io ) until io.eof?
|
303
|
+
end
|
303
304
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
305
|
+
# Enumerate sql insert statements from the dump
|
306
|
+
def insert_sql_each
|
307
|
+
return enum_for(__method__) unless block_given?
|
308
|
+
each_row do |row|
|
309
|
+
yield table_dataset.insert_sql( row )
|
310
|
+
end
|
309
311
|
end
|
310
312
|
end
|
311
313
|
end
|