wyrm 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/Gemfile +18 -26
- data/History.txt +4 -0
- data/README.md +2 -0
- data/lib/wyrm/dump.rb +100 -75
- data/lib/wyrm/logger.rb +1 -1
- data/lib/wyrm/pump.rb +249 -247
- data/lib/wyrm/pump_maker.rb +23 -22
- data/lib/wyrm/restore.rb +90 -76
- data/lib/wyrm/schema_tools.rb +69 -62
- data/lib/wyrm/version.rb +1 -1
- data/spec/pump_spec.rb +7 -4
- data/spec/rspec_syntax.rb +22 -0
- data/spec/schema_tools_spec.rb +7 -6
- data/wyrm.gemspec +0 -4
- metadata +2 -58
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c40184e0e1f6175ad0447494ff5bf367c39292db
|
4
|
+
data.tar.gz: c7b927a63887f83ba35b6c3be3c11fb412a2212a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd762e971e8fb35f4147b4657b5fbb67fb1de1ef26ec4d8ef7af2dac2a9f6532cf8bce4e02587021e261e302e133d6312caad46cf6e06924d3701a25dc8bb2a1
|
7
|
+
data.tar.gz: 7c38e0d0f186e78e58639220b21755b219e85ef15b3acbe8c920e145c70f1715702b4c4fd060abebad767732469296dd855af198349b27be97d70fd419060e47
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,32 +1,24 @@
|
|
1
|
-
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
def from_gemrc
|
5
|
-
# auto-load from ~/.gemrc
|
6
|
-
home_gemrc = Pathname('~/.gemrc').expand_path
|
7
|
-
|
8
|
-
if home_gemrc.exist?
|
9
|
-
require 'yaml'
|
10
|
-
# use all the sources specified in .gemrc
|
11
|
-
YAML.load_file(home_gemrc)[:sources]
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
# Use the gemrc source if defined, unless CANON is set,
|
16
|
-
# otherwise just use the default.
|
17
|
-
def preferred_sources
|
18
|
-
rv = from_gemrc unless eval(ENV['CANON']||'')
|
19
|
-
rv ||= []
|
20
|
-
rv << 'http://rubygems.org' if rv.empty?
|
21
|
-
rv
|
22
|
-
end
|
23
|
-
|
24
|
-
preferred_sources.each{|src| source src}
|
3
|
+
raise "You need >= ruby-2.3 for wyrm" unless RUBY_VERSION >= '2.3.0'
|
25
4
|
|
26
5
|
# Specify your gem's dependencies in wyrm.gemspec
|
27
6
|
gemspec
|
28
7
|
|
29
|
-
|
30
|
-
|
31
|
-
gem
|
8
|
+
platforms :ruby do
|
9
|
+
gem 'pg'
|
10
|
+
gem 'sequel_pg'
|
11
|
+
gem 'sqlite3'
|
12
|
+
gem 'pry-byebug'
|
13
|
+
|
14
|
+
if Pathname('/usr/include/mysql').exist?
|
15
|
+
# version is for mysql streaming result sets
|
16
|
+
gem "mysql2", '>= 0.3.12'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
platforms :jruby do
|
21
|
+
# gem "pg"
|
22
|
+
gem 'jdbc-sqlite3'
|
23
|
+
gem 'jdbc-postgres'
|
32
24
|
end
|
data/History.txt
CHANGED
data/README.md
CHANGED
data/lib/wyrm/dump.rb
CHANGED
@@ -6,104 +6,129 @@ require 'wyrm/schema_tools'
|
|
6
6
|
require 'wyrm/logger'
|
7
7
|
|
8
8
|
# Dump a schema and compressed data from a db to a set of files
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
9
|
+
#
|
10
|
+
# Dump["postgres://localhost:5454/lots", '/var/data/lots']
|
11
|
+
#
|
12
12
|
# TODO possibly use Gem::Package::TarWriter to write tar files
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
module Wyrm
|
14
|
+
class Dump
|
15
|
+
include Wyrm::PumpMaker
|
16
|
+
include Wyrm::SchemaTools
|
17
|
+
include Wyrm::Logger
|
18
|
+
|
19
|
+
def self.[]( *args )
|
20
|
+
new(*args).call
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
23
|
+
def call
|
24
|
+
dump_schema
|
25
|
+
dump_tables
|
26
|
+
dump_indexes
|
27
|
+
end
|
24
28
|
|
25
|
-
|
26
|
-
|
29
|
+
def initialize( src_db, container = nil, pump: nil )
|
30
|
+
@container = Pathname.new container || '.'
|
31
|
+
raise "#{@container} does not exist" unless @container.exist?
|
27
32
|
|
28
|
-
|
33
|
+
@src_db = maybe_deebe src_db
|
34
|
+
@pump = make_pump( @src_db, pump )
|
29
35
|
|
30
|
-
|
36
|
+
@src_db.extension :schema_dumper
|
37
|
+
end
|
31
38
|
|
32
|
-
|
33
|
-
@numbering ||= '000'
|
34
|
-
end
|
39
|
+
attr_reader :src_db, :container, :pump
|
35
40
|
|
36
|
-
|
37
|
-
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
38
|
-
io.write schema_migration
|
39
|
-
end
|
40
|
-
end
|
41
|
+
def same_db; false end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
io.write index_migration
|
43
|
+
def numbering
|
44
|
+
@numbering ||= '000'
|
45
45
|
end
|
46
46
|
|
47
|
-
(
|
48
|
-
|
47
|
+
def dump_table_schemas( *tables )
|
48
|
+
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
49
|
+
tables.each do |table|
|
50
|
+
logger.debug "schema for #{table}"
|
51
|
+
io.puts table_migration table
|
52
|
+
end
|
53
|
+
end
|
49
54
|
end
|
50
|
-
end
|
51
55
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
zio = IO.popen( STREAM_COMP, 'r+' )
|
56
|
-
copier = Thread.new do
|
57
|
-
begin
|
58
|
-
IO.copy_stream zio, fio
|
59
|
-
logger.debug "finished stream copy"
|
60
|
-
ensure
|
61
|
-
fio.close
|
56
|
+
def dump_schema
|
57
|
+
(container + "#{numbering.next!}_schema.rb").open('w') do |io|
|
58
|
+
io.write schema_migration
|
62
59
|
end
|
63
60
|
end
|
64
61
|
|
65
|
-
|
62
|
+
def dump_indexes
|
63
|
+
(container + "#{numbering.next!}_indexes.rb").open('w') do |io|
|
64
|
+
io.write index_migration
|
65
|
+
end
|
66
66
|
|
67
|
-
|
68
|
-
|
69
|
-
|
67
|
+
(container + "#{numbering.next!}_foreign_keys.rb").open('w') do |io|
|
68
|
+
io.write fk_migration
|
69
|
+
end
|
70
|
+
end
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
72
|
+
def write_through_bz2( pathname )
|
73
|
+
fio = pathname.open('w')
|
74
|
+
# open subprocess in read-write mode
|
75
|
+
zio = IO.popen( STREAM_COMP, 'r+' )
|
76
|
+
copier = Thread.new do
|
77
|
+
begin
|
78
|
+
IO.copy_stream zio, fio
|
79
|
+
logger.debug "finished stream copy"
|
80
|
+
ensure
|
81
|
+
fio.close
|
82
|
+
end
|
83
|
+
end
|
78
84
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
# block receiving zio will write to it.
|
86
|
+
yield zio
|
87
|
+
|
88
|
+
# signal the copier thread to stop
|
89
|
+
logger.debug 'flushing'
|
90
|
+
if RUBY_ENGINE == 'jruby'
|
91
|
+
# seems to be required for jruby, at least 9.1.2.0
|
92
|
+
logger.debug 'jruby flushing'
|
93
|
+
zio.flush
|
94
|
+
logger.debug 'jruby close'
|
95
|
+
zio.close
|
96
|
+
else
|
97
|
+
zio.close_write
|
98
|
+
end
|
99
|
+
logger.debug 'finished dumping'
|
100
|
+
|
101
|
+
# wait for copier thread to finish
|
102
|
+
copier.join
|
103
|
+
logger.debug 'stream copy thread finished'
|
104
|
+
ensure
|
105
|
+
zio.close if zio && !zio.closed?
|
106
|
+
fio.close if fio && !fio.closed?
|
84
107
|
end
|
85
108
|
|
86
|
-
|
87
|
-
|
109
|
+
def dump_table( table_name, &io_block )
|
110
|
+
pump.table_name = table_name
|
111
|
+
if pump.table_dataset.empty?
|
112
|
+
logger.info "No records in #{table_name}"
|
113
|
+
return
|
114
|
+
end
|
88
115
|
|
89
|
-
|
90
|
-
#
|
91
|
-
pump.io = zio
|
92
|
-
pump.dump
|
93
|
-
end
|
94
|
-
rescue
|
95
|
-
logger.error "failed dumping #{table_name}: #{$!.message}"
|
96
|
-
end
|
116
|
+
filename = container + "#{table_name}.dbp.bz2"
|
117
|
+
logger.info "dumping #{table_name} to #{filename}"
|
97
118
|
|
98
|
-
|
99
|
-
|
100
|
-
|
119
|
+
write_through_bz2 filename do |zio|
|
120
|
+
# generate the dump
|
121
|
+
pump.io = zio
|
122
|
+
pump.dump
|
123
|
+
end
|
124
|
+
rescue
|
125
|
+
logger.error "failed dumping #{table_name}: #{$!.message}"
|
101
126
|
end
|
102
|
-
end
|
103
127
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
128
|
+
def dump_tables
|
129
|
+
src_db.tables.each do |table_name|
|
130
|
+
dump_table table_name
|
131
|
+
end
|
132
|
+
end
|
108
133
|
end
|
109
134
|
end
|
data/lib/wyrm/logger.rb
CHANGED
data/lib/wyrm/pump.rb
CHANGED
@@ -8,304 +8,306 @@ require 'wyrm/module'
|
|
8
8
|
# TODO need to version the dumps, or something like that.
|
9
9
|
# TODO looks like io should belong to codec. Hmm. Not sure.
|
10
10
|
# TODO table_name table_dataset need some thinking about. Dataset would encapsulate both. But couldn't change db then, and primary_keys would be hard.
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
module Wyrm
|
12
|
+
class Pump
|
13
|
+
def initialize( db: nil, table_name: nil, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false, logger: nil )
|
14
|
+
self.codec = codec
|
15
|
+
self.db = db
|
16
|
+
self.table_name = table_name
|
17
|
+
self.io = io
|
18
|
+
self.page_size = page_size
|
19
|
+
self.dry_run = dry_run
|
20
|
+
self.logger = logger
|
21
|
+
yield self if block_given?
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
24
|
+
include Wyrm::Logger
|
25
|
+
attr_writer :logger
|
25
26
|
|
26
|
-
|
27
|
-
|
27
|
+
attr_accessor :io, :page_size, :dry_run
|
28
|
+
def dry_run?; dry_run; end
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
# These are affected by cached values
|
31
|
+
attr_reader :db, :table_name
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
def invalidate_cached_members
|
34
|
+
@primary_keys = nil
|
35
|
+
@table_dataset = nil
|
36
|
+
end
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
def table_name=( name_sym )
|
39
|
+
invalidate_cached_members
|
40
|
+
@table_name = name_sym
|
41
|
+
end
|
41
42
|
|
42
|
-
|
43
|
-
|
43
|
+
def db=( other_db )
|
44
|
+
invalidate_cached_members
|
44
45
|
|
45
|
-
|
46
|
-
|
46
|
+
@db = other_db
|
47
|
+
return unless other_db
|
47
48
|
|
48
|
-
|
49
|
-
|
49
|
+
# add extensions
|
50
|
+
@db.extension :pagination
|
50
51
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
52
|
+
# turn on postgres streaming if available
|
53
|
+
# also gets called for non-postgres dbs, but that seems to be fine.
|
54
|
+
if defined?( Sequel::Postgres::Database ) && @db.is_a?(Sequel::Postgres::Database) && defined?(Sequel::Postgres.supports_streaming?) && Sequel::Postgres.supports_streaming?
|
55
|
+
@db.extension :pg_streaming
|
56
|
+
logger.info "Streaming for #{@db.uri}"
|
57
|
+
else
|
58
|
+
logger.info "No streaming for #{@db.uri}"
|
59
|
+
end
|
58
60
|
end
|
59
|
-
end
|
60
61
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
62
|
+
# return an object that responds to ===
|
63
|
+
# which returns true if ==='s parameter
|
64
|
+
# responds to all the methods
|
65
|
+
def self.quacks_like( *methods )
|
66
|
+
@quacks_like ||= {}
|
67
|
+
@quacks_like[methods] ||= lambda do |inst|
|
68
|
+
methods.all?{|m| inst.respond_to? m}
|
69
|
+
end
|
68
70
|
end
|
69
|
-
end
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
def quacks_like( *methods )
|
73
|
+
self.class.quacks_like( *methods )
|
74
|
+
end
|
74
75
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
76
|
+
def codec=( codec_thing )
|
77
|
+
@codec =
|
78
|
+
case codec_thing
|
79
|
+
when :yaml; YamlCodec.new
|
80
|
+
when :marshal; MarshalCodec.new
|
81
|
+
when Class
|
82
|
+
codec_thing.new
|
83
|
+
when quacks_like(:encode,:decode)
|
84
|
+
codec_thing
|
85
|
+
else
|
86
|
+
raise "unknown codec #{codec_thing.inspect}"
|
87
|
+
end
|
86
88
|
end
|
87
|
-
end
|
88
89
|
|
89
|
-
|
90
|
+
attr_reader :codec
|
90
91
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
92
|
+
class MarshalCodec
|
93
|
+
def encode( obj, io )
|
94
|
+
Marshal.dump obj, io
|
95
|
+
end
|
95
96
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def decode( io, &block )
|
98
|
+
obj = Marshal.load(io)
|
99
|
+
yield obj if block_given?
|
100
|
+
obj
|
101
|
+
end
|
100
102
|
end
|
101
|
-
end
|
102
103
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
104
|
+
class YamlCodec
|
105
|
+
def encode( obj, io )
|
106
|
+
YAML.dump obj, io
|
107
|
+
end
|
107
108
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
def decode( io, &block )
|
110
|
+
obj = YAML.load(io)
|
111
|
+
yield obj if block_given?
|
112
|
+
obj
|
113
|
+
end
|
112
114
|
end
|
113
|
-
end
|
114
115
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
116
|
+
def primary_keys
|
117
|
+
# each_with_object([]){...} is only faster for < 3 items in 100000
|
118
|
+
@primary_keys ||= db.schema(table_name).map{|name,column_info| name if column_info[:primary_key]}.compact
|
119
|
+
end
|
119
120
|
|
120
|
-
|
121
|
-
|
122
|
-
|
121
|
+
def table_dataset
|
122
|
+
@table_dataset ||= db[table_name.to_sym]
|
123
|
+
end
|
123
124
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
125
|
+
# Use limit / offset. Last fallback if there are no keys (or a compound primary key?).
|
126
|
+
def paginated_dump( &encode_block )
|
127
|
+
records_count = 0
|
128
|
+
table_dataset.order(*primary_keys).each_page(page_size) do |page|
|
129
|
+
logger.info "#{__method__} #{table_name} #{records_count}"
|
130
|
+
logger.debug page.sql
|
131
|
+
page.each &encode_block
|
132
|
+
records_count += page_size
|
133
|
+
end
|
132
134
|
end
|
133
|
-
end
|
134
135
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
136
|
+
# Use limit / offset, but not for all fields.
|
137
|
+
# The idea is that large offsets are expensive in the db because the db server has to read
|
138
|
+
# through the data set to reach the required offset. So make that only ids need to be read,
|
139
|
+
# and then do the main select from the limited id list.
|
140
|
+
# select * from massive as full
|
141
|
+
# inner join (select id from massive order by whatever limit m, n) limit
|
142
|
+
# on full.id = limit.id
|
143
|
+
# order by full.whatever
|
144
|
+
# http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
|
145
|
+
def inner_dump( &encode_block )
|
146
|
+
# could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
|
147
|
+
on_conditions = primary_keys.map{|f| [f,f]}.to_h
|
148
|
+
(0..table_dataset.count).step(page_size).each do |offset|
|
149
|
+
limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
|
150
|
+
page = table_dataset.join( limit_dataset, on_conditions ).order( *primary_keys ).qualify(table_name)
|
151
|
+
logger.info "#{__method__} #{table_name} #{offset}"
|
152
|
+
logger.debug page.sql
|
153
|
+
page.each &encode_block
|
154
|
+
end
|
153
155
|
end
|
154
|
-
end
|
155
156
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
157
|
+
# Selects pages by a range of ids, using >= and <.
|
158
|
+
# Use this for integer pks
|
159
|
+
def min_max_dump( &encode_block )
|
160
|
+
# select max(id), min(id) from table
|
161
|
+
# and then split that up into 10000 size chunks.
|
162
|
+
# Not really important if there aren't exactly 10000
|
163
|
+
min, max = table_dataset.select{[min(id), max(id)]}.first.values
|
164
|
+
return unless min && max
|
165
|
+
|
166
|
+
# will always include the last item because page_size will be
|
167
|
+
# bigger than max for the last page
|
168
|
+
(min..max).step(page_size).each do |offset|
|
169
|
+
page = table_dataset.where( id: offset...(offset + page_size) )
|
170
|
+
logger.info "#{__method__} #{table_name} #{offset}"
|
171
|
+
logger.debug page.sql
|
172
|
+
page.each &encode_block
|
173
|
+
end
|
172
174
|
end
|
173
|
-
end
|
174
175
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
176
|
+
def stream_dump( &encode_block )
|
177
|
+
logger.info "using result set streaming"
|
178
|
+
|
179
|
+
# I want to output progress every page_size records,
|
180
|
+
# without doing a records_count % page_size every iteration.
|
181
|
+
# So define an external enumerator
|
182
|
+
# TODO should really performance test the options here.
|
183
|
+
records_count = 0
|
184
|
+
enum = table_dataset.stream.enum_for
|
185
|
+
loop do
|
186
|
+
begin
|
187
|
+
page_size.times do
|
188
|
+
encode_block.call enum.next
|
189
|
+
records_count += 1
|
190
|
+
end
|
191
|
+
ensure
|
192
|
+
logger.info "#{__method__} #{table_name} #{records_count}" if records_count < page_size
|
193
|
+
logger.debug " #{records_count} from #{table_dataset.sql}"
|
189
194
|
end
|
190
|
-
ensure
|
191
|
-
logger.info "#{__method__} #{table_name} #{records_count}" if records_count < page_size
|
192
|
-
logger.debug " #{records_count} from #{table_dataset.sql}"
|
193
195
|
end
|
194
196
|
end
|
195
|
-
end
|
196
197
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
198
|
+
# Dump the serialization of the table to the specified io.
|
199
|
+
#
|
200
|
+
# TODO need to also dump a first row containing useful stuff:
|
201
|
+
# - source table name
|
202
|
+
# - number of rows
|
203
|
+
# - source db url
|
204
|
+
# - permissions?
|
205
|
+
# These should all be in one object that can be Marshall.load-ed easily.
|
206
|
+
#
|
207
|
+
# TODO could speed this up by have a query thread which runs the next page-query while
|
208
|
+
# the current one is being written/compressed.
|
209
|
+
def dump
|
210
|
+
_dump do |row|
|
211
|
+
codec.encode( row.values, io ) unless dry_run?
|
212
|
+
end
|
213
|
+
ensure
|
214
|
+
io.flush
|
211
215
|
end
|
212
|
-
ensure
|
213
|
-
io.flush
|
214
|
-
end
|
215
216
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
217
|
+
# decide which kind of paged iteration will be best for this table.
|
218
|
+
# Return an iterator, or yield row hashes to the block
|
219
|
+
def _dump( &encode_block )
|
220
|
+
return enum_for(__method__) unless block_given?
|
221
|
+
case
|
222
|
+
when table_dataset.respond_to?( :stream )
|
223
|
+
stream_dump &encode_block
|
223
224
|
|
224
|
-
|
225
|
-
|
225
|
+
when primary_keys.empty?
|
226
|
+
paginated_dump &encode_block
|
226
227
|
|
227
|
-
|
228
|
-
|
228
|
+
when primary_keys.all?{|i| i == :id }
|
229
|
+
min_max_dump &encode_block
|
229
230
|
|
230
|
-
|
231
|
-
|
231
|
+
else
|
232
|
+
inner_dump &encode_block
|
233
|
+
end
|
232
234
|
end
|
233
|
-
end
|
234
235
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
236
|
+
def dump_matches_columns?( row_enum, columns )
|
237
|
+
raise "schema mismatch" unless row_enum.peek.size == columns.size
|
238
|
+
true
|
239
|
+
rescue StopIteration
|
240
|
+
# peek threw a StopIteration, so there's no data
|
241
|
+
false
|
242
|
+
end
|
242
243
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
244
|
+
# start_row is zero-based
|
245
|
+
#
|
246
|
+
# TODO don't generate the full insert, ie leave out the fields
|
247
|
+
# because we've already checked that the columns and the table
|
248
|
+
# match.
|
249
|
+
# TODO generate column names in insert, they might still work
|
250
|
+
# if columns have been added to the db, but not the dump.
|
251
|
+
def restore( start_row: 0, filename: 'io' )
|
252
|
+
columns = table_dataset.columns
|
253
|
+
row_enum = each_row
|
254
|
+
|
255
|
+
return unless dump_matches_columns?( row_enum, columns )
|
256
|
+
|
257
|
+
logger.info "#{__method__} inserting to #{table_name} from #{start_row}"
|
258
|
+
logger.debug " #{columns.inspect}"
|
259
|
+
rows_restored = 0
|
260
|
+
|
261
|
+
if start_row != 0
|
262
|
+
logger.debug{ "skipping #{start_row} rows from #{filename}" }
|
263
|
+
start_row.times do |i|
|
264
|
+
row_enum.next
|
265
|
+
logger.debug{ "skipped #{i} from #{filename}" } if i % page_size == 0
|
266
|
+
end
|
267
|
+
logger.debug{ "skipped #{start_row} from #{filename}" }
|
268
|
+
rows_restored += start_row
|
265
269
|
end
|
266
|
-
logger.debug{ "skipped #{start_row} from #{filename}" }
|
267
|
-
rows_restored += start_row
|
268
|
-
end
|
269
270
|
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
271
|
+
loop do
|
272
|
+
db.transaction do
|
273
|
+
begin
|
274
|
+
page_size.times do
|
275
|
+
# This skips all the checks in the Sequel code. Basically we want
|
276
|
+
# to generate the
|
277
|
+
# insert into (field1,field2) values (value1,value2)
|
278
|
+
# statement as quickly as possible.
|
279
|
+
#
|
280
|
+
# Uses a private method so it will need to be updated repeatedly.
|
281
|
+
sql = table_dataset.clone( columns: columns, values: row_enum.next ).send(:_insert_sql)
|
282
|
+
db.execute sql unless dry_run?
|
283
|
+
rows_restored += 1
|
284
|
+
end
|
285
|
+
rescue StopIteration
|
286
|
+
# reached the end of the inout stream.
|
287
|
+
# So commit this transaction, and then re-raise
|
288
|
+
# StopIteration to get out of the loop{} statement
|
289
|
+
db.after_commit{ raise StopIteration }
|
283
290
|
end
|
284
|
-
rescue StopIteration
|
285
|
-
# reached the end of the inout stream.
|
286
|
-
# So commit this transaction, and then re-raise
|
287
|
-
# StopIteration to get out of the loop{} statement
|
288
|
-
db.after_commit{ raise StopIteration }
|
289
291
|
end
|
290
292
|
end
|
293
|
+
logger.info "#{__method__} #{table_name} done. Inserted #{rows_restored}."
|
294
|
+
rows_restored
|
291
295
|
end
|
292
|
-
logger.info "#{__method__} #{table_name} done. Inserted #{rows_restored}."
|
293
|
-
rows_restored
|
294
|
-
end
|
295
296
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
297
|
+
# Enumerate through the given io at its current position.
|
298
|
+
# Can raise StopIteration (ie when eof is not detected)
|
299
|
+
# MAYBE don't check for io.eof here, leave that to the codec
|
300
|
+
def each_row
|
301
|
+
return enum_for(__method__) unless block_given?
|
302
|
+
yield codec.decode( io ) until io.eof?
|
303
|
+
end
|
303
304
|
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
305
|
+
# Enumerate sql insert statements from the dump
|
306
|
+
def insert_sql_each
|
307
|
+
return enum_for(__method__) unless block_given?
|
308
|
+
each_row do |row|
|
309
|
+
yield table_dataset.insert_sql( row )
|
310
|
+
end
|
309
311
|
end
|
310
312
|
end
|
311
313
|
end
|