wyrm 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rvmrc +1 -1
- data/Gemfile +0 -3
- data/README.md +18 -11
- data/bin/wyrm +40 -24
- data/bin/wyrm-view +34 -0
- data/lib/wyrm.rb +3 -5
- data/lib/wyrm/cli.rb +9 -0
- data/lib/wyrm/core_extensions.rb +10 -0
- data/lib/wyrm/{dump_schema.rb → dump.rb} +22 -21
- data/lib/wyrm/hole.rb +164 -0
- data/lib/wyrm/logger.rb +11 -0
- data/lib/wyrm/module.rb +1 -0
- data/lib/wyrm/{db_pump.rb → pump.rb} +40 -34
- data/lib/wyrm/pump_maker.rb +10 -4
- data/lib/wyrm/{restore_schema.rb → restore.rb} +40 -33
- data/lib/wyrm/schema_tools.rb +91 -0
- data/lib/wyrm/version.rb +1 -1
- data/snippets/console.rb +5 -3
- data/spec/core_extensions_spec.rb +50 -0
- data/spec/hole_mouth_spec.rb +176 -0
- data/spec/pump_spec.rb +62 -0
- data/spec/schema_tools_spec.rb +201 -0
- data/wyrm.gemspec +12 -3
- metadata +135 -23
- data/lib/wyrm/other_schema.rb +0 -6
- data/lib/wyrm/transferer.rb +0 -32
data/lib/wyrm/logger.rb
ADDED
data/lib/wyrm/module.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
module Wyrm end
|
@@ -1,25 +1,28 @@
|
|
1
1
|
require 'sequel'
|
2
2
|
require 'yaml'
|
3
|
-
require 'logger'
|
4
3
|
|
5
|
-
|
4
|
+
require 'wyrm/logger'
|
5
|
+
require 'wyrm/module'
|
6
6
|
|
7
7
|
# TODO when restoring, could use a SizeQueue to make sure the db is kept busy
|
8
8
|
# TODO need to version the dumps, or something like that.
|
9
9
|
# TODO looks like io should belong to codec. Hmm. Not sure.
|
10
10
|
# TODO table_name table_dataset need some thinking about. Dataset would encapsulate both. But couldn't change db then, and primary_keys would be hard.
|
11
|
-
class
|
12
|
-
|
13
|
-
def initialize( db: nil, table_name: nil, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false )
|
11
|
+
class Wyrm::Pump
|
12
|
+
def initialize( db: nil, table_name: nil, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false, logger: nil )
|
14
13
|
self.codec = codec
|
15
14
|
self.db = db
|
16
15
|
self.table_name = table_name
|
17
16
|
self.io = io
|
18
17
|
self.page_size = page_size
|
19
18
|
self.dry_run = dry_run
|
19
|
+
self.logger = logger
|
20
20
|
yield self if block_given?
|
21
21
|
end
|
22
22
|
|
23
|
+
include Wyrm::Logger
|
24
|
+
attr_writer :logger
|
25
|
+
|
23
26
|
attr_accessor :io, :page_size, :dry_run
|
24
27
|
def dry_run?; dry_run; end
|
25
28
|
|
@@ -46,9 +49,11 @@ class DbPump
|
|
46
49
|
@db.extension :pagination
|
47
50
|
|
48
51
|
# turn on postgres streaming if available
|
49
|
-
if defined?( Sequel::Postgres ) && Sequel::Postgres.supports_streaming?
|
50
|
-
logger.
|
52
|
+
if defined?( Sequel::Postgres ) && defined?(Sequel::Postgres.supports_streaming?) && Sequel::Postgres.supports_streaming?
|
53
|
+
logger.debug "Streaming for postgres"
|
51
54
|
@db.extension :pg_streaming
|
55
|
+
else
|
56
|
+
logger.info "No streaming for postgres"
|
52
57
|
end
|
53
58
|
end
|
54
59
|
|
@@ -57,10 +62,8 @@ class DbPump
|
|
57
62
|
# responds to all the methods
|
58
63
|
def self.quacks_like( *methods )
|
59
64
|
@quacks_like ||= {}
|
60
|
-
@quacks_like[methods] ||=
|
61
|
-
|
62
|
-
methods.all?{|m| instance.respond_to? m}
|
63
|
-
end
|
65
|
+
@quacks_like[methods] ||= lambda do |inst|
|
66
|
+
methods.all?{|m| inst.respond_to? m}
|
64
67
|
end
|
65
68
|
end
|
66
69
|
|
@@ -75,7 +78,7 @@ class DbPump
|
|
75
78
|
when :marshal; MarshalCodec.new
|
76
79
|
when Class
|
77
80
|
codec_thing.new
|
78
|
-
when quacks_like(
|
81
|
+
when quacks_like(:encode,:decode)
|
79
82
|
codec_thing
|
80
83
|
else
|
81
84
|
raise "unknown codec #{codec_thing.inspect}"
|
@@ -108,10 +111,6 @@ class DbPump
|
|
108
111
|
end
|
109
112
|
end
|
110
113
|
|
111
|
-
def logger
|
112
|
-
@logger ||= Logger.new STDERR
|
113
|
-
end
|
114
|
-
|
115
114
|
def primary_keys
|
116
115
|
@primary_keys ||= db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
|
117
116
|
end
|
@@ -122,9 +121,12 @@ class DbPump
|
|
122
121
|
|
123
122
|
# Use limit / offset. Last fallback if there are no keys (or a compound primary key?).
|
124
123
|
def paginated_dump( &encode_block )
|
124
|
+
records_count = 0
|
125
125
|
table_dataset.order(*primary_keys).each_page(page_size) do |page|
|
126
|
-
logger.info
|
126
|
+
logger.info{ "#{__method__} #{table_name} #{records_count}" }
|
127
|
+
logger.debug{ page.sql }
|
127
128
|
page.each &encode_block
|
129
|
+
records_count += page_size
|
128
130
|
end
|
129
131
|
end
|
130
132
|
|
@@ -132,8 +134,6 @@ class DbPump
|
|
132
134
|
# The idea is that large offsets are expensive in the db because the db server has to read
|
133
135
|
# through the data set to reach the required offset. So make that only ids need to be read,
|
134
136
|
# and then do the main select from the limited id list.
|
135
|
-
# TODO could speed this up by have a query thread which runs the next page-query while
|
136
|
-
# the current one is being written/compressed.
|
137
137
|
# select * from massive as full
|
138
138
|
# inner join (select id from massive order by whatever limit m, n) limit
|
139
139
|
# on full.id = limit.id
|
@@ -144,7 +144,8 @@ class DbPump
|
|
144
144
|
0.step(table_dataset.count, page_size).each do |offset|
|
145
145
|
limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
|
146
146
|
page = table_dataset.join( limit_dataset, Hash[ primary_keys.map{|f| [f,f]} ] ).order( *primary_keys ).qualify(table_name)
|
147
|
-
logger.info
|
147
|
+
logger.info{ "#{__method__} #{table_name} #{offset}" }
|
148
|
+
logger.debug{ page.sql }
|
148
149
|
page.each &encode_block
|
149
150
|
end
|
150
151
|
end
|
@@ -162,13 +163,14 @@ class DbPump
|
|
162
163
|
# bigger than max for the last page
|
163
164
|
(min..max).step(page_size).each do |offset|
|
164
165
|
page = table_dataset.where( id: offset...(offset + page_size) )
|
165
|
-
logger.info
|
166
|
+
logger.info{ "#{__method__} #{table_name} #{offset}" }
|
167
|
+
logger.debug{ page.sql }
|
166
168
|
page.each &encode_block
|
167
169
|
end
|
168
170
|
end
|
169
171
|
|
170
172
|
def stream_dump( &encode_block )
|
171
|
-
logger.
|
173
|
+
logger.debug{ "using result set streaming" }
|
172
174
|
|
173
175
|
# I want to output progress every page_size records,
|
174
176
|
# without doing a records_count % page_size every iteration.
|
@@ -183,18 +185,23 @@ class DbPump
|
|
183
185
|
records_count += 1
|
184
186
|
end
|
185
187
|
ensure
|
186
|
-
logger.info "#{
|
188
|
+
logger.info{ "#{__method__} #{table_name} #{records_count}" if records_count < page_size }
|
189
|
+
logger.debug{ " from #{table_dataset.sql}" }
|
187
190
|
end
|
188
191
|
end
|
189
192
|
end
|
190
193
|
|
191
194
|
# Dump the serialization of the table to the specified io.
|
195
|
+
#
|
192
196
|
# TODO need to also dump a first row containing useful stuff:
|
193
197
|
# - source table name
|
194
198
|
# - number of rows
|
195
199
|
# - source db url
|
196
200
|
# - permissions?
|
197
201
|
# These should all be in one object that can be Marshall.load-ed easily.
|
202
|
+
#
|
203
|
+
# TODO could speed this up by have a query thread which runs the next page-query while
|
204
|
+
# the current one is being written/compressed.
|
198
205
|
def dump
|
199
206
|
_dump do |row|
|
200
207
|
codec.encode( row.values, io ) unless dry_run?
|
@@ -239,21 +246,20 @@ class DbPump
|
|
239
246
|
|
240
247
|
return unless dump_matches_columns?( row_enum, columns )
|
241
248
|
|
242
|
-
logger.info{ "inserting to #{table_name} #{
|
249
|
+
logger.info{ "#{__method__} inserting to #{table_name} from #{start_row}" }
|
250
|
+
logger.debug{ " #{columns.inspect}" }
|
243
251
|
rows_restored = 0
|
244
252
|
|
245
253
|
if start_row != 0
|
246
|
-
logger.
|
254
|
+
logger.debug{ "skipping #{start_row} rows from #{filename}" }
|
247
255
|
start_row.times do |i|
|
248
256
|
row_enum.next
|
249
|
-
logger.
|
257
|
+
logger.debug{ "skipped #{i} from #{filename}" } if i % page_size == 0
|
250
258
|
end
|
251
|
-
logger.
|
259
|
+
logger.debug{ "skipped #{start_row} from #{filename}" }
|
252
260
|
rows_restored += start_row
|
253
261
|
end
|
254
262
|
|
255
|
-
logger.info{ "inserting to #{table_name} from #{rows_restored}" }
|
256
|
-
|
257
263
|
loop do
|
258
264
|
db.transaction do
|
259
265
|
begin
|
@@ -267,20 +273,20 @@ class DbPump
|
|
267
273
|
rows_restored += 1
|
268
274
|
end
|
269
275
|
rescue StopIteration
|
270
|
-
#
|
276
|
+
# reached the end of the inout stream.
|
271
277
|
# So commit this transaction, and then re-raise
|
272
278
|
# StopIteration to get out of the loop{} statement
|
273
279
|
db.after_commit{ raise StopIteration }
|
274
280
|
end
|
275
|
-
logger.info{ "#{table_name} inserted #{rows_restored}" }
|
276
281
|
end
|
277
282
|
end
|
278
|
-
logger.info{ "#{table_name} done. Inserted #{rows_restored}." }
|
283
|
+
logger.info{ "#{__method__} #{table_name} done. Inserted #{rows_restored}." }
|
279
284
|
rows_restored
|
280
285
|
end
|
281
286
|
|
282
|
-
# Enumerate through the given io at its current position
|
283
|
-
#
|
287
|
+
# Enumerate through the given io at its current position.
|
288
|
+
# Can raise StopIteration (ie when eof is not detected)
|
289
|
+
# MAYBE don't check for io.eof here, leave that to the codec
|
284
290
|
def each_row
|
285
291
|
return enum_for(__method__) unless block_given?
|
286
292
|
yield codec.decode( io ) until io.eof?
|
data/lib/wyrm/pump_maker.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
require 'wyrm/
|
1
|
+
require 'wyrm/pump'
|
2
|
+
require 'wyrm/module'
|
2
3
|
|
3
|
-
module PumpMaker
|
4
|
+
module Wyrm::PumpMaker
|
4
5
|
def call_or_self( maybe_callable )
|
5
6
|
if maybe_callable.respond_to? :call
|
6
7
|
maybe_callable.call( self )
|
@@ -10,13 +11,18 @@ module PumpMaker
|
|
10
11
|
end
|
11
12
|
|
12
13
|
def make_pump( db, pump_thing )
|
13
|
-
call_or_self(pump_thing) ||
|
14
|
+
call_or_self(pump_thing) || Pump.new( db: db )
|
14
15
|
end
|
15
16
|
|
16
17
|
def maybe_deebe( db_or_string )
|
17
18
|
case db_or_string
|
18
19
|
when String
|
19
|
-
|
20
|
+
begin
|
21
|
+
Sequel.connect db_or_string
|
22
|
+
rescue Sequel::AdapterNotFound
|
23
|
+
puts "\nCan't find db driver for #{db_or_string}. It might work to do\n\n gem install #{db_or_string.split(?:).first}\n\n"
|
24
|
+
exit(1)
|
25
|
+
end
|
20
26
|
when Sequel::Database
|
21
27
|
db_or_string
|
22
28
|
else
|
@@ -1,28 +1,44 @@
|
|
1
|
-
require '
|
1
|
+
require 'ostruct'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
require 'wyrm/logger'
|
5
|
+
require 'wyrm/module'
|
2
6
|
require 'wyrm/pump_maker'
|
7
|
+
require 'wyrm/schema_tools'
|
3
8
|
|
4
9
|
# Load a schema from a set of dump files (from DumpSchema)
|
5
10
|
# and restore the table data.
|
6
11
|
# dst_db = Sequel.connect "postgres://localhost:5454/lots"
|
7
12
|
# rs = RestoreSchema.new dst_db, '/var/data/lots'
|
8
|
-
# rs.
|
9
|
-
#
|
10
|
-
|
13
|
+
# rs.call
|
14
|
+
# TODO the problem with lazy loading the schema files is that
|
15
|
+
# errors in indexes and foreign keys will only be picked up at the
|
16
|
+
# end of they probably lengthy table restore process.
|
17
|
+
# TODO check if table has been restored already, and has the correct rows,
|
18
|
+
class Wyrm::Restore
|
11
19
|
include PumpMaker
|
20
|
+
include SchemaTools
|
21
|
+
include Wyrm::Logger
|
12
22
|
|
13
|
-
def initialize(
|
23
|
+
def initialize( container, dst_db, pump: nil, drop_tables: false )
|
14
24
|
@container = Pathname.new container
|
15
25
|
@dst_db = maybe_deebe dst_db
|
16
26
|
@pump = make_pump( @dst_db, pump )
|
27
|
+
|
28
|
+
options.drop_tables = drop_tables
|
17
29
|
end
|
18
30
|
|
19
31
|
attr_reader :pump
|
20
32
|
attr_reader :dst_db
|
21
33
|
attr_reader :container
|
22
34
|
|
35
|
+
def options
|
36
|
+
@options ||= OpenStruct.new
|
37
|
+
end
|
38
|
+
|
23
39
|
# sequel wants migrations numbered, but it's a bit of an annoyance for this.
|
24
40
|
def find_single( glob )
|
25
|
-
candidates =Pathname.glob container + glob
|
41
|
+
candidates = Pathname.glob container + glob
|
26
42
|
raise "too many #{candidates.inspect} for #{glob}" unless candidates.size == 1
|
27
43
|
candidates.first
|
28
44
|
end
|
@@ -45,38 +61,13 @@ class RestoreSchema
|
|
45
61
|
@schema_migration = nil
|
46
62
|
end
|
47
63
|
|
48
|
-
def logger
|
49
|
-
@logger ||= Logger.new STDERR
|
50
|
-
end
|
51
|
-
|
52
|
-
# create indexes and foreign keys, and reset sequences
|
53
|
-
def index
|
54
|
-
logger.info "creating indexes"
|
55
|
-
eval( index_migration ).apply dst_db, :up
|
56
|
-
logger.info "creating foreign keys"
|
57
|
-
eval( fk_migration ).apply dst_db, :up
|
58
|
-
|
59
|
-
if dst_db.database_type == :postgres
|
60
|
-
logger.info "reset primary key sequences"
|
61
|
-
dst_db.tables.each{|t| dst_db.reset_primary_key_sequence(t)}
|
62
|
-
logger.info "Primary key sequences reset successfully"
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
# create the destination schema
|
67
|
-
def create
|
68
|
-
logger.info "creating tables"
|
69
|
-
eval( schema_migration ).apply dst_db, :up
|
70
|
-
end
|
71
|
-
|
72
64
|
# assume the table name is the base name of table_file pathname
|
73
65
|
def restore_table( table_file )
|
74
66
|
logger.info "restoring from #{table_file}"
|
75
67
|
pump.table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
|
76
|
-
# TODO check if table has been restored already, and has the correct rows,
|
77
68
|
open_bz2 table_file do |io|
|
78
69
|
pump.io = io
|
79
|
-
pump.restore
|
70
|
+
pump.restore filename: table_file
|
80
71
|
end
|
81
72
|
end
|
82
73
|
|
@@ -95,8 +86,24 @@ class RestoreSchema
|
|
95
86
|
IO.popen "pbzip2 -d -c #{table_file}", &block
|
96
87
|
end
|
97
88
|
|
89
|
+
def table_files
|
90
|
+
Pathname.glob container + '*.dbp.bz2'
|
91
|
+
end
|
92
|
+
|
98
93
|
def restore_tables
|
99
|
-
table_files = Pathname.glob container + '*.dbp.bz2'
|
100
94
|
table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_table table_file}
|
101
95
|
end
|
96
|
+
|
97
|
+
def table_names
|
98
|
+
table_files.map do |path|
|
99
|
+
path.basename.to_s.split(?.)[0...-2].last.to_sym
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def call
|
104
|
+
drop_tables(table_names) if options.drop_tables
|
105
|
+
create_tables
|
106
|
+
restore_tables
|
107
|
+
create_indexes
|
108
|
+
end
|
102
109
|
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'fastandand'
|
2
|
+
Sequel.extension :migration
|
3
|
+
require 'wyrm/module'
|
4
|
+
|
5
|
+
# needs dst_db for mutate operations
|
6
|
+
# and src_db for fetch operations
|
7
|
+
# src_db must have extension(:schema_dumper)
|
8
|
+
module Wyrm::SchemaTools
|
9
|
+
# some includers will need to provide a different implementation for this.
|
10
|
+
def same_db
|
11
|
+
respond_to?( :dst_db ) && respond_to?( :src_db ) && dst_db.andand.database_type == src_db.andand.database_type
|
12
|
+
end
|
13
|
+
|
14
|
+
def schema_migration
|
15
|
+
@schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
|
16
|
+
end
|
17
|
+
|
18
|
+
def index_migration
|
19
|
+
@index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
|
20
|
+
end
|
21
|
+
|
22
|
+
def fk_migration
|
23
|
+
@fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
|
24
|
+
end
|
25
|
+
|
26
|
+
def drop_table_options
|
27
|
+
@drop_table_options ||=
|
28
|
+
begin
|
29
|
+
if dst_db.database_type == :postgres
|
30
|
+
{cascade: true}
|
31
|
+
else
|
32
|
+
{}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Delete given tables.
|
38
|
+
# Recurse if there are foreign keys preventing table deletion.
|
39
|
+
# This implementation will fail for tables with mutual foreign keys.
|
40
|
+
# TODO maybe this should use the schema down migration?
|
41
|
+
def drop_tables( tables )
|
42
|
+
foreign_keyed_tables = []
|
43
|
+
tables.each do |table_name|
|
44
|
+
begin
|
45
|
+
logger.debug "dropping #{table_name}"
|
46
|
+
dst_db.drop_table? table_name, drop_table_options
|
47
|
+
|
48
|
+
rescue Sequel::ForeignKeyConstraintViolation => ex
|
49
|
+
foreign_keyed_tables << table_name
|
50
|
+
|
51
|
+
rescue Sequel::DatabaseError => ex
|
52
|
+
# Mysql2::Error: Cannot delete or update a parent row: a foreign key constraint fails
|
53
|
+
# SQLite3::ConstraintException: FOREIGN KEY constraint failed==
|
54
|
+
if ex.message =~ /foreign key constraint fail/i
|
55
|
+
foreign_keyed_tables << table_name
|
56
|
+
else
|
57
|
+
raise
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# this should be temporary
|
64
|
+
if tables.sort == foreign_keyed_tables.sort
|
65
|
+
raise "can't remove #{tables.inspect} because they have mutual foreign keys"
|
66
|
+
end
|
67
|
+
|
68
|
+
# recursively delete tables
|
69
|
+
drop_tables foreign_keyed_tables.shuffle unless foreign_keyed_tables.empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
def create_tables
|
73
|
+
logger.info "creating tables"
|
74
|
+
eval( schema_migration ).apply dst_db, :up
|
75
|
+
end
|
76
|
+
|
77
|
+
def create_indexes
|
78
|
+
# create indexes and foreign keys, and reset sequences
|
79
|
+
logger.info "creating indexes"
|
80
|
+
eval( index_migration ).apply dst_db, :up
|
81
|
+
|
82
|
+
logger.info "creating foreign keys"
|
83
|
+
eval( fk_migration ).apply dst_db, :up
|
84
|
+
|
85
|
+
if dst_db.database_type == :postgres
|
86
|
+
logger.info "reset primary key sequences"
|
87
|
+
dst_db.tables.each{|t| dst_db.reset_primary_key_sequence(t)}
|
88
|
+
logger.info "Primary key sequences reset successfully"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|