wyrm 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a446734aac434cad29e28523bbf7ed431b796b29
4
- data.tar.gz: ab49b40fcb5d172c2d588e53ab8eafb19eb5d53b
3
+ metadata.gz: 25ce5387e1498b4e6e76915889bbe11f5cb4e008
4
+ data.tar.gz: dc86f010e4fbb7da91f58ea7a0bb99b2faf80591
5
5
  SHA512:
6
- metadata.gz: 3a574b2ceda6bb849b9dc0a61ba6ce7b2d9f22b6b1ab17aa2ca421c30a2158a72ee65ddb5a5dc50f6a22d50b9b20a32c090b58d6e78cf62acca415f439b685a8
7
- data.tar.gz: 1bcfc34aa0177d4b4e1831a79323840ca8f3a2306407bfebfaa45f5a9b300984e71637dab21790801933be1dcfa4f43dfa0873173a4107d91a9d595ce493ec3a
6
+ metadata.gz: c533c2238d722afdcb4c43c1e395df198e9ec37ce47f680b12f6b1d208a6fef7d2186919f356082371f28a716407a58c008f8482c50d0d33529828e7a27d4790
7
+ data.tar.gz: d8161add55bb09b8a6a44052640342beba85f74fb3c6aee124a48816628247aa7df3bb672d9061579d95d82a0e92b13cd3d2a25e22bff744802214af79d95661
data/Gemfile CHANGED
@@ -3,6 +3,7 @@ source 'file:///var/cache/rubygems'
3
3
 
4
4
  gem 'sequel'
5
5
  gem 'fastandand'
6
+ gem 'pry'
6
7
 
7
8
  # Specify your gem's dependencies in wyrm.gemspec
8
9
  gemspec
@@ -6,12 +6,49 @@ require 'fastandand'
6
6
 
7
7
  Sequel.extension :migration, :schema_dumper, :pagination
8
8
 
9
-
10
9
  # TODO possibly use Gem::Package::TarWriter to write tar files
11
10
  # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
12
11
 
13
12
  # TODO need to version the dumps, or something like that.
13
+ # So the slowest-changing variables are the db, the io stream
14
+ # and the page size.
15
+ # table will change every call. Will IO stream change between
16
+ # table changes? No. So a currying type approach will work.
17
+ # Somebody must have done this before.
18
+ # But table and io are often related (ie table going to one file)
19
+ # TODO This really should be Wyrm::Hole. Or maybe Wyrm::Hole should
20
+ # be the codec that connects two DbPumps, for direct transfer?
14
21
  class DbPump
22
+ # some codecs might ignore io, eg if a dbpump is talking to another dbpump
23
+ def initialize( db, table_name, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false )
24
+ self.codec = codec
25
+ self.db = db
26
+ self.table_name = table_name
27
+ self.io = io
28
+ self.page_size = page_size
29
+ self.dry_run = dry_run
30
+ yield self if block_given?
31
+ end
32
+
33
+ attr_accessor :io, :page_size, :dry_run
34
+
35
+ # These affect cached values
36
+ attr_reader :db, :table_name
37
+
38
+ def table_name=( name_sym )
39
+ @primary_keys = nil
40
+ @table_dataset = nil
41
+ @table_name = name_sym
42
+ end
43
+
44
+ def db=( other_db )
45
+ @primary_keys = nil
46
+ @table_dataset = nil
47
+ @db = other_db
48
+ end
49
+
50
+ def dry_run?; dry_run; end
51
+
15
52
  class RespondsTo
16
53
  def initialize( *methods )
17
54
  @methods = methods
@@ -22,9 +59,9 @@ class DbPump
22
59
  end
23
60
  end
24
61
 
25
- def initialize( codec = :marshal )
62
+ def codec=( codec_thing )
26
63
  @codec =
27
- case codec
64
+ case codec_thing
28
65
  when :yaml; YamlCodec.new
29
66
  when :marshal; MarshalCodec.new
30
67
  when Class
@@ -80,25 +117,26 @@ class DbPump
80
117
  @logger ||= Logger.new STDERR
81
118
  end
82
119
 
83
- def primary_keys( db, table_name )
84
- db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
120
+ def primary_keys
121
+ @primary_keys ||= db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
122
+ end
123
+
124
+ def table_dataset
125
+ @table_dataset ||= db[table_name.to_sym]
85
126
  end
86
127
 
87
128
  # TODO possibly use select from outer / inner join to
88
129
  # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
89
130
  # because mysql is useless
90
- def paginated_dump( table_name, options = {} )
91
- options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
92
- pk = primary_keys options.db, table_name
93
- options.db[table_name].order(*pk).each_page(options[:page_size]) do |page|
131
+ def paginated_dump
132
+ table_dataset.order(*primary_keys).each_page(page_size) do |page|
94
133
  logger.info page.sql
95
134
  page.each do |row|
96
- unless options[:dry_run]
97
- codec.encode row.values, options.io
135
+ unless dry_run?
136
+ codec.encode row.values, io
98
137
  end
99
138
  end
100
139
  end
101
- options.io.flush
102
140
  end
103
141
 
104
142
  # have to use this for non-integer pks
@@ -111,23 +149,18 @@ class DbPump
111
149
  # inner join (select id from massive order by whatever limit m, n) limit
112
150
  # on full.id = limit.id
113
151
  # order by full.whatever
114
- def inner_dump( table_name, options = {} )
115
- options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
116
- pk = primary_keys options.db, table_name
117
-
118
- table_dataset = options.db[table_name]
152
+ def inner_dump
119
153
  # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
120
- 0.step(table_dataset.count, options.page_size).each do |offset|
121
- limit_dataset = table_dataset.select( *pk ).limit( options.page_size, offset ).order( *pk )
122
- page = table_dataset.join( limit_dataset, Hash[ pk.map{|f| [f,f]} ] ).order( *pk ).qualify_to(table_name)
154
+ 0.step(table_dataset.count, page_size).each do |offset|
155
+ limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
156
+ page = table_dataset.join( limit_dataset, Hash[ primary_keys.map{|f| [f,f]} ] ).order( *primary_keys ).qualify_to(table_name)
123
157
  logger.info page.sql
124
158
  page.each do |row|
125
- unless options[:dry_run]
126
- codec.encode row.values, options.io
159
+ unless dry_run?
160
+ codec.encode row.values, io
127
161
  end
128
162
  end
129
163
  end
130
- options.io.flush
131
164
  end
132
165
 
133
166
  # TODO need to also dump a first row containing useful stuff:
@@ -136,68 +169,61 @@ class DbPump
136
169
  # - source db url
137
170
  # - permissions?
138
171
  # These should all be in one object that can be Marshall.load-ed easily.
139
- def dump( table_name, options = {} )
140
- pk = primary_keys options[:db], table_name
172
+ def dump
141
173
  case
142
- when pk.empty?
143
- paginated_dump( table_name, options )
144
- when pk.all?{|i| i == :id }
145
- min_max_dump( table_name, options )
174
+ when primary_keys.empty?
175
+ paginated_dump
176
+ when primary_keys.all?{|i| i == :id }
177
+ min_max_dump
146
178
  else
147
- inner_dump( table_name, options )
179
+ inner_dump
148
180
  end
181
+ io.flush
149
182
  end
150
183
 
151
184
  # could use this for integer pks
152
- def min_max_dump( table_name, options = {} )
185
+ def min_max_dump
153
186
  # select max(id), min(id) from patents
154
187
  # and then split that up into 10000 size chunks. Not really important if there aren't exactly 10000
155
- options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
156
- pk = primary_keys options.db, table_name
157
-
158
- table_dataset = options.db[table_name]
159
188
  min, max = table_dataset.select{[min(id), max(id)]}.first.values
160
189
  return unless min && max
161
190
  # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
162
191
  # TODO definitely need to refactor this
163
192
 
164
193
  # will always include the last item because
165
- (min..max).step(options.page_size).each do |offset|
166
- page = table_dataset.where( id: offset...(offset+options.page_size) )
194
+ (min..max).step(page_size).each do |offset|
195
+ page = table_dataset.where( id: offset...(offset + page_size) )
167
196
  logger.info page.sql
168
197
  page.each do |row|
169
- unless options[:dry_run]
170
- codec.encode row.values, options.io
198
+ unless dry_run?
199
+ codec.encode row.values, io
171
200
  end
172
201
  end
173
202
  end
174
- options.io.flush
175
203
  end
176
204
 
177
205
  # TODO possible memory issues here if the rows are big. May need to fork this.
178
206
  # TODO lazy evaluation
179
- def restore( table_name, options = {} )
207
+ def restore( start_row: 0 )
180
208
  logger.info "restoring #{table_name}"
181
- options = OpenStruct.new( {io: STDIN, page_size: 10000, start_row: 0, dry_run: false}.merge( options ) )
182
- dataset = options.db[table_name.to_sym]
183
209
  # destination db should be same structure as incoming data
184
- column_names = options.db.schema(table_name.to_sym).map( &:first )
210
+ column_names = db.schema(table_name.to_sym).map( &:first )
185
211
  first = ->(row){raise "schema mismatch" if row.size != column_names.size}
186
212
 
187
213
  rows_restored = 0
188
214
 
189
215
  # skip this many rows
190
- options.start_row.times do
191
- codec.decode( options.io ) {|row|}
216
+ start_row.times do
217
+ codec.decode( io ) {|row|}
192
218
  end
193
219
 
194
220
  # copy rows into db
195
- while !options.io.eof?
221
+ while !io.eof?
196
222
  # fetch a page of rows
197
223
  rows_ary = []
198
224
  begin
199
- options.page_size.times do |i|
200
- codec.decode( options.io ) do |row|
225
+ page_size.times do |i|
226
+ codec.decode( io ) do |row|
201
227
  rows_ary << row
202
228
  end
203
229
  rows_restored += 1
@@ -207,8 +233,8 @@ class DbPump
207
233
  end
208
234
 
209
235
  # insert to db. Hopeful db support bulk insert, which Sequel will figure out
210
- options.db.transaction do
211
- dataset.import column_names, rows_ary
236
+ db.transaction do
237
+ table_dataset.import column_names, rows_ary
212
238
  yield rows_restored if block_given?
213
239
  logger.info "restored #{rows_restored}"
214
240
  end
@@ -217,194 +243,10 @@ class DbPump
217
243
  rows_restored
218
244
  end
219
245
 
220
- def from_bz2( filename, db, table_name, options = {} )
246
+ def self.from_bz2( filename, db, table_name, options = {} )
221
247
  IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
222
- restore table_name, options.merge( io: io, db: db )
248
+ dbpump = DbPump.new db, table_name, io: io
249
+ dbpump.restore
223
250
  end
224
251
  end
225
252
  end
226
-
227
- # There are actually 2 sources for this:
228
- # one is the src db, the other is the dumped files
229
- # And the one that transfers live is another version
230
- class Schema
231
- def initialize( src_db, dst_db = nil )
232
- @src_db = src_db
233
- @dst_db = dst_db
234
- end
235
-
236
- def schema_migration
237
- @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
238
- end
239
-
240
- def index_migration
241
- @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
242
- end
243
-
244
- def fk_migration
245
- @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
246
- end
247
-
248
- def restore_migration
249
- <<-EOF
250
- require 'restore_migration'
251
- Sequel.migration do
252
- def db_pump
253
- end
254
-
255
- up do
256
- restore_tables
257
- end
258
-
259
- down do
260
- # from each table clear table
261
- each_table do |table_name|
262
- db_pump.restore table_name, io: io, db: db
263
- end
264
- end
265
- end
266
- EOF
267
- end
268
-
269
- attr_accessor :dst_db
270
- attr_reader :src_db
271
-
272
- def same_db
273
- @dst_db.andand.database_type == @src_db.andand.database_type
274
- end
275
-
276
- def logger
277
- @logger ||= Logger.new STDERR
278
- end
279
-
280
- # create the destination schema
281
- def create
282
- eval( @schema_migration ).apply dst_db, :up
283
- end
284
-
285
- # create indexes and foreign keys, and reset sequences
286
- def index
287
- logger.info "creating indexes"
288
- eval(@index_migration).apply dst, :up
289
- logger.info "creating foreign keys"
290
- eval(@fk_migration).apply dst, :up
291
-
292
- if dst.database_type == :postgres
293
- logger.info "reset primary key sequences"
294
- dst.tables.each{|t| dst.reset_primary_key_sequence(t)}
295
- logger.info "Primary key sequences reset successfully"
296
- end
297
- end
298
-
299
- def transfer_table( table_name, options = {} )
300
- options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
301
- total_records = @src_db[table_name].count
302
- logger.info "transferring #{total_records}"
303
- column_names = @src_db.schema(table_name.to_sym).map( &:first )
304
-
305
- @src_db[table_name].each_page(options.page_size) do |page|
306
- logger.info "#{page.sql} of #{total_records}"
307
- unless options.dry_run
308
- @dst_db.transaction do
309
- rows_ary = []
310
- page.each do |row_hash|
311
- rows_ary << row_hash.values
312
- end
313
- @dst_db[table_name.to_sym].import column_names, rows_ary
314
- end
315
- end
316
- end
317
- end
318
-
319
- # copy the data in the tables
320
- def transfer
321
- create
322
- transfer_tables
323
- index
324
- end
325
-
326
- def dump_schema( container, options = {codec: :marshal} )
327
- (container + '001_schema.rb').open('w') do |io|
328
- io.write schema_migration
329
- end
330
-
331
- (container + '002_populate_tables.rb').open('w') do |io|
332
- io.write restore_migration
333
- end
334
-
335
- (container + '003_indexes.rb').open('w') do |io|
336
- io.write index_migration
337
- end
338
-
339
- (container + '004_foreign keys.rb').open('w') do |io|
340
- io.write fk_migration
341
- end
342
- end
343
-
344
- def load_migrations( container )
345
- @schema_migration = eval (container + '001_schema.rb').read
346
- @index_migration = eval (container + '003_indexes.rb').read
347
- @fk_migration = eval (container + '004_foreign keys.rb').read
348
- end
349
-
350
- def dump_one_table( table_name, pathname, db_pump )
351
- logger.info "dumping #{table_name} to #{pathname}"
352
- fio = pathname.open('w')
353
- # open subprocess in read-write mode
354
- zio = IO.popen( "pbzip2 -z", 'r+' )
355
- copier = Thread.new do
356
- begin
357
- IO.copy_stream zio, fio
358
- logger.debug "finished stream copy"
359
- ensure
360
- fio.close
361
- end
362
- end
363
-
364
- # generate the dump
365
- db_pump.dump table_name, db: src_db, io: zio
366
-
367
- # signal the copier thread to stop
368
- zio.close_write
369
- logger.debug 'finished dumping'
370
- # wait for copier thread to
371
- copier.join
372
- logger.debug 'stream copy thread finished'
373
- ensure
374
- zio.close unless zio.closed?
375
- fio.close unless fio.closed?
376
- end
377
-
378
- def dump_tables( container, options = {:codec => :marshal} )
379
- container = Pathname(container)
380
- db_pump = DbPump.new( options[:codec] )
381
-
382
- src_db.tables.each do |table_name|
383
- filename = container + "#{table_name}.dbp.bz2"
384
- dump_one_table table_name, filename, db_pump
385
- end
386
- end
387
-
388
- def restore_one_table( table_file, db_pump )
389
- logger.info "restoring from #{table_file}"
390
- table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
391
- # check if table has been restored already, and has the correct rows,
392
- # otherwise pass in a start row.
393
- db_pump.from_bz2 table_file, dst_db, table_name
394
- end
395
-
396
- def restore_tables( container, options = {:codec => :marshal} )
397
- db_pump = DbPump.new( options[:codec] )
398
- table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
399
- table_files.each{|table_file| restore_one_table table_file, db_pump}
400
- end
401
-
402
- def restore_tables( container, options = {:codec => :marshal} )
403
- container = Pathname(container)
404
- container.child ren
405
- end
406
-
407
- def self.transfer( src_db, dst_db )
408
- new( src_db, dst_db ).transfer
409
- end
410
- end
@@ -1,17 +1,33 @@
1
+ require 'logger'
2
+ require 'wyrm/db_pump'
3
+
4
+ class Object
5
+ def call_or_self( maybe_callable )
6
+ if maybe_callable.respond_to? :call
7
+ maybe_callable.call( self )
8
+ else
9
+ maybe_callable
10
+ end
11
+ end
12
+ end
13
+
1
14
  # Dump a schema and compressed data from a db to a set of files
2
15
  # src_db = Sequel.connect "postgres://localhost:5454/lots"
3
16
  # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
4
17
  # ds.dump_schema
5
18
  # ds.dump_tables
6
19
  class DumpSchema
7
- def initialize( src_db, container = nil, options = {} )
8
- @options = {:codec => :marshal}.merge( options )
9
-
20
+ def initialize( src_db, container = nil, pump: nil )
10
21
  @src_db = src_db
11
22
  @container = Pathname(container)
23
+ @pump = make_pump( pump )
12
24
  end
13
25
 
14
- attr_reader :src_db, :container, :codec
26
+ attr_reader :src_db, :container, :pump
27
+
28
+ def make_pump( pump_thing )
29
+ call_or_self(pump_thing) || DbPump.new( src_db, nil )
30
+ end
15
31
 
16
32
  def schema_migration
17
33
  @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
@@ -72,8 +88,7 @@ class DumpSchema
72
88
  end
73
89
  end
74
90
 
75
- def dump_one_table( table_name, pathname, db_pump )
76
- logger.info "dumping #{table_name} to #{pathname}"
91
+ def open_bz2( pathname )
77
92
  fio = pathname.open('w')
78
93
  # open subprocess in read-write mode
79
94
  zio = IO.popen( "pbzip2 -z", 'r+' )
@@ -86,8 +101,7 @@ class DumpSchema
86
101
  end
87
102
  end
88
103
 
89
- # generate the dump
90
- db_pump.dump table_name, db: src_db, io: zio
104
+ yield zio
91
105
 
92
106
  # signal the copier thread to stop
93
107
  zio.close_write
@@ -101,11 +115,15 @@ class DumpSchema
101
115
  end
102
116
 
103
117
  def dump_tables
104
- db_pump = DbPump.new( @options[:codec] )
105
-
106
118
  src_db.tables.each do |table_name|
107
119
  filename = container + "#{table_name}.dbp.bz2"
108
- dump_one_table table_name, filename, db_pump
120
+ logger.info "dumping #{table_name} to #{filename}"
121
+ open_bz2 filename do |zio|
122
+ # generate the dump
123
+ pump.table_name = table_name
124
+ pump.io = zio
125
+ pump.dump
126
+ end
109
127
  end
110
128
  end
111
129
  end
@@ -17,7 +17,7 @@ class RestoreSchema
17
17
  attr_reader :dst_db
18
18
  attr_reader :options
19
19
  attr_reader :container
20
- attr_reader :schema_migration, :index_migration
20
+ attr_reader :schema_migration, :index_migration, :fk_migration
21
21
 
22
22
  def logger
23
23
  @logger ||= Logger.new STDERR
@@ -48,17 +48,16 @@ class RestoreSchema
48
48
  eval( schema_migration ).apply dst_db, :up
49
49
  end
50
50
 
51
- def restore_one_table( table_file, db_pump )
51
+ def restore_one_table( table_file )
52
52
  logger.info "restoring from #{table_file}"
53
53
  table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
54
54
  # check if table has been restored already, and has the correct rows,
55
55
  # otherwise pass in a start row.
56
- db_pump.from_bz2 table_file, dst_db, table_name
56
+ DbPump.from_bz2 table_file, dst_db, table_name
57
57
  end
58
58
 
59
59
  def restore_tables
60
- db_pump = DbPump.new( options[:codec] )
61
60
  table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
62
- table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file, db_pump}
61
+ table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file}
63
62
  end
64
63
  end
@@ -1,3 +1,3 @@
1
1
  module Wyrm
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -0,0 +1,12 @@
1
+ require 'sequel'
2
+ require 'sqlite3'
3
+ require 'pathname'
4
+ require 'wyrm/dump_schema.rb'
5
+
6
+ db = Sequel.connect 'sqlite:/home/panic/.qtstalker/new-trading.sqlite3'
7
+
8
+ # pump = DbPump.new db, :positions, codec: :yaml
9
+ dumper = DumpSchema.new db, '/tmp/test', pump: lambda{|_| DbPump.new db, nil, codec: :yaml}
10
+ dumper = DumpSchema.new db, '/tmp/test', pump: ->(dump_schema){ DbPump.new dump_schema.src_db, nil, codec: :yaml}
11
+ dumper.dump_tables
12
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wyrm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Anderson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-13 00:00:00.000000000 Z
11
+ date: 2013-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -86,6 +86,7 @@ files:
86
86
  - lib/wyrm/restore_schema.rb
87
87
  - lib/wyrm/transferer.rb
88
88
  - lib/wyrm/version.rb
89
+ - snippets/console.rb
89
90
  - wyrm.gemspec
90
91
  homepage: https://github.com/djellemah/wyrm
91
92
  licenses:
@@ -107,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
108
  version: '0'
108
109
  requirements: []
109
110
  rubyforge_project:
110
- rubygems_version: 2.0.0.rc.2
111
+ rubygems_version: 2.0.3
111
112
  signing_key:
112
113
  specification_version: 4
113
114
  summary: Transfer from one SQL database to another