wyrm 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a446734aac434cad29e28523bbf7ed431b796b29
4
- data.tar.gz: ab49b40fcb5d172c2d588e53ab8eafb19eb5d53b
3
+ metadata.gz: 25ce5387e1498b4e6e76915889bbe11f5cb4e008
4
+ data.tar.gz: dc86f010e4fbb7da91f58ea7a0bb99b2faf80591
5
5
  SHA512:
6
- metadata.gz: 3a574b2ceda6bb849b9dc0a61ba6ce7b2d9f22b6b1ab17aa2ca421c30a2158a72ee65ddb5a5dc50f6a22d50b9b20a32c090b58d6e78cf62acca415f439b685a8
7
- data.tar.gz: 1bcfc34aa0177d4b4e1831a79323840ca8f3a2306407bfebfaa45f5a9b300984e71637dab21790801933be1dcfa4f43dfa0873173a4107d91a9d595ce493ec3a
6
+ metadata.gz: c533c2238d722afdcb4c43c1e395df198e9ec37ce47f680b12f6b1d208a6fef7d2186919f356082371f28a716407a58c008f8482c50d0d33529828e7a27d4790
7
+ data.tar.gz: d8161add55bb09b8a6a44052640342beba85f74fb3c6aee124a48816628247aa7df3bb672d9061579d95d82a0e92b13cd3d2a25e22bff744802214af79d95661
data/Gemfile CHANGED
@@ -3,6 +3,7 @@ source 'file:///var/cache/rubygems'
3
3
 
4
4
  gem 'sequel'
5
5
  gem 'fastandand'
6
+ gem 'pry'
6
7
 
7
8
  # Specify your gem's dependencies in wyrm.gemspec
8
9
  gemspec
@@ -6,12 +6,49 @@ require 'fastandand'
6
6
 
7
7
  Sequel.extension :migration, :schema_dumper, :pagination
8
8
 
9
-
10
9
  # TODO possibly use Gem::Package::TarWriter to write tar files
11
10
  # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
12
11
 
13
12
  # TODO need to version the dumps, or something like that.
13
+ # So the slowest-changing variables are the db, the io stream
14
+ # and the page size.
15
+ # table will change every call. Will IO stream change between
16
+ # table changes? No. So a currying type approach will work.
17
+ # Somebody must have done this before.
18
+ # But table and io are often related (ie table going to one file)
19
+ # TODO This really should be Wyrm::Hole. Or maybe Wyrm::Hole should
20
+ # be the codec that connects two DbPumps, for direct transfer?
14
21
  class DbPump
22
+ # some codecs might ignore io, eg if a dbpump is talking to another dbpump
23
+ def initialize( db, table_name, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false )
24
+ self.codec = codec
25
+ self.db = db
26
+ self.table_name = table_name
27
+ self.io = io
28
+ self.page_size = page_size
29
+ self.dry_run = dry_run
30
+ yield self if block_given?
31
+ end
32
+
33
+ attr_accessor :io, :page_size, :dry_run
34
+
35
+ # These affect cached values
36
+ attr_reader :db, :table_name
37
+
38
+ def table_name=( name_sym )
39
+ @primary_keys = nil
40
+ @table_dataset = nil
41
+ @table_name = name_sym
42
+ end
43
+
44
+ def db=( other_db )
45
+ @primary_keys = nil
46
+ @table_dataset = nil
47
+ @db = other_db
48
+ end
49
+
50
+ def dry_run?; dry_run; end
51
+
15
52
  class RespondsTo
16
53
  def initialize( *methods )
17
54
  @methods = methods
@@ -22,9 +59,9 @@ class DbPump
22
59
  end
23
60
  end
24
61
 
25
- def initialize( codec = :marshal )
62
+ def codec=( codec_thing )
26
63
  @codec =
27
- case codec
64
+ case codec_thing
28
65
  when :yaml; YamlCodec.new
29
66
  when :marshal; MarshalCodec.new
30
67
  when Class
@@ -80,25 +117,26 @@ class DbPump
80
117
  @logger ||= Logger.new STDERR
81
118
  end
82
119
 
83
- def primary_keys( db, table_name )
84
- db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
120
+ def primary_keys
121
+ @primary_keys ||= db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
122
+ end
123
+
124
+ def table_dataset
125
+ @table_dataset ||= db[table_name.to_sym]
85
126
  end
86
127
 
87
128
  # TODO possibly use select from outer / inner join to
88
129
  # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
89
130
  # because mysql is useless
90
- def paginated_dump( table_name, options = {} )
91
- options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
92
- pk = primary_keys options.db, table_name
93
- options.db[table_name].order(*pk).each_page(options[:page_size]) do |page|
131
+ def paginated_dump
132
+ table_dataset.order(*primary_keys).each_page(page_size) do |page|
94
133
  logger.info page.sql
95
134
  page.each do |row|
96
- unless options[:dry_run]
97
- codec.encode row.values, options.io
135
+ unless dry_run?
136
+ codec.encode row.values, io
98
137
  end
99
138
  end
100
139
  end
101
- options.io.flush
102
140
  end
103
141
 
104
142
  # have to use this for non-integer pks
@@ -111,23 +149,18 @@ class DbPump
111
149
  # inner join (select id from massive order by whatever limit m, n) limit
112
150
  # on full.id = limit.id
113
151
  # order by full.whatever
114
- def inner_dump( table_name, options = {} )
115
- options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
116
- pk = primary_keys options.db, table_name
117
-
118
- table_dataset = options.db[table_name]
152
+ def inner_dump
119
153
  # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
120
- 0.step(table_dataset.count, options.page_size).each do |offset|
121
- limit_dataset = table_dataset.select( *pk ).limit( options.page_size, offset ).order( *pk )
122
- page = table_dataset.join( limit_dataset, Hash[ pk.map{|f| [f,f]} ] ).order( *pk ).qualify_to(table_name)
154
+ 0.step(table_dataset.count, page_size).each do |offset|
155
+ limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
156
+ page = table_dataset.join( limit_dataset, Hash[ primary_keys.map{|f| [f,f]} ] ).order( *primary_keys ).qualify_to(table_name)
123
157
  logger.info page.sql
124
158
  page.each do |row|
125
- unless options[:dry_run]
126
- codec.encode row.values, options.io
159
+ unless dry_run?
160
+ codec.encode row.values, io
127
161
  end
128
162
  end
129
163
  end
130
- options.io.flush
131
164
  end
132
165
 
133
166
  # TODO need to also dump a first row containing useful stuff:
@@ -136,68 +169,61 @@ class DbPump
136
169
  # - source db url
137
170
  # - permissions?
138
171
  # These should all be in one object that can be Marshall.load-ed easily.
139
- def dump( table_name, options = {} )
140
- pk = primary_keys options[:db], table_name
172
+ def dump
141
173
  case
142
- when pk.empty?
143
- paginated_dump( table_name, options )
144
- when pk.all?{|i| i == :id }
145
- min_max_dump( table_name, options )
174
+ when primary_keys.empty?
175
+ paginated_dump
176
+ when primary_keys.all?{|i| i == :id }
177
+ min_max_dump
146
178
  else
147
- inner_dump( table_name, options )
179
+ inner_dump
148
180
  end
181
+ io.flush
149
182
  end
150
183
 
151
184
  # could use this for integer pks
152
- def min_max_dump( table_name, options = {} )
185
+ def min_max_dump
153
186
  # select max(id), min(id) from patents
154
187
  # and then split that up into 10000 size chunks. Not really important if there aren't exactly 10000
155
- options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
156
- pk = primary_keys options.db, table_name
157
-
158
- table_dataset = options.db[table_name]
159
188
  min, max = table_dataset.select{[min(id), max(id)]}.first.values
160
189
  return unless min && max
161
190
  # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
162
191
  # TODO definitely need to refactor this
163
192
 
164
193
  # will always include the last item because
165
- (min..max).step(options.page_size).each do |offset|
166
- page = table_dataset.where( id: offset...(offset+options.page_size) )
194
+ (min..max).step(page_size).each do |offset|
195
+ page = table_dataset.where( id: offset...(offset + page_size) )
167
196
  logger.info page.sql
168
197
  page.each do |row|
169
- unless options[:dry_run]
170
- codec.encode row.values, options.io
198
+ unless dry_run?
199
+ codec.encode row.values, io
171
200
  end
172
201
  end
173
202
  end
174
- options.io.flush
175
203
  end
176
204
 
177
205
  # TODO possible memory issues here if the rows are big. May need to fork this.
178
206
  # TODO lazy evaluation
179
- def restore( table_name, options = {} )
207
+ def restore( start_row: 0 )
180
208
  logger.info "restoring #{table_name}"
181
- options = OpenStruct.new( {io: STDIN, page_size: 10000, start_row: 0, dry_run: false}.merge( options ) )
182
- dataset = options.db[table_name.to_sym]
183
209
  # destination db should be same structure as incoming data
184
- column_names = options.db.schema(table_name.to_sym).map( &:first )
210
+ column_names = db.schema(table_name.to_sym).map( &:first )
185
211
  first = ->(row){raise "schema mismatch" if row.size != column_names.size}
186
212
 
187
213
  rows_restored = 0
188
214
 
189
215
  # skip this many rows
190
- options.start_row.times do
191
- codec.decode( options.io ) {|row|}
216
+ start_row.times do
217
+ codec.decode( io ) {|row|}
192
218
  end
193
219
 
194
220
  # copy rows into db
195
- while !options.io.eof?
221
+ while !io.eof?
196
222
  # fetch a page of rows
197
223
  rows_ary = []
198
224
  begin
199
- options.page_size.times do |i|
200
- codec.decode( options.io ) do |row|
225
+ page_size.times do |i|
226
+ codec.decode( io ) do |row|
201
227
  rows_ary << row
202
228
  end
203
229
  rows_restored += 1
@@ -207,8 +233,8 @@ class DbPump
207
233
  end
208
234
 
209
235
  # insert to db. Hopeful db support bulk insert, which Sequel will figure out
210
- options.db.transaction do
211
- dataset.import column_names, rows_ary
236
+ db.transaction do
237
+ table_dataset.import column_names, rows_ary
212
238
  yield rows_restored if block_given?
213
239
  logger.info "restored #{rows_restored}"
214
240
  end
@@ -217,194 +243,10 @@ class DbPump
217
243
  rows_restored
218
244
  end
219
245
 
220
- def from_bz2( filename, db, table_name, options = {} )
246
+ def self.from_bz2( filename, db, table_name, options = {} )
221
247
  IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
222
- restore table_name, options.merge( io: io, db: db )
248
+ dbpump = DbPump.new db, table_name, io: io
249
+ dbpump.restore
223
250
  end
224
251
  end
225
252
  end
226
-
227
- # There are actually 2 sources for this:
228
- # one is the src db, the other is the dumped files
229
- # And the one that transfers live is another version
230
- class Schema
231
- def initialize( src_db, dst_db = nil )
232
- @src_db = src_db
233
- @dst_db = dst_db
234
- end
235
-
236
- def schema_migration
237
- @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
238
- end
239
-
240
- def index_migration
241
- @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
242
- end
243
-
244
- def fk_migration
245
- @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
246
- end
247
-
248
- def restore_migration
249
- <<-EOF
250
- require 'restore_migration'
251
- Sequel.migration do
252
- def db_pump
253
- end
254
-
255
- up do
256
- restore_tables
257
- end
258
-
259
- down do
260
- # from each table clear table
261
- each_table do |table_name|
262
- db_pump.restore table_name, io: io, db: db
263
- end
264
- end
265
- end
266
- EOF
267
- end
268
-
269
- attr_accessor :dst_db
270
- attr_reader :src_db
271
-
272
- def same_db
273
- @dst_db.andand.database_type == @src_db.andand.database_type
274
- end
275
-
276
- def logger
277
- @logger ||= Logger.new STDERR
278
- end
279
-
280
- # create the destination schema
281
- def create
282
- eval( @schema_migration ).apply dst_db, :up
283
- end
284
-
285
- # create indexes and foreign keys, and reset sequences
286
- def index
287
- logger.info "creating indexes"
288
- eval(@index_migration).apply dst, :up
289
- logger.info "creating foreign keys"
290
- eval(@fk_migration).apply dst, :up
291
-
292
- if dst.database_type == :postgres
293
- logger.info "reset primary key sequences"
294
- dst.tables.each{|t| dst.reset_primary_key_sequence(t)}
295
- logger.info "Primary key sequences reset successfully"
296
- end
297
- end
298
-
299
- def transfer_table( table_name, options = {} )
300
- options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
301
- total_records = @src_db[table_name].count
302
- logger.info "transferring #{total_records}"
303
- column_names = @src_db.schema(table_name.to_sym).map( &:first )
304
-
305
- @src_db[table_name].each_page(options.page_size) do |page|
306
- logger.info "#{page.sql} of #{total_records}"
307
- unless options.dry_run
308
- @dst_db.transaction do
309
- rows_ary = []
310
- page.each do |row_hash|
311
- rows_ary << row_hash.values
312
- end
313
- @dst_db[table_name.to_sym].import column_names, rows_ary
314
- end
315
- end
316
- end
317
- end
318
-
319
- # copy the data in the tables
320
- def transfer
321
- create
322
- transfer_tables
323
- index
324
- end
325
-
326
- def dump_schema( container, options = {codec: :marshal} )
327
- (container + '001_schema.rb').open('w') do |io|
328
- io.write schema_migration
329
- end
330
-
331
- (container + '002_populate_tables.rb').open('w') do |io|
332
- io.write restore_migration
333
- end
334
-
335
- (container + '003_indexes.rb').open('w') do |io|
336
- io.write index_migration
337
- end
338
-
339
- (container + '004_foreign keys.rb').open('w') do |io|
340
- io.write fk_migration
341
- end
342
- end
343
-
344
- def load_migrations( container )
345
- @schema_migration = eval (container + '001_schema.rb').read
346
- @index_migration = eval (container + '003_indexes.rb').read
347
- @fk_migration = eval (container + '004_foreign keys.rb').read
348
- end
349
-
350
- def dump_one_table( table_name, pathname, db_pump )
351
- logger.info "dumping #{table_name} to #{pathname}"
352
- fio = pathname.open('w')
353
- # open subprocess in read-write mode
354
- zio = IO.popen( "pbzip2 -z", 'r+' )
355
- copier = Thread.new do
356
- begin
357
- IO.copy_stream zio, fio
358
- logger.debug "finished stream copy"
359
- ensure
360
- fio.close
361
- end
362
- end
363
-
364
- # generate the dump
365
- db_pump.dump table_name, db: src_db, io: zio
366
-
367
- # signal the copier thread to stop
368
- zio.close_write
369
- logger.debug 'finished dumping'
370
- # wait for copier thread to
371
- copier.join
372
- logger.debug 'stream copy thread finished'
373
- ensure
374
- zio.close unless zio.closed?
375
- fio.close unless fio.closed?
376
- end
377
-
378
- def dump_tables( container, options = {:codec => :marshal} )
379
- container = Pathname(container)
380
- db_pump = DbPump.new( options[:codec] )
381
-
382
- src_db.tables.each do |table_name|
383
- filename = container + "#{table_name}.dbp.bz2"
384
- dump_one_table table_name, filename, db_pump
385
- end
386
- end
387
-
388
- def restore_one_table( table_file, db_pump )
389
- logger.info "restoring from #{table_file}"
390
- table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
391
- # check if table has been restored already, and has the correct rows,
392
- # otherwise pass in a start row.
393
- db_pump.from_bz2 table_file, dst_db, table_name
394
- end
395
-
396
- def restore_tables( container, options = {:codec => :marshal} )
397
- db_pump = DbPump.new( options[:codec] )
398
- table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
399
- table_files.each{|table_file| restore_one_table table_file, db_pump}
400
- end
401
-
402
- def restore_tables( container, options = {:codec => :marshal} )
403
- container = Pathname(container)
404
- container.child ren
405
- end
406
-
407
- def self.transfer( src_db, dst_db )
408
- new( src_db, dst_db ).transfer
409
- end
410
- end
@@ -1,17 +1,33 @@
1
+ require 'logger'
2
+ require 'wyrm/db_pump'
3
+
4
+ class Object
5
+ def call_or_self( maybe_callable )
6
+ if maybe_callable.respond_to? :call
7
+ maybe_callable.call( self )
8
+ else
9
+ maybe_callable
10
+ end
11
+ end
12
+ end
13
+
1
14
  # Dump a schema and compressed data from a db to a set of files
2
15
  # src_db = Sequel.connect "postgres://localhost:5454/lots"
3
16
  # ds = DumpSchema.new src_db, Pathname('/var/data/lots')
4
17
  # ds.dump_schema
5
18
  # ds.dump_tables
6
19
  class DumpSchema
7
- def initialize( src_db, container = nil, options = {} )
8
- @options = {:codec => :marshal}.merge( options )
9
-
20
+ def initialize( src_db, container = nil, pump: nil )
10
21
  @src_db = src_db
11
22
  @container = Pathname(container)
23
+ @pump = make_pump( pump )
12
24
  end
13
25
 
14
- attr_reader :src_db, :container, :codec
26
+ attr_reader :src_db, :container, :pump
27
+
28
+ def make_pump( pump_thing )
29
+ call_or_self(pump_thing) || DbPump.new( src_db, nil )
30
+ end
15
31
 
16
32
  def schema_migration
17
33
  @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
@@ -72,8 +88,7 @@ class DumpSchema
72
88
  end
73
89
  end
74
90
 
75
- def dump_one_table( table_name, pathname, db_pump )
76
- logger.info "dumping #{table_name} to #{pathname}"
91
+ def open_bz2( pathname )
77
92
  fio = pathname.open('w')
78
93
  # open subprocess in read-write mode
79
94
  zio = IO.popen( "pbzip2 -z", 'r+' )
@@ -86,8 +101,7 @@ class DumpSchema
86
101
  end
87
102
  end
88
103
 
89
- # generate the dump
90
- db_pump.dump table_name, db: src_db, io: zio
104
+ yield zio
91
105
 
92
106
  # signal the copier thread to stop
93
107
  zio.close_write
@@ -101,11 +115,15 @@ class DumpSchema
101
115
  end
102
116
 
103
117
  def dump_tables
104
- db_pump = DbPump.new( @options[:codec] )
105
-
106
118
  src_db.tables.each do |table_name|
107
119
  filename = container + "#{table_name}.dbp.bz2"
108
- dump_one_table table_name, filename, db_pump
120
+ logger.info "dumping #{table_name} to #{filename}"
121
+ open_bz2 filename do |zio|
122
+ # generate the dump
123
+ pump.table_name = table_name
124
+ pump.io = zio
125
+ pump.dump
126
+ end
109
127
  end
110
128
  end
111
129
  end
@@ -17,7 +17,7 @@ class RestoreSchema
17
17
  attr_reader :dst_db
18
18
  attr_reader :options
19
19
  attr_reader :container
20
- attr_reader :schema_migration, :index_migration
20
+ attr_reader :schema_migration, :index_migration, :fk_migration
21
21
 
22
22
  def logger
23
23
  @logger ||= Logger.new STDERR
@@ -48,17 +48,16 @@ class RestoreSchema
48
48
  eval( schema_migration ).apply dst_db, :up
49
49
  end
50
50
 
51
- def restore_one_table( table_file, db_pump )
51
+ def restore_one_table( table_file )
52
52
  logger.info "restoring from #{table_file}"
53
53
  table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
54
54
  # check if table has been restored already, and has the correct rows,
55
55
  # otherwise pass in a start row.
56
- db_pump.from_bz2 table_file, dst_db, table_name
56
+ DbPump.from_bz2 table_file, dst_db, table_name
57
57
  end
58
58
 
59
59
  def restore_tables
60
- db_pump = DbPump.new( options[:codec] )
61
60
  table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
62
- table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file, db_pump}
61
+ table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file}
63
62
  end
64
63
  end
@@ -1,3 +1,3 @@
1
1
  module Wyrm
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -0,0 +1,12 @@
1
+ require 'sequel'
2
+ require 'sqlite3'
3
+ require 'pathname'
4
+ require 'wyrm/dump_schema.rb'
5
+
6
+ db = Sequel.connect 'sqlite:/home/panic/.qtstalker/new-trading.sqlite3'
7
+
8
+ # pump = DbPump.new db, :positions, codec: :yaml
9
+ dumper = DumpSchema.new db, '/tmp/test', pump: lambda{|_| DbPump.new db, nil, codec: :yaml}
10
+ dumper = DumpSchema.new db, '/tmp/test', pump: ->(dump_schema){ DbPump.new dump_schema.src_db, nil, codec: :yaml}
11
+ dumper.dump_tables
12
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wyrm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Anderson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-13 00:00:00.000000000 Z
11
+ date: 2013-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -86,6 +86,7 @@ files:
86
86
  - lib/wyrm/restore_schema.rb
87
87
  - lib/wyrm/transferer.rb
88
88
  - lib/wyrm/version.rb
89
+ - snippets/console.rb
89
90
  - wyrm.gemspec
90
91
  homepage: https://github.com/djellemah/wyrm
91
92
  licenses:
@@ -107,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
108
  version: '0'
108
109
  requirements: []
109
110
  rubyforge_project:
110
- rubygems_version: 2.0.0.rc.2
111
+ rubygems_version: 2.0.3
111
112
  signing_key:
112
113
  specification_version: 4
113
114
  summary: Transfer from one SQL database to another