RubyGems - wyrm - Versions diffs - 0.1.1 → 0.1.2 - Mend

wyrm 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: a446734aac434cad29e28523bbf7ed431b796b29
-  data.tar.gz: ab49b40fcb5d172c2d588e53ab8eafb19eb5d53b
+  metadata.gz: 25ce5387e1498b4e6e76915889bbe11f5cb4e008
+  data.tar.gz: dc86f010e4fbb7da91f58ea7a0bb99b2faf80591
 SHA512:
-  metadata.gz: 3a574b2ceda6bb849b9dc0a61ba6ce7b2d9f22b6b1ab17aa2ca421c30a2158a72ee65ddb5a5dc50f6a22d50b9b20a32c090b58d6e78cf62acca415f439b685a8
-  data.tar.gz: 1bcfc34aa0177d4b4e1831a79323840ca8f3a2306407bfebfaa45f5a9b300984e71637dab21790801933be1dcfa4f43dfa0873173a4107d91a9d595ce493ec3a
+  metadata.gz: c533c2238d722afdcb4c43c1e395df198e9ec37ce47f680b12f6b1d208a6fef7d2186919f356082371f28a716407a58c008f8482c50d0d33529828e7a27d4790
+  data.tar.gz: d8161add55bb09b8a6a44052640342beba85f74fb3c6aee124a48816628247aa7df3bb672d9061579d95d82a0e92b13cd3d2a25e22bff744802214af79d95661

data/Gemfile CHANGED

@@ -3,6 +3,7 @@ source 'file:///var/cache/rubygems'
 gem 'sequel'
 gem 'fastandand'
+gem 'pry'
 # Specify your gem's dependencies in wyrm.gemspec
 gemspec

data/lib/wyrm/db_pump.rb CHANGED

@@ -6,12 +6,49 @@ require 'fastandand'
 Sequel.extension :migration, :schema_dumper, :pagination
 # TODO possibly use Gem::Package::TarWriter to write tar files
 # TODO when restoring, could use a SizeQueue to make sure the db is kept busy
 # TODO need to version the dumps, or something like that.
+# So the slowest-changing variables are the db, the io stream
+# and the page size.
+# table will change every call. Will IO stream change between
+# table changes? No. So a currying type approach will work.
+# Somebody must have done this before.
+# But table and io are often related (ie table going to one file)
+# TODO This really should be Wyrm::Hole. Or maybe Wyrm::Hole should
+# be the codec that connects two DbPumps, for direct transfer?
 class DbPump
+  # some codecs might ignore io, eg if a dbpump is talking to another dbpump
+  def initialize( db, table_name, io: STDOUT, codec: :marshal, page_size: 10000, dry_run: false )
+    self.codec = codec
+    self.db = db
+    self.table_name = table_name
+    self.io = io
+    self.page_size = page_size
+    self.dry_run = dry_run
+    yield self if block_given?
+  end
+  attr_accessor :io, :page_size, :dry_run
+  # These affect cached values
+  attr_reader :db, :table_name
+  def table_name=( name_sym )
+    @primary_keys = nil
+    @table_dataset = nil
+    @table_name = name_sym
+  end
+  def db=( other_db )
+    @primary_keys = nil
+    @table_dataset = nil
+    @db = other_db
+  end
+  def dry_run?; dry_run; end
   class RespondsTo
     def initialize( *methods )
       @methods = methods
@@ -22,9 +59,9 @@ class DbPump
     end
   end
-  def initialize( codec = :marshal )
+  def codec=( codec_thing )
     @codec =
-    case codec
+    case codec_thing
     when :yaml; YamlCodec.new
     when :marshal; MarshalCodec.new
     when Class
@@ -80,25 +117,26 @@ class DbPump
     @logger ||= Logger.new STDERR
   end
-  def primary_keys( db, table_name )
-    db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
+  def primary_keys
+    @primary_keys ||= db.schema(table_name).select{|df| df.last[:primary_key]}.map{|df| df.first}
+  end
+  def table_dataset
+    @table_dataset ||= db[table_name.to_sym]
   end
   # TODO possibly use select from outer / inner join to
   # http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
   # because mysql is useless
-  def paginated_dump( table_name, options = {} )
-    options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
-    pk = primary_keys options.db, table_name
-    options.db[table_name].order(*pk).each_page(options[:page_size]) do |page|
+  def paginated_dump
+    table_dataset.order(*primary_keys).each_page(page_size) do |page|
       logger.info page.sql
       page.each do |row|
-        unless options[:dry_run]
-          codec.encode row.values, options.io
+        unless dry_run?
+          codec.encode row.values, io
         end
       end
     end
-    options.io.flush
   end
   # have to use this for non-integer pks
@@ -111,23 +149,18 @@ class DbPump
   #   inner join (select id from massive order by whatever limit m, n) limit
   #   on full.id = limit.id
   # order by full.whatever
-  def inner_dump( table_name, options = {} )
-    options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
-    pk = primary_keys options.db, table_name
-    table_dataset = options.db[table_name]
+  def inner_dump
     # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
-    0.step(table_dataset.count, options.page_size).each do |offset|
-      limit_dataset = table_dataset.select( *pk ).limit( options.page_size, offset ).order( *pk )
-      page = table_dataset.join( limit_dataset, Hash[ pk.map{|f| [f,f]} ] ).order( *pk ).qualify_to(table_name)
+    0.step(table_dataset.count, page_size).each do |offset|
+      limit_dataset = table_dataset.select( *primary_keys ).limit( page_size, offset ).order( *primary_keys )
+      page = table_dataset.join( limit_dataset, Hash[ primary_keys.map{|f| [f,f]} ] ).order( *primary_keys ).qualify_to(table_name)
       logger.info page.sql
       page.each do |row|
-        unless options[:dry_run]
-          codec.encode row.values, options.io
+        unless dry_run?
+          codec.encode row.values, io
         end
       end
     end
-    options.io.flush
   end
   # TODO need to also dump a first row containing useful stuff:
@@ -136,68 +169,61 @@ class DbPump
   # - source db url
   # - permissions?
   # These should all be in one object that can be Marshall.load-ed easily.
-  def dump( table_name, options = {} )
-    pk = primary_keys options[:db], table_name
+  def dump
     case
-    when pk.empty?
-      paginated_dump( table_name, options )
-    when pk.all?{|i| i == :id }
-      min_max_dump( table_name, options )
+    when primary_keys.empty?
+      paginated_dump
+    when primary_keys.all?{|i| i == :id }
+      min_max_dump
     else
-      inner_dump( table_name, options )
+      inner_dump
     end
+    io.flush
   end
   # could use this for integer pks
-  def min_max_dump( table_name, options = {} )
+  def min_max_dump
     # select max(id), min(id) from patents
     # and then split that up into 10000 size chunks. Not really important if there aren't exactly 10000
-    options = OpenStruct.new( {io: STDOUT, page_size: 10000, dry_run: false}.merge( options.to_h ) )
-    pk = primary_keys options.db, table_name
-    table_dataset = options.db[table_name]
     min, max = table_dataset.select{[min(id), max(id)]}.first.values
     return unless min && max
     # could possibly overrride Dataset#paginate(page_no, page_size, record_count=nil)
     # TODO definitely need to refactor this
     # will always include the last item because
-    (min..max).step(options.page_size).each do |offset|
-      page = table_dataset.where( id: offset...(offset+options.page_size) )
+    (min..max).step(page_size).each do |offset|
+      page = table_dataset.where( id: offset...(offset + page_size) )
       logger.info page.sql
       page.each do |row|
-        unless options[:dry_run]
-          codec.encode row.values, options.io
+        unless dry_run?
+          codec.encode row.values, io
         end
       end
     end
-    options.io.flush
   end
   # TODO possible memory issues here if the rows are big. May need to fork this.
   # TODO lazy evaluation
-  def restore( table_name, options = {} )
+  def restore( start_row: 0 )
     logger.info "restoring #{table_name}"
-    options = OpenStruct.new( {io: STDIN, page_size: 10000, start_row: 0, dry_run: false}.merge( options ) )
-    dataset = options.db[table_name.to_sym]
     # destination db should be same structure as incoming data
-    column_names = options.db.schema(table_name.to_sym).map( &:first )
+    column_names = db.schema(table_name.to_sym).map( &:first )
     first = ->(row){raise "schema mismatch" if row.size != column_names.size}
     rows_restored = 0
     # skip this many rows
-    options.start_row.times do
-      codec.decode( options.io ) {|row|}
+    start_row.times do
+      codec.decode( io ) {|row|}
     end
     # copy rows into db
-    while !options.io.eof?
+    while !io.eof?
       # fetch a page of rows
       rows_ary = []
       begin
-        options.page_size.times do |i|
-          codec.decode( options.io ) do |row|
+        page_size.times do |i|
+          codec.decode( io ) do |row|
             rows_ary << row
           end
           rows_restored += 1
@@ -207,8 +233,8 @@ class DbPump
       end
       # insert to db. Hopeful db support bulk insert, which Sequel will figure out
-      options.db.transaction do
-        dataset.import column_names, rows_ary
+      db.transaction do
+        table_dataset.import column_names, rows_ary
         yield rows_restored if block_given?
         logger.info "restored #{rows_restored}"
       end
@@ -217,194 +243,10 @@ class DbPump
     rows_restored
   end
-  def from_bz2( filename, db, table_name, options = {} )
+  def self.from_bz2( filename, db, table_name, options = {} )
     IO.popen( "pbzip2 -d -c #{filename}" ) do |io|
-      restore table_name, options.merge( io: io, db: db )
+      dbpump = DbPump.new db, table_name, io: io
+      dbpump.restore
     end
   end
 end
-# There are actually 2 sources for this:
-# one is the src db, the other is the dumped files
-# And the one that transfers live is another version
-class Schema
-  def initialize( src_db, dst_db = nil )
-    @src_db = src_db
-    @dst_db = dst_db
-  end
-  def schema_migration
-    @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
-  end
-  def index_migration
-    @index_migration ||= src_db.dump_indexes_migration(:same_db => same_db)
-  end
-  def fk_migration
-    @fk_migration ||= src_db.dump_foreign_key_migration(:same_db => same_db)
-  end
-  def restore_migration
-    <<-EOF
-      require 'restore_migration'
-      Sequel.migration do
-        def db_pump
-        end
-        up do
-          restore_tables
-        end
-        down do
-          # from each table clear table
-          each_table do |table_name|
-            db_pump.restore table_name, io: io, db: db
-          end
-        end
-      end
-    EOF
-  end
-  attr_accessor :dst_db
-  attr_reader :src_db
-  def same_db
-    @dst_db.andand.database_type == @src_db.andand.database_type
-  end
-  def logger
-    @logger ||= Logger.new STDERR
-  end
-  # create the destination schema
-  def create
-    eval( @schema_migration ).apply dst_db, :up
-  end
-  # create indexes and foreign keys, and reset sequences
-  def index
-    logger.info "creating indexes"
-    eval(@index_migration).apply dst, :up
-    logger.info "creating foreign keys"
-    eval(@fk_migration).apply dst, :up
-    if dst.database_type == :postgres
-      logger.info "reset primary key sequences"
-      dst.tables.each{|t| dst.reset_primary_key_sequence(t)}
-      logger.info "Primary key sequences reset successfully"
-    end
-  end
-  def transfer_table( table_name, options = {} )
-    options = OpenStruct.new( {page_size: 10000, dry_run: false}.merge( options ) )
-    total_records = @src_db[table_name].count
-    logger.info "transferring #{total_records}"
-    column_names = @src_db.schema(table_name.to_sym).map( &:first )
-    @src_db[table_name].each_page(options.page_size) do |page|
-      logger.info "#{page.sql} of #{total_records}"
-      unless options.dry_run
-        @dst_db.transaction do
-          rows_ary = []
-          page.each do |row_hash|
-            rows_ary << row_hash.values
-          end
-          @dst_db[table_name.to_sym].import column_names, rows_ary
-        end
-      end
-    end
-  end
-  # copy the data in the tables
-  def transfer
-    create
-    transfer_tables
-    index
-  end
-  def dump_schema( container, options = {codec: :marshal} )
-    (container + '001_schema.rb').open('w') do |io|
-      io.write schema_migration
-    end
-    (container + '002_populate_tables.rb').open('w') do |io|
-      io.write restore_migration
-    end
-    (container + '003_indexes.rb').open('w') do |io|
-      io.write index_migration
-    end
-    (container + '004_foreign keys.rb').open('w') do |io|
-      io.write fk_migration
-    end
-  end
-  def load_migrations( container )
-    @schema_migration = eval (container + '001_schema.rb').read
-    @index_migration = eval (container + '003_indexes.rb').read
-    @fk_migration = eval (container + '004_foreign keys.rb').read
-  end
-  def dump_one_table( table_name, pathname, db_pump )
-    logger.info "dumping #{table_name} to #{pathname}"
-    fio = pathname.open('w')
-    # open subprocess in read-write mode
-    zio = IO.popen( "pbzip2 -z", 'r+' )
-    copier = Thread.new do
-      begin
-        IO.copy_stream zio, fio
-        logger.debug "finished stream copy"
-      ensure
-        fio.close
-      end
-    end
-    # generate the dump
-    db_pump.dump table_name, db: src_db, io: zio
-    # signal the copier thread to stop
-    zio.close_write
-    logger.debug 'finished dumping'
-    # wait for copier thread to
-    copier.join
-    logger.debug 'stream copy thread finished'
-  ensure
-    zio.close unless zio.closed?
-    fio.close unless fio.closed?
-  end
-  def dump_tables( container, options = {:codec => :marshal} )
-    container = Pathname(container)
-    db_pump = DbPump.new( options[:codec] )
-    src_db.tables.each do |table_name|
-      filename = container + "#{table_name}.dbp.bz2"
-      dump_one_table table_name, filename, db_pump
-    end
-  end
-  def restore_one_table( table_file, db_pump )
-    logger.info "restoring from #{table_file}"
-    table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
-    # check if table has been restored already, and has the correct rows,
-    # otherwise pass in a start row.
-    db_pump.from_bz2 table_file, dst_db, table_name
-  end
-  def restore_tables( container, options = {:codec => :marshal} )
-    db_pump = DbPump.new( options[:codec] )
-    table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
-    table_files.each{|table_file| restore_one_table table_file, db_pump}
-  end
-  def restore_tables( container, options = {:codec => :marshal} )
-    container = Pathname(container)
-    container.child ren
-  end
-  def self.transfer( src_db, dst_db )
-    new( src_db, dst_db ).transfer
-  end
-end

data/lib/wyrm/dump_schema.rb CHANGED

@@ -1,17 +1,33 @@
+require 'logger'
+require 'wyrm/db_pump'
+class Object
+  def call_or_self( maybe_callable )
+    if maybe_callable.respond_to? :call
+      maybe_callable.call( self )
+    else
+      maybe_callable
+    end
+  end
+end
 # Dump a schema and compressed data from a db to a set of files
 #  src_db = Sequel.connect "postgres://localhost:5454/lots"
 #  ds = DumpSchema.new src_db, Pathname('/var/data/lots')
 #  ds.dump_schema
 #  ds.dump_tables
 class DumpSchema
-  def initialize( src_db, container = nil, options = {} )
-    @options = {:codec => :marshal}.merge( options )
+  def initialize( src_db, container = nil, pump: nil )
     @src_db = src_db
     @container = Pathname(container)
+    @pump = make_pump( pump )
   end
-  attr_reader :src_db, :container, :codec
+  attr_reader :src_db, :container, :pump
+  def make_pump( pump_thing )
+    call_or_self(pump_thing) || DbPump.new( src_db, nil )
+  end
   def schema_migration
     @schema_migration ||= src_db.dump_schema_migration(:indexes=>false, :same_db => same_db)
@@ -72,8 +88,7 @@ class DumpSchema
     end
   end
-  def dump_one_table( table_name, pathname, db_pump )
-    logger.info "dumping #{table_name} to #{pathname}"
+  def open_bz2( pathname )
     fio = pathname.open('w')
     # open subprocess in read-write mode
     zio = IO.popen( "pbzip2 -z", 'r+' )
@@ -86,8 +101,7 @@ class DumpSchema
       end
     end
-    # generate the dump
-    db_pump.dump table_name, db: src_db, io: zio
+    yield zio
     # signal the copier thread to stop
     zio.close_write
@@ -101,11 +115,15 @@ class DumpSchema
   end
   def dump_tables
-    db_pump = DbPump.new( @options[:codec] )
     src_db.tables.each do |table_name|
       filename = container + "#{table_name}.dbp.bz2"
-      dump_one_table table_name, filename, db_pump
+      logger.info "dumping #{table_name} to #{filename}"
+      open_bz2 filename do |zio|
+        # generate the dump
+        pump.table_name = table_name
+        pump.io = zio
+        pump.dump
+      end
     end
   end
 end

data/lib/wyrm/restore_schema.rb CHANGED

@@ -17,7 +17,7 @@ class RestoreSchema
   attr_reader :dst_db
   attr_reader :options
   attr_reader :container
-  attr_reader :schema_migration, :index_migration
+  attr_reader :schema_migration, :index_migration, :fk_migration
   def logger
     @logger ||= Logger.new STDERR
@@ -48,17 +48,16 @@ class RestoreSchema
     eval( schema_migration ).apply dst_db, :up
   end
-  def restore_one_table( table_file, db_pump )
+  def restore_one_table( table_file )
     logger.info "restoring from #{table_file}"
     table_name = table_file.basename.sub_ext('').sub_ext('').to_s.to_sym
     # check if table has been restored already, and has the correct rows,
     # otherwise pass in a start row.
-    db_pump.from_bz2 table_file, dst_db, table_name
+    DbPump.from_bz2 table_file, dst_db, table_name
   end
   def restore_tables
-    db_pump = DbPump.new( options[:codec] )
     table_files = Pathname.glob Pathname(container) + '*dbp.bz2'
-    table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file, db_pump}
+    table_files.sort_by{|tf| tf.stat.size}.each{|table_file| restore_one_table table_file}
   end
 end

data/lib/wyrm/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Wyrm
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

data/snippets/console.rb ADDED

@@ -0,0 +1,12 @@
+require 'sequel'
+require 'sqlite3'
+require 'pathname'
+require 'wyrm/dump_schema.rb'
+db = Sequel.connect 'sqlite:/home/panic/.qtstalker/new-trading.sqlite3'
+# pump = DbPump.new db, :positions, codec: :yaml
+dumper = DumpSchema.new db, '/tmp/test', pump: lambda{|_| DbPump.new db, nil, codec: :yaml}
+dumper = DumpSchema.new db, '/tmp/test', pump: ->(dump_schema){ DbPump.new dump_schema.src_db, nil, codec: :yaml}
+dumper.dump_tables

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wyrm
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - John Anderson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-13 00:00:00.000000000 Z
+date: 2013-07-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: sequel
@@ -86,6 +86,7 @@ files:
 - lib/wyrm/restore_schema.rb
 - lib/wyrm/transferer.rb
 - lib/wyrm/version.rb
+- snippets/console.rb
 - wyrm.gemspec
 homepage: https://github.com/djellemah/wyrm
 licenses:
@@ -107,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.0.rc.2
+rubygems_version: 2.0.3
 signing_key:
 specification_version: 4
 summary: Transfer from one SQL database to another