RubyGems - mysql_truck - Versions diffs - 0.1.1 → 0.2.0 - Mend

mysql_truck 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/TODO CHANGED Viewed

@@ -1,8 +1,5 @@
 # TODO
-* Download/extract/import one table at a time.
-* Rework backup so that table schema and indexes are separated.
-* Rework restore so that schema is applied, data is imported, then indexes are
-  applied.
 * Add ability to manage number of backups on S3
 * Better error handling messages rather than stack dumps
+* Add ability to import a table as a tmp table, then rename.

data/bin/mysql_truck CHANGED Viewed

@@ -58,7 +58,12 @@ parser = OptionParser.new do |opts|
   opts.on("-t", "--skip-tables TABLES",
           "List of tables to skip separated by commas.") do |tables|
-    options[:skip_tables] = tables.split(",")
+    options[:skip_data_for_tables] = tables.split(",")
+  end
+  opts.on("-e", "--exec-smartly",
+          "On dumping, do not dump tables that have already been dumped. On loading, if the files were already downloaded, do not redownload. This option allows for resuming a previous load/dump that failed.") do
+    options[:smartly] = true
   end
   opts.on_tail("-h", "--help", "Show this message") do

data/lib/mysql_truck/dumper.rb ADDED Viewed

@@ -0,0 +1,175 @@
+module MysqlTruck
+  class Dumper
+    include FileUtils
+    include Helper
+    REGEX = /,?\s*(UNIQUE)?\s*KEY\s`[A-Za-z\d_]+`\s*\([A-Za-z\d_,`]+\),?\s*/m
+    def initialize(config)
+      @config = config
+      @time = Time.now # Sets the directory for dump
+      initialize_s3
+      initialize_directories
+    end
+    def dump
+      dump_data
+      upload
+      remove_directories
+    end
+    def dump_data
+      tables.each do |table|
+        puts "Dumping #{table}..."
+        next if gzip_files_exist?(table) && smartly?
+        if dump_table?(table)
+          # This command creates a table_name.sql and a table_name.txt file
+          cmd = "mysqldump --quick -T #{tmp_path} "
+          cmd += csv_options
+          cmd += "#{db_connection_options} #{table}"
+          puts cmd
+          `#{cmd}`
+          # `mysqldump` creates files with .txt extensions, so we rename it.
+          mv filename(table)[:txt_file], filename(table)[:csv_file]
+        end
+        if split_schema_file?(table)
+          schema_contents = filename(table)[:schema_file].read
+          # Create schema with no indexes
+          File.open(filename(table)[:no_index_sql_file], 'w') do |f|
+            f.write(schema_contents.gsub(REGEX, ''))
+          end
+          # Create an alter table
+          indices = []
+          File.open(filename(table)[:index_sql_file], 'w') do |f|
+            f.write("ALTER TABLE #{table}\n")
+            schema_contents.gsub(/^,?\s*((UNIQUE)?\s*KEY\s`[A-Za-z\d_]+`\s*\([A-Za-z\d_,`]+\)),?\s*$/) do |part|
+              indices << $1
+            end
+            f.write(indices.collect {|i| "ADD #{i}"}.join(",\n"))
+          end
+        end
+        if gzip_files?(table)
+          puts "gzipping #{filename(table)[:no_index_sql_file]}."
+          `gzip #{filename(table)[:no_index_sql_file]}`
+          puts "gzipping #{filename(table)[:index_sql_file]}."
+          `gzip #{filename(table)[:index_sql_file]}`
+          puts "gziping #{filename(table)[:csv_file]}."
+          `gzip #{filename(table)[:csv_file]}`
+        end
+        puts "#{table} dumped.\n\n"
+      end
+    end
+    def upload
+      Dir["#{tmp_path}/*"].each do |file|
+        next if File.extname(file) != ".gz"
+        puts "Uploading #{file} ..."
+        upload_file file
+      end
+      puts "Finished uploading backups."
+    end
+    private
+    def smartly?
+      config[:smartly]
+    end
+    def upload_file(local_file)
+      path = Pathname.new(local_file)
+      s3_path = bucket_path.join(path.basename)
+      @bucket.put(s3_path, open(path), {}, nil, {
+        'x-amz-storage-class' => 'REDUCED_REDUNDANCY'
+      })
+    end
+    def tables
+      return config[:only_tables] if config[:only_tables]
+      unless @tables
+        res = `mysql #{db_connection_options} -e "SHOW TABLES"`
+        @tables = res.split[1..-1]
+      end
+      @tables
+    end
+    def bucket_path
+      @bucket_path ||= Pathname.new(bucket_dir).join(@time.strftime("%Y-%m-%d-%H-%M"))
+    end
+    def filename(table)
+      @table_filenames ||= {}
+      @table_filenames[table] ||= {
+        :schema_file          => tmp_path.join("#{table}.sql"),
+        :no_index_sql_file    => tmp_path.join("#{table}.no_index.sql"),
+        :index_sql_file       => tmp_path.join("#{table}.indices.sql"),
+        :txt_file             => tmp_path.join("#{table}.txt"),
+        :csv_file             => tmp_path.join("#{table}.csv"),
+        :gz_no_index_sql_file => tmp_path.join("#{table}.no_index.sql.gz"),
+        :gz_index_sql_file    => tmp_path.join("#{table}.indices.sql.gz"),
+        :gz_csv_file          => tmp_path.join("#{table}.csv.gz"),
+      }
+    end
+    def gzip_files_exist?(table)
+      tmp_path.join("#{table}.sql.gz").file? &&
+      tmp_path.join("#{table}.no_index.sql.gz") &&
+      tmp_path.join("#{table}.indexes.sql.gz") &&
+      tmp_path.join("#{table}.csv.gz")
+    end
+    def dump_table?(table)
+      !smartly? ||
+      (
+        smartly? &&
+        !filename(table)[:schema_file].exist? &&
+        !filename(table)[:csv_file].exist? &&
+        !filename(table)[:no_index_sql_file].exist? &&
+        !filename(table)[:index_sql_file].exist? &&
+        !filename(table)[:gz_no_index_sql_file].exist? &&
+        !filename(table)[:gz_index_sql_file].exist? &&
+        !filename(table)[:gz_csv_file].exist?
+      )
+    end
+    def split_schema_file?(table)
+      !smartly? ||
+      (
+        smartly? &&
+        filename(table)[:schema_file].exist? &&
+        filename(table)[:csv_file].exist? &&
+        !filename(table)[:no_index_sql_file].exist? &&
+        !filename(table)[:index_sql_file].exist? &&
+        !filename(table)[:gz_no_index_sql_file].exist? &&
+        !filename(table)[:gz_index_sql_file].exist? &&
+        !filename(table)[:gz_csv_file].exist?
+      )
+    end
+    def gzip_files?(table)
+      !smartly? ||
+      (
+        smartly? &&
+        filename(table)[:schema_file].exist? &&
+        filename(table)[:csv_file].exist? &&
+        filename(table)[:no_index_sql_file].exist? &&
+        filename(table)[:index_sql_file].exist? &&
+        !filename(table)[:gz_no_index_sql_file].exist? &&
+        !filename(table)[:gz_index_sql_file].exist? &&
+        !filename(table)[:gz_csv_file].exist?
+      )
+    end
+  end # class Dumper
+end

data/lib/mysql_truck/helper.rb ADDED Viewed

@@ -0,0 +1,63 @@
+module MysqlTruck
+  module Helper
+    include FileUtils
+    def config
+      @config
+    end
+    def initialize_s3
+      @s3 = RightAws::S3.new(
+        config[:s3_access_key],
+        config[:s3_secret_access_key])
+      @bucket = @s3.bucket(config[:bucket])
+    end
+    def db_connection_options
+      opts = %Q[ -u #{config[:username]} ]
+      opts += %Q[ -p"#{config[:password]}" ] unless config[:password].nil?
+      opts += %Q[ -h #{config[:host]} --default-character-set=utf8 ]
+      opts += %Q[ #{config[:database]} ]
+      opts
+    end
+    def local_host?
+      config[:host] == '127.0.0.1' || config[:host] == 'localhost'
+    end
+    def remote_host?
+      !local_host?
+    end
+    def csv_options
+      " --fields-enclosed-by=\\\" --fields-terminated-by=, "
+    end
+    def initialize_directories
+      mkdir_p base_path
+      mkdir_p tmp_path
+      chmod 0777, tmp_path
+    end
+    def remove_directories
+      rm_r tmp_path, :force => true
+    end
+    def tmp_path
+      raise "@time not initialized" unless @time
+      base_path.join(@time.strftime("%Y-%m-%d"))
+    end
+    def base_path
+      if config[:dump_dir]
+        config[:dump_dir].is_a?(Pathname) ? config[:dump_dir].join("mysqltruck") : Pathname.new(config[:dump_dir]).join("mysqltruck")
+      else
+        Pathname.new("/tmp/mysqltruck")
+      end
+    end
+    def bucket_dir
+      "mysql/#{config[:bucket_dir] || config[:database]}/"
+    end
+  end
+end

data/lib/mysql_truck/loader.rb ADDED Viewed

@@ -0,0 +1,179 @@
+module MysqlTruck
+  class Loader
+    include Helper
+    include FileUtils
+    def initialize(config)
+      @config = config
+      initialize_s3
+    end
+    # only import schema for these tables
+    def skip_data_for_tables
+      config[:skip_data_for_tables] || []
+    end
+    # only import these tables schema+data
+    def only_tables
+      config[:only_tables] || []
+    end
+    def load_latest
+      prefix = backups.first
+      # Set directory where backup is downloaded to
+      @time = Time.new(*prefix.split("/").last.split("-"))
+      initialize_directories
+      puts "Downloading backups"
+      puts "-------------------"
+      @bucket.keys(:prefix => prefix).each do |key|
+        puts "\n#{key}"
+        next unless (filename = download_file(key))
+        # gunzip file
+        if tmp_path.join(filename).exist?
+          print " - Inflating #{filename} ... "
+          `gunzip -f #{tmp_path.join(filename)}`
+          print "complete.\n"
+        end
+      end
+      # Load data
+      puts "\nLoading schema and data by table"
+      puts "--------------------------------"
+      if remote_host?
+        import_cmd = "mysqlimport --local --compress #{db_connection_options}"
+      else
+        import_cmd = "mysqlimport #{db_connection_options}"
+      end
+      import_cmd += csv_options
+      # Find all .no_index.sql files and process
+      Dir["#{tmp_path}/*.no_index.sql"].each do |file|
+        table       = File.basename(file, ".no_index.sql")
+        puts "\nProcessing #{table}"
+        schema_file = Pathname.new(file)
+        index_file  = tmp_path.join("#{table}.indices.sql")
+        csv_file    = tmp_path.join("#{table}.csv")
+        print " - Loading schema for #{table} ... "
+        cmd = "cat #{schema_file} | mysql #{db_connection_options}"
+        `#{cmd}`
+        print "complete.\n"
+        if csv_file.exist?
+          print " - Importing #{schema_file.basename(".sql")} ... "
+          `#{import_cmd} #{csv_file}`
+          print "complete.\n"
+        end
+        if index_file.exist?
+          print " - Adding indices for #{schema_file.basename(".no_index.sql")} ... "
+          cmd = "cat #{index_file} | mysql #{db_connection_options}"
+          `#{cmd}`
+          print "complete.\n"
+        end
+        schema_file.delete if schema_file.exist?
+        index_file.delete if index_file.exist?
+        csv_file.delete if csv_file.exist?
+      end
+      puts "Backup loaded."
+      # This isn't in an ensure block because we want to keep around
+      # downloads if there's a failure importing a table.
+      # remove_directories
+    rescue Exception => e
+      puts e.message
+      puts e.backtrace.join("\n")
+    end
+    def download_file(key)
+      filename = File.basename(key.name)
+      unless should_download_file?(filename)
+        puts " [ SKIP ]"
+        return
+      end
+      file = tmp_path.join(filename)
+      unzipped_file = tmp_path.join(file.basename(".gz"))
+      if !smartly? || (smartly? && !unzipped_file.exist?)
+        print " - Downloading... "
+        file.open("wb") do |f|
+          @bucket.s3.interface.get(@bucket.name, key.name) do |chunk|
+            f.write chunk
+          end
+        end
+        puts "complete."
+      else
+        puts " already downloaded."
+      end
+      filename
+    end
+    def should_download_file?(filename)
+      table_name = filename.gsub(/\..*\..*$/, '')
+      if only_tables.empty? and skip_data_for_tables.empty?
+        return true
+      end
+      # If we're targetting specific tables, then we always want both
+      # schema and csv files.
+      if !only_tables.empty?
+        return only_tables.include?(table_name)
+      end
+      if filename.match(/\.csv\.gz$/)
+        is_data = true
+        is_schema = false
+      else
+        is_data = false
+        is_schema = true
+      end
+      if !skip_data_for_tables.empty?
+        if is_schema or (is_data and !skip_data_for_tables.include?(table_name))
+          return true
+        end
+      end
+      false
+    end
+    # Get a list of backups stored on S3.
+    #
+    # Returns an array of s3 paths that look like:
+    #
+    #   mysql/YYYY-MM-DD
+    #
+    # Array elements are sorted with the latest date first.
+    def backups
+      unless @backups
+        @backups = []
+        # Backups are stored in the mysql/ directory
+        @bucket.s3.interface.incrementally_list_bucket(@bucket.name, {
+          :prefix => "#{bucket_dir}", :delimiter => "/"
+        }) do |item|
+          @backups += item[:common_prefixes]
+        end
+        @backups = @backups.sort { |a,b| b <=> a }
+      end
+      @backups
+    end
+    def smartly?
+      config[:smartly]
+    end
+  end # class Loader
+end

data/lib/mysql_truck/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module MysqlTruck
-  VERSION = "0.1.1"
+  VERSION = "0.2.0"
 end

data/lib/mysql_truck.rb CHANGED Viewed

@@ -1,7 +1,12 @@
-require "mysql_truck/version"
 require "right_aws"
-require 'fileutils'
-require 'pathname'
+require "fileutils"
+require "pathname"
+require "mysql_truck/version"
+require "mysql_truck/helper"
+require "mysql_truck/dumper"
+require "mysql_truck/loader"
 # MysqlTruck
 #
@@ -21,270 +26,4 @@ module MysqlTruck
       puts "Unknown action #{action}"
     end
   end
-  module Helper
-    include FileUtils
-    def config
-      @config
-    end
-    def initialize_s3
-      @s3 = RightAws::S3.new(
-        config[:s3_access_key],
-        config[:s3_secret_access_key])
-      @bucket = @s3.bucket(config[:bucket])
-    end
-    def db_connection_options
-      opts = %Q[ -u #{config[:username]} ]
-      opts += %Q[ -p"#{config[:password]}" ] unless config[:password].nil?
-      opts += %Q[ -h #{config[:host]} --default-character-set=utf8 ]
-      opts += %Q[ #{config[:database]} ]
-      opts
-    end
-    def csv_options
-      " --fields-enclosed-by=\\\" --fields-terminated-by=, "
-    end
-    def initialize_directories
-      mkdir_p base_path
-      mkdir_p tmp_path
-      chmod 0777, tmp_path
-    end
-    def remove_directories
-      rm_r tmp_path, :force => true
-    end
-    def tmp_path
-      raise "@time not initialized yet" unless @time
-      base_path.join(@time.strftime("%Y-%m-%d-%H-%M"))
-    end
-    def base_path
-      if config[:dump_dir]
-        config[:dump_dir].is_a?(Pathname) ? config[:dump_dir].join("mysqltruck") : Pathname.new(config[:dump_dir]).join("mysqltruck")
-      else
-        Pathname.new("/tmp/mysqltruck")
-      end
-    end
-    def bucket_dir
-      "mysql/#{config[:bucket_dir] || config[:database]}/"
-    end
-  end
-  class Dumper
-    include FileUtils
-    include Helper
-    def initialize(config)
-      @config = config
-      @time = Time.now # Sets the directory for dump
-      initialize_s3
-      initialize_directories
-    end
-    def dump
-      dump_data
-      upload
-    ensure
-      remove_directories
-    end
-    def dump_data
-      tables.each do |table|
-        schema_file = tmp_path.join("#{table}.sql")
-        csv_file = tmp_path.join("#{table}.txt")
-        puts "Dumping #{table}."
-        # This command creates a table_name.sql and a table_name.txt file
-        cmd = "mysqldump --quick -T #{tmp_path} "
-        cmd += csv_options
-        cmd += "#{db_connection_options} #{table}"
-        puts cmd
-        `#{cmd}`
-        # `mysqldump` creates files with .txt extensions, so we rename it.
-        path, file = csv_file.split
-        csv_file = path.join("#{file.basename(".txt")}.csv")
-        mv path.join(file), csv_file
-        puts "gziping #{schema_file}."
-        `gzip #{schema_file}`
-        puts "gziping #{csv_file}."
-        `gzip #{csv_file}`
-        puts "#{table} dumped.\n\n"
-      end
-    end
-    def upload
-      Dir["#{tmp_path}/*"].each do |file|
-        upload_file file
-      end
-      puts "Finished uploading backups."
-    end
-    private
-    def upload_file(local_file)
-      path = Pathname.new(local_file)
-      s3_path = bucket_path.join(path.basename)
-      @bucket.put(s3_path, open(path), {}, nil, {
-        'x-amz-storage-class' => 'REDUCED_REDUNDANCY'
-      })
-    end
-    def tables
-      unless @tables
-        res = `mysql #{db_connection_options} -e "SHOW TABLES"`
-        @tables = res.split[1..-1]
-      end
-      @tables
-    end
-    def bucket_path
-      @bucket_path ||= Pathname.new(bucket_dir).join(@time.strftime("%Y-%m-%d-%H-%M"))
-    end
-  end # class Dumper
-  class Loader
-    include Helper
-    include FileUtils
-    def initialize(config)
-      @config = config
-      initialize_s3
-    end
-    # only import schema for these tables
-    def skip_data_for_tables
-      config[:skip_data_for_tables] || []
-    end
-    # only import these tables schema+data
-    def only_tables
-      config[:only_tables] || []
-    end
-    def load_latest
-      prefix = backups.first
-      # Set directory where backup is downloaded to
-      @time = Time.new(*prefix.split("/").last.split("-"))
-      initialize_directories
-      puts "Downloading backups ..."
-      @bucket.keys(:prefix => prefix).each do |key|
-        next unless (filename = download_file(key))
-        # gunzip file
-        print " -- Inflating #{filename} ... "
-        `gunzip #{tmp_path.join(filename)}`
-        print "complete.\n"
-      end
-      # Load data
-      puts "Loading schema and data by table"
-      import_cmd = "mysqlimport #{db_connection_options}"
-      import_cmd += csv_options
-      Dir["#{tmp_path}/*.sql"].each do |file|
-        print " - Loading schema for #{File.basename(file, ".sql")} ... "
-        cmd = "cat #{file} | mysql #{db_connection_options}"
-        `#{cmd}`
-        print "complete.\n"
-        csv_file = "#{tmp_path}/#{File.basename(file, ".sql")}.csv"
-        if File.exists?(csv_file)
-          print " - Importing #{File.basename(csv_file, ".csv")} ... "
-          `#{import_cmd} #{csv_file}`
-          print "complete.\n"
-        end
-      end
-      puts "Backup loaded."
-    rescue Exception => e
-      puts e.message
-      puts e.backtrace.join("\n")
-    ensure
-      remove_directories
-    end
-    def download_file(key)
-      filename = File.basename(key.name)
-      print "#{filename}... "
-      unless should_download_file?(filename)
-        puts " [ SKIP ]"
-        return
-      end
-      print " Downloading... "
-      File.open(tmp_path.join(filename), "wb") do |f|
-        @bucket.s3.interface.get(@bucket.name, key.name) do |chunk|
-          f.write chunk
-        end
-      end
-      puts "complete."
-      filename
-    end
-    def should_download_file?(filename)
-      table_name = filename.gsub(/\..*\..*$/, '')
-      if only_tables.empty? and skip_data_for_tables.empty?
-        return true
-      end
-      if filename.match(/\.csv\.gz$/)
-        is_data = true
-        is_schema = false
-      else
-        is_data = false
-        is_schema = true
-      end
-      if !only_tables.empty?
-        return only_tables.include?(table_name)
-      end
-      if !skip_data_for_tables.empty?
-        if is_schema or (is_data and !skip_data_for_tables.include?(table_name))
-          return true
-        end
-      end
-    end
-    # Get a list of backups stored on S3.
-    #
-    # Returns an array of s3 paths that look like:
-    #
-    #   mysql/YYYY-MM-DD-HH-MM
-    #
-    # Array elements are sorted with the latest date first.
-    def backups
-      unless @backups
-        @backups = []
-        # Backups are stored in the mysql/ directory
-        @bucket.s3.interface.incrementally_list_bucket(@bucket.name, {
-          :prefix => "#{bucket_dir}", :delimiter => "/"
-        }) do |item|
-          @backups += item[:common_prefixes]
-        end
-        @backups = @backups.sort { |a,b| b <=> a }
-      end
-      @backups
-    end
-  end # class Loader
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: mysql_truck
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
   prerelease:
 platform: ruby
 authors:
@@ -11,11 +11,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-12-29 00:00:00.000000000Z
+date: 2012-05-30 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: right_aws
-  requirement: &70123421522780 !ruby/object:Gem::Requirement
+  requirement: &70339584428840 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -23,7 +23,7 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70123421522780
+  version_requirements: *70339584428840
 description: Mysql database backup tool. Dumps/Loads to/from S3.
 email:
 - peter@paydrotalks.com
@@ -39,6 +39,9 @@ files:
 - TODO
 - bin/mysql_truck
 - lib/mysql_truck.rb
+- lib/mysql_truck/dumper.rb
+- lib/mysql_truck/helper.rb
+- lib/mysql_truck/loader.rb
 - lib/mysql_truck/version.rb
 - mysql_truck.gemspec
 homepage: ''