data_sampler 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
 - data/Gemfile +9 -0
 - data/README +47 -0
 - data/Rakefile +2 -0
 - data/bin/data_sampler +34 -0
 - data/data_sampler.gemspec +25 -0
 - data/lib/data_sampler/dependency.rb +21 -0
 - data/lib/data_sampler/sample.rb +41 -0
 - data/lib/data_sampler/table_sample.rb +117 -0
 - data/lib/data_sampler/version.rb +3 -0
 - data/lib/data_sampler.rb +5 -0
 - metadata +87 -0
 
    
        data/Gemfile
    ADDED
    
    
    
        data/README
    ADDED
    
    | 
         @@ -0,0 +1,47 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
            Ever found yourself wanting a modest amount of fresh rows from a production database for development purposes, but
         
     | 
| 
      
 3 
     | 
    
         
            +
            put back by the need to maintain referential integrity in the extracted data sample? This data sampler utility will
         
     | 
| 
      
 4 
     | 
    
         
            +
            take care that referential dependencies are fulfilled by recursively fetching any rows referred to by the sample.
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
              COMMANDS:
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                help                 Display global or [command] help documentation.
         
     | 
| 
      
 9 
     | 
    
         
            +
                sample               Extract a sample from the given connection
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
              OPTIONS:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                --adapter NAME
         
     | 
| 
      
 14 
     | 
    
         
            +
                    ActiveRecord adapter to use
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                --database NAME
         
     | 
| 
      
 17 
     | 
    
         
            +
                    Name of database to sample
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                --username USER
         
     | 
| 
      
 20 
     | 
    
         
            +
                    Username for connection
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                --password PASSWORD
         
     | 
| 
      
 23 
     | 
    
         
            +
                    Password for connection
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                --encoding ENCODING
         
     | 
| 
      
 26 
     | 
    
         
            +
                    Encoding for connection
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                --socket PATH
         
     | 
| 
      
 29 
     | 
    
         
            +
                    Socket for connection
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                --rows NUM
         
     | 
| 
      
 32 
     | 
    
         
            +
                    Number of rows to sample per table
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                --log PATH
         
     | 
| 
      
 35 
     | 
    
         
            +
                    Log queries to PATH
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
              GLOBAL OPTIONS:
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                -h, --help
         
     | 
| 
      
 40 
     | 
    
         
            +
                    Display help documentation
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                -v, --version
         
     | 
| 
      
 43 
     | 
    
         
            +
                    Display version information
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                -t, --trace
         
     | 
| 
      
 46 
     | 
    
         
            +
                    Display backtrace when an error occurs
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
    
        data/Rakefile
    ADDED
    
    
    
        data/bin/data_sampler
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "data_sampler"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "commander/import"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "logger"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            program :version, DataSampler::VERSION
         
     | 
| 
      
 7 
     | 
    
         
            +
            program :description, 'Extract a sample of desired size from a database while ensuring referential integrity.'
         
     | 
| 
      
 8 
     | 
    
         
            +
            default_command :sample
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            command :sample do |c|
         
     | 
| 
      
 11 
     | 
    
         
            +
              c.description = 'Extract a sample from the given connection'
         
     | 
| 
      
 12 
     | 
    
         
            +
              c.option '--adapter NAME', String, 'ActiveRecord adapter to use'
         
     | 
| 
      
 13 
     | 
    
         
            +
              c.option '--database NAME', String, 'Name of database to sample'
         
     | 
| 
      
 14 
     | 
    
         
            +
              c.option '--username USER', String, 'Username for connection'
         
     | 
| 
      
 15 
     | 
    
         
            +
              c.option '--password PASSWORD', String, 'Password for connection'
         
     | 
| 
      
 16 
     | 
    
         
            +
              c.option '--encoding ENCODING', String, 'Encoding for connection'
         
     | 
| 
      
 17 
     | 
    
         
            +
              c.option '--socket PATH', String, 'Socket for connection'
         
     | 
| 
      
 18 
     | 
    
         
            +
              c.option '--rows NUM', Integer, 'Number of rows to sample per table'
         
     | 
| 
      
 19 
     | 
    
         
            +
              c.option '--log PATH', String, 'Log queries to PATH'
         
     | 
| 
      
 20 
     | 
    
         
            +
              c.when_called do |args, options|
         
     | 
| 
      
 21 
     | 
    
         
            +
                options.default \
         
     | 
| 
      
 22 
     | 
    
         
            +
                  :adapter  => 'mysql',
         
     | 
| 
      
 23 
     | 
    
         
            +
                  :database => 'test',
         
     | 
| 
      
 24 
     | 
    
         
            +
                  :username => 'root',
         
     | 
| 
      
 25 
     | 
    
         
            +
                  :encoding => 'utf8',
         
     | 
| 
      
 26 
     | 
    
         
            +
                  :socket   => '/opt/local/var/run/mysql5/mysqld.sock',
         
     | 
| 
      
 27 
     | 
    
         
            +
                  :rows     => 1000
         
     | 
| 
      
 28 
     | 
    
         
            +
                ActiveRecord::Base.logger = Logger.new(options.log) if options.log
         
     | 
| 
      
 29 
     | 
    
         
            +
                ActiveRecord::Base.establish_connection(options.__hash__).with_connection do |conn|
         
     | 
| 
      
 30 
     | 
    
         
            +
                  puts DataSampler::Sample.new(conn, options.rows).to_sql
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            $:.push File.expand_path("../lib", __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "data_sampler/version"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Gem::Specification.new do |s|
         
     | 
| 
      
 6 
     | 
    
         
            +
              s.name        = "data_sampler"
         
     | 
| 
      
 7 
     | 
    
         
            +
              s.version     = DataSampler::VERSION
         
     | 
| 
      
 8 
     | 
    
         
            +
              s.authors     = ["Christian Rishoj"]
         
     | 
| 
      
 9 
     | 
    
         
            +
              s.email       = ["christian@rishoj.net"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              s.homepage    = "https://github.com/crishoj/data_sampler"
         
     | 
| 
      
 11 
     | 
    
         
            +
              s.summary     = %q{Extract a sample of records from a database while maintaining referential integrity.}
         
     | 
| 
      
 12 
     | 
    
         
            +
              s.description = %q{Ever found yourself wanting a modest amount of fresh rows from a production database for development purposes, but
         
     | 
| 
      
 13 
     | 
    
         
            +
            put back by the need to maintain referential integrity in the extracted data sample? This data sampler utility will
         
     | 
| 
      
 14 
     | 
    
         
            +
            take care that referential dependencies are fulfilled by recursively fetching any rows referred to by the sample.}
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              s.rubyforge_project = "data_sampler"
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
              s.files         = `git ls-files`.split("\n")
         
     | 
| 
      
 19 
     | 
    
         
            +
              s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
         
     | 
| 
      
 20 
     | 
    
         
            +
              s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
         
     | 
| 
      
 21 
     | 
    
         
            +
              s.require_paths = ["lib"]
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              s.add_dependency "schema_plus"
         
     | 
| 
      
 24 
     | 
    
         
            +
              s.add_dependency "activerecord"
         
     | 
| 
      
 25 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,21 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module DataSampler
         
     | 
| 
      
 2 
     | 
    
         
            +
              class Dependency
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
                attr_reader :table_name
         
     | 
| 
      
 5 
     | 
    
         
            +
                attr_reader :keys
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize(table_name, keys)
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @table_name = table_name
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @keys = keys
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                def eql? other
         
     | 
| 
      
 13 
     | 
    
         
            +
                  table_name == other.table_name and keys == other.keys
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def to_s
         
     | 
| 
      
 17 
     | 
    
         
            +
                  "#{keys} in table #{table_name}"
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "data_sampler/table_sample"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module DataSampler
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              class Sample
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize(connection, rows_per_table = 1000)
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @connection = connection
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @rows_per_table = rows_per_table
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @table_samples = {}
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @computed = false
         
     | 
| 
      
 12 
     | 
    
         
            +
                end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                def compute!
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @connection.tables.each do |table_name|
         
     | 
| 
      
 16 
     | 
    
         
            +
                    # Workaround for inconsistent casing in table definitions (http://bugs.mysql.com/bug.php?id=60773)
         
     | 
| 
      
 17 
     | 
    
         
            +
                    table_name.downcase!
         
     | 
| 
      
 18 
     | 
    
         
            +
                    @table_samples[table_name] = TableSample.new(@connection, table_name, @rows_per_table)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
                  warn "Sampling #{@table_samples.count} tables..."
         
     | 
| 
      
 21 
     | 
    
         
            +
                  @table_samples.values.map &:sample!
         
     | 
| 
      
 22 
     | 
    
         
            +
                  warn "Ensuring referential integrity..."
         
     | 
| 
      
 23 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 24 
     | 
    
         
            +
                    new_dependencies = 0
         
     | 
| 
      
 25 
     | 
    
         
            +
                    @table_samples.values.each do |table_sample|
         
     | 
| 
      
 26 
     | 
    
         
            +
                      new_dependencies += 1 if table_sample.ensure_referential_integrity(@table_samples)
         
     | 
| 
      
 27 
     | 
    
         
            +
                    end
         
     | 
| 
      
 28 
     | 
    
         
            +
                    warn " - discovered #{new_dependencies} new dependencies" if new_dependencies > 0
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end while new_dependencies > 0
         
     | 
| 
      
 30 
     | 
    
         
            +
                  warn " - referential integrity obtained"
         
     | 
| 
      
 31 
     | 
    
         
            +
                  @computed = true
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                def to_sql
         
     | 
| 
      
 35 
     | 
    
         
            +
                  compute! unless @computed
         
     | 
| 
      
 36 
     | 
    
         
            +
                  @table_samples.values.collect(&:to_sql) * "\n"
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
              end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,117 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "data_sampler/dependency"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module DataSampler
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              class TableSample
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                attr_reader :table_name
         
     | 
| 
      
 8 
     | 
    
         
            +
                attr_reader :pending_dependencies
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                def initialize(connection, table_name, size = 1000)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @table_name = table_name
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @connection = connection
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @size = size
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @pending_dependencies = Set.new
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @sample = Set.new
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @sampled = false
         
     | 
| 
      
 17 
     | 
    
         
            +
                  @sampled_ids = Set.new
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                def sample!
         
     | 
| 
      
 21 
     | 
    
         
            +
                  fetch_sample(@size) unless @sampled
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @sample
         
     | 
| 
      
 23 
     | 
    
         
            +
                end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                def fulfil(dependency)
         
     | 
| 
      
 26 
     | 
    
         
            +
                  return if fulfilled?(dependency)
         
     | 
| 
      
 27 
     | 
    
         
            +
                  where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
         
     | 
| 
      
 28 
     | 
    
         
            +
                  sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
         
     | 
| 
      
 29 
     | 
    
         
            +
                  add @connection.select_one(sql)
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                def fulfilled?(dependency)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  # FIXME: Only checks id column
         
     | 
| 
      
 34 
     | 
    
         
            +
                  if dependency.keys.values.size == 1
         
     | 
| 
      
 35 
     | 
    
         
            +
                    dependency.keys.each_pair do |key, val|
         
     | 
| 
      
 36 
     | 
    
         
            +
                      if key == 'id'
         
     | 
| 
      
 37 
     | 
    
         
            +
                        return true if @sampled_ids.include?(val)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      end
         
     | 
| 
      
 39 
     | 
    
         
            +
                    end
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
                  false
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                def add(row)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  return false unless @sample.add? row
         
     | 
| 
      
 46 
     | 
    
         
            +
                  @sampled_ids.add row['id'] if row['id']
         
     | 
| 
      
 47 
     | 
    
         
            +
                  any_new = false
         
     | 
| 
      
 48 
     | 
    
         
            +
                  dependencies_for(row).each do |dep|
         
     | 
| 
      
 49 
     | 
    
         
            +
                    any_new = true if @pending_dependencies.add?(dep)
         
     | 
| 
      
 50 
     | 
    
         
            +
                  end
         
     | 
| 
      
 51 
     | 
    
         
            +
                  any_new
         
     | 
| 
      
 52 
     | 
    
         
            +
                rescue ActiveRecord::StatementInvalid => e
         
     | 
| 
      
 53 
     | 
    
         
            +
                  # Don't choke on unknown table engines, such as Sphinx
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                def ensure_referential_integrity(table_samples)
         
     | 
| 
      
 57 
     | 
    
         
            +
                  any_new = false
         
     | 
| 
      
 58 
     | 
    
         
            +
                  deps_in_progress = @pending_dependencies
         
     | 
| 
      
 59 
     | 
    
         
            +
                  @pending_dependencies = Set.new
         
     | 
| 
      
 60 
     | 
    
         
            +
                  deps_in_progress.each do |dependency|
         
     | 
| 
      
 61 
     | 
    
         
            +
                    any_new = true if table_samples[dependency.table_name].fulfil(dependency)
         
     | 
| 
      
 62 
     | 
    
         
            +
                  end
         
     | 
| 
      
 63 
     | 
    
         
            +
                  any_new
         
     | 
| 
      
 64 
     | 
    
         
            +
                end
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                def to_sql
         
     | 
| 
      
 67 
     | 
    
         
            +
                  ret = ["-- #{@table_name}: #{@sample.count} rows"]
         
     | 
| 
      
 68 
     | 
    
         
            +
                  unless @sample.empty?
         
     | 
| 
      
 69 
     | 
    
         
            +
                    quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
         
     | 
| 
      
 70 
     | 
    
         
            +
                    sql = "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','})"
         
     | 
| 
      
 71 
     | 
    
         
            +
                    @sample.each do |row|
         
     | 
| 
      
 72 
     | 
    
         
            +
                      quoted_vals = row.values.collect { |val| @connection.quote val }
         
     | 
| 
      
 73 
     | 
    
         
            +
                      ret << sql + " VALUES (#{quoted_vals * ','})"
         
     | 
| 
      
 74 
     | 
    
         
            +
                    end
         
     | 
| 
      
 75 
     | 
    
         
            +
                  end
         
     | 
| 
      
 76 
     | 
    
         
            +
                  ret * "\n"
         
     | 
| 
      
 77 
     | 
    
         
            +
                end
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
                protected
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                def fetch_sample(count)
         
     | 
| 
      
 82 
     | 
    
         
            +
                  sql = "SELECT * FROM #{@connection.quote_table_name @table_name}"
         
     | 
| 
      
 83 
     | 
    
         
            +
                  pk = @connection.primary_key(@table_name)
         
     | 
| 
      
 84 
     | 
    
         
            +
                  sql += " ORDER BY #{@connection.quote_column_name pk} DESC" unless pk.nil?
         
     | 
| 
      
 85 
     | 
    
         
            +
                  sql += " LIMIT #{count}"
         
     | 
| 
      
 86 
     | 
    
         
            +
                  @connection.select_all(sql).each { |row| add(row) }
         
     | 
| 
      
 87 
     | 
    
         
            +
                rescue ActiveRecord::StatementInvalid => e
         
     | 
| 
      
 88 
     | 
    
         
            +
                  # Don't choke on unknown table engines, such as Sphinx
         
     | 
| 
      
 89 
     | 
    
         
            +
                  []
         
     | 
| 
      
 90 
     | 
    
         
            +
                end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                def samplable?
         
     | 
| 
      
 93 
     | 
    
         
            +
                  # We shouldn't be sampling views
         
     | 
| 
      
 94 
     | 
    
         
            +
                  @connection.views.grep(@table_name).empty?
         
     | 
| 
      
 95 
     | 
    
         
            +
                end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                def dependency_for(fk, row)
         
     | 
| 
      
 98 
     | 
    
         
            +
                  ref = {}
         
     | 
| 
      
 99 
     | 
    
         
            +
                  cols = fk.column_names.dup
         
     | 
| 
      
 100 
     | 
    
         
            +
                  raise "No column names in foreign key #{fk.inspect}" if cols.empty?
         
     | 
| 
      
 101 
     | 
    
         
            +
                  fk.references_column_names.each do |ref_col|
         
     | 
| 
      
 102 
     | 
    
         
            +
                    col = cols.shift
         
     | 
| 
      
 103 
     | 
    
         
            +
                    ref[ref_col] = row[col] unless row[col].nil?
         
     | 
| 
      
 104 
     | 
    
         
            +
                  end
         
     | 
| 
      
 105 
     | 
    
         
            +
                  Dependency.new(fk.references_table_name, ref) unless ref.empty?
         
     | 
| 
      
 106 
     | 
    
         
            +
                end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                def dependencies_for(row)
         
     | 
| 
      
 109 
     | 
    
         
            +
                  foreign_keys.collect { |fk| dependency_for(fk, row) }.compact
         
     | 
| 
      
 110 
     | 
    
         
            +
                end
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                def foreign_keys
         
     | 
| 
      
 113 
     | 
    
         
            +
                  @fks ||= @connection.foreign_keys(@table_name)
         
     | 
| 
      
 114 
     | 
    
         
            +
                end
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
              end
         
     | 
| 
      
 117 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/data_sampler.rb
    ADDED
    
    
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,87 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: data_sampler
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.1
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
      
 6 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 7 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 8 
     | 
    
         
            +
            - Christian Rishoj
         
     | 
| 
      
 9 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 10 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 11 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2011-08-03 00:00:00.000000000Z
         
     | 
| 
      
 13 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 14 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 15 
     | 
    
         
            +
              name: schema_plus
         
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &70132762292640 !ruby/object:Gem::Requirement
         
     | 
| 
      
 17 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 18 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 19 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 20 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 21 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 22 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 23 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *70132762292640
         
     | 
| 
      
 25 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 26 
     | 
    
         
            +
              name: activerecord
         
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &70132762292220 !ruby/object:Gem::Requirement
         
     | 
| 
      
 28 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 29 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 30 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 31 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 33 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 34 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *70132762292220
         
     | 
| 
      
 36 
     | 
    
         
            +
            description: ! 'Ever found yourself wanting a modest amount of fresh rows from a production
         
     | 
| 
      
 37 
     | 
    
         
            +
              database for development purposes, but
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
              put back by the need to maintain referential integrity in the extracted data sample?
         
     | 
| 
      
 40 
     | 
    
         
            +
              This data sampler utility will
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              take care that referential dependencies are fulfilled by recursively fetching any
         
     | 
| 
      
 43 
     | 
    
         
            +
              rows referred to by the sample.'
         
     | 
| 
      
 44 
     | 
    
         
            +
            email:
         
     | 
| 
      
 45 
     | 
    
         
            +
            - christian@rishoj.net
         
     | 
| 
      
 46 
     | 
    
         
            +
            executables:
         
     | 
| 
      
 47 
     | 
    
         
            +
            - data_sampler
         
     | 
| 
      
 48 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 49 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 50 
     | 
    
         
            +
            files:
         
     | 
| 
      
 51 
     | 
    
         
            +
            - .gitignore
         
     | 
| 
      
 52 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 53 
     | 
    
         
            +
            - README
         
     | 
| 
      
 54 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 55 
     | 
    
         
            +
            - bin/data_sampler
         
     | 
| 
      
 56 
     | 
    
         
            +
            - data_sampler.gemspec
         
     | 
| 
      
 57 
     | 
    
         
            +
            - lib/data_sampler.rb
         
     | 
| 
      
 58 
     | 
    
         
            +
            - lib/data_sampler/dependency.rb
         
     | 
| 
      
 59 
     | 
    
         
            +
            - lib/data_sampler/sample.rb
         
     | 
| 
      
 60 
     | 
    
         
            +
            - lib/data_sampler/table_sample.rb
         
     | 
| 
      
 61 
     | 
    
         
            +
            - lib/data_sampler/version.rb
         
     | 
| 
      
 62 
     | 
    
         
            +
            homepage: https://github.com/crishoj/data_sampler
         
     | 
| 
      
 63 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 64 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 65 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 66 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 67 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 68 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 69 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 70 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 71 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 72 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 73 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 74 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 75 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 76 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 77 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 78 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 79 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 80 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 81 
     | 
    
         
            +
            rubyforge_project: data_sampler
         
     | 
| 
      
 82 
     | 
    
         
            +
            rubygems_version: 1.8.6
         
     | 
| 
      
 83 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 84 
     | 
    
         
            +
            specification_version: 3
         
     | 
| 
      
 85 
     | 
    
         
            +
            summary: Extract a sample of records from a database while maintaining referential
         
     | 
| 
      
 86 
     | 
    
         
            +
              integrity.
         
     | 
| 
      
 87 
     | 
    
         
            +
            test_files: []
         
     |