RubyGems - shiba - Versions diffs - 0.1.0 - Mend

shiba 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +7 -0
data/.gitignore +10 -0
data/CODE_OF_CONDUCT.md +3 -0
data/Gemfile +5 -0
data/Gemfile.lock +24 -0
data/README.md +53 -0
data/Rakefile +2 -0
data/TODO +12 -0
data/bin/analyze +116 -0
data/bin/check +0 -0
data/bin/console +14 -0
data/bin/explain +105 -0
data/bin/fingerprint +10 -0
data/bin/inspect +0 -0
data/bin/parse +0 -0
data/bin/redmine/sample_redmine.rb +165 -0
data/bin/release +6 -0
data/bin/setup +8 -0
data/bin/shiba +40 -0
data/bin/watch.rb +19 -0
data/cmd/builds/fingerprint.darwin-amd64 +0 -0
data/cmd/builds/fingerprint.linux-amd64 +0 -0
data/cmd/check.go +138 -0
data/cmd/fingerprint.go +28 -0
data/cmd/inspect.go +92 -0
data/cmd/parse.go +79 -0
data/lib/shiba.rb +21 -0
data/lib/shiba/analyzer.rb +100 -0
data/lib/shiba/configure.rb +31 -0
data/lib/shiba/explain.rb +234 -0
data/lib/shiba/index.rb +159 -0
data/lib/shiba/output.rb +67 -0
data/lib/shiba/output/tags.yaml +44 -0
data/lib/shiba/query.rb +34 -0
data/lib/shiba/query_watcher.rb +79 -0
data/lib/shiba/railtie.rb +20 -0
data/lib/shiba/version.rb +3 -0
data/shiba.gemspec +38 -0
data/web/bootstrap.min.css +7 -0
data/web/dist/bundle.js +167 -0
data/web/main.css +18 -0
data/web/main.js +5 -0
data/web/package-lock.json +4100 -0
data/web/package.json +19 -0
data/web/results.html.erb +199 -0
data/web/vue.js +11055 -0
data/web/webpack.config.js +14 -0
metadata +121 -0

data/lib/shiba/analyzer.rb ADDED Viewed

@@ -0,0 +1,100 @@
+require 'shiba'
+require 'shiba/query'
+require 'json'
+require 'logger'
+module Shiba
+  class Analyzer
+    def self.analyze(file, output, stats, options)
+      new(file, output, stats, options).analyze
+    end
+    def initialize(file, output, stats, options)
+      @file = file
+      @output = output
+      @stats = stats
+      @options = options
+      @fingerprints = {}
+    end
+    def analyze
+      idx = 0
+      queries = []
+      while line = @file.gets
+        # strip out colors
+        begin
+          line.gsub!(/\e\[?.*?[\@-~]/, '')
+        rescue ArgumentError => e
+          next
+        end
+        if line =~ /(select.*from.*)/i
+          sql = $1
+        else
+          next
+        end
+        if @options['limit']
+          return if idx == @options['limit']
+        end
+        if @options['index']
+          next unless idx == @options['index']
+        end
+        sql.chomp!
+        query = Shiba::Query.new(sql, @stats)
+        if !@fingerprints[query.fingerprint]
+          if sql.downcase.start_with?("select")
+            if @options['debug']
+              require 'byebug'
+              debugger
+            end
+            explain = analyze_query(query)
+            if explain
+              idx += 1
+              queries << explain
+            end
+          end
+        end
+        @fingerprints[query.fingerprint] = true
+      end
+      queries
+    end
+    protected
+    def dump_error(e, query)
+      $stderr.puts "got exception trying to explain: #{e.message}"
+      $stderr.puts "query: #{query.sql} (index #{query.index})"
+      $stderr.puts e.backtrace.join("\n")
+    end
+    def analyze_query(query)
+      explain = nil
+      begin
+        explain = query.explain
+      rescue Mysql2::Error => e
+        # we're picking up crap on the command-line that's not good SQL.  ignore it.
+        if !(e.message =~ /You have an error in your SQL syntax/)
+          dump_error(e, query)
+        end
+      rescue StandardError => e
+        dump_error(e, query)
+      end
+      return nil unless explain
+      json = JSON.dump(explain.as_json)
+      write(json)
+      explain.as_json
+    end
+    def write(line)
+      @output.puts(line)
+    end
+  end
+end

data/lib/shiba/configure.rb ADDED Viewed

@@ -0,0 +1,31 @@
+require 'pathname'
+module Shiba
+    module Configure
+        # avoiding Rails dependency on the cli tools for now.
+        # yanked from https://github.com/rails/rails/blob/v5.0.5/railties/lib/rails/application/configuration.rb
+        def self.activerecord_configuration
+            yaml = Pathname.new("config/database.yml")
+            config = if yaml && yaml.exist?
+            require "yaml"
+            require "erb"
+            YAML.load(ERB.new(yaml.read).result) || {}
+            elsif ENV['DATABASE_URL']
+            # Value from ENV['DATABASE_URL'] is set to default database connection
+            # by Active Record.
+            {}
+            end
+            config
+        rescue Psych::SyntaxError => e
+            raise "YAML syntax error occurred while parsing #{yaml.to_s}. " \
+                "Please note that YAML must be consistently indented using spaces. Tabs are not allowed. " \
+                "Error: #{e.message}"
+        rescue => e
+            raise e, "Cannot load `#{path}`:\n#{e.message}", e.backtrace
+        end
+    end
+end

data/lib/shiba/explain.rb ADDED Viewed

@@ -0,0 +1,234 @@
+require 'json'
+require 'shiba/index'
+module Shiba
+  class Explain
+    def initialize(sql, stats, options = {})
+      @sql = sql
+      @sql, _, @backtrace = @sql.partition(" /*shiba")
+      if options[:force_key]
+         @sql = @sql.sub(/(FROM\s*\S+)/i, '\1' + " FORCE INDEX(`#{options[:force_key]}`)")
+      end
+      @options = options
+      ex = Shiba.connection.query("EXPLAIN FORMAT=JSON #{@sql}").to_a
+      json = JSON.parse(ex.first['EXPLAIN'])
+      @rows = self.class.transform_json(json['query_block'])
+      @stats = stats
+      run_checks!
+    end
+    def as_json
+      @backtrace.chomp!("*/")
+      {
+        sql: @sql,
+        table: get_table,
+        key: first_key,
+        tags: messages,
+        cost: @cost,
+        used_key_parts: first['used_key_parts'],
+        possible_keys: first['possible_keys'],
+        backtrace: JSON.parse(@backtrace)
+      }
+    end
+    def get_table
+      @sql =~ /\s+from\s*([^\s,]+)/i
+      table = $1
+      return nil unless table
+      table = table.downcase
+      table.gsub!('`', '')
+      table.gsub!(/.*\.(.*)/, '\1')
+      table
+    end
+    def self.transform_table(table)
+      t = table
+      res = {}
+      res['table'] = t['table_name']
+      res['access_type'] = t['access_type']
+      res['key'] = t['key']
+      res['used_key_parts'] = t['used_key_parts'] if t['used_key_parts']
+      res['rows'] = t['rows_examined_per_scan']
+      res['filtered'] = t['filtered']
+      if t['possible_keys'] && t['possible_keys'] != [res['key']]
+        res['possible_keys'] = t['possible_keys']
+      end
+      res['using_index'] = t['using_index'] if t['using_index']
+      res
+    end
+    def self.transform_json(json, res = [])
+      rows = []
+      if json['ordering_operation']
+        return transform_json(json['ordering_operation'])
+      elsif json['duplicates_removal']
+        return transform_json(json['duplicates_removal'])
+      elsif !json['nested_loop'] && !json['table']
+        return [{'Extra' => json['message']}]
+      elsif json['nested_loop']
+        json['nested_loop'].map do |nested|
+          transform_json(nested, res)
+        end
+      elsif json['table']
+        res << transform_table(json['table'])
+      end
+      res
+    end
+    # [{"id"=>1, "select_type"=>"SIMPLE", "table"=>"interwiki", "partitions"=>nil, "type"=>"const", "possible_keys"=>"PRIMARY", "key"=>"PRIMARY", "key_len"=>"34", "ref"=>"const", "rows"=>1, "filtered"=>100.0, "Extra"=>nil}]
+    attr_reader :cost
+    def first
+      @rows.first
+    end
+    def first_table
+      first["table"]
+    end
+    def first_key
+      first["key"]
+    end
+    def first_extra
+      first["Extra"]
+    end
+    def messages
+      @messages ||= []
+    end
+    # shiba: {"possible_keys"=>nil, "key"=>nil, "key_len"=>nil, "ref"=>nil, "rows"=>6, "filtered"=>16.67, "Extra"=>"Using where"}
+    def to_log
+      plan = first.symbolize_keys
+      "possible: #{plan[:possible_keys]}, rows: #{plan[:rows]}, filtered: #{plan[:filtered]}, cost: #{self.cost}, access: #{plan[:access_type]}"
+    end
+    def to_h
+      first.merge(cost: cost, messages: messages)
+    end
+    IGNORE_PATTERNS = [
+      /No tables used/,
+      /Impossible WHERE/,
+      /Select tables optimized away/,
+      /No matching min\/max row/
+    ]
+    def table_size
+      Shiba::Index.count(first["table"], @stats)
+    end
+    def no_matching_row_in_const_table?
+      first_extra && first_extra =~ /no matching row in const table/
+    end
+    def ignore_explain?
+      first_extra && IGNORE_PATTERNS.any? { |p| first_extra =~ p }
+    end
+    def derived?
+      first['table'] =~ /<derived.*?>/
+    end
+    # TODO: need to parse SQL here I think
+    def simple_table_scan?
+      @rows.size == 1 && (@sql !~ /where/i || @sql =~ /where\s*1=1/i) && (@sql !~ /order by/i)
+    end
+    def limit
+      if @sql =~ /limit\s*(\d+)\s*(offset \d+)?$/i
+        $1.to_i
+      else
+        nil
+      end
+    end
+    def tag_query_type
+      access_type = first['access_type']
+      return unless access_type
+      access_type = 'tablescan' if access_type == 'ALL'
+      messages << "access_type_" + access_type
+    end
+    def estimate_row_count
+      if no_matching_row_in_const_table?
+        messages << "access_type_const"
+        first['key'] = 'PRIMARY'
+        return 0
+      end
+      return 0 if ignore_explain?
+      messages << "fuzzed_data" if Shiba::Index.fuzzed?(first_table, @stats)
+      if simple_table_scan?
+        if limit
+          messages << 'limited_tablescan'
+        else
+          messages << 'access_type_tablescan'
+        end
+        return limit || table_size
+      end
+      if derived?
+        # select count(*) from ( select 1 from foo where blah )
+        @rows.shift
+        return estimate_row_count
+      end
+      tag_query_type
+      # TODO: if possible_keys but mysql chooses NULL, this could be a test-data issue,
+      # pick the best key from the list of possibilities.
+      #
+      if first_key
+        Shiba::Index.estimate_key(first_table, first_key, first['used_key_parts'], @stats)
+      else
+        if first['possible_keys'].nil?
+          # if no possibile we're table scanning, use PRIMARY to indicate that cost.
+          # note that this can be wildly inaccurate bcs of WHERE + LIMIT stuff.
+          Shiba::Index.count(first_table, @stats)
+        else
+          if @options[:force_key]
+            # we were asked to force a key, but mysql still told us to fuck ourselves.
+            # (no index used)
+            #
+            # there seems to be cases where mysql lists `possible_key` values
+            # that it then cannot use, seen this in OR queries.
+            return Shiba::Index.count(first_table, @stats)
+          end
+          possibilities = [Shiba::Index.count(first_table, @stats)]
+          possibilities += first['possible_keys'].map do |key|
+            estimate_row_count_with_key(key)
+          end
+          possibilities.compact.min
+        end
+      end
+    end
+    def estimate_row_count_with_key(key)
+      Explain.new(@sql, @stats, force_key: key).estimate_row_count
+    rescue Mysql2::Error => e
+      if /Key .+? doesn't exist in table/ =~ e.message
+        return nil
+      end
+      raise e
+    end
+    def run_checks!
+      @cost = estimate_row_count
+    end
+  end
+end

data/lib/shiba/index.rb ADDED Viewed

@@ -0,0 +1,159 @@
+module Shiba
+  module Index
+    # Given the path to the information_schema.statistics output, returns index statistics keyed by table name.
+    # Examples:
+    # Exploring the schema:
+    #
+    # schema_stats = Index.parse("./shiba/schema_stats.tsv")
+    # schema_stats.keys
+    # => :users, :posts, :comments
+    # schema_stats[:users]
+    # => {:table_schema=>"blog_test", :table_name=>"users", :non_unique=>"0", :column_name=>"id", :cardinality=>"2", :is_visible=>"YES", :"expression\n"=>"NULL\n"}
+    #
+    def self.parse(path)
+      tables = {}
+      records = read(path)
+      headers = records.shift.map { |header| header.downcase }
+      records.each do |r|
+        h = Hash[headers.zip(r)]
+        h["cardinality"] = h["cardinality"].to_i
+        table = tables[h['table_name']] ||= []
+        table.push(h)
+      end
+      tables
+    end
+    # Getting a row count for a table:
+    #
+    # schema_stats = Index.parse("./shiba/schema_stats.tsv")
+    # users_count = Index.count(:users, schema_stats)
+    # => 2
+    def self.count(table, schema)
+      return nil unless schema[table]
+      primary = schema[table].detect { |index| index['index_name'] == "PRIMARY" }
+      if primary.nil?
+        # find the highest cardinality of a unique index, if it exists
+        schema[table].map do |index|
+          if index['non_unique'].to_i == 0
+            index['cardinality']
+          else
+            nil
+          end
+        end.compact.max
+      else
+        primary['cardinality'].to_i
+      end
+    end
+    def self.fuzzed?(table, schema)
+      return nil unless schema[table]
+      schema[table].first['fuzzed']
+    end
+    def self.estimate_key(table, key, parts, schema)
+      table_count = count(table, schema)
+      return nil unless table_count
+      key_stat = schema[table].detect do |i|
+        i["index_name"] == key && i["column_name"] == parts.last
+      end
+      return nil unless key_stat
+      return 0 if key_stat['cardinality'] == 0
+      table_count / key_stat['cardinality']
+    end
+    def self.query(connection)
+      records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
+      tables = {}
+      records.each do |h|
+        h.keys.each { |k| h[k.downcase] = h.delete(k) }
+        h["cardinality"] = h["cardinality"].to_i
+        table = tables[h['table_name']] ||= []
+        table.push(h)
+      end
+      tables
+    end
+    # Up the cardinality on our indexes.
+    # Non uniques have a little less cardinality.
+    def self.fuzz!(stats)
+      db = stats.values.first.first['table_schema']
+      table_sizes = self.guess_table_sizes(db)
+      stats.each do |table,indexes|
+        indexes.each do |idx|
+          idx['cardinality'] = table_sizes[table]
+          if idx['non_unique'] == 1
+            idx['cardinality'] = (idx['cardinality'] * 0.7).round
+          end
+          idx['fuzzed'] = true
+        end
+      end
+    end
+    MINIMUM_TABLE_SIZE = 500
+    # Approximate median size of the tables is less than 500.
+    def self.insufficient_stats?(stats)
+      if stats.length == 0
+        return true
+      end
+      # Calculate a rough median.
+      primary_keys = stats.map do |_,indexes|
+        indexes.detect { |idx| idx['index_name'] == 'PRIMARY' } || {}
+      end
+      table_counts = primary_keys.map { |pk| pk['cardinality'].to_i }
+      median = table_counts[table_counts.size/2]
+      return median < MINIMUM_TABLE_SIZE
+    end
+    STANDARD_FUZZ_SIZE = 5_000
+    # Create fake table sizes based on the table's index count.
+    # The more indexes, the bigger the table. Seems to rank tables fairly well.
+    def self.guess_table_sizes(db)
+      db = Shiba.connection.escape(db)
+      index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
+        from information_schema.statistics where table_schema = '#{db}'
+        and seq_in_index = 1 and index_name not like 'fk_rails%'
+        group by table_name order by index_count"
+        index_counts = Shiba.connection.query(index_count_query).to_a
+        # 80th table percentile based on number of indexes
+        large_table_idx = (index_counts.size * 0.8).round
+        large_table = index_counts[large_table_idx]
+        sizes = Hash[index_counts.map(&:values)]
+        sizes.each do |table_name, index_count|
+          if index_count == 0
+            index_count = 1
+          end
+          sizes[table_name] = STANDARD_FUZZ_SIZE * (index_count / large_table['index_count'].to_f)
+        end
+        sizes
+    end
+    protected
+    def self.read(path)
+      # fixes :"expression\n"=>"NULL\n"},
+      IO.foreach(path).map { |l| l.gsub!("\n", "").split("\t") }
+    end
+  end
+end