RubyGems - shiba - Versions diffs - 0.3.0 → 0.4.0 - Mend

shiba 0.3.0 → 0.4.0

Files changed (25) hide show

checksums.yaml +5 -5
data/Gemfile.lock +1 -1
data/README.md +22 -0
data/bin/dump_stats +21 -0
data/bin/mysql_dump_stats +2 -19
data/bin/shiba +2 -1
data/lib/shiba/activerecord_integration.rb +3 -1
data/lib/shiba/analyzer.rb +25 -25
data/lib/shiba/configure.rb +3 -7
data/lib/shiba/connection/mysql.rb +7 -0
data/lib/shiba/connection/postgres.rb +1 -0
data/lib/shiba/explain.rb +84 -163
data/lib/shiba/explain/check_support.rb +24 -0
data/lib/shiba/explain/checks.rb +133 -0
data/lib/shiba/explain/mysql_explain.rb +4 -0
data/lib/shiba/explain/result.rb +18 -0
data/lib/shiba/fuzzer.rb +1 -0
data/lib/shiba/output.rb +3 -1
data/lib/shiba/output/tags.yaml +18 -10
data/lib/shiba/review/comment_renderer.rb +23 -13
data/lib/shiba/reviewer.rb +7 -4
data/lib/shiba/version.rb +1 -1
data/web/main.css +24 -5
data/web/results.html.erb +99 -48
metadata +7 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 8aaef4cac972cd661d5398bd510d0a78f4d1e078
-  data.tar.gz: 61d077a4f31b4ff21eb652c2c866e8c5f3bb9e49
+SHA256:
+  metadata.gz: 4b4f464acdc517169589f38206c835a77b160ae7fe5293f3c1c96bef2a736911
+  data.tar.gz: ab4af7bcb0e55e042372c5579f8829f751c76efbb0ac3c0b17ae1fd97547362b
 SHA512:
-  metadata.gz: 80e2b32747df07efbbd89227b86347530ad955fdc4520f9179bbfe274440397b1063d6dfb50bf2c737f8e88460609e80ae86361b712f9c2e3b0d7ae86d55d728
-  data.tar.gz: 4b540f27e5033c153621a0f2292cba50786857f018d2de54f8d9a5f58755d0a6b8872b2dae5af11e097a98f9b18935499d176ed5aedcedb4a859ce7219c7fc0c
+  metadata.gz: 0c2809905f330b3e1e8874297e66d84c24a938fe4c532b29cfe87b88b8983a1d6700a03f60382e190fe171c5f7cc90ad81d0ca4aa2d1770cf2eb0afdd10621f5
+  data.tar.gz: 36e6639d67a0b333c8faac40c45d092c7fe13b5acc0053066e1e68c0b3f9f70377fc19bb2d1fa0e724fd9163ea913b778bdf07ac8fbbd15d073678c31ac28b16

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    shiba (0.3.0)
+    shiba (0.4.0)
       activesupport
       mysql2
       pg

data/README.md CHANGED

@@ -134,3 +134,25 @@ users:
         rows_per: 20% # each organization has, on average, 20% or 2000 users.
       unique: false
 ```
+## Automatic pull request reviews
+Shiba can automatically comment on Github pull requests when code changes appear to introduce a query issue. The comments are similar to those in the query report dashboard. This guide will walk through setup on Travis CI, but other CI services should work in a similar fashion.
+Once Shiba is installed, the `shiba review` command needs to be run after the tests are finished. On Travis, this goes in an after_script setting:
+```yml
+# .travis.yml
+after_script:
+ - bundle exec shiba review --submit
+ ```
+The `--submit` option tells Shiba to comment on the relevant PR when an issue is found. To do this, it will need the Github API token of a user that has access to the repo. Shiba's comments will appear to come from that user, so you'll likely want to setup a bot account on Github with repo access for this.
+By default, the review script looks for an environment variable named  GITHUB_TOKEN that can be specified at https://travis-ci.com/{organization}/{repo}/settings. The token can be generated on Github at https://github.com/settings/tokens. If you have another environment variable name for your Github token, it can be manually configured using the `--token` flag.
+```yml
+# .travis.yml
+after_script:
+ - bundle exec shiba review --token $MY_GITHUB_API_TOKEN --submit
+ ```

data/bin/dump_stats ADDED

@@ -0,0 +1,21 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'shiba'
+require 'shiba/configure'
+require 'shiba/fuzzer'
+options = {}
+parser = Shiba::Configure.make_options_parser(options, only_basics: true)
+parser.banner = "Dump database statistics into yaml file."
+parser.parse!
+Shiba.configure(options) do |errmsg|
+  $stderr.puts(errmsg)
+  $stderr.puts(parser.help)
+  exit 1
+end
+Shiba.connection.analyze!
+index = Shiba::Fuzzer.new(Shiba.connection).fetch_index
+puts index.to_yaml

data/bin/mysql_dump_stats CHANGED

@@ -1,20 +1,3 @@
-#!/usr/bin/env ruby
+#!/bin/bash
-require 'bundler/setup'
-require 'shiba'
-require 'shiba/configure'
-require 'shiba/fuzzer'
-options = {}
-parser = Shiba::Configure.make_options_parser(options, only_basics: true)
-parser.banner = "Dump database statistics into yaml file."
-parser.parse!
-Shiba.configure(options) do |errmsg|
-  $stderr.puts(errmsg)
-  $stderr.puts(parser.help)
-  exit 1
-end
-index = Shiba::Fuzzer.new(Shiba.connection).fetch_index
-puts index.to_yaml
+exec `dirname $0`/dump_stats --server mysql $*

data/bin/shiba CHANGED

@@ -6,7 +6,8 @@ APP = File.basename(__FILE__)
 commands = {
     "explain" => "Generate a report from logged SQL queries",
-    "review"   => "Review changed files for query problems",
+    "review"  => "Review changed files for query problems",
+    "dump_stats" => "Collect database statistics for more accurate analysis"
 }
 global = OptionParser.new do |opts|

data/lib/shiba/activerecord_integration.rb CHANGED

@@ -50,7 +50,9 @@ module Shiba
       puts ""
       cmd = "shiba explain #{database_args} --file #{path}"
-      if Shiba::Configure.ci?
+      if ENV['SHIBA_OUT']
+        cmd << " --json #{File.join(Shiba.path, "#{ENV["SHIBA_OUT"]}.json")}"
+      elsif Shiba::Configure.ci?
         cmd << " --json #{File.join(Shiba.path, 'ci.json')}"
       end

data/lib/shiba/analyzer.rb CHANGED

@@ -16,11 +16,17 @@ module Shiba
       @stats = stats
       @options = options
       @fingerprints = {}
+      @queries = []
     end
     def analyze
       idx = 0
-      queries = []
+      if @options['sql']
+        analyze_sql(@options['sql'])
+        return @queries
+      end
       while line = @file.gets
         # strip out colors
         begin
@@ -35,35 +41,25 @@ module Shiba
           next
         end
-        if @options['limit']
-          return if idx == @options['limit']
-        end
+        sql.chomp!
+        analyze_sql(sql)
+      end
+      @queries
+    end
-        if @options['index']
-          next unless idx == @options['index']
-        end
+    def analyze_sql(sql)
+      query = Shiba::Query.new(sql, @stats)
-        sql.chomp!
-        query = Shiba::Query.new(sql, @stats)
-        if !@fingerprints[query.fingerprint]
-          if sql.downcase.start_with?("select")
-            if @options['debug']
-              require 'byebug'
-              debugger
-            end
-            explain = analyze_query(query)
-            if explain
-              idx += 1
-              queries << explain
-            end
+      if !@fingerprints[query.fingerprint]
+        if sql.downcase.start_with?("select")
+          explain = analyze_query(query)
+          if explain
+            @queries << explain
           end
         end
-        @fingerprints[query.fingerprint] = true
       end
-      queries
+      @fingerprints[query.fingerprint] = true
     end
     protected
@@ -85,6 +81,10 @@ module Shiba
       end
       return nil unless explain
+      if explain.other_paths.any?
+        paths = [explain] + explain.other_paths
+        explain = paths.sort { |a, b| a.cost - b.cost }.first
+      end
       json = JSON.dump(explain.as_json)
       write(json)
       explain.as_json

data/lib/shiba/configure.rb CHANGED

@@ -101,8 +101,8 @@ module Shiba
         next if only_basics
-        opts.on("-l", "--limit NUM", "stop after processing NUM queries") do |l|
-          options["limit"] = l.to_i
+        opts.on("--sql SQL", "analyze this sql") do |s|
+          options["sql"] = s
         end
         opts.on("-f", "--file FILE", "location of file containing queries") do |f|
@@ -117,14 +117,10 @@ module Shiba
           end
         end
-        opts.on("-h", "--html FILE", "write html report here. Default to /tmp/explain.html") do |h|
+        opts.on("-h", "--html FILE", "write html report here.") do |h|
           options["html"] = h
         end
-        opts.on("-t", "--test", "analyze queries at --file instead of analyzing a process") do |f|
-          options["test"] = true
-        end
         opts.on("-v", "--verbose", "print internal runtime information") do
            options["verbose"] = true
         end

data/lib/shiba/connection/mysql.rb CHANGED

@@ -21,6 +21,13 @@ module Shiba
         @connection.query(sql)
       end
+      def analyze!
+        @connection.query("show tables").each do |row|
+          t = row.values.first
+          @connection.query("analyze table `#{t}`") rescue nil
+        end
+      end
       def count_indexes_by_table
         sql =<<-EOL
           select TABLE_NAME as table_name, count(*) as index_count

data/lib/shiba/connection/postgres.rb CHANGED

@@ -7,6 +7,7 @@ module Shiba
         @connection = PG.connect( dbname: h['database'], host: h['host'], user: h['username'], password: h['password'], port: h['port'] )
         @connection.type_map_for_results = PG::BasicTypeMapForResults.new(@connection)
         query("SET enable_seqscan = OFF")
+        query("SET random_page_cost = 0.01")
       end
       def query(sql)

data/lib/shiba/explain.rb CHANGED

@@ -1,10 +1,15 @@
 require 'json'
 require 'shiba/index'
+require 'shiba/explain/check_support'
+require 'shiba/explain/checks'
+require 'shiba/explain/result'
 require 'shiba/explain/mysql_explain'
 require 'shiba/explain/postgres_explain'
 module Shiba
   class Explain
+    include CheckSupport
+    extend CheckSupport::ClassMethods
     def initialize(sql, stats, backtrace, options = {})
       @sql = sql
       @backtrace = backtrace
@@ -21,7 +26,9 @@ module Shiba
       else
         @rows = Shiba::Explain::PostgresExplain.new(@explain_json).transform
       end
+      @result = Result.new
       @stats = stats
       run_checks!
     end
@@ -29,19 +36,22 @@ module Shiba
       {
         sql: @sql,
         table: get_table,
-        table_size: table_size,
-        key: first_key,
-        tags: messages,
-        cost: @cost,
-        return_size: @return_size,
+        messages: @result.messages,
+        cost: @result.cost,
         severity: severity,
-        used_key_parts: first['used_key_parts'],
-        possible_keys: first['possible_keys'],
         raw_explain: humanized_explain,
         backtrace: @backtrace
       }
     end
+    def messages
+      @result.messages
+    end
+    def cost
+      @result.cost
+    end
     def get_table
       @sql =~ /\s+from\s*([^\s,]+)/i
       table = $1
@@ -53,66 +63,20 @@ module Shiba
       table
     end
-    # [{"id"=>1, "select_type"=>"SIMPLE", "table"=>"interwiki", "partitions"=>nil, "type"=>"const", "possible_keys"=>"PRIMARY", "key"=>"PRIMARY", "key_len"=>"34", "ref"=>"const", "rows"=>1, "filtered"=>100.0, "Extra"=>nil}]
-    attr_reader :cost
     def first
       @rows.first
     end
-    def first_table
-      first["table"]
-    end
-    def first_key
-      first["key"]
-    end
     def first_extra
       first["Extra"]
     end
-    def messages
-      @messages ||= []
-    end
-    # shiba: {"possible_keys"=>nil, "key"=>nil, "key_len"=>nil, "ref"=>nil, "rows"=>6, "filtered"=>16.67, "Extra"=>"Using where"}
-    def to_log
-      plan = first.symbolize_keys
-      "possible: #{plan[:possible_keys]}, rows: #{plan[:rows]}, filtered: #{plan[:filtered]}, cost: #{self.cost}, access: #{plan[:access_type]}"
-    end
-    def to_h
-      first.merge(cost: cost, messages: messages)
-    end
-    def table_size
-      @stats.table_count(first['table'])
-    end
-    def fuzzed?(table)
-      @stats.fuzzed?(first['table'])
-    end
     def no_matching_row_in_const_table?
       first_extra && first_extra =~ /no matching row in const table/
     end
-    def ignore_explain?
-    end
-    def derived?
-      first['table'] =~ /<derived.*?>/
-    end
-    # TODO: need to parse SQL here I think
-    def simple_table_scan?
-      @rows.size == 1 &&  (@sql !~ /order by/i) &&
-        (first['using_index'] || !(@sql =~ /\s+WHERE\s+/i))
-    end
     def severity
-      case @cost
+      case @result.cost
       when 0..100
         "low"
       when 100..1000
@@ -133,22 +97,33 @@ module Shiba
     def aggregation?
       @sql =~ /select\s*(.*?)from/i
       select_fields = $1
-      select_fields =~ /min|max|avg|count|sum|group_concat\s*\(.*?\)/i
+      select_fields =~ /(min|max|avg|count|sum|group_concat)\s*\(.*?\)/i
     end
-    def self.check(c)
-      @checks ||= []
-      @checks << c
+    def ignore?
+      !!ignore_line_and_backtrace_line
     end
-    def self.get_checks
-      @checks
+    def ignore_line_and_backtrace_line
+      ignore_files = Shiba.config['ignore']
+      if ignore_files
+        ignore_files.each do |i|
+          file, method = i.split('#')
+          @backtrace.each do |b|
+            next unless b.include?(file)
+            next if method && !b.include?(method)
+            return [i, b]
+          end
+        end
+      end
+      nil
     end
     check :check_query_is_ignored
     def check_query_is_ignored
       if ignore?
-        messages << "ignored"
+        @result.messages << { tag: "ignored" }
         @cost = 0
       end
     end
@@ -156,7 +131,7 @@ module Shiba
     check :check_no_matching_row_in_const_table
     def check_no_matching_row_in_const_table
       if no_matching_row_in_const_table?
-        messages << "access_type_const"
+        @result.messages << { tag: "access_type_const", table:  get_table }
         first['key'] = 'PRIMARY'
         @cost = 1
       end
@@ -176,9 +151,10 @@ module Shiba
       end
     end
-    check :check_fuzzed
-    def check_fuzzed
-      messages << "fuzzed_data" if fuzzed?(first_table)
+    # TODO: need to parse SQL here I think
+    def simple_table_scan?
+      @rows.size == 1 &&  (@sql !~ /order by/i) &&
+        (@rows.first['using_index'] || !(@sql =~ /\s+WHERE\s+/i))
     end
     # TODO: we don't catch some cases like SELECT * from foo where index_col = 1 limit 1
@@ -187,129 +163,60 @@ module Shiba
     def check_simple_table_scan
       if simple_table_scan?
         if limit
-          messages << 'limited_scan'
+          @result.messages << { tag: 'limited_scan', cost: limit, table: @rows.first['table'] }
           @cost = limit
         end
       end
     end
-    check :check_derived
-    def check_derived
-      if derived?
-        # select count(*) from ( select 1 from foo where blah )
-        @rows.shift
-        return run_checks!
-      end
-    end
-    check :tag_query_type
-    def tag_query_type
-      access_type = first['access_type']
-      if access_type.nil?
-        @cost = 0
-        return
-      end
-      access_type = 'tablescan' if access_type == 'ALL'
-      messages << "access_type_" + access_type
-    end
-    #check :check_index_walk
-    # disabling this one for now, it's not quite good enough and has a high
-    # false-negative rate.
-    def check_index_walk
-      if first['index_walk']
-        @cost = limit
-        messages << 'index_walk'
-      end
-    end
-    check :check_key_size
-    def check_key_size
-      # TODO: if possible_keys but mysql chooses NULL, this could be a test-data issue,
-      # pick the best key from the list of possibilities.
-      #
-      if first_key
-        @cost = @stats.estimate_key(first_table, first_key, first['used_key_parts'])
-      else
-        if first['possible_keys'].nil?
-          # if no possibile we're table scanning, use PRIMARY to indicate that cost.
-          # note that this can be wildly inaccurate bcs of WHERE + LIMIT stuff.
-          @cost = table_size
-        else
-          if @options[:force_key]
-            # we were asked to force a key, but mysql still told us to fuck ourselves.
-            # (no index used)
-            #
-            # there seems to be cases where mysql lists `possible_key` values
-            # that it then cannot use, seen this in OR queries.
-            @cost = table_size
-          else
-            possibilities = [table_size]
-            possibilities += first['possible_keys'].map do |key|
-              estimate_row_count_with_key(key)
-            end
-            @cost = possibilities.compact.min
-          end
+    check :check_fuzzed
+    def check_fuzzed
+      h = {}
+      @rows.each do |row|
+        t = row['table']
+        if @stats.fuzzed?(t)
+          h[t] = @stats.table_count(t)
         end
       end
+      if h.any?
+        @result.messages << { tag: "fuzzed_data", tables: h }
+      end
     end
     def check_return_size
       if limit
-        @return_size = limit
+        return_size = limit
       elsif aggregation?
-        @return_size = 1
+        return_size = 1
       else
-        @return_size = @cost
+        return_size = @result.result_size
       end
-      if @return_size && @return_size > 100
-        messages << "retsize_bad"
+      if return_size && return_size > 100
+        @result.messages << { tag: "retsize_bad", result_size: return_size }
       else
-        messages << "retsize_good"
+        @result.messages << { tag: "retsize_good", result_size: return_size }
       end
     end
-    def estimate_row_count_with_key(key)
-      explain = Explain.new(@sql, @stats, @backtrace, force_key: key)
-      explain.run_checks!
-    rescue Mysql2::Error => e
-      if /Key .+? doesn't exist in table/ =~ e.message
-        return nil
+    def run_checks!
+      # first run top-level checks
+      _run_checks! do
+        :stop if @cost
       end
-      raise e
-    end
-    def ignore?
-      !!ignore_line_and_backtrace_line
-    end
-    def ignore_line_and_backtrace_line
-      ignore_files = Shiba.config['ignore']
-      if ignore_files
-        ignore_files.each do |i|
-          file, method = i.split('#')
-          @backtrace.each do |b|
-            next unless b.include?(file)
-            next if method && !b.include?(method)
-            return [i, b]
-          end
+      if @cost
+        # we've decided to stop further analysis at the query level
+        @result.cost = @cost
+      else
+        # run per-table checks
+        0.upto(@rows.size - 1) do |i|
+          check = Checks.new(@rows, i, @stats, @options, @result)
+          check.run_checks!
         end
       end
-      nil
-    end
-    def run_checks!
-      self.class.get_checks.each do |check|
-        res = send(check)
-        break if @cost
-      end
       check_return_size
-      @cost
     end
     def humanized_explain
@@ -318,5 +225,19 @@ module Shiba
       #h
       @explain_json
     end
+    def other_paths
+      if Shiba.connection.mysql?
+        @rows.map do |r|
+          next [] unless r['possible_keys'] && r['key'].nil?
+          possible = r['possible_keys'] - [r['key']]
+          possible.map do |p|
+            Explain.new(@sql, @stats, @backtrace, force_key: p) rescue nil
+          end.compact
+        end.flatten
+      else
+        []
+      end
+    end
   end
 end

data/lib/shiba/explain/check_support.rb ADDED

@@ -0,0 +1,24 @@
+module Shiba
+  class Explain
+    module CheckSupport
+      module ClassMethods
+        def check(c)
+          @checks ||= []
+          @checks << c
+        end
+        def get_checks
+          @checks
+        end
+      end
+      def _run_checks!(&block)
+        self.class.get_checks.each do |check|
+          res = send(check)
+          break if yield == :stop
+        end
+      end
+    end
+  end
+end

data/lib/shiba/explain/checks.rb ADDED

@@ -0,0 +1,133 @@
+require 'shiba/explain/check_support'
+module Shiba
+  class Explain
+    class Checks
+      include CheckSupport
+      extend CheckSupport::ClassMethods
+      def initialize(rows, index, stats, options, result)
+        @rows = rows
+        @row = rows[index]
+        @index = index
+        @stats = stats
+        @options = options
+        @result = result
+        @tbl_message = {}
+      end
+      attr_reader :cost
+      def table
+        @row['table']
+      end
+      def table_size
+        @stats.table_count(table)
+      end
+      def add_message(tag, extra = {})
+        @result.messages << { tag: tag, table_size: table_size, table: table }.merge(extra)
+      end
+      check :check_derived
+      def check_derived
+        if table =~ /<derived.*?>/
+          # select count(*) from ( select 1 from foo where blah )
+          add_message('derived_table', size: nil)
+          @cost = 0
+        end
+      end
+      check :tag_query_type
+      def tag_query_type
+        @access_type = @row['access_type']
+        if @access_type.nil?
+          @cost = 0
+          return
+        end
+        @access_type = 'tablescan' if @access_type == 'ALL'
+        @access_type = "access_type_" + @access_type
+      end
+      check :check_join
+      def check_join
+        if @row['join_ref']
+          @access_type.sub!("access_type", "join_type")
+          # TODO MAYBE: are multiple-table joins possible?  or does it just ref one table?
+          ref = @row['join_ref'].find { |r| r != 'const' }
+          table = ref.split('.')[1]
+          @tbl_message['join_to'] = table
+        end
+      end
+      #check :check_index_walk
+      # disabling this one for now, it's not quite good enough and has a high
+      # false-negative rate.
+      def check_index_walk
+        if first['index_walk']
+          @cost = limit
+          add_message("index_walk")
+        end
+      end
+      check :check_key_size
+      def check_key_size
+        if @row['key']
+          rows_read = @stats.estimate_key(table, @row['key'], @row['used_key_parts'])
+        else
+          rows_read = table_size
+        end
+        # TBD: this appears to come from a couple of bugs.
+        # one is we're not handling mysql index-merges, the other is that
+        # we're not handling mysql table aliasing.
+        if rows_read.nil?
+          rows_read = 1
+        end
+        if @row['join_ref']
+          # when joining, we'll say we read "@cost" rows -- but up to
+          # a max of the table size.  I'm not sure this assumption is *exactly*
+          # true but it feels good enough to start; a decent hash join should
+          # nullify the cost of re-reading rows.  I think.
+          @cost = [@result.result_size * rows_read, table_size || 2**32].min
+          # poke holes in this.  Is this even remotely accurate?
+          # We're saying that if we join to a a table with 100 rows per item
+          # in the index, for each row we'll be joining in 100 more rows.  Is that true?
+          @result.result_size *= rows_read
+        else
+          @cost = rows_read
+          @result.result_size += rows_read
+        end
+        @result.cost += @cost
+        @tbl_message['cost'] = @cost
+        @tbl_message['index'] = @row['key']
+        @tbl_message['index_used'] = @row['used_key_parts']
+        add_message(@access_type, @tbl_message)
+      end
+      def estimate_row_count_with_key(key)
+        explain = Explain.new(@sql, @stats, @backtrace, force_key: key)
+        explain.run_checks!
+      rescue Mysql2::Error => e
+        if /Key .+? doesn't exist in table/ =~ e.message
+          return nil
+        end
+        raise e
+      end
+      def run_checks!
+        _run_checks! do
+          :stop if @cost
+        end
+      end
+    end
+  end
+end

data/lib/shiba/explain/mysql_explain.rb CHANGED

@@ -11,6 +11,10 @@ module Shiba
         res['rows'] = t['rows_examined_per_scan']
         res['filtered'] = t['filtered']
+        if t['ref'] && t['ref'].any? { |r| r != "const" }
+          res['join_ref'] = t['ref']
+        end
         if t['possible_keys'] && t['possible_keys'] != [res['key']]
           res['possible_keys'] = t['possible_keys']
         end

data/lib/shiba/explain/result.rb ADDED

@@ -0,0 +1,18 @@
+module Shiba
+  class Explain
+    class Result
+      # cost: total rows read
+      # result_size: approximate rows returned to the client
+      # messages: list of hashes detailing the operations
+      def initialize
+        @messages = []
+        @cost = 0
+        @result_size = 0
+      end
+      attr_accessor :messages, :cost, :result_size
+    end
+  end
+end

data/lib/shiba/fuzzer.rb CHANGED

@@ -48,6 +48,7 @@ module Shiba
     # The more indexes, the bigger the table. Seems to rank tables fairly well.
     def guess_table_sizes
       index_counts = connection.count_indexes_by_table
+      return if index_counts.empty?
       # 90th table percentile based on number of indexes
       # round down so we don't blow up on small tables

data/lib/shiba/output.rb CHANGED

@@ -45,7 +45,9 @@ module Shiba
       url.chomp!
       url.gsub!('git@github.com:', 'https://github.com/')
       url.gsub!(/\.git$/, '')
-      url + '/blob/master/'
+      branch = `git symbolic-ref HEAD`.strip.split('/').last
+      url + "/blob/#{branch}"
     end
     def make_web!

data/lib/shiba/output/tags.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 fuzzed_data:
   title: Fuzzed Data
-  summary: Shiba doesn't know the size of <b>{{table}}</b>.  For these purposes we set the table size to <b>{{table_size}}</b>.
+  summary: "Table sizes estimated as follows -- {{ fuzz_table_sizes }}"
   description: |
     We're not sure how much data this table will hold in the future, so we've pretended
     there's 6000 rows in it.  This can lead to a lot of false positives. To
@@ -21,24 +21,32 @@ access_type_const:
     This query selects at *most* one row, which is about as good as things get.
   level: success
 access_type_ref:
-  title: Indexed
-  summary: The database reads {{ cost }} rows in <b>{{ table }}</b> via the <b>{{ key }}</b> index ({{ key_parts }}).
+  title: Index Scan
+  summary: The database reads {{ formatted_cost }} rows in <b>{{ table }}</b> via the <i>{{ index }}</i> index ({{ key_parts }}).
   description: |
     This query uses an index to find rows that match a single value.  Often this
     has very good performance, but it depends on how many rows match that value.
   level: success
+join_type_eq_ref:
+  title: Indexed Join
+  summary: <b>{{ table }}</b> is joined to <b>{{ join_to }}</b> via the <i>{{ index }}</i> index, reading 1 row per joined item.
+  level: success
+join_type_ref:
+  title: Indexed Join
+  summary: <b>{{ table }}</b> is joined to <b>{{ join_to }}</b> via the <i>{{ index }}</i> index, reading {{ formatted_cost }} rows per joined item.
+  level: success
 access_type_range:
   title: Indexed
-  summary: The database uses a "range scan" to read more than {{ cost }} rows in {{ table }} via the <b>{{ key }}</b> index ({{ key_parts }})
+  summary: The database uses a "range scan" to read more than {{ formatted_cost }} rows in {{ table }} via the <b>{{ index }}</b> index ({{ key_parts }})
   description: |
     This query uses an index to find rows that match a range of values, for instance
     `WHERE indexed_value in (1,2,5,6)` or `WHERE indexed_value >= 5 AND indexed_value <= 15`.
     It's very hard to estimate how many rows this query will consider in production, so we've
-    upped the cost of this query.
+    upped the formatted_cost of this query.
   level: info
 access_type_tablescan:
   title: Table Scan
-  summary: The database reads <b>100%</b> ({{ table_size }}) of the rows in <b>{{ table }}</b>, skipping any indexes.
+  summary: The database reads {{ formatted_cost }} of the rows in <b>{{ table }}</b>, skipping any indexes.
   description: |
     This query doesn't use any indexes to find data, meaning this query will need to evaluate
     every single row in the table.  This is about the worst of all possible worlds.
@@ -49,10 +57,10 @@ access_type_tablescan:
   level: danger
 limited_scan:
   title: Limited Scan
-  summary: The database reads {{ query.cost }} rows from {{ query.table }}.
+  summary: The database reads {{ formatted_cost }} rows from {{ table }}.
   description: |
     This query doesn't use any indexes to find data, but since it doesn't care about
-    ordering and it doesn't have any conditions, it only ever reads {{ query.cost }} rows.
+    ordering and it doesn't have any conditions, it only ever reads {{ formatted_cost }} rows.
   level: info
 ignored:
   title: Ignored
@@ -68,9 +76,9 @@ index_walk:
   level: success
 retsize_bad:
   title: Big Results
-  summary: The database returns {{ return_size }} rows to the client.
+  summary: The database returns {{ result_size }} rows to the client.
   level: danger
 retsize_good:
   title: Small Results
-  summary: The database returns {{ return_size }} row(s) to the client.
+  summary: The database returns {{ result_size }} row(s) to the client.
   level: success

data/lib/shiba/review/comment_renderer.rb CHANGED

@@ -13,8 +13,9 @@ module Shiba
       def render(explain)
         body = ""
-        data = present(explain)
-        explain["tags"].each do |tag|
+        explain["messages"].each do |message|
+          tag = message['tag']
+          data = present(message)
           body << @templates[tag]["title"]
           body << ": "
           body << render_template(@templates[tag]["summary"], data)
@@ -32,22 +33,25 @@ module Shiba
         end
         # convert to markdown
         rendered.gsub!(/<\/?b>/, "**")
+        rendered.gsub!(/<\/?i>/, "_")
         rendered
       end
-      def present(explain)
-        used_key_parts = explain["used_key_parts"] || []
-        { "table"       => explain["table"],
-          "table_size"  => explain["table_size"],
-          "key"         => explain["key"],
-          "return_size" => explain["return_size"],
-          "key_parts"   => used_key_parts.join(","),
-          "cost"        => cost(explain)
+      def present(message)
+        {
+          "fuzz_table_sizes" => fuzzed_sizes(message),
+          "table"           => message["table"],
+          "table_size"      => message["table_size"],
+          "result_size"     => message["result_size"],
+          "index"           => message["index"],
+          "key_parts"       => (message["index_used"] || []).join(','),
+          "size"            => message["size"],
+          "formatted_cost"  => formatted_cost(message)
         }
       end
-      def cost(explain)
+      def formatted_cost(explain)
+        return nil unless explain["cost"] && explain["table_size"]
         percentage = (explain["cost"] / explain["table_size"]) * 100.0;
         if explain["cost"] > 100 && percentage > 1
@@ -57,6 +61,12 @@ module Shiba
         end
       end
+      def fuzzed_sizes(message)
+        return nil unless message["tables"]
+        message['tables'].group_by { |k, v| v }.map do |size, arr|
+          size.to_s + ": " + arr.map(&:first).join(', ')
+        end.join(". ")
+      end
     end
   end
-end
+end

data/lib/shiba/reviewer.rb CHANGED

@@ -35,7 +35,7 @@ module Shiba
         position = diff.find_position(file, line_number.to_i)
         if options["submit"]
-          explain = keep_only_dangerous_tags(explain)
+          explain = keep_only_dangerous_messages(explain)
         end
         { body: renderer.render(explain),
@@ -90,9 +90,12 @@ module Shiba
       end
     end
-    def keep_only_dangerous_tags(explain)
+    def keep_only_dangerous_messages(explain)
       explain_b = explain.dup
-      explain_b["tags"] = explain_b["tags"].select { |tag| tags[tag]["level"] == "danger" }
+      explain_b["messages"] = explain_b["messages"].select do |message|
+        tag = message['tag']
+        tags[tag]["level"] == "danger"
+      end
       explain_b
     end
@@ -133,4 +136,4 @@ module Shiba
     end
   end
-end
+end

data/lib/shiba/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Shiba
-  VERSION = "0.3.0"
+  VERSION = "0.4.0"
 end

data/web/main.css CHANGED

@@ -2,7 +2,7 @@
 .query-info-box {
   border: 1px solid black;
   padding: 10px;
-  margin: 20px;
+  margin: 5px;
 }
 .backtrace {
@@ -41,11 +41,30 @@
   border-color: #ffb100;
 }
-.shiba-info-list {
-  list-style-type:none;
-  margin: 5px;
-  padding: 0;
+.shiba-badge-td {
+  width: 100px;
+}
+.shiba-messages {
+  margin: 0px;
+  margin-top: 10px;
+  width: 100%;
+}
+.shiba-messages td {
+  padding-top: 5px;
 }
+.shiba-message {
+  padding-right: 10px;
+  width: 90%;
+}
+.running-totals {
+  align: right;
+  font-family: monospace;
+}
 [v-cloak] { display: none }

data/web/results.html.erb CHANGED

@@ -43,11 +43,21 @@
         Object.assign(this, obj);
         this.severityIndex = severityIndexes[this.severity];
         this.splitSQL();
+        this.makeSearchString();
       };
       Query.prototype = {
+        makeSearchString: function() {
+          var arr = [this.sql];
+          arr = arr.concat(this.messages.map(function(m) { return m.tag }).join(':'));
+          arr = arr.concat(this.backtrace.join(':'));
+          this.searchString = arr.join(':').toLowerCase();
+        },
         hasTag: function(tag) {
-          return this.tags.includes(tag);
+          return this.messages.find(function(m) {
+            return m.tag == tag;
+          });
         },
         splitSQL: function() {
           this.sqlFragments = this.sql.match(/(SELECT\s)(.*?)(\s+FROM .*)/i);
@@ -72,10 +82,20 @@
           queriesByTable.push(q);
         }
-        if ( q.hasTag("fuzzed_data" ) )
+        if ( q.hasTag("fuzzed_data") )
           queriesHaveFuzzed = true;
         q.expandSelect = false;
+        var rCost = 0;
+        q.messages.forEach(function(m) {
+          if ( m.cost ) {
+            rCost += m.cost;
+            m.running_cost = rCost;
+          } else {
+            m.running_cost = '';
+          }
+        });
       });
       var f = sortByFunc(['severityIndex', 'table']);
@@ -106,11 +126,9 @@
                   <div v-for="backtrace in query.backtrace" v-html="makeURL(backtrace, backtrace)"></div>
                 </div>
               </div>
-              <ul class="shiba-info-list">
-              <li v-for="tag in query.tags">
-                <component v-bind:is="'tag-' + tag" v-bind:query="query"></component>
-              </li>
-              </ul>
+              <table class="shiba-messages">
+                <component v-for="message in query.messages" v-bind:is="'tag-' + message.tag" v-bind="message"></component>
+              </table>
               <div v-if="!rawExpanded">
                 <a href="#" v-on:click.prevent="rawExpanded = !rawExpanded">See full EXPLAIN</a>
               </div>
@@ -124,47 +142,82 @@
       </div>
     </script>
+  <script>
+    var greenToRedGradient = [
+      '#57bb8a','#63b682', '#73b87e', '#84bb7b', '#94bd77', '#a4c073', '#b0be6e',
+      '#c4c56d', '#d4c86a', '#e2c965', '#f5ce62', '#f3c563', '#e9b861', '#e6ad61',
+      '#ecac67', '#e9a268', '#e79a69', '#e5926b', '#e2886c', '#e0816d', '#dd776e'];
+      var templateComputedFunctions = {
+        key_parts: function() {
+          if ( this.index_used && this.index_used.length > 0 )
+            return this.index_used.join(',');
+          else
+            return "";
+        },
+        fuzz_table_sizes: function() {
+          var h = {};
+          var tables = this.tables;
+          Object.keys(tables).forEach(function(k) {
+            console.log(k);
+            var size = tables[k];
+            if ( !h[size] )
+              h[size] = [];
+            h[size].push(k);
+          });
+          var sizesDesc = Object.keys(h).sort(function(a, b) { return b - a });
+          var str = "";
+          sizesDesc.forEach(function(size) {
+            str = str + h[size].join(", ") + ": " + size.toLocaleString() + " rows.  ";
+          });
+          return str;
+        },
+        formatted_cost: function() {
+          var costPercentage = (this.cost / this.table_size) * 100.0;
+          if ( this.cost > 100 && costPercentage > 1 ) // todo: make better
+            return `${costPercentage.toFixed()}% (${this.cost.toLocaleString()}) of the`;
+          else
+            return this.cost.toLocaleString();
+        },
+        costToColor: function() {
+          var goodColor = [34, 160, 60];
+          var endColor = [255, 0, 0];
+          var costScale = this.cost ? this.cost / 5000 : 0;
+          if ( costScale > 1 )
+            costScale = 1;
+          var pos =  (costScale * (greenToRedGradient.length - 1)).toFixed();
+          debugger;
+          return "border-color: " + greenToRedGradient[pos];
+        }
+      }
+    </script>
     <% data[:tags].each do |tag, h| %>
-    <script type="text/x-template" id="tag-<%= tag %>-template">
-      <span><a class="badge shiba-badge-<%= h['level'] %>"><%= h['title'] %></a><%= h['summary'] %></span>
+      <script type="text/x-template" id="tag-<%= tag %>-template">
+        <tr>
+        <td class="shiba-badge-td">
+          <a class="badge" v-bind:style="costToColor"><%= h['title'] %></a>
+        </td>
+        <td class="shiba-message">
+          <%= h['summary'] %>
+        </td>
+        <td class="running-totals">
+          {{ running_cost.toLocaleString() }}
+        </td>
+        </tr>
     </script>
     <script>
       Vue.component('tag-<%= tag %>', {
         template: '#tag-<%= tag %>-template',
-        props: [ 'query' ],
-        computed: {
-          table: function() {
-            return this.query.table;
-          },
-          table_size: function() {
-            return this.query.table_size;
-          },
-          key: function() {
-            return this.query.key;
-          },
-          return_size: function() {
-            return this.query.return_size.toLocaleString();
-          },
-          key_parts: function() {
-            if ( this.query.used_key_parts && this.query.used_key_parts.length > 0 )
-              return this.query.used_key_parts.join(',');
-            else
-              return "";
-          },
-          cost: function() {
-            var costPercentage = (this.query.cost / this.query.table_size) * 100.0;
-            if ( this.query.cost > 100 && costPercentage > 1 ) // todo: make better
-              return `${costPercentage.toFixed()}% (${this.query.cost.toLocaleString()}) of the`;
-            else
-              return this.query.cost.toLocaleString();
-          }
-        }
+        props: [ 'table_size', 'result_size', 'table', 'cost', 'index', 'join_to', 'index_used', 'running_cost', 'tables' ],
+        computed: templateComputedFunctions
       });
     </script>
     <% end %>
@@ -269,13 +322,13 @@
           this.expanded = !this.expanded;
         },
         shortLocation: function(query) {
-          if ( !query.backtrace )
+          if ( !query.backtrace || query.backtrace.length == 0 )
             return null;
           var location = query.backtrace[0];
           return location.match(/([^\/]+:\d+):/)[1];
         },
         makeURL: function(line, content) {
-          if ( !data.url )
+          if ( !data.url || !line )
             return content;
           var matches = line.match(/(.+):(\d+):/);
@@ -304,7 +357,6 @@
       },
       methods: {
         updateSearch: _.debounce(function (e) {
-          console.log("ok, updating...");
           this.search = e.target.value;
         }, 500)
       },
@@ -314,8 +366,7 @@
             var filtered = [];
             var lcSearch = this.search.toLowerCase();
             this.highQ.concat(this.lowQ).forEach(function(q) {
-              var searchString = q.sql + ":" + q.tags.join(":") + q.backtrace.join(":");
-              if ( searchString.toLowerCase().includes(lcSearch) )
+              if ( q.searchString.includes(lcSearch) )
                 filtered.push(q);
             });
             return filtered;

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: shiba
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.4.0
 platform: ruby
 authors:
 - Ben Osheroff
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-02-23 00:00:00.000000000 Z
+date: 2019-02-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -101,6 +101,7 @@ files:
 - Rakefile
 - TODO
 - bin/console
+- bin/dump_stats
 - bin/explain
 - bin/fingerprint
 - bin/mysql_dump_stats
@@ -127,9 +128,12 @@ files:
 - lib/shiba/connection/postgres.rb
 - lib/shiba/diff.rb
 - lib/shiba/explain.rb
+- lib/shiba/explain/check_support.rb
+- lib/shiba/explain/checks.rb
 - lib/shiba/explain/mysql_explain.rb
 - lib/shiba/explain/postgres_explain.rb
 - lib/shiba/explain/postgres_explain_index_conditions.rb
+- lib/shiba/explain/result.rb
 - lib/shiba/fuzzer.rb
 - lib/shiba/index.rb
 - lib/shiba/index_stats.rb
@@ -174,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.5.1
+rubygems_version: 2.7.6
 signing_key:
 specification_version: 4
 summary: A gem that attempts to find bad queries before you shoot self in foot