RubyGems - shiba - Versions diffs - 0.2.3 → 0.3.0 - Mend

shiba 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/.travis.yml +11 -2
data/Gemfile +1 -2
data/Gemfile.lock +4 -2
data/README.md +1 -1
data/bin/explain +10 -41
data/bin/mysql_dump_stats +20 -0
data/bin/postgres_dump_stats +3 -0
data/bin/review +181 -0
data/bin/shiba +3 -3
data/lib/shiba.rb +65 -4
data/lib/shiba/activerecord_integration.rb +30 -13
data/lib/shiba/checker.rb +89 -25
data/lib/shiba/configure.rb +22 -5
data/lib/shiba/connection.rb +25 -0
data/lib/shiba/connection/mysql.rb +45 -0
data/lib/shiba/connection/postgres.rb +91 -0
data/lib/shiba/diff.rb +21 -11
data/lib/shiba/explain.rb +18 -53
data/lib/shiba/explain/mysql_explain.rb +47 -0
data/lib/shiba/explain/postgres_explain.rb +91 -0
data/lib/shiba/explain/postgres_explain_index_conditions.rb +137 -0
data/lib/shiba/fuzzer.rb +16 -16
data/lib/shiba/index_stats.rb +9 -5
data/lib/shiba/output.rb +1 -1
data/lib/shiba/output/tags.yaml +14 -8
data/lib/shiba/query_watcher.rb +13 -1
data/lib/shiba/review/api.rb +100 -0
data/lib/shiba/review/comment_renderer.rb +62 -0
data/lib/shiba/reviewer.rb +136 -0
data/lib/shiba/version.rb +1 -1
data/shiba.gemspec +2 -0
data/web/dist/bundle.js +23 -1
data/web/main.css +3 -0
data/web/main.js +1 -0
data/web/package-lock.json +5 -0
data/web/package.json +1 -0
data/web/results.html.erb +77 -20
metadata +43 -5
data/bin/check +0 -75
data/bin/dump_stats +0 -44

data/lib/shiba/fuzzer.rb CHANGED Viewed

@@ -5,14 +5,14 @@ module Shiba
     def initialize(connection)
       @connection = connection
-      @index_stats = IndexStats.new
     end
     attr_reader :connection
     def fuzz!
-      fetch_index!
+      @index_stats = fetch_index
       table_sizes = guess_table_sizes
       @index_stats.tables.each do |name, table|
         table.count = table_sizes[name]
         table.indexes.each do |name, index|
@@ -21,33 +21,33 @@ module Shiba
           end
         end
       end
       @index_stats
     end
-    private
-    BIG_FUZZ_SIZE   = 5_000
-    SMALL_FUZZ_SIZE = 100
-    def fetch_index!
-      records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
+    def fetch_index
+      stats = Shiba::IndexStats.new
+      records = connection.fetch_indexes
       tables = {}
       records.each do |h|
         h.keys.each { |k| h[k.downcase] = h.delete(k) }
         h["cardinality"] = h["cardinality"].to_i
-        @index_stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
+        stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == 0)
       end
+      stats
     end
+    private
+    BIG_FUZZ_SIZE   = 5_000
+    SMALL_FUZZ_SIZE = 100
     # Create fake table sizes based on the table's index count.
     # The more indexes, the bigger the table. Seems to rank tables fairly well.
     def guess_table_sizes
-      index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
-        from information_schema.statistics where table_schema = DATABASE()
-        and seq_in_index = 1 and index_name not like 'fk_rails%'
-        group by table_name order by index_count"
-      index_counts = connection.query(index_count_query).to_a
+      index_counts = connection.count_indexes_by_table
       # 90th table percentile based on number of indexes
       # round down so we don't blow up on small tables

data/lib/shiba/index_stats.rb CHANGED Viewed

@@ -97,8 +97,7 @@ module Shiba
         count = table_count
         count = 1 if count == 0
-        ratio_per_item = self.rows_per / count.to_f rescue debugger
+        ratio_per_item = self.rows_per / count.to_f
         if count <= 10
           ratio_threshold = 1_000_0000 # always show a number
@@ -158,11 +157,16 @@ module Shiba
       return nil unless index
-      index_part = index.columns.detect do |p|
-        p.column == parts.last
+      index_part = nil
+      index.columns.each do |c|
+        break unless parts.include?(c.column)
+        index_part = c
       end
-      return nil unless index_part
+      # postgres can claim to use the right hand side of an index
+      # in a bitmap scan, which seems to be a side-effect of forcing
+      # seq-scan off.  In these cases we'll say it's a full scan.
+      return table_count(table_name) unless index_part
       index_part.rows_per
     end

data/lib/shiba/output.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module Shiba
         FileUtils.mkdir_p(File.join(logdir, "shiba_results"))
         File.join(Dir.pwd, "log", "shiba_results", default_filename)
       else
-        File.join(Dir.tmpdir, default_filename)
+        File.join(Shiba.path, default_filename)
       end
     end

data/lib/shiba/output/tags.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 fuzzed_data:
   title: Fuzzed Data
-  summary: Shiba doesn't know the size of <b>{{query.table}}</b>.  For these purposes we set the table size to <b>{{query.table_size}}</b>.
+  summary: Shiba doesn't know the size of <b>{{table}}</b>.  For these purposes we set the table size to <b>{{table_size}}</b>.
   description: |
     We're not sure how much data this table will hold in the future, so we've pretended
     there's 6000 rows in it.  This can lead to a lot of false positives. To
@@ -16,20 +16,20 @@ possible_key_check:
   level: info
 access_type_const:
   title: One row
-  summary: The database only needs to read a single row from <b>{{query.table}}</b>.
+  summary: The database only needs to read a single row from <b>{{table}}</b>.
   description: |
     This query selects at *most* one row, which is about as good as things get.
   level: success
 access_type_ref:
   title: Indexed
-  summary: The database reads {{ formattedCost }} rows in <b>{{ query.table }}</b> via the <b>{{ query.key }}</b> index ({{ key_parts }}).
+  summary: The database reads {{ cost }} rows in <b>{{ table }}</b> via the <b>{{ key }}</b> index ({{ key_parts }}).
   description: |
     This query uses an index to find rows that match a single value.  Often this
     has very good performance, but it depends on how many rows match that value.
   level: success
 access_type_range:
   title: Indexed
-  summary: The database uses a "range scan" to read more than {{ formattedCost }} rows in {{ query.table }} via the <b>{{ query.key }}</b> index ({{ key_parts }})
+  summary: The database uses a "range scan" to read more than {{ cost }} rows in {{ table }} via the <b>{{ key }}</b> index ({{ key_parts }})
   description: |
     This query uses an index to find rows that match a range of values, for instance
     `WHERE indexed_value in (1,2,5,6)` or `WHERE indexed_value >= 5 AND indexed_value <= 15`.
@@ -38,7 +38,7 @@ access_type_range:
   level: info
 access_type_tablescan:
   title: Table Scan
-  summary: The database reads <b>100%</b> ({{ query.table_size }}) of the rows in <b>{{ query.table }}</b>, skipping any indexes.
+  summary: The database reads <b>100%</b> ({{ table_size }}) of the rows in <b>{{ table }}</b>, skipping any indexes.
   description: |
     This query doesn't use any indexes to find data, meaning this query will need to evaluate
     every single row in the table.  This is about the worst of all possible worlds.
@@ -47,6 +47,13 @@ access_type_tablescan:
     but be aware that if this table is not effectively tiny or constant-sized you're entering
     a world of pain.
   level: danger
+limited_scan:
+  title: Limited Scan
+  summary: The database reads {{ query.cost }} rows from {{ query.table }}.
+  description: |
+    This query doesn't use any indexes to find data, but since it doesn't care about
+    ordering and it doesn't have any conditions, it only ever reads {{ query.cost }} rows.
+  level: info
 ignored:
   title: Ignored
   summary: This query matched an "ignore" rule in shiba.yml.  Any further analysis was skipped.
@@ -61,10 +68,9 @@ index_walk:
   level: success
 retsize_bad:
   title: Big Results
-  summary: The database returns {{ query.return_size.toLocaleString() }} rows to the client.
+  summary: The database returns {{ return_size }} rows to the client.
   level: danger
 retsize_good:
   title: Small Results
-  summary: The database returns {{ query.return_size.toLocaleString() }} row(s) to the client.
+  summary: The database returns {{ return_size }} row(s) to the client.
   level: success

data/lib/shiba/query_watcher.rb CHANGED Viewed

@@ -17,6 +17,12 @@ module Shiba
       sql = payload[:sql]
       return if !sql.start_with?("SELECT")
+      if sql.include?("$1")
+        sql = interpolate(sql, payload[:type_casted_binds])
+      end
+      sql = sql.gsub(/\n/, ' ')
       fingerprint = Query.get_fingerprint(sql)
       return if @queries[fingerprint]
@@ -27,5 +33,11 @@ module Shiba
       @queries[fingerprint] = true
     end
+    def interpolate(sql, binds)
+      binds.each_with_index do |val, i|
+        sql = sql.sub("$#{i +1}", ActiveRecord::Base.connection.quote(val))
+      end
+      sql
+    end
   end
-end
+end

data/lib/shiba/review/api.rb ADDED Viewed

@@ -0,0 +1,100 @@
+require 'uri'
+require 'json'
+require 'net/http'
+module Shiba
+  module Review
+    class API
+      attr_reader :repo_url, :token, :pull_request
+      # options "token", "pull_request"
+      def initialize(repo_url, options)
+        @repo_url = repo_url
+        @http = nil
+        @token = options.fetch("token")
+        @pull_request = options.fetch("pull_request")
+      end
+      def connect
+        Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
+          begin
+            @http = http
+            yield
+          ensure
+            @http = nil
+          end
+        end
+      end
+      # https://developer.github.com/v3/pulls/comments/#create-a-comment
+      def comment_on_pull_request(comment)
+        req = Net::HTTP::Post.new(uri)
+        req.body = JSON.dump(comment)
+        request(req)
+      end
+      # https://developer.github.com/v3/pulls/comments/#list-comments-on-a-pull-request
+      def previous_comments
+        req = Net::HTTP::Get.new(uri)
+        request(req)
+      end
+      def uri
+        return @uri if @uri
+        repo_host, repo_path = host_and_path
+        url = if repo_host == 'github.com'
+          'https://api.github.com'
+        else
+          "https://#{repo_host}/api/v3"
+        end
+        url << "/repos/#{repo_path}/pulls/#{pull_request}/comments"
+        @uri = URI(url)
+      end
+      def host_and_path
+         host, path = nil
+         # git@github.com:burrito-brothers/shiba.git
+         if repo_url.index('@')
+           host, path = repo_url.split(':')
+           host.sub!('git@', '')
+           path.chomp!('.git')
+         # https://github.com/burrito-brothers/shiba.git
+         else
+           uri = URI.parse(repo_url)
+           host = uri.host
+           path = uri.path.chomp('.git')
+           path.reverse!.chomp!("/").reverse!
+         end
+         return host, path
+      end
+      protected
+      def request(req)
+        verify_connection!
+        req['Authorization'] = "token #{token}"
+        req['Content-Type']  = "application/json"
+        res = @http.request(req)
+        case res
+        when Net::HTTPSuccess
+          JSON.parse(res.body)
+        else
+          raise Shiba::Error.new, "API request failed: #{res} #{res.body}"
+        end
+      end
+      def verify_connection!
+        return true if @http
+        raise Shiba::Error.new("API requests must be wrapped in a #connect { ... } block")
+      end
+    end
+  end
+end

data/lib/shiba/review/comment_renderer.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require 'yaml'
+module Shiba
+  module Review
+    class CommentRenderer
+      # {{ variable }}
+      VAR_PATTERN = /{{\s?([a-z_]+)\s?}}/
+      def initialize(templates)
+        @templates = templates
+      end
+      def render(explain)
+        body = ""
+        data = present(explain)
+        explain["tags"].each do |tag|
+          body << @templates[tag]["title"]
+          body << ": "
+          body << render_template(@templates[tag]["summary"], data)
+          body << "\n"
+        end
+        body
+      end
+      protected
+      def render_template(template, data)
+        rendered = template.gsub(VAR_PATTERN) do
+          data[$1]
+        end
+        # convert to markdown
+        rendered.gsub!(/<\/?b>/, "**")
+        rendered
+      end
+      def present(explain)
+        used_key_parts = explain["used_key_parts"] || []
+        { "table"       => explain["table"],
+          "table_size"  => explain["table_size"],
+          "key"         => explain["key"],
+          "return_size" => explain["return_size"],
+          "key_parts"   => used_key_parts.join(","),
+          "cost"        => cost(explain)
+        }
+      end
+      def cost(explain)
+        percentage = (explain["cost"] / explain["table_size"]) * 100.0;
+        if explain["cost"] > 100 && percentage > 1
+          "#{percentage.floor}% (#{explain["cost"]}) of the"
+        else
+          explain["cost"]
+        end
+      end
+    end
+  end
+end

data/lib/shiba/reviewer.rb ADDED Viewed

@@ -0,0 +1,136 @@
+require 'open3'
+require 'shiba'
+require 'shiba/diff'
+require 'shiba/review/api'
+require 'shiba/review/comment_renderer'
+module Shiba
+  # TODO:
+  # 1. Properly handle more than a handful of review failures
+  # 2. May make sense to edit the comment on a commit line when the code
+  # is semi-corrected but still a problem
+  class Reviewer
+    TEMPLATE_FILE = File.join(Shiba.root, 'lib/shiba/output/tags.yaml')
+    attr_reader :repo_url, :problems, :options
+    def initialize(repo_url, problems, options)
+      @repo_url = repo_url
+      @problems = problems
+      @options = options
+      @commit_id = options.fetch("branch") do
+        raise Shiba::Error.new("Must specify a branch") if !options['diff']
+      end
+    end
+    def comments
+      return @comments if @comments
+      @comments = problems.map do |path, explain|
+        file, line_number = path.split(":")
+        if path.empty? || line_number.nil?
+          raise Shiba::Error.new("Bad path received: #{line_number}")
+        end
+        position = diff.find_position(file, line_number.to_i)
+        if options["submit"]
+          explain = keep_only_dangerous_tags(explain)
+        end
+        { body: renderer.render(explain),
+          commit_id: @commit_id,
+          path: file,
+          line: line_number,
+          position: position }
+      end
+    end
+    # FIXME: Only submit 10 comments for now. The rest just vanish.
+    # Submits commits, checking to makre sure the line doesn't already have a review.
+    def submit
+      report("Connecting to #{api.uri}")
+      api.connect do
+        previous_reviews = api.previous_comments.map { |c| c['body'] }
+        comments[0,10].each do |c|
+          if previous_reviews.any? { |r| r == c[:body] }
+            report("skipped duplicate comment")
+            next
+          end
+          # :line isn't part of the github api
+          comment = c.dup.tap { |dc| dc.delete(:line) }
+          if options[:verbose]
+            comment[:body] += " (verbose mode ts=#{Time.now.to_i})"
+          end
+          res = api.comment_on_pull_request(comment)
+          report("API success #{res.inspect}")
+        end
+      end
+      report("HTTP request finished")
+    end
+    def repo_host
+      @repo_host ||= api.host_and_path.first
+    end
+    def repo_path
+      @repo_path ||= api.host_and_path.last
+    end
+    protected
+    def report(message)
+      if options["verbose"]
+        $stderr.puts message
+      end
+    end
+    def keep_only_dangerous_tags(explain)
+      explain_b = explain.dup
+      explain_b["tags"] = explain_b["tags"].select { |tag| tags[tag]["level"] == "danger" }
+      explain_b
+    end
+    def diff
+      return @diff if @diff
+      output = options['diff'] ? file_diff : git_diff
+      @diff = Shiba::Diff.new(output)
+    end
+    def git_diff
+      cmd ="git diff origin/HEAD..#{@commit_id}"
+      report("Finding PR position using: #{cmd}")
+      output = StringIO.new(`#{cmd}`)
+    end
+    def file_diff
+      report("Finding PR position using file: #{options['diff']}")
+      File.open(options['diff'], 'r')
+    end
+    def api
+      @api ||= begin
+        api_options = {
+          "token"        => options["token"],
+          "pull_request" => options["pull_request"]
+        }
+        Review::API.new(repo_url, api_options)
+      end
+    end
+    def renderer
+      @renderer ||= Review::CommentRenderer.new(tags)
+    end
+    def tags
+      @tags ||=  YAML.load_file(TEMPLATE_FILE)
+    end
+  end
+end