shiba 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +11 -2
- data/Gemfile +1 -2
- data/Gemfile.lock +4 -2
- data/README.md +1 -1
- data/bin/explain +10 -41
- data/bin/mysql_dump_stats +20 -0
- data/bin/postgres_dump_stats +3 -0
- data/bin/review +181 -0
- data/bin/shiba +3 -3
- data/lib/shiba.rb +65 -4
- data/lib/shiba/activerecord_integration.rb +30 -13
- data/lib/shiba/checker.rb +89 -25
- data/lib/shiba/configure.rb +22 -5
- data/lib/shiba/connection.rb +25 -0
- data/lib/shiba/connection/mysql.rb +45 -0
- data/lib/shiba/connection/postgres.rb +91 -0
- data/lib/shiba/diff.rb +21 -11
- data/lib/shiba/explain.rb +18 -53
- data/lib/shiba/explain/mysql_explain.rb +47 -0
- data/lib/shiba/explain/postgres_explain.rb +91 -0
- data/lib/shiba/explain/postgres_explain_index_conditions.rb +137 -0
- data/lib/shiba/fuzzer.rb +16 -16
- data/lib/shiba/index_stats.rb +9 -5
- data/lib/shiba/output.rb +1 -1
- data/lib/shiba/output/tags.yaml +14 -8
- data/lib/shiba/query_watcher.rb +13 -1
- data/lib/shiba/review/api.rb +100 -0
- data/lib/shiba/review/comment_renderer.rb +62 -0
- data/lib/shiba/reviewer.rb +136 -0
- data/lib/shiba/version.rb +1 -1
- data/shiba.gemspec +2 -0
- data/web/dist/bundle.js +23 -1
- data/web/main.css +3 -0
- data/web/main.js +1 -0
- data/web/package-lock.json +5 -0
- data/web/package.json +1 -0
- data/web/results.html.erb +77 -20
- metadata +43 -5
- data/bin/check +0 -75
- data/bin/dump_stats +0 -44
data/lib/shiba/fuzzer.rb
CHANGED
@@ -5,14 +5,14 @@ module Shiba
|
|
5
5
|
|
6
6
|
def initialize(connection)
|
7
7
|
@connection = connection
|
8
|
-
@index_stats = IndexStats.new
|
9
8
|
end
|
10
9
|
|
11
10
|
attr_reader :connection
|
12
11
|
|
13
12
|
def fuzz!
|
14
|
-
fetch_index
|
13
|
+
@index_stats = fetch_index
|
15
14
|
table_sizes = guess_table_sizes
|
15
|
+
|
16
16
|
@index_stats.tables.each do |name, table|
|
17
17
|
table.count = table_sizes[name]
|
18
18
|
table.indexes.each do |name, index|
|
@@ -21,33 +21,33 @@ module Shiba
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
24
|
+
|
24
25
|
@index_stats
|
25
26
|
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
SMALL_FUZZ_SIZE = 100
|
31
|
-
|
32
|
-
def fetch_index!
|
33
|
-
records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
|
28
|
+
def fetch_index
|
29
|
+
stats = Shiba::IndexStats.new
|
30
|
+
records = connection.fetch_indexes
|
34
31
|
tables = {}
|
35
32
|
records.each do |h|
|
36
33
|
h.keys.each { |k| h[k.downcase] = h.delete(k) }
|
37
34
|
h["cardinality"] = h["cardinality"].to_i
|
38
|
-
|
35
|
+
|
36
|
+
stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == 0)
|
39
37
|
end
|
38
|
+
stats
|
40
39
|
end
|
41
40
|
|
41
|
+
private
|
42
|
+
|
43
|
+
BIG_FUZZ_SIZE = 5_000
|
44
|
+
SMALL_FUZZ_SIZE = 100
|
45
|
+
|
46
|
+
|
42
47
|
# Create fake table sizes based on the table's index count.
|
43
48
|
# The more indexes, the bigger the table. Seems to rank tables fairly well.
|
44
49
|
def guess_table_sizes
|
45
|
-
|
46
|
-
from information_schema.statistics where table_schema = DATABASE()
|
47
|
-
and seq_in_index = 1 and index_name not like 'fk_rails%'
|
48
|
-
group by table_name order by index_count"
|
49
|
-
|
50
|
-
index_counts = connection.query(index_count_query).to_a
|
50
|
+
index_counts = connection.count_indexes_by_table
|
51
51
|
|
52
52
|
# 90th table percentile based on number of indexes
|
53
53
|
# round down so we don't blow up on small tables
|
data/lib/shiba/index_stats.rb
CHANGED
@@ -97,8 +97,7 @@ module Shiba
|
|
97
97
|
|
98
98
|
count = table_count
|
99
99
|
count = 1 if count == 0
|
100
|
-
ratio_per_item = self.rows_per / count.to_f
|
101
|
-
|
100
|
+
ratio_per_item = self.rows_per / count.to_f
|
102
101
|
|
103
102
|
if count <= 10
|
104
103
|
ratio_threshold = 1_000_0000 # always show a number
|
@@ -158,11 +157,16 @@ module Shiba
|
|
158
157
|
|
159
158
|
return nil unless index
|
160
159
|
|
161
|
-
index_part =
|
162
|
-
|
160
|
+
index_part = nil
|
161
|
+
index.columns.each do |c|
|
162
|
+
break unless parts.include?(c.column)
|
163
|
+
index_part = c
|
163
164
|
end
|
164
165
|
|
165
|
-
|
166
|
+
# postgres can claim to use the right hand side of an index
|
167
|
+
# in a bitmap scan, which seems to be a side-effect of forcing
|
168
|
+
# seq-scan off. In these cases we'll say it's a full scan.
|
169
|
+
return table_count(table_name) unless index_part
|
166
170
|
|
167
171
|
index_part.rows_per
|
168
172
|
end
|
data/lib/shiba/output.rb
CHANGED
data/lib/shiba/output/tags.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
fuzzed_data:
|
3
3
|
title: Fuzzed Data
|
4
|
-
summary: Shiba doesn't know the size of <b>{{
|
4
|
+
summary: Shiba doesn't know the size of <b>{{table}}</b>. For these purposes we set the table size to <b>{{table_size}}</b>.
|
5
5
|
description: |
|
6
6
|
We're not sure how much data this table will hold in the future, so we've pretended
|
7
7
|
there's 6000 rows in it. This can lead to a lot of false positives. To
|
@@ -16,20 +16,20 @@ possible_key_check:
|
|
16
16
|
level: info
|
17
17
|
access_type_const:
|
18
18
|
title: One row
|
19
|
-
summary: The database only needs to read a single row from <b>{{
|
19
|
+
summary: The database only needs to read a single row from <b>{{table}}</b>.
|
20
20
|
description: |
|
21
21
|
This query selects at *most* one row, which is about as good as things get.
|
22
22
|
level: success
|
23
23
|
access_type_ref:
|
24
24
|
title: Indexed
|
25
|
-
summary: The database reads {{
|
25
|
+
summary: The database reads {{ cost }} rows in <b>{{ table }}</b> via the <b>{{ key }}</b> index ({{ key_parts }}).
|
26
26
|
description: |
|
27
27
|
This query uses an index to find rows that match a single value. Often this
|
28
28
|
has very good performance, but it depends on how many rows match that value.
|
29
29
|
level: success
|
30
30
|
access_type_range:
|
31
31
|
title: Indexed
|
32
|
-
summary: The database uses a "range scan" to read more than {{
|
32
|
+
summary: The database uses a "range scan" to read more than {{ cost }} rows in {{ table }} via the <b>{{ key }}</b> index ({{ key_parts }})
|
33
33
|
description: |
|
34
34
|
This query uses an index to find rows that match a range of values, for instance
|
35
35
|
`WHERE indexed_value in (1,2,5,6)` or `WHERE indexed_value >= 5 AND indexed_value <= 15`.
|
@@ -38,7 +38,7 @@ access_type_range:
|
|
38
38
|
level: info
|
39
39
|
access_type_tablescan:
|
40
40
|
title: Table Scan
|
41
|
-
summary: The database reads <b>100%</b> ({{
|
41
|
+
summary: The database reads <b>100%</b> ({{ table_size }}) of the rows in <b>{{ table }}</b>, skipping any indexes.
|
42
42
|
description: |
|
43
43
|
This query doesn't use any indexes to find data, meaning this query will need to evaluate
|
44
44
|
every single row in the table. This is about the worst of all possible worlds.
|
@@ -47,6 +47,13 @@ access_type_tablescan:
|
|
47
47
|
but be aware that if this table is not effectively tiny or constant-sized you're entering
|
48
48
|
a world of pain.
|
49
49
|
level: danger
|
50
|
+
limited_scan:
|
51
|
+
title: Limited Scan
|
52
|
+
summary: The database reads {{ query.cost }} rows from {{ query.table }}.
|
53
|
+
description: |
|
54
|
+
This query doesn't use any indexes to find data, but since it doesn't care about
|
55
|
+
ordering and it doesn't have any conditions, it only ever reads {{ query.cost }} rows.
|
56
|
+
level: info
|
50
57
|
ignored:
|
51
58
|
title: Ignored
|
52
59
|
summary: This query matched an "ignore" rule in shiba.yml. Any further analysis was skipped.
|
@@ -61,10 +68,9 @@ index_walk:
|
|
61
68
|
level: success
|
62
69
|
retsize_bad:
|
63
70
|
title: Big Results
|
64
|
-
summary: The database returns {{
|
71
|
+
summary: The database returns {{ return_size }} rows to the client.
|
65
72
|
level: danger
|
66
73
|
retsize_good:
|
67
74
|
title: Small Results
|
68
|
-
summary: The database returns {{
|
75
|
+
summary: The database returns {{ return_size }} row(s) to the client.
|
69
76
|
level: success
|
70
|
-
|
data/lib/shiba/query_watcher.rb
CHANGED
@@ -17,6 +17,12 @@ module Shiba
|
|
17
17
|
sql = payload[:sql]
|
18
18
|
return if !sql.start_with?("SELECT")
|
19
19
|
|
20
|
+
if sql.include?("$1")
|
21
|
+
sql = interpolate(sql, payload[:type_casted_binds])
|
22
|
+
end
|
23
|
+
|
24
|
+
sql = sql.gsub(/\n/, ' ')
|
25
|
+
|
20
26
|
fingerprint = Query.get_fingerprint(sql)
|
21
27
|
return if @queries[fingerprint]
|
22
28
|
|
@@ -27,5 +33,11 @@ module Shiba
|
|
27
33
|
@queries[fingerprint] = true
|
28
34
|
end
|
29
35
|
|
36
|
+
def interpolate(sql, binds)
|
37
|
+
binds.each_with_index do |val, i|
|
38
|
+
sql = sql.sub("$#{i +1}", ActiveRecord::Base.connection.quote(val))
|
39
|
+
end
|
40
|
+
sql
|
41
|
+
end
|
30
42
|
end
|
31
|
-
end
|
43
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'json'
|
3
|
+
require 'net/http'
|
4
|
+
|
5
|
+
module Shiba
|
6
|
+
module Review
|
7
|
+
class API
|
8
|
+
|
9
|
+
attr_reader :repo_url, :token, :pull_request
|
10
|
+
|
11
|
+
# options "token", "pull_request"
|
12
|
+
def initialize(repo_url, options)
|
13
|
+
@repo_url = repo_url
|
14
|
+
@http = nil
|
15
|
+
@token = options.fetch("token")
|
16
|
+
@pull_request = options.fetch("pull_request")
|
17
|
+
end
|
18
|
+
|
19
|
+
def connect
|
20
|
+
Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
21
|
+
begin
|
22
|
+
@http = http
|
23
|
+
yield
|
24
|
+
ensure
|
25
|
+
@http = nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# https://developer.github.com/v3/pulls/comments/#create-a-comment
|
31
|
+
def comment_on_pull_request(comment)
|
32
|
+
req = Net::HTTP::Post.new(uri)
|
33
|
+
req.body = JSON.dump(comment)
|
34
|
+
request(req)
|
35
|
+
end
|
36
|
+
|
37
|
+
# https://developer.github.com/v3/pulls/comments/#list-comments-on-a-pull-request
|
38
|
+
def previous_comments
|
39
|
+
req = Net::HTTP::Get.new(uri)
|
40
|
+
request(req)
|
41
|
+
end
|
42
|
+
|
43
|
+
def uri
|
44
|
+
return @uri if @uri
|
45
|
+
|
46
|
+
repo_host, repo_path = host_and_path
|
47
|
+
url = if repo_host == 'github.com'
|
48
|
+
'https://api.github.com'
|
49
|
+
else
|
50
|
+
"https://#{repo_host}/api/v3"
|
51
|
+
end
|
52
|
+
url << "/repos/#{repo_path}/pulls/#{pull_request}/comments"
|
53
|
+
|
54
|
+
@uri = URI(url)
|
55
|
+
end
|
56
|
+
|
57
|
+
def host_and_path
|
58
|
+
host, path = nil
|
59
|
+
# git@github.com:burrito-brothers/shiba.git
|
60
|
+
if repo_url.index('@')
|
61
|
+
host, path = repo_url.split(':')
|
62
|
+
host.sub!('git@', '')
|
63
|
+
path.chomp!('.git')
|
64
|
+
# https://github.com/burrito-brothers/shiba.git
|
65
|
+
else
|
66
|
+
uri = URI.parse(repo_url)
|
67
|
+
host = uri.host
|
68
|
+
path = uri.path.chomp('.git')
|
69
|
+
path.reverse!.chomp!("/").reverse!
|
70
|
+
end
|
71
|
+
|
72
|
+
return host, path
|
73
|
+
end
|
74
|
+
|
75
|
+
protected
|
76
|
+
|
77
|
+
def request(req)
|
78
|
+
verify_connection!
|
79
|
+
|
80
|
+
req['Authorization'] = "token #{token}"
|
81
|
+
req['Content-Type'] = "application/json"
|
82
|
+
|
83
|
+
res = @http.request(req)
|
84
|
+
|
85
|
+
case res
|
86
|
+
when Net::HTTPSuccess
|
87
|
+
JSON.parse(res.body)
|
88
|
+
else
|
89
|
+
raise Shiba::Error.new, "API request failed: #{res} #{res.body}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def verify_connection!
|
94
|
+
return true if @http
|
95
|
+
raise Shiba::Error.new("API requests must be wrapped in a #connect { ... } block")
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Shiba
|
4
|
+
module Review
|
5
|
+
class CommentRenderer
|
6
|
+
# {{ variable }}
|
7
|
+
VAR_PATTERN = /{{\s?([a-z_]+)\s?}}/
|
8
|
+
|
9
|
+
def initialize(templates)
|
10
|
+
@templates = templates
|
11
|
+
end
|
12
|
+
|
13
|
+
def render(explain)
|
14
|
+
body = ""
|
15
|
+
|
16
|
+
data = present(explain)
|
17
|
+
explain["tags"].each do |tag|
|
18
|
+
body << @templates[tag]["title"]
|
19
|
+
body << ": "
|
20
|
+
body << render_template(@templates[tag]["summary"], data)
|
21
|
+
body << "\n"
|
22
|
+
end
|
23
|
+
|
24
|
+
body
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
|
29
|
+
def render_template(template, data)
|
30
|
+
rendered = template.gsub(VAR_PATTERN) do
|
31
|
+
data[$1]
|
32
|
+
end
|
33
|
+
# convert to markdown
|
34
|
+
rendered.gsub!(/<\/?b>/, "**")
|
35
|
+
rendered
|
36
|
+
end
|
37
|
+
|
38
|
+
def present(explain)
|
39
|
+
used_key_parts = explain["used_key_parts"] || []
|
40
|
+
|
41
|
+
{ "table" => explain["table"],
|
42
|
+
"table_size" => explain["table_size"],
|
43
|
+
"key" => explain["key"],
|
44
|
+
"return_size" => explain["return_size"],
|
45
|
+
"key_parts" => used_key_parts.join(","),
|
46
|
+
"cost" => cost(explain)
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
def cost(explain)
|
51
|
+
percentage = (explain["cost"] / explain["table_size"]) * 100.0;
|
52
|
+
|
53
|
+
if explain["cost"] > 100 && percentage > 1
|
54
|
+
"#{percentage.floor}% (#{explain["cost"]}) of the"
|
55
|
+
else
|
56
|
+
explain["cost"]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'shiba'
|
3
|
+
require 'shiba/diff'
|
4
|
+
require 'shiba/review/api'
|
5
|
+
require 'shiba/review/comment_renderer'
|
6
|
+
|
7
|
+
module Shiba
|
8
|
+
# TODO:
|
9
|
+
# 1. Properly handle more than a handful of review failures
|
10
|
+
# 2. May make sense to edit the comment on a commit line when the code
|
11
|
+
# is semi-corrected but still a problem
|
12
|
+
class Reviewer
|
13
|
+
TEMPLATE_FILE = File.join(Shiba.root, 'lib/shiba/output/tags.yaml')
|
14
|
+
|
15
|
+
attr_reader :repo_url, :problems, :options
|
16
|
+
|
17
|
+
def initialize(repo_url, problems, options)
|
18
|
+
@repo_url = repo_url
|
19
|
+
@problems = problems
|
20
|
+
@options = options
|
21
|
+
@commit_id = options.fetch("branch") do
|
22
|
+
raise Shiba::Error.new("Must specify a branch") if !options['diff']
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def comments
|
27
|
+
return @comments if @comments
|
28
|
+
|
29
|
+
@comments = problems.map do |path, explain|
|
30
|
+
file, line_number = path.split(":")
|
31
|
+
if path.empty? || line_number.nil?
|
32
|
+
raise Shiba::Error.new("Bad path received: #{line_number}")
|
33
|
+
end
|
34
|
+
|
35
|
+
position = diff.find_position(file, line_number.to_i)
|
36
|
+
|
37
|
+
if options["submit"]
|
38
|
+
explain = keep_only_dangerous_tags(explain)
|
39
|
+
end
|
40
|
+
|
41
|
+
{ body: renderer.render(explain),
|
42
|
+
commit_id: @commit_id,
|
43
|
+
path: file,
|
44
|
+
line: line_number,
|
45
|
+
position: position }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# FIXME: Only submit 10 comments for now. The rest just vanish.
|
50
|
+
# Submits commits, checking to makre sure the line doesn't already have a review.
|
51
|
+
def submit
|
52
|
+
report("Connecting to #{api.uri}")
|
53
|
+
|
54
|
+
api.connect do
|
55
|
+
previous_reviews = api.previous_comments.map { |c| c['body'] }
|
56
|
+
|
57
|
+
comments[0,10].each do |c|
|
58
|
+
if previous_reviews.any? { |r| r == c[:body] }
|
59
|
+
report("skipped duplicate comment")
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
# :line isn't part of the github api
|
64
|
+
comment = c.dup.tap { |dc| dc.delete(:line) }
|
65
|
+
if options[:verbose]
|
66
|
+
comment[:body] += " (verbose mode ts=#{Time.now.to_i})"
|
67
|
+
end
|
68
|
+
|
69
|
+
res = api.comment_on_pull_request(comment)
|
70
|
+
report("API success #{res.inspect}")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
report("HTTP request finished")
|
75
|
+
end
|
76
|
+
|
77
|
+
def repo_host
|
78
|
+
@repo_host ||= api.host_and_path.first
|
79
|
+
end
|
80
|
+
|
81
|
+
def repo_path
|
82
|
+
@repo_path ||= api.host_and_path.last
|
83
|
+
end
|
84
|
+
|
85
|
+
protected
|
86
|
+
|
87
|
+
def report(message)
|
88
|
+
if options["verbose"]
|
89
|
+
$stderr.puts message
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def keep_only_dangerous_tags(explain)
|
94
|
+
explain_b = explain.dup
|
95
|
+
explain_b["tags"] = explain_b["tags"].select { |tag| tags[tag]["level"] == "danger" }
|
96
|
+
explain_b
|
97
|
+
end
|
98
|
+
|
99
|
+
def diff
|
100
|
+
return @diff if @diff
|
101
|
+
output = options['diff'] ? file_diff : git_diff
|
102
|
+
@diff = Shiba::Diff.new(output)
|
103
|
+
end
|
104
|
+
|
105
|
+
def git_diff
|
106
|
+
cmd ="git diff origin/HEAD..#{@commit_id}"
|
107
|
+
report("Finding PR position using: #{cmd}")
|
108
|
+
|
109
|
+
output = StringIO.new(`#{cmd}`)
|
110
|
+
end
|
111
|
+
|
112
|
+
def file_diff
|
113
|
+
report("Finding PR position using file: #{options['diff']}")
|
114
|
+
File.open(options['diff'], 'r')
|
115
|
+
end
|
116
|
+
|
117
|
+
def api
|
118
|
+
@api ||= begin
|
119
|
+
api_options = {
|
120
|
+
"token" => options["token"],
|
121
|
+
"pull_request" => options["pull_request"]
|
122
|
+
}
|
123
|
+
Review::API.new(repo_url, api_options)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def renderer
|
128
|
+
@renderer ||= Review::CommentRenderer.new(tags)
|
129
|
+
end
|
130
|
+
|
131
|
+
def tags
|
132
|
+
@tags ||= YAML.load_file(TEMPLATE_FILE)
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
end
|