shiba 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +11 -2
- data/Gemfile +1 -2
- data/Gemfile.lock +4 -2
- data/README.md +1 -1
- data/bin/explain +10 -41
- data/bin/mysql_dump_stats +20 -0
- data/bin/postgres_dump_stats +3 -0
- data/bin/review +181 -0
- data/bin/shiba +3 -3
- data/lib/shiba.rb +65 -4
- data/lib/shiba/activerecord_integration.rb +30 -13
- data/lib/shiba/checker.rb +89 -25
- data/lib/shiba/configure.rb +22 -5
- data/lib/shiba/connection.rb +25 -0
- data/lib/shiba/connection/mysql.rb +45 -0
- data/lib/shiba/connection/postgres.rb +91 -0
- data/lib/shiba/diff.rb +21 -11
- data/lib/shiba/explain.rb +18 -53
- data/lib/shiba/explain/mysql_explain.rb +47 -0
- data/lib/shiba/explain/postgres_explain.rb +91 -0
- data/lib/shiba/explain/postgres_explain_index_conditions.rb +137 -0
- data/lib/shiba/fuzzer.rb +16 -16
- data/lib/shiba/index_stats.rb +9 -5
- data/lib/shiba/output.rb +1 -1
- data/lib/shiba/output/tags.yaml +14 -8
- data/lib/shiba/query_watcher.rb +13 -1
- data/lib/shiba/review/api.rb +100 -0
- data/lib/shiba/review/comment_renderer.rb +62 -0
- data/lib/shiba/reviewer.rb +136 -0
- data/lib/shiba/version.rb +1 -1
- data/shiba.gemspec +2 -0
- data/web/dist/bundle.js +23 -1
- data/web/main.css +3 -0
- data/web/main.js +1 -0
- data/web/package-lock.json +5 -0
- data/web/package.json +1 -0
- data/web/results.html.erb +77 -20
- metadata +43 -5
- data/bin/check +0 -75
- data/bin/dump_stats +0 -44
data/lib/shiba/fuzzer.rb
CHANGED
@@ -5,14 +5,14 @@ module Shiba
|
|
5
5
|
|
6
6
|
def initialize(connection)
|
7
7
|
@connection = connection
|
8
|
-
@index_stats = IndexStats.new
|
9
8
|
end
|
10
9
|
|
11
10
|
attr_reader :connection
|
12
11
|
|
13
12
|
def fuzz!
|
14
|
-
fetch_index
|
13
|
+
@index_stats = fetch_index
|
15
14
|
table_sizes = guess_table_sizes
|
15
|
+
|
16
16
|
@index_stats.tables.each do |name, table|
|
17
17
|
table.count = table_sizes[name]
|
18
18
|
table.indexes.each do |name, index|
|
@@ -21,33 +21,33 @@ module Shiba
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
24
|
+
|
24
25
|
@index_stats
|
25
26
|
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
SMALL_FUZZ_SIZE = 100
|
31
|
-
|
32
|
-
def fetch_index!
|
33
|
-
records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
|
28
|
+
def fetch_index
|
29
|
+
stats = Shiba::IndexStats.new
|
30
|
+
records = connection.fetch_indexes
|
34
31
|
tables = {}
|
35
32
|
records.each do |h|
|
36
33
|
h.keys.each { |k| h[k.downcase] = h.delete(k) }
|
37
34
|
h["cardinality"] = h["cardinality"].to_i
|
38
|
-
|
35
|
+
|
36
|
+
stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == 0)
|
39
37
|
end
|
38
|
+
stats
|
40
39
|
end
|
41
40
|
|
41
|
+
private
|
42
|
+
|
43
|
+
BIG_FUZZ_SIZE = 5_000
|
44
|
+
SMALL_FUZZ_SIZE = 100
|
45
|
+
|
46
|
+
|
42
47
|
# Create fake table sizes based on the table's index count.
|
43
48
|
# The more indexes, the bigger the table. Seems to rank tables fairly well.
|
44
49
|
def guess_table_sizes
|
45
|
-
|
46
|
-
from information_schema.statistics where table_schema = DATABASE()
|
47
|
-
and seq_in_index = 1 and index_name not like 'fk_rails%'
|
48
|
-
group by table_name order by index_count"
|
49
|
-
|
50
|
-
index_counts = connection.query(index_count_query).to_a
|
50
|
+
index_counts = connection.count_indexes_by_table
|
51
51
|
|
52
52
|
# 90th table percentile based on number of indexes
|
53
53
|
# round down so we don't blow up on small tables
|
data/lib/shiba/index_stats.rb
CHANGED
@@ -97,8 +97,7 @@ module Shiba
|
|
97
97
|
|
98
98
|
count = table_count
|
99
99
|
count = 1 if count == 0
|
100
|
-
ratio_per_item = self.rows_per / count.to_f
|
101
|
-
|
100
|
+
ratio_per_item = self.rows_per / count.to_f
|
102
101
|
|
103
102
|
if count <= 10
|
104
103
|
ratio_threshold = 1_000_0000 # always show a number
|
@@ -158,11 +157,16 @@ module Shiba
|
|
158
157
|
|
159
158
|
return nil unless index
|
160
159
|
|
161
|
-
index_part =
|
162
|
-
|
160
|
+
index_part = nil
|
161
|
+
index.columns.each do |c|
|
162
|
+
break unless parts.include?(c.column)
|
163
|
+
index_part = c
|
163
164
|
end
|
164
165
|
|
165
|
-
|
166
|
+
# postgres can claim to use the right hand side of an index
|
167
|
+
# in a bitmap scan, which seems to be a side-effect of forcing
|
168
|
+
# seq-scan off. In these cases we'll say it's a full scan.
|
169
|
+
return table_count(table_name) unless index_part
|
166
170
|
|
167
171
|
index_part.rows_per
|
168
172
|
end
|
data/lib/shiba/output.rb
CHANGED
data/lib/shiba/output/tags.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
fuzzed_data:
|
3
3
|
title: Fuzzed Data
|
4
|
-
summary: Shiba doesn't know the size of <b>{{
|
4
|
+
summary: Shiba doesn't know the size of <b>{{table}}</b>. For these purposes we set the table size to <b>{{table_size}}</b>.
|
5
5
|
description: |
|
6
6
|
We're not sure how much data this table will hold in the future, so we've pretended
|
7
7
|
there's 6000 rows in it. This can lead to a lot of false positives. To
|
@@ -16,20 +16,20 @@ possible_key_check:
|
|
16
16
|
level: info
|
17
17
|
access_type_const:
|
18
18
|
title: One row
|
19
|
-
summary: The database only needs to read a single row from <b>{{
|
19
|
+
summary: The database only needs to read a single row from <b>{{table}}</b>.
|
20
20
|
description: |
|
21
21
|
This query selects at *most* one row, which is about as good as things get.
|
22
22
|
level: success
|
23
23
|
access_type_ref:
|
24
24
|
title: Indexed
|
25
|
-
summary: The database reads {{
|
25
|
+
summary: The database reads {{ cost }} rows in <b>{{ table }}</b> via the <b>{{ key }}</b> index ({{ key_parts }}).
|
26
26
|
description: |
|
27
27
|
This query uses an index to find rows that match a single value. Often this
|
28
28
|
has very good performance, but it depends on how many rows match that value.
|
29
29
|
level: success
|
30
30
|
access_type_range:
|
31
31
|
title: Indexed
|
32
|
-
summary: The database uses a "range scan" to read more than {{
|
32
|
+
summary: The database uses a "range scan" to read more than {{ cost }} rows in {{ table }} via the <b>{{ key }}</b> index ({{ key_parts }})
|
33
33
|
description: |
|
34
34
|
This query uses an index to find rows that match a range of values, for instance
|
35
35
|
`WHERE indexed_value in (1,2,5,6)` or `WHERE indexed_value >= 5 AND indexed_value <= 15`.
|
@@ -38,7 +38,7 @@ access_type_range:
|
|
38
38
|
level: info
|
39
39
|
access_type_tablescan:
|
40
40
|
title: Table Scan
|
41
|
-
summary: The database reads <b>100%</b> ({{
|
41
|
+
summary: The database reads <b>100%</b> ({{ table_size }}) of the rows in <b>{{ table }}</b>, skipping any indexes.
|
42
42
|
description: |
|
43
43
|
This query doesn't use any indexes to find data, meaning this query will need to evaluate
|
44
44
|
every single row in the table. This is about the worst of all possible worlds.
|
@@ -47,6 +47,13 @@ access_type_tablescan:
|
|
47
47
|
but be aware that if this table is not effectively tiny or constant-sized you're entering
|
48
48
|
a world of pain.
|
49
49
|
level: danger
|
50
|
+
limited_scan:
|
51
|
+
title: Limited Scan
|
52
|
+
summary: The database reads {{ query.cost }} rows from {{ query.table }}.
|
53
|
+
description: |
|
54
|
+
This query doesn't use any indexes to find data, but since it doesn't care about
|
55
|
+
ordering and it doesn't have any conditions, it only ever reads {{ query.cost }} rows.
|
56
|
+
level: info
|
50
57
|
ignored:
|
51
58
|
title: Ignored
|
52
59
|
summary: This query matched an "ignore" rule in shiba.yml. Any further analysis was skipped.
|
@@ -61,10 +68,9 @@ index_walk:
|
|
61
68
|
level: success
|
62
69
|
retsize_bad:
|
63
70
|
title: Big Results
|
64
|
-
summary: The database returns {{
|
71
|
+
summary: The database returns {{ return_size }} rows to the client.
|
65
72
|
level: danger
|
66
73
|
retsize_good:
|
67
74
|
title: Small Results
|
68
|
-
summary: The database returns {{
|
75
|
+
summary: The database returns {{ return_size }} row(s) to the client.
|
69
76
|
level: success
|
70
|
-
|
data/lib/shiba/query_watcher.rb
CHANGED
@@ -17,6 +17,12 @@ module Shiba
|
|
17
17
|
sql = payload[:sql]
|
18
18
|
return if !sql.start_with?("SELECT")
|
19
19
|
|
20
|
+
if sql.include?("$1")
|
21
|
+
sql = interpolate(sql, payload[:type_casted_binds])
|
22
|
+
end
|
23
|
+
|
24
|
+
sql = sql.gsub(/\n/, ' ')
|
25
|
+
|
20
26
|
fingerprint = Query.get_fingerprint(sql)
|
21
27
|
return if @queries[fingerprint]
|
22
28
|
|
@@ -27,5 +33,11 @@ module Shiba
|
|
27
33
|
@queries[fingerprint] = true
|
28
34
|
end
|
29
35
|
|
36
|
+
def interpolate(sql, binds)
|
37
|
+
binds.each_with_index do |val, i|
|
38
|
+
sql = sql.sub("$#{i +1}", ActiveRecord::Base.connection.quote(val))
|
39
|
+
end
|
40
|
+
sql
|
41
|
+
end
|
30
42
|
end
|
31
|
-
end
|
43
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'json'
|
3
|
+
require 'net/http'
|
4
|
+
|
5
|
+
module Shiba
|
6
|
+
module Review
|
7
|
+
class API
|
8
|
+
|
9
|
+
attr_reader :repo_url, :token, :pull_request
|
10
|
+
|
11
|
+
# options "token", "pull_request"
|
12
|
+
def initialize(repo_url, options)
|
13
|
+
@repo_url = repo_url
|
14
|
+
@http = nil
|
15
|
+
@token = options.fetch("token")
|
16
|
+
@pull_request = options.fetch("pull_request")
|
17
|
+
end
|
18
|
+
|
19
|
+
def connect
|
20
|
+
Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
21
|
+
begin
|
22
|
+
@http = http
|
23
|
+
yield
|
24
|
+
ensure
|
25
|
+
@http = nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# https://developer.github.com/v3/pulls/comments/#create-a-comment
|
31
|
+
def comment_on_pull_request(comment)
|
32
|
+
req = Net::HTTP::Post.new(uri)
|
33
|
+
req.body = JSON.dump(comment)
|
34
|
+
request(req)
|
35
|
+
end
|
36
|
+
|
37
|
+
# https://developer.github.com/v3/pulls/comments/#list-comments-on-a-pull-request
|
38
|
+
def previous_comments
|
39
|
+
req = Net::HTTP::Get.new(uri)
|
40
|
+
request(req)
|
41
|
+
end
|
42
|
+
|
43
|
+
def uri
|
44
|
+
return @uri if @uri
|
45
|
+
|
46
|
+
repo_host, repo_path = host_and_path
|
47
|
+
url = if repo_host == 'github.com'
|
48
|
+
'https://api.github.com'
|
49
|
+
else
|
50
|
+
"https://#{repo_host}/api/v3"
|
51
|
+
end
|
52
|
+
url << "/repos/#{repo_path}/pulls/#{pull_request}/comments"
|
53
|
+
|
54
|
+
@uri = URI(url)
|
55
|
+
end
|
56
|
+
|
57
|
+
def host_and_path
|
58
|
+
host, path = nil
|
59
|
+
# git@github.com:burrito-brothers/shiba.git
|
60
|
+
if repo_url.index('@')
|
61
|
+
host, path = repo_url.split(':')
|
62
|
+
host.sub!('git@', '')
|
63
|
+
path.chomp!('.git')
|
64
|
+
# https://github.com/burrito-brothers/shiba.git
|
65
|
+
else
|
66
|
+
uri = URI.parse(repo_url)
|
67
|
+
host = uri.host
|
68
|
+
path = uri.path.chomp('.git')
|
69
|
+
path.reverse!.chomp!("/").reverse!
|
70
|
+
end
|
71
|
+
|
72
|
+
return host, path
|
73
|
+
end
|
74
|
+
|
75
|
+
protected
|
76
|
+
|
77
|
+
def request(req)
|
78
|
+
verify_connection!
|
79
|
+
|
80
|
+
req['Authorization'] = "token #{token}"
|
81
|
+
req['Content-Type'] = "application/json"
|
82
|
+
|
83
|
+
res = @http.request(req)
|
84
|
+
|
85
|
+
case res
|
86
|
+
when Net::HTTPSuccess
|
87
|
+
JSON.parse(res.body)
|
88
|
+
else
|
89
|
+
raise Shiba::Error.new, "API request failed: #{res} #{res.body}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def verify_connection!
|
94
|
+
return true if @http
|
95
|
+
raise Shiba::Error.new("API requests must be wrapped in a #connect { ... } block")
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Shiba
|
4
|
+
module Review
|
5
|
+
class CommentRenderer
|
6
|
+
# {{ variable }}
|
7
|
+
VAR_PATTERN = /{{\s?([a-z_]+)\s?}}/
|
8
|
+
|
9
|
+
def initialize(templates)
|
10
|
+
@templates = templates
|
11
|
+
end
|
12
|
+
|
13
|
+
def render(explain)
|
14
|
+
body = ""
|
15
|
+
|
16
|
+
data = present(explain)
|
17
|
+
explain["tags"].each do |tag|
|
18
|
+
body << @templates[tag]["title"]
|
19
|
+
body << ": "
|
20
|
+
body << render_template(@templates[tag]["summary"], data)
|
21
|
+
body << "\n"
|
22
|
+
end
|
23
|
+
|
24
|
+
body
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
|
29
|
+
def render_template(template, data)
|
30
|
+
rendered = template.gsub(VAR_PATTERN) do
|
31
|
+
data[$1]
|
32
|
+
end
|
33
|
+
# convert to markdown
|
34
|
+
rendered.gsub!(/<\/?b>/, "**")
|
35
|
+
rendered
|
36
|
+
end
|
37
|
+
|
38
|
+
def present(explain)
|
39
|
+
used_key_parts = explain["used_key_parts"] || []
|
40
|
+
|
41
|
+
{ "table" => explain["table"],
|
42
|
+
"table_size" => explain["table_size"],
|
43
|
+
"key" => explain["key"],
|
44
|
+
"return_size" => explain["return_size"],
|
45
|
+
"key_parts" => used_key_parts.join(","),
|
46
|
+
"cost" => cost(explain)
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
def cost(explain)
|
51
|
+
percentage = (explain["cost"] / explain["table_size"]) * 100.0;
|
52
|
+
|
53
|
+
if explain["cost"] > 100 && percentage > 1
|
54
|
+
"#{percentage.floor}% (#{explain["cost"]}) of the"
|
55
|
+
else
|
56
|
+
explain["cost"]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'shiba'
|
3
|
+
require 'shiba/diff'
|
4
|
+
require 'shiba/review/api'
|
5
|
+
require 'shiba/review/comment_renderer'
|
6
|
+
|
7
|
+
module Shiba
|
8
|
+
# TODO:
|
9
|
+
# 1. Properly handle more than a handful of review failures
|
10
|
+
# 2. May make sense to edit the comment on a commit line when the code
|
11
|
+
# is semi-corrected but still a problem
|
12
|
+
class Reviewer
|
13
|
+
TEMPLATE_FILE = File.join(Shiba.root, 'lib/shiba/output/tags.yaml')
|
14
|
+
|
15
|
+
attr_reader :repo_url, :problems, :options
|
16
|
+
|
17
|
+
def initialize(repo_url, problems, options)
|
18
|
+
@repo_url = repo_url
|
19
|
+
@problems = problems
|
20
|
+
@options = options
|
21
|
+
@commit_id = options.fetch("branch") do
|
22
|
+
raise Shiba::Error.new("Must specify a branch") if !options['diff']
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def comments
|
27
|
+
return @comments if @comments
|
28
|
+
|
29
|
+
@comments = problems.map do |path, explain|
|
30
|
+
file, line_number = path.split(":")
|
31
|
+
if path.empty? || line_number.nil?
|
32
|
+
raise Shiba::Error.new("Bad path received: #{line_number}")
|
33
|
+
end
|
34
|
+
|
35
|
+
position = diff.find_position(file, line_number.to_i)
|
36
|
+
|
37
|
+
if options["submit"]
|
38
|
+
explain = keep_only_dangerous_tags(explain)
|
39
|
+
end
|
40
|
+
|
41
|
+
{ body: renderer.render(explain),
|
42
|
+
commit_id: @commit_id,
|
43
|
+
path: file,
|
44
|
+
line: line_number,
|
45
|
+
position: position }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# FIXME: Only submit 10 comments for now. The rest just vanish.
|
50
|
+
# Submits commits, checking to makre sure the line doesn't already have a review.
|
51
|
+
def submit
|
52
|
+
report("Connecting to #{api.uri}")
|
53
|
+
|
54
|
+
api.connect do
|
55
|
+
previous_reviews = api.previous_comments.map { |c| c['body'] }
|
56
|
+
|
57
|
+
comments[0,10].each do |c|
|
58
|
+
if previous_reviews.any? { |r| r == c[:body] }
|
59
|
+
report("skipped duplicate comment")
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
# :line isn't part of the github api
|
64
|
+
comment = c.dup.tap { |dc| dc.delete(:line) }
|
65
|
+
if options[:verbose]
|
66
|
+
comment[:body] += " (verbose mode ts=#{Time.now.to_i})"
|
67
|
+
end
|
68
|
+
|
69
|
+
res = api.comment_on_pull_request(comment)
|
70
|
+
report("API success #{res.inspect}")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
report("HTTP request finished")
|
75
|
+
end
|
76
|
+
|
77
|
+
def repo_host
|
78
|
+
@repo_host ||= api.host_and_path.first
|
79
|
+
end
|
80
|
+
|
81
|
+
def repo_path
|
82
|
+
@repo_path ||= api.host_and_path.last
|
83
|
+
end
|
84
|
+
|
85
|
+
protected
|
86
|
+
|
87
|
+
def report(message)
|
88
|
+
if options["verbose"]
|
89
|
+
$stderr.puts message
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def keep_only_dangerous_tags(explain)
|
94
|
+
explain_b = explain.dup
|
95
|
+
explain_b["tags"] = explain_b["tags"].select { |tag| tags[tag]["level"] == "danger" }
|
96
|
+
explain_b
|
97
|
+
end
|
98
|
+
|
99
|
+
def diff
|
100
|
+
return @diff if @diff
|
101
|
+
output = options['diff'] ? file_diff : git_diff
|
102
|
+
@diff = Shiba::Diff.new(output)
|
103
|
+
end
|
104
|
+
|
105
|
+
def git_diff
|
106
|
+
cmd ="git diff origin/HEAD..#{@commit_id}"
|
107
|
+
report("Finding PR position using: #{cmd}")
|
108
|
+
|
109
|
+
output = StringIO.new(`#{cmd}`)
|
110
|
+
end
|
111
|
+
|
112
|
+
def file_diff
|
113
|
+
report("Finding PR position using file: #{options['diff']}")
|
114
|
+
File.open(options['diff'], 'r')
|
115
|
+
end
|
116
|
+
|
117
|
+
def api
|
118
|
+
@api ||= begin
|
119
|
+
api_options = {
|
120
|
+
"token" => options["token"],
|
121
|
+
"pull_request" => options["pull_request"]
|
122
|
+
}
|
123
|
+
Review::API.new(repo_url, api_options)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def renderer
|
128
|
+
@renderer ||= Review::CommentRenderer.new(tags)
|
129
|
+
end
|
130
|
+
|
131
|
+
def tags
|
132
|
+
@tags ||= YAML.load_file(TEMPLATE_FILE)
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
end
|