shiba 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/shiba/fuzzer.rb CHANGED
@@ -5,14 +5,14 @@ module Shiba
5
5
 
6
6
  def initialize(connection)
7
7
  @connection = connection
8
- @index_stats = IndexStats.new
9
8
  end
10
9
 
11
10
  attr_reader :connection
12
11
 
13
12
  def fuzz!
14
- fetch_index!
13
+ @index_stats = fetch_index
15
14
  table_sizes = guess_table_sizes
15
+
16
16
  @index_stats.tables.each do |name, table|
17
17
  table.count = table_sizes[name]
18
18
  table.indexes.each do |name, index|
@@ -21,33 +21,33 @@ module Shiba
21
21
  end
22
22
  end
23
23
  end
24
+
24
25
  @index_stats
25
26
  end
26
27
 
27
- private
28
-
29
- BIG_FUZZ_SIZE = 5_000
30
- SMALL_FUZZ_SIZE = 100
31
-
32
- def fetch_index!
33
- records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
28
+ def fetch_index
29
+ stats = Shiba::IndexStats.new
30
+ records = connection.fetch_indexes
34
31
  tables = {}
35
32
  records.each do |h|
36
33
  h.keys.each { |k| h[k.downcase] = h.delete(k) }
37
34
  h["cardinality"] = h["cardinality"].to_i
38
- @index_stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
35
+
36
+ stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == 0)
39
37
  end
38
+ stats
40
39
  end
41
40
 
41
+ private
42
+
43
+ BIG_FUZZ_SIZE = 5_000
44
+ SMALL_FUZZ_SIZE = 100
45
+
46
+
42
47
  # Create fake table sizes based on the table's index count.
43
48
  # The more indexes, the bigger the table. Seems to rank tables fairly well.
44
49
  def guess_table_sizes
45
- index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
46
- from information_schema.statistics where table_schema = DATABASE()
47
- and seq_in_index = 1 and index_name not like 'fk_rails%'
48
- group by table_name order by index_count"
49
-
50
- index_counts = connection.query(index_count_query).to_a
50
+ index_counts = connection.count_indexes_by_table
51
51
 
52
52
  # 90th table percentile based on number of indexes
53
53
  # round down so we don't blow up on small tables
@@ -97,8 +97,7 @@ module Shiba
97
97
 
98
98
  count = table_count
99
99
  count = 1 if count == 0
100
- ratio_per_item = self.rows_per / count.to_f rescue debugger
101
-
100
+ ratio_per_item = self.rows_per / count.to_f
102
101
 
103
102
  if count <= 10
104
103
  ratio_threshold = 1_000_0000 # always show a number
@@ -158,11 +157,16 @@ module Shiba
158
157
 
159
158
  return nil unless index
160
159
 
161
- index_part = index.columns.detect do |p|
162
- p.column == parts.last
160
+ index_part = nil
161
+ index.columns.each do |c|
162
+ break unless parts.include?(c.column)
163
+ index_part = c
163
164
  end
164
165
 
165
- return nil unless index_part
166
+ # postgres can claim to use the right hand side of an index
167
+ # in a bitmap scan, which seems to be a side-effect of forcing
168
+ # seq-scan off. In these cases we'll say it's a full scan.
169
+ return table_count(table_name) unless index_part
166
170
 
167
171
  index_part.rows_per
168
172
  end
data/lib/shiba/output.rb CHANGED
@@ -30,7 +30,7 @@ module Shiba
30
30
  FileUtils.mkdir_p(File.join(logdir, "shiba_results"))
31
31
  File.join(Dir.pwd, "log", "shiba_results", default_filename)
32
32
  else
33
- File.join(Dir.tmpdir, default_filename)
33
+ File.join(Shiba.path, default_filename)
34
34
  end
35
35
  end
36
36
 
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  fuzzed_data:
3
3
  title: Fuzzed Data
4
- summary: Shiba doesn't know the size of <b>{{query.table}}</b>. For these purposes we set the table size to <b>{{query.table_size}}</b>.
4
+ summary: Shiba doesn't know the size of <b>{{table}}</b>. For these purposes we set the table size to <b>{{table_size}}</b>.
5
5
  description: |
6
6
  We're not sure how much data this table will hold in the future, so we've pretended
7
7
  there's 6000 rows in it. This can lead to a lot of false positives. To
@@ -16,20 +16,20 @@ possible_key_check:
16
16
  level: info
17
17
  access_type_const:
18
18
  title: One row
19
- summary: The database only needs to read a single row from <b>{{query.table}}</b>.
19
+ summary: The database only needs to read a single row from <b>{{table}}</b>.
20
20
  description: |
21
21
  This query selects at *most* one row, which is about as good as things get.
22
22
  level: success
23
23
  access_type_ref:
24
24
  title: Indexed
25
- summary: The database reads {{ formattedCost }} rows in <b>{{ query.table }}</b> via the <b>{{ query.key }}</b> index ({{ key_parts }}).
25
+ summary: The database reads {{ cost }} rows in <b>{{ table }}</b> via the <b>{{ key }}</b> index ({{ key_parts }}).
26
26
  description: |
27
27
  This query uses an index to find rows that match a single value. Often this
28
28
  has very good performance, but it depends on how many rows match that value.
29
29
  level: success
30
30
  access_type_range:
31
31
  title: Indexed
32
- summary: The database uses a "range scan" to read more than {{ formattedCost }} rows in {{ query.table }} via the <b>{{ query.key }}</b> index ({{ key_parts }})
32
+ summary: The database uses a "range scan" to read more than {{ cost }} rows in {{ table }} via the <b>{{ key }}</b> index ({{ key_parts }})
33
33
  description: |
34
34
  This query uses an index to find rows that match a range of values, for instance
35
35
  `WHERE indexed_value in (1,2,5,6)` or `WHERE indexed_value >= 5 AND indexed_value <= 15`.
@@ -38,7 +38,7 @@ access_type_range:
38
38
  level: info
39
39
  access_type_tablescan:
40
40
  title: Table Scan
41
- summary: The database reads <b>100%</b> ({{ query.table_size }}) of the rows in <b>{{ query.table }}</b>, skipping any indexes.
41
+ summary: The database reads <b>100%</b> ({{ table_size }}) of the rows in <b>{{ table }}</b>, skipping any indexes.
42
42
  description: |
43
43
  This query doesn't use any indexes to find data, meaning this query will need to evaluate
44
44
  every single row in the table. This is about the worst of all possible worlds.
@@ -47,6 +47,13 @@ access_type_tablescan:
47
47
  but be aware that if this table is not effectively tiny or constant-sized you're entering
48
48
  a world of pain.
49
49
  level: danger
50
+ limited_scan:
51
+ title: Limited Scan
52
+ summary: The database reads {{ query.cost }} rows from {{ query.table }}.
53
+ description: |
54
+ This query doesn't use any indexes to find data, but since it doesn't care about
55
+ ordering and it doesn't have any conditions, it only ever reads {{ query.cost }} rows.
56
+ level: info
50
57
  ignored:
51
58
  title: Ignored
52
59
  summary: This query matched an "ignore" rule in shiba.yml. Any further analysis was skipped.
@@ -61,10 +68,9 @@ index_walk:
61
68
  level: success
62
69
  retsize_bad:
63
70
  title: Big Results
64
- summary: The database returns {{ query.return_size.toLocaleString() }} rows to the client.
71
+ summary: The database returns {{ return_size }} rows to the client.
65
72
  level: danger
66
73
  retsize_good:
67
74
  title: Small Results
68
- summary: The database returns {{ query.return_size.toLocaleString() }} row(s) to the client.
75
+ summary: The database returns {{ return_size }} row(s) to the client.
69
76
  level: success
70
-
@@ -17,6 +17,12 @@ module Shiba
17
17
  sql = payload[:sql]
18
18
  return if !sql.start_with?("SELECT")
19
19
 
20
+ if sql.include?("$1")
21
+ sql = interpolate(sql, payload[:type_casted_binds])
22
+ end
23
+
24
+ sql = sql.gsub(/\n/, ' ')
25
+
20
26
  fingerprint = Query.get_fingerprint(sql)
21
27
  return if @queries[fingerprint]
22
28
 
@@ -27,5 +33,11 @@ module Shiba
27
33
  @queries[fingerprint] = true
28
34
  end
29
35
 
36
+ def interpolate(sql, binds)
37
+ binds.each_with_index do |val, i|
38
+ sql = sql.sub("$#{i +1}", ActiveRecord::Base.connection.quote(val))
39
+ end
40
+ sql
41
+ end
30
42
  end
31
- end
43
+ end
@@ -0,0 +1,100 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'net/http'
4
+
5
+ module Shiba
6
+ module Review
7
+ class API
8
+
9
+ attr_reader :repo_url, :token, :pull_request
10
+
11
+ # options "token", "pull_request"
12
+ def initialize(repo_url, options)
13
+ @repo_url = repo_url
14
+ @http = nil
15
+ @token = options.fetch("token")
16
+ @pull_request = options.fetch("pull_request")
17
+ end
18
+
19
+ def connect
20
+ Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
21
+ begin
22
+ @http = http
23
+ yield
24
+ ensure
25
+ @http = nil
26
+ end
27
+ end
28
+ end
29
+
30
+ # https://developer.github.com/v3/pulls/comments/#create-a-comment
31
+ def comment_on_pull_request(comment)
32
+ req = Net::HTTP::Post.new(uri)
33
+ req.body = JSON.dump(comment)
34
+ request(req)
35
+ end
36
+
37
+ # https://developer.github.com/v3/pulls/comments/#list-comments-on-a-pull-request
38
+ def previous_comments
39
+ req = Net::HTTP::Get.new(uri)
40
+ request(req)
41
+ end
42
+
43
+ def uri
44
+ return @uri if @uri
45
+
46
+ repo_host, repo_path = host_and_path
47
+ url = if repo_host == 'github.com'
48
+ 'https://api.github.com'
49
+ else
50
+ "https://#{repo_host}/api/v3"
51
+ end
52
+ url << "/repos/#{repo_path}/pulls/#{pull_request}/comments"
53
+
54
+ @uri = URI(url)
55
+ end
56
+
57
+ def host_and_path
58
+ host, path = nil
59
+ # git@github.com:burrito-brothers/shiba.git
60
+ if repo_url.index('@')
61
+ host, path = repo_url.split(':')
62
+ host.sub!('git@', '')
63
+ path.chomp!('.git')
64
+ # https://github.com/burrito-brothers/shiba.git
65
+ else
66
+ uri = URI.parse(repo_url)
67
+ host = uri.host
68
+ path = uri.path.chomp('.git')
69
+ path.reverse!.chomp!("/").reverse!
70
+ end
71
+
72
+ return host, path
73
+ end
74
+
75
+ protected
76
+
77
+ def request(req)
78
+ verify_connection!
79
+
80
+ req['Authorization'] = "token #{token}"
81
+ req['Content-Type'] = "application/json"
82
+
83
+ res = @http.request(req)
84
+
85
+ case res
86
+ when Net::HTTPSuccess
87
+ JSON.parse(res.body)
88
+ else
89
+ raise Shiba::Error.new, "API request failed: #{res} #{res.body}"
90
+ end
91
+ end
92
+
93
+ def verify_connection!
94
+ return true if @http
95
+ raise Shiba::Error.new("API requests must be wrapped in a #connect { ... } block")
96
+ end
97
+
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,62 @@
1
+ require 'yaml'
2
+
3
+ module Shiba
4
+ module Review
5
+ class CommentRenderer
6
+ # {{ variable }}
7
+ VAR_PATTERN = /{{\s?([a-z_]+)\s?}}/
8
+
9
+ def initialize(templates)
10
+ @templates = templates
11
+ end
12
+
13
+ def render(explain)
14
+ body = ""
15
+
16
+ data = present(explain)
17
+ explain["tags"].each do |tag|
18
+ body << @templates[tag]["title"]
19
+ body << ": "
20
+ body << render_template(@templates[tag]["summary"], data)
21
+ body << "\n"
22
+ end
23
+
24
+ body
25
+ end
26
+
27
+ protected
28
+
29
+ def render_template(template, data)
30
+ rendered = template.gsub(VAR_PATTERN) do
31
+ data[$1]
32
+ end
33
+ # convert to markdown
34
+ rendered.gsub!(/<\/?b>/, "**")
35
+ rendered
36
+ end
37
+
38
+ def present(explain)
39
+ used_key_parts = explain["used_key_parts"] || []
40
+
41
+ { "table" => explain["table"],
42
+ "table_size" => explain["table_size"],
43
+ "key" => explain["key"],
44
+ "return_size" => explain["return_size"],
45
+ "key_parts" => used_key_parts.join(","),
46
+ "cost" => cost(explain)
47
+ }
48
+ end
49
+
50
+ def cost(explain)
51
+ percentage = (explain["cost"] / explain["table_size"]) * 100.0;
52
+
53
+ if explain["cost"] > 100 && percentage > 1
54
+ "#{percentage.floor}% (#{explain["cost"]}) of the"
55
+ else
56
+ explain["cost"]
57
+ end
58
+ end
59
+
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,136 @@
1
+ require 'open3'
2
+ require 'shiba'
3
+ require 'shiba/diff'
4
+ require 'shiba/review/api'
5
+ require 'shiba/review/comment_renderer'
6
+
7
+ module Shiba
8
+ # TODO:
9
+ # 1. Properly handle more than a handful of review failures
10
+ # 2. May make sense to edit the comment on a commit line when the code
11
+ # is semi-corrected but still a problem
12
+ class Reviewer
13
+ TEMPLATE_FILE = File.join(Shiba.root, 'lib/shiba/output/tags.yaml')
14
+
15
+ attr_reader :repo_url, :problems, :options
16
+
17
+ def initialize(repo_url, problems, options)
18
+ @repo_url = repo_url
19
+ @problems = problems
20
+ @options = options
21
+ @commit_id = options.fetch("branch") do
22
+ raise Shiba::Error.new("Must specify a branch") if !options['diff']
23
+ end
24
+ end
25
+
26
+ def comments
27
+ return @comments if @comments
28
+
29
+ @comments = problems.map do |path, explain|
30
+ file, line_number = path.split(":")
31
+ if path.empty? || line_number.nil?
32
+ raise Shiba::Error.new("Bad path received: #{line_number}")
33
+ end
34
+
35
+ position = diff.find_position(file, line_number.to_i)
36
+
37
+ if options["submit"]
38
+ explain = keep_only_dangerous_tags(explain)
39
+ end
40
+
41
+ { body: renderer.render(explain),
42
+ commit_id: @commit_id,
43
+ path: file,
44
+ line: line_number,
45
+ position: position }
46
+ end
47
+ end
48
+
49
+ # FIXME: Only submit 10 comments for now. The rest just vanish.
50
+ # Submits commits, checking to makre sure the line doesn't already have a review.
51
+ def submit
52
+ report("Connecting to #{api.uri}")
53
+
54
+ api.connect do
55
+ previous_reviews = api.previous_comments.map { |c| c['body'] }
56
+
57
+ comments[0,10].each do |c|
58
+ if previous_reviews.any? { |r| r == c[:body] }
59
+ report("skipped duplicate comment")
60
+ next
61
+ end
62
+
63
+ # :line isn't part of the github api
64
+ comment = c.dup.tap { |dc| dc.delete(:line) }
65
+ if options[:verbose]
66
+ comment[:body] += " (verbose mode ts=#{Time.now.to_i})"
67
+ end
68
+
69
+ res = api.comment_on_pull_request(comment)
70
+ report("API success #{res.inspect}")
71
+ end
72
+ end
73
+
74
+ report("HTTP request finished")
75
+ end
76
+
77
+ def repo_host
78
+ @repo_host ||= api.host_and_path.first
79
+ end
80
+
81
+ def repo_path
82
+ @repo_path ||= api.host_and_path.last
83
+ end
84
+
85
+ protected
86
+
87
+ def report(message)
88
+ if options["verbose"]
89
+ $stderr.puts message
90
+ end
91
+ end
92
+
93
+ def keep_only_dangerous_tags(explain)
94
+ explain_b = explain.dup
95
+ explain_b["tags"] = explain_b["tags"].select { |tag| tags[tag]["level"] == "danger" }
96
+ explain_b
97
+ end
98
+
99
+ def diff
100
+ return @diff if @diff
101
+ output = options['diff'] ? file_diff : git_diff
102
+ @diff = Shiba::Diff.new(output)
103
+ end
104
+
105
+ def git_diff
106
+ cmd ="git diff origin/HEAD..#{@commit_id}"
107
+ report("Finding PR position using: #{cmd}")
108
+
109
+ output = StringIO.new(`#{cmd}`)
110
+ end
111
+
112
+ def file_diff
113
+ report("Finding PR position using file: #{options['diff']}")
114
+ File.open(options['diff'], 'r')
115
+ end
116
+
117
+ def api
118
+ @api ||= begin
119
+ api_options = {
120
+ "token" => options["token"],
121
+ "pull_request" => options["pull_request"]
122
+ }
123
+ Review::API.new(repo_url, api_options)
124
+ end
125
+ end
126
+
127
+ def renderer
128
+ @renderer ||= Review::CommentRenderer.new(tags)
129
+ end
130
+
131
+ def tags
132
+ @tags ||= YAML.load_file(TEMPLATE_FILE)
133
+ end
134
+
135
+ end
136
+ end