shiba 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 8aaef4cac972cd661d5398bd510d0a78f4d1e078
4
- data.tar.gz: 61d077a4f31b4ff21eb652c2c866e8c5f3bb9e49
2
+ SHA256:
3
+ metadata.gz: 4b4f464acdc517169589f38206c835a77b160ae7fe5293f3c1c96bef2a736911
4
+ data.tar.gz: ab4af7bcb0e55e042372c5579f8829f751c76efbb0ac3c0b17ae1fd97547362b
5
5
  SHA512:
6
- metadata.gz: 80e2b32747df07efbbd89227b86347530ad955fdc4520f9179bbfe274440397b1063d6dfb50bf2c737f8e88460609e80ae86361b712f9c2e3b0d7ae86d55d728
7
- data.tar.gz: 4b540f27e5033c153621a0f2292cba50786857f018d2de54f8d9a5f58755d0a6b8872b2dae5af11e097a98f9b18935499d176ed5aedcedb4a859ce7219c7fc0c
6
+ metadata.gz: 0c2809905f330b3e1e8874297e66d84c24a938fe4c532b29cfe87b88b8983a1d6700a03f60382e190fe171c5f7cc90ad81d0ca4aa2d1770cf2eb0afdd10621f5
7
+ data.tar.gz: 36e6639d67a0b333c8faac40c45d092c7fe13b5acc0053066e1e68c0b3f9f70377fc19bb2d1fa0e724fd9163ea913b778bdf07ac8fbbd15d073678c31ac28b16
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- shiba (0.3.0)
4
+ shiba (0.4.0)
5
5
  activesupport
6
6
  mysql2
7
7
  pg
data/README.md CHANGED
@@ -134,3 +134,25 @@ users:
134
134
  rows_per: 20% # each organization has, on average, 20% or 2000 users.
135
135
  unique: false
136
136
  ```
137
+
138
+ ## Automatic pull request reviews
139
+
140
+ Shiba can automatically comment on Github pull requests when code changes appear to introduce a query issue. The comments are similar to those in the query report dashboard. This guide will walk through setup on Travis CI, but other CI services should work in a similar fashion.
141
+
142
+ Once Shiba is installed, the `shiba review` command needs to be run after the tests are finished. On Travis, this goes in an after_script setting:
143
+
144
+ ```yml
145
+ # .travis.yml
146
+ after_script:
147
+ - bundle exec shiba review --submit
148
+ ```
149
+
150
+ The `--submit` option tells Shiba to comment on the relevant PR when an issue is found. To do this, it will need the Github API token of a user that has access to the repo. Shiba's comments will appear to come from that user, so you'll likely want to setup a bot account on Github with repo access for this.
151
+
152
+ By default, the review script looks for an environment variable named GITHUB_TOKEN that can be specified at https://travis-ci.com/{organization}/{repo}/settings. The token can be generated on Github at https://github.com/settings/tokens. If you have another environment variable name for your Github token, it can be manually configured using the `--token` flag.
153
+
154
+ ```yml
155
+ # .travis.yml
156
+ after_script:
157
+ - bundle exec shiba review --token $MY_GITHUB_API_TOKEN --submit
158
+ ```
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'shiba'
5
+ require 'shiba/configure'
6
+ require 'shiba/fuzzer'
7
+
8
+ options = {}
9
+ parser = Shiba::Configure.make_options_parser(options, only_basics: true)
10
+ parser.banner = "Dump database statistics into yaml file."
11
+ parser.parse!
12
+
13
+ Shiba.configure(options) do |errmsg|
14
+ $stderr.puts(errmsg)
15
+ $stderr.puts(parser.help)
16
+ exit 1
17
+ end
18
+
19
+ Shiba.connection.analyze!
20
+ index = Shiba::Fuzzer.new(Shiba.connection).fetch_index
21
+ puts index.to_yaml
@@ -1,20 +1,3 @@
1
- #!/usr/bin/env ruby
1
+ #!/bin/bash
2
2
 
3
- require 'bundler/setup'
4
- require 'shiba'
5
- require 'shiba/configure'
6
- require 'shiba/fuzzer'
7
-
8
- options = {}
9
- parser = Shiba::Configure.make_options_parser(options, only_basics: true)
10
- parser.banner = "Dump database statistics into yaml file."
11
- parser.parse!
12
-
13
- Shiba.configure(options) do |errmsg|
14
- $stderr.puts(errmsg)
15
- $stderr.puts(parser.help)
16
- exit 1
17
- end
18
-
19
- index = Shiba::Fuzzer.new(Shiba.connection).fetch_index
20
- puts index.to_yaml
3
+ exec `dirname $0`/dump_stats --server mysql $*
data/bin/shiba CHANGED
@@ -6,7 +6,8 @@ APP = File.basename(__FILE__)
6
6
 
7
7
  commands = {
8
8
  "explain" => "Generate a report from logged SQL queries",
9
- "review" => "Review changed files for query problems",
9
+ "review" => "Review changed files for query problems",
10
+ "dump_stats" => "Collect database statistics for more accurate analysis"
10
11
  }
11
12
 
12
13
  global = OptionParser.new do |opts|
@@ -50,7 +50,9 @@ module Shiba
50
50
  puts ""
51
51
 
52
52
  cmd = "shiba explain #{database_args} --file #{path}"
53
- if Shiba::Configure.ci?
53
+ if ENV['SHIBA_OUT']
54
+ cmd << " --json #{File.join(Shiba.path, "#{ENV["SHIBA_OUT"]}.json")}"
55
+ elsif Shiba::Configure.ci?
54
56
  cmd << " --json #{File.join(Shiba.path, 'ci.json')}"
55
57
  end
56
58
 
@@ -16,11 +16,17 @@ module Shiba
16
16
  @stats = stats
17
17
  @options = options
18
18
  @fingerprints = {}
19
+ @queries = []
19
20
  end
20
21
 
21
22
  def analyze
22
23
  idx = 0
23
- queries = []
24
+
25
+ if @options['sql']
26
+ analyze_sql(@options['sql'])
27
+ return @queries
28
+ end
29
+
24
30
  while line = @file.gets
25
31
  # strip out colors
26
32
  begin
@@ -35,35 +41,25 @@ module Shiba
35
41
  next
36
42
  end
37
43
 
38
- if @options['limit']
39
- return if idx == @options['limit']
40
- end
44
+ sql.chomp!
45
+ analyze_sql(sql)
46
+ end
47
+ @queries
48
+ end
41
49
 
42
- if @options['index']
43
- next unless idx == @options['index']
44
- end
50
+ def analyze_sql(sql)
51
+ query = Shiba::Query.new(sql, @stats)
45
52
 
46
- sql.chomp!
47
- query = Shiba::Query.new(sql, @stats)
48
-
49
- if !@fingerprints[query.fingerprint]
50
- if sql.downcase.start_with?("select")
51
- if @options['debug']
52
- require 'byebug'
53
- debugger
54
- end
55
-
56
- explain = analyze_query(query)
57
- if explain
58
- idx += 1
59
- queries << explain
60
- end
53
+ if !@fingerprints[query.fingerprint]
54
+ if sql.downcase.start_with?("select")
55
+ explain = analyze_query(query)
56
+ if explain
57
+ @queries << explain
61
58
  end
62
59
  end
63
-
64
- @fingerprints[query.fingerprint] = true
65
60
  end
66
- queries
61
+
62
+ @fingerprints[query.fingerprint] = true
67
63
  end
68
64
 
69
65
  protected
@@ -85,6 +81,10 @@ module Shiba
85
81
  end
86
82
  return nil unless explain
87
83
 
84
+ if explain.other_paths.any?
85
+ paths = [explain] + explain.other_paths
86
+ explain = paths.sort { |a, b| a.cost - b.cost }.first
87
+ end
88
88
  json = JSON.dump(explain.as_json)
89
89
  write(json)
90
90
  explain.as_json
@@ -101,8 +101,8 @@ module Shiba
101
101
 
102
102
  next if only_basics
103
103
 
104
- opts.on("-l", "--limit NUM", "stop after processing NUM queries") do |l|
105
- options["limit"] = l.to_i
104
+ opts.on("--sql SQL", "analyze this sql") do |s|
105
+ options["sql"] = s
106
106
  end
107
107
 
108
108
  opts.on("-f", "--file FILE", "location of file containing queries") do |f|
@@ -117,14 +117,10 @@ module Shiba
117
117
  end
118
118
  end
119
119
 
120
- opts.on("-h", "--html FILE", "write html report here. Default to /tmp/explain.html") do |h|
120
+ opts.on("-h", "--html FILE", "write html report here.") do |h|
121
121
  options["html"] = h
122
122
  end
123
123
 
124
- opts.on("-t", "--test", "analyze queries at --file instead of analyzing a process") do |f|
125
- options["test"] = true
126
- end
127
-
128
124
  opts.on("-v", "--verbose", "print internal runtime information") do
129
125
  options["verbose"] = true
130
126
  end
@@ -21,6 +21,13 @@ module Shiba
21
21
  @connection.query(sql)
22
22
  end
23
23
 
24
+ def analyze!
25
+ @connection.query("show tables").each do |row|
26
+ t = row.values.first
27
+ @connection.query("analyze table `#{t}`") rescue nil
28
+ end
29
+ end
30
+
24
31
  def count_indexes_by_table
25
32
  sql =<<-EOL
26
33
  select TABLE_NAME as table_name, count(*) as index_count
@@ -7,6 +7,7 @@ module Shiba
7
7
  @connection = PG.connect( dbname: h['database'], host: h['host'], user: h['username'], password: h['password'], port: h['port'] )
8
8
  @connection.type_map_for_results = PG::BasicTypeMapForResults.new(@connection)
9
9
  query("SET enable_seqscan = OFF")
10
+ query("SET random_page_cost = 0.01")
10
11
  end
11
12
 
12
13
  def query(sql)
@@ -1,10 +1,15 @@
1
1
  require 'json'
2
2
  require 'shiba/index'
3
+ require 'shiba/explain/check_support'
4
+ require 'shiba/explain/checks'
5
+ require 'shiba/explain/result'
3
6
  require 'shiba/explain/mysql_explain'
4
7
  require 'shiba/explain/postgres_explain'
5
8
 
6
9
  module Shiba
7
10
  class Explain
11
+ include CheckSupport
12
+ extend CheckSupport::ClassMethods
8
13
  def initialize(sql, stats, backtrace, options = {})
9
14
  @sql = sql
10
15
  @backtrace = backtrace
@@ -21,7 +26,9 @@ module Shiba
21
26
  else
22
27
  @rows = Shiba::Explain::PostgresExplain.new(@explain_json).transform
23
28
  end
29
+ @result = Result.new
24
30
  @stats = stats
31
+
25
32
  run_checks!
26
33
  end
27
34
 
@@ -29,19 +36,22 @@ module Shiba
29
36
  {
30
37
  sql: @sql,
31
38
  table: get_table,
32
- table_size: table_size,
33
- key: first_key,
34
- tags: messages,
35
- cost: @cost,
36
- return_size: @return_size,
39
+ messages: @result.messages,
40
+ cost: @result.cost,
37
41
  severity: severity,
38
- used_key_parts: first['used_key_parts'],
39
- possible_keys: first['possible_keys'],
40
42
  raw_explain: humanized_explain,
41
43
  backtrace: @backtrace
42
44
  }
43
45
  end
44
46
 
47
+ def messages
48
+ @result.messages
49
+ end
50
+
51
+ def cost
52
+ @result.cost
53
+ end
54
+
45
55
  def get_table
46
56
  @sql =~ /\s+from\s*([^\s,]+)/i
47
57
  table = $1
@@ -53,66 +63,20 @@ module Shiba
53
63
  table
54
64
  end
55
65
 
56
- # [{"id"=>1, "select_type"=>"SIMPLE", "table"=>"interwiki", "partitions"=>nil, "type"=>"const", "possible_keys"=>"PRIMARY", "key"=>"PRIMARY", "key_len"=>"34", "ref"=>"const", "rows"=>1, "filtered"=>100.0, "Extra"=>nil}]
57
- attr_reader :cost
58
-
59
66
  def first
60
67
  @rows.first
61
68
  end
62
69
 
63
- def first_table
64
- first["table"]
65
- end
66
-
67
- def first_key
68
- first["key"]
69
- end
70
-
71
70
  def first_extra
72
71
  first["Extra"]
73
72
  end
74
73
 
75
- def messages
76
- @messages ||= []
77
- end
78
-
79
- # shiba: {"possible_keys"=>nil, "key"=>nil, "key_len"=>nil, "ref"=>nil, "rows"=>6, "filtered"=>16.67, "Extra"=>"Using where"}
80
- def to_log
81
- plan = first.symbolize_keys
82
- "possible: #{plan[:possible_keys]}, rows: #{plan[:rows]}, filtered: #{plan[:filtered]}, cost: #{self.cost}, access: #{plan[:access_type]}"
83
- end
84
-
85
- def to_h
86
- first.merge(cost: cost, messages: messages)
87
- end
88
-
89
- def table_size
90
- @stats.table_count(first['table'])
91
- end
92
-
93
- def fuzzed?(table)
94
- @stats.fuzzed?(first['table'])
95
- end
96
-
97
74
  def no_matching_row_in_const_table?
98
75
  first_extra && first_extra =~ /no matching row in const table/
99
76
  end
100
77
 
101
- def ignore_explain?
102
- end
103
-
104
- def derived?
105
- first['table'] =~ /<derived.*?>/
106
- end
107
-
108
- # TODO: need to parse SQL here I think
109
- def simple_table_scan?
110
- @rows.size == 1 && (@sql !~ /order by/i) &&
111
- (first['using_index'] || !(@sql =~ /\s+WHERE\s+/i))
112
- end
113
-
114
78
  def severity
115
- case @cost
79
+ case @result.cost
116
80
  when 0..100
117
81
  "low"
118
82
  when 100..1000
@@ -133,22 +97,33 @@ module Shiba
133
97
  def aggregation?
134
98
  @sql =~ /select\s*(.*?)from/i
135
99
  select_fields = $1
136
- select_fields =~ /min|max|avg|count|sum|group_concat\s*\(.*?\)/i
100
+ select_fields =~ /(min|max|avg|count|sum|group_concat)\s*\(.*?\)/i
137
101
  end
138
102
 
139
- def self.check(c)
140
- @checks ||= []
141
- @checks << c
103
+
104
+ def ignore?
105
+ !!ignore_line_and_backtrace_line
142
106
  end
143
107
 
144
- def self.get_checks
145
- @checks
108
+ def ignore_line_and_backtrace_line
109
+ ignore_files = Shiba.config['ignore']
110
+ if ignore_files
111
+ ignore_files.each do |i|
112
+ file, method = i.split('#')
113
+ @backtrace.each do |b|
114
+ next unless b.include?(file)
115
+ next if method && !b.include?(method)
116
+ return [i, b]
117
+ end
118
+ end
119
+ end
120
+ nil
146
121
  end
147
122
 
148
123
  check :check_query_is_ignored
149
124
  def check_query_is_ignored
150
125
  if ignore?
151
- messages << "ignored"
126
+ @result.messages << { tag: "ignored" }
152
127
  @cost = 0
153
128
  end
154
129
  end
@@ -156,7 +131,7 @@ module Shiba
156
131
  check :check_no_matching_row_in_const_table
157
132
  def check_no_matching_row_in_const_table
158
133
  if no_matching_row_in_const_table?
159
- messages << "access_type_const"
134
+ @result.messages << { tag: "access_type_const", table: get_table }
160
135
  first['key'] = 'PRIMARY'
161
136
  @cost = 1
162
137
  end
@@ -176,9 +151,10 @@ module Shiba
176
151
  end
177
152
  end
178
153
 
179
- check :check_fuzzed
180
- def check_fuzzed
181
- messages << "fuzzed_data" if fuzzed?(first_table)
154
+ # TODO: need to parse SQL here I think
155
+ def simple_table_scan?
156
+ @rows.size == 1 && (@sql !~ /order by/i) &&
157
+ (@rows.first['using_index'] || !(@sql =~ /\s+WHERE\s+/i))
182
158
  end
183
159
 
184
160
  # TODO: we don't catch some cases like SELECT * from foo where index_col = 1 limit 1
@@ -187,129 +163,60 @@ module Shiba
187
163
  def check_simple_table_scan
188
164
  if simple_table_scan?
189
165
  if limit
190
- messages << 'limited_scan'
166
+ @result.messages << { tag: 'limited_scan', cost: limit, table: @rows.first['table'] }
191
167
  @cost = limit
192
168
  end
193
169
  end
194
170
  end
195
171
 
196
- check :check_derived
197
- def check_derived
198
- if derived?
199
- # select count(*) from ( select 1 from foo where blah )
200
- @rows.shift
201
- return run_checks!
202
- end
203
- end
204
-
205
-
206
- check :tag_query_type
207
- def tag_query_type
208
- access_type = first['access_type']
209
-
210
- if access_type.nil?
211
- @cost = 0
212
- return
213
- end
214
-
215
- access_type = 'tablescan' if access_type == 'ALL'
216
- messages << "access_type_" + access_type
217
- end
218
-
219
- #check :check_index_walk
220
- # disabling this one for now, it's not quite good enough and has a high
221
- # false-negative rate.
222
- def check_index_walk
223
- if first['index_walk']
224
- @cost = limit
225
- messages << 'index_walk'
226
- end
227
- end
228
-
229
- check :check_key_size
230
- def check_key_size
231
- # TODO: if possible_keys but mysql chooses NULL, this could be a test-data issue,
232
- # pick the best key from the list of possibilities.
233
- #
234
- if first_key
235
- @cost = @stats.estimate_key(first_table, first_key, first['used_key_parts'])
236
- else
237
- if first['possible_keys'].nil?
238
- # if no possibile we're table scanning, use PRIMARY to indicate that cost.
239
- # note that this can be wildly inaccurate bcs of WHERE + LIMIT stuff.
240
- @cost = table_size
241
- else
242
- if @options[:force_key]
243
- # we were asked to force a key, but mysql still told us to fuck ourselves.
244
- # (no index used)
245
- #
246
- # there seems to be cases where mysql lists `possible_key` values
247
- # that it then cannot use, seen this in OR queries.
248
- @cost = table_size
249
- else
250
- possibilities = [table_size]
251
- possibilities += first['possible_keys'].map do |key|
252
- estimate_row_count_with_key(key)
253
- end
254
- @cost = possibilities.compact.min
255
- end
172
+ check :check_fuzzed
173
+ def check_fuzzed
174
+ h = {}
175
+ @rows.each do |row|
176
+ t = row['table']
177
+ if @stats.fuzzed?(t)
178
+ h[t] = @stats.table_count(t)
256
179
  end
257
180
  end
181
+ if h.any?
182
+ @result.messages << { tag: "fuzzed_data", tables: h }
183
+ end
258
184
  end
259
185
 
260
186
  def check_return_size
261
187
  if limit
262
- @return_size = limit
188
+ return_size = limit
263
189
  elsif aggregation?
264
- @return_size = 1
190
+ return_size = 1
265
191
  else
266
- @return_size = @cost
192
+ return_size = @result.result_size
267
193
  end
268
194
 
269
- if @return_size && @return_size > 100
270
- messages << "retsize_bad"
195
+ if return_size && return_size > 100
196
+ @result.messages << { tag: "retsize_bad", result_size: return_size }
271
197
  else
272
- messages << "retsize_good"
198
+ @result.messages << { tag: "retsize_good", result_size: return_size }
273
199
  end
274
200
  end
275
201
 
276
- def estimate_row_count_with_key(key)
277
- explain = Explain.new(@sql, @stats, @backtrace, force_key: key)
278
- explain.run_checks!
279
- rescue Mysql2::Error => e
280
- if /Key .+? doesn't exist in table/ =~ e.message
281
- return nil
202
+ def run_checks!
203
+ # first run top-level checks
204
+ _run_checks! do
205
+ :stop if @cost
282
206
  end
283
207
 
284
- raise e
285
- end
286
-
287
- def ignore?
288
- !!ignore_line_and_backtrace_line
289
- end
290
-
291
- def ignore_line_and_backtrace_line
292
- ignore_files = Shiba.config['ignore']
293
- if ignore_files
294
- ignore_files.each do |i|
295
- file, method = i.split('#')
296
- @backtrace.each do |b|
297
- next unless b.include?(file)
298
- next if method && !b.include?(method)
299
- return [i, b]
300
- end
208
+ if @cost
209
+ # we've decided to stop further analysis at the query level
210
+ @result.cost = @cost
211
+ else
212
+ # run per-table checks
213
+ 0.upto(@rows.size - 1) do |i|
214
+ check = Checks.new(@rows, i, @stats, @options, @result)
215
+ check.run_checks!
301
216
  end
302
217
  end
303
- nil
304
- end
305
218
 
306
- def run_checks!
307
- self.class.get_checks.each do |check|
308
- res = send(check)
309
- break if @cost
310
- end
311
219
  check_return_size
312
- @cost
313
220
  end
314
221
 
315
222
  def humanized_explain
@@ -318,5 +225,19 @@ module Shiba
318
225
  #h
319
226
  @explain_json
320
227
  end
228
+
229
+ def other_paths
230
+ if Shiba.connection.mysql?
231
+ @rows.map do |r|
232
+ next [] unless r['possible_keys'] && r['key'].nil?
233
+ possible = r['possible_keys'] - [r['key']]
234
+ possible.map do |p|
235
+ Explain.new(@sql, @stats, @backtrace, force_key: p) rescue nil
236
+ end.compact
237
+ end.flatten
238
+ else
239
+ []
240
+ end
241
+ end
321
242
  end
322
243
  end
@@ -0,0 +1,24 @@
1
+ module Shiba
2
+ class Explain
3
+ module CheckSupport
4
+ module ClassMethods
5
+ def check(c)
6
+ @checks ||= []
7
+ @checks << c
8
+ end
9
+
10
+ def get_checks
11
+ @checks
12
+ end
13
+ end
14
+
15
+ def _run_checks!(&block)
16
+ self.class.get_checks.each do |check|
17
+ res = send(check)
18
+ break if yield == :stop
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+
@@ -0,0 +1,133 @@
1
+ require 'shiba/explain/check_support'
2
+
3
+ module Shiba
4
+ class Explain
5
+ class Checks
6
+ include CheckSupport
7
+ extend CheckSupport::ClassMethods
8
+
9
+ def initialize(rows, index, stats, options, result)
10
+ @rows = rows
11
+ @row = rows[index]
12
+ @index = index
13
+ @stats = stats
14
+ @options = options
15
+ @result = result
16
+ @tbl_message = {}
17
+ end
18
+
19
+ attr_reader :cost
20
+
21
+ def table
22
+ @row['table']
23
+ end
24
+
25
+ def table_size
26
+ @stats.table_count(table)
27
+ end
28
+
29
+ def add_message(tag, extra = {})
30
+ @result.messages << { tag: tag, table_size: table_size, table: table }.merge(extra)
31
+ end
32
+
33
+ check :check_derived
34
+ def check_derived
35
+ if table =~ /<derived.*?>/
36
+ # select count(*) from ( select 1 from foo where blah )
37
+ add_message('derived_table', size: nil)
38
+ @cost = 0
39
+ end
40
+ end
41
+
42
+ check :tag_query_type
43
+ def tag_query_type
44
+ @access_type = @row['access_type']
45
+
46
+ if @access_type.nil?
47
+ @cost = 0
48
+ return
49
+ end
50
+
51
+ @access_type = 'tablescan' if @access_type == 'ALL'
52
+ @access_type = "access_type_" + @access_type
53
+ end
54
+
55
+ check :check_join
56
+ def check_join
57
+ if @row['join_ref']
58
+ @access_type.sub!("access_type", "join_type")
59
+ # TODO MAYBE: are multiple-table joins possible? or does it just ref one table?
60
+ ref = @row['join_ref'].find { |r| r != 'const' }
61
+ table = ref.split('.')[1]
62
+ @tbl_message['join_to'] = table
63
+ end
64
+ end
65
+
66
+ #check :check_index_walk
67
+ # disabling this one for now, it's not quite good enough and has a high
68
+ # false-negative rate.
69
+ def check_index_walk
70
+ if first['index_walk']
71
+ @cost = limit
72
+ add_message("index_walk")
73
+ end
74
+ end
75
+
76
+ check :check_key_size
77
+ def check_key_size
78
+ if @row['key']
79
+ rows_read = @stats.estimate_key(table, @row['key'], @row['used_key_parts'])
80
+ else
81
+ rows_read = table_size
82
+ end
83
+
84
+ # TBD: this appears to come from a couple of bugs.
85
+ # one is we're not handling mysql index-merges, the other is that
86
+ # we're not handling mysql table aliasing.
87
+ if rows_read.nil?
88
+ rows_read = 1
89
+ end
90
+
91
+ if @row['join_ref']
92
+ # when joining, we'll say we read "@cost" rows -- but up to
93
+ # a max of the table size. I'm not sure this assumption is *exactly*
94
+ # true but it feels good enough to start; a decent hash join should
95
+ # nullify the cost of re-reading rows. I think.
96
+ @cost = [@result.result_size * rows_read, table_size || 2**32].min
97
+
98
+ # poke holes in this. Is this even remotely accurate?
99
+ # We're saying that if we join to a a table with 100 rows per item
100
+ # in the index, for each row we'll be joining in 100 more rows. Is that true?
101
+ @result.result_size *= rows_read
102
+ else
103
+ @cost = rows_read
104
+ @result.result_size += rows_read
105
+ end
106
+
107
+ @result.cost += @cost
108
+
109
+ @tbl_message['cost'] = @cost
110
+ @tbl_message['index'] = @row['key']
111
+ @tbl_message['index_used'] = @row['used_key_parts']
112
+ add_message(@access_type, @tbl_message)
113
+ end
114
+
115
+ def estimate_row_count_with_key(key)
116
+ explain = Explain.new(@sql, @stats, @backtrace, force_key: key)
117
+ explain.run_checks!
118
+ rescue Mysql2::Error => e
119
+ if /Key .+? doesn't exist in table/ =~ e.message
120
+ return nil
121
+ end
122
+
123
+ raise e
124
+ end
125
+
126
+ def run_checks!
127
+ _run_checks! do
128
+ :stop if @cost
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
@@ -11,6 +11,10 @@ module Shiba
11
11
  res['rows'] = t['rows_examined_per_scan']
12
12
  res['filtered'] = t['filtered']
13
13
 
14
+ if t['ref'] && t['ref'].any? { |r| r != "const" }
15
+ res['join_ref'] = t['ref']
16
+ end
17
+
14
18
  if t['possible_keys'] && t['possible_keys'] != [res['key']]
15
19
  res['possible_keys'] = t['possible_keys']
16
20
  end
@@ -0,0 +1,18 @@
1
+ module Shiba
2
+ class Explain
3
+ class Result
4
+ # cost: total rows read
5
+ # result_size: approximate rows returned to the client
6
+ # messages: list of hashes detailing the operations
7
+
8
+ def initialize
9
+ @messages = []
10
+ @cost = 0
11
+ @result_size = 0
12
+ end
13
+
14
+ attr_accessor :messages, :cost, :result_size
15
+ end
16
+ end
17
+ end
18
+
@@ -48,6 +48,7 @@ module Shiba
48
48
  # The more indexes, the bigger the table. Seems to rank tables fairly well.
49
49
  def guess_table_sizes
50
50
  index_counts = connection.count_indexes_by_table
51
+ return if index_counts.empty?
51
52
 
52
53
  # 90th table percentile based on number of indexes
53
54
  # round down so we don't blow up on small tables
@@ -45,7 +45,9 @@ module Shiba
45
45
  url.chomp!
46
46
  url.gsub!('git@github.com:', 'https://github.com/')
47
47
  url.gsub!(/\.git$/, '')
48
- url + '/blob/master/'
48
+
49
+ branch = `git symbolic-ref HEAD`.strip.split('/').last
50
+ url + "/blob/#{branch}"
49
51
  end
50
52
 
51
53
  def make_web!
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  fuzzed_data:
3
3
  title: Fuzzed Data
4
- summary: Shiba doesn't know the size of <b>{{table}}</b>. For these purposes we set the table size to <b>{{table_size}}</b>.
4
+ summary: "Table sizes estimated as follows -- {{ fuzz_table_sizes }}"
5
5
  description: |
6
6
  We're not sure how much data this table will hold in the future, so we've pretended
7
7
  there's 6000 rows in it. This can lead to a lot of false positives. To
@@ -21,24 +21,32 @@ access_type_const:
21
21
  This query selects at *most* one row, which is about as good as things get.
22
22
  level: success
23
23
  access_type_ref:
24
- title: Indexed
25
- summary: The database reads {{ cost }} rows in <b>{{ table }}</b> via the <b>{{ key }}</b> index ({{ key_parts }}).
24
+ title: Index Scan
25
+ summary: The database reads {{ formatted_cost }} rows in <b>{{ table }}</b> via the <i>{{ index }}</i> index ({{ key_parts }}).
26
26
  description: |
27
27
  This query uses an index to find rows that match a single value. Often this
28
28
  has very good performance, but it depends on how many rows match that value.
29
29
  level: success
30
+ join_type_eq_ref:
31
+ title: Indexed Join
32
+ summary: <b>{{ table }}</b> is joined to <b>{{ join_to }}</b> via the <i>{{ index }}</i> index, reading 1 row per joined item.
33
+ level: success
34
+ join_type_ref:
35
+ title: Indexed Join
36
+ summary: <b>{{ table }}</b> is joined to <b>{{ join_to }}</b> via the <i>{{ index }}</i> index, reading {{ formatted_cost }} rows per joined item.
37
+ level: success
30
38
  access_type_range:
31
39
  title: Indexed
32
- summary: The database uses a "range scan" to read more than {{ cost }} rows in {{ table }} via the <b>{{ key }}</b> index ({{ key_parts }})
40
+ summary: The database uses a "range scan" to read more than {{ formatted_cost }} rows in {{ table }} via the <b>{{ index }}</b> index ({{ key_parts }})
33
41
  description: |
34
42
  This query uses an index to find rows that match a range of values, for instance
35
43
  `WHERE indexed_value in (1,2,5,6)` or `WHERE indexed_value >= 5 AND indexed_value <= 15`.
36
44
  It's very hard to estimate how many rows this query will consider in production, so we've
37
- upped the cost of this query.
45
+ upped the formatted_cost of this query.
38
46
  level: info
39
47
  access_type_tablescan:
40
48
  title: Table Scan
41
- summary: The database reads <b>100%</b> ({{ table_size }}) of the rows in <b>{{ table }}</b>, skipping any indexes.
49
+ summary: The database reads {{ formatted_cost }} of the rows in <b>{{ table }}</b>, skipping any indexes.
42
50
  description: |
43
51
  This query doesn't use any indexes to find data, meaning this query will need to evaluate
44
52
  every single row in the table. This is about the worst of all possible worlds.
@@ -49,10 +57,10 @@ access_type_tablescan:
49
57
  level: danger
50
58
  limited_scan:
51
59
  title: Limited Scan
52
- summary: The database reads {{ query.cost }} rows from {{ query.table }}.
60
+ summary: The database reads {{ formatted_cost }} rows from {{ table }}.
53
61
  description: |
54
62
  This query doesn't use any indexes to find data, but since it doesn't care about
55
- ordering and it doesn't have any conditions, it only ever reads {{ query.cost }} rows.
63
+ ordering and it doesn't have any conditions, it only ever reads {{ formatted_cost }} rows.
56
64
  level: info
57
65
  ignored:
58
66
  title: Ignored
@@ -68,9 +76,9 @@ index_walk:
68
76
  level: success
69
77
  retsize_bad:
70
78
  title: Big Results
71
- summary: The database returns {{ return_size }} rows to the client.
79
+ summary: The database returns {{ result_size }} rows to the client.
72
80
  level: danger
73
81
  retsize_good:
74
82
  title: Small Results
75
- summary: The database returns {{ return_size }} row(s) to the client.
83
+ summary: The database returns {{ result_size }} row(s) to the client.
76
84
  level: success
@@ -13,8 +13,9 @@ module Shiba
13
13
  def render(explain)
14
14
  body = ""
15
15
 
16
- data = present(explain)
17
- explain["tags"].each do |tag|
16
+ explain["messages"].each do |message|
17
+ tag = message['tag']
18
+ data = present(message)
18
19
  body << @templates[tag]["title"]
19
20
  body << ": "
20
21
  body << render_template(@templates[tag]["summary"], data)
@@ -32,22 +33,25 @@ module Shiba
32
33
  end
33
34
  # convert to markdown
34
35
  rendered.gsub!(/<\/?b>/, "**")
36
+ rendered.gsub!(/<\/?i>/, "_")
35
37
  rendered
36
38
  end
37
39
 
38
- def present(explain)
39
- used_key_parts = explain["used_key_parts"] || []
40
-
41
- { "table" => explain["table"],
42
- "table_size" => explain["table_size"],
43
- "key" => explain["key"],
44
- "return_size" => explain["return_size"],
45
- "key_parts" => used_key_parts.join(","),
46
- "cost" => cost(explain)
40
+ def present(message)
41
+ {
42
+ "fuzz_table_sizes" => fuzzed_sizes(message),
43
+ "table" => message["table"],
44
+ "table_size" => message["table_size"],
45
+ "result_size" => message["result_size"],
46
+ "index" => message["index"],
47
+ "key_parts" => (message["index_used"] || []).join(','),
48
+ "size" => message["size"],
49
+ "formatted_cost" => formatted_cost(message)
47
50
  }
48
51
  end
49
52
 
50
- def cost(explain)
53
+ def formatted_cost(explain)
54
+ return nil unless explain["cost"] && explain["table_size"]
51
55
  percentage = (explain["cost"] / explain["table_size"]) * 100.0;
52
56
 
53
57
  if explain["cost"] > 100 && percentage > 1
@@ -57,6 +61,12 @@ module Shiba
57
61
  end
58
62
  end
59
63
 
64
+ def fuzzed_sizes(message)
65
+ return nil unless message["tables"]
66
+ message['tables'].group_by { |k, v| v }.map do |size, arr|
67
+ size.to_s + ": " + arr.map(&:first).join(', ')
68
+ end.join(". ")
69
+ end
60
70
  end
61
71
  end
62
- end
72
+ end
@@ -35,7 +35,7 @@ module Shiba
35
35
  position = diff.find_position(file, line_number.to_i)
36
36
 
37
37
  if options["submit"]
38
- explain = keep_only_dangerous_tags(explain)
38
+ explain = keep_only_dangerous_messages(explain)
39
39
  end
40
40
 
41
41
  { body: renderer.render(explain),
@@ -90,9 +90,12 @@ module Shiba
90
90
  end
91
91
  end
92
92
 
93
- def keep_only_dangerous_tags(explain)
93
+ def keep_only_dangerous_messages(explain)
94
94
  explain_b = explain.dup
95
- explain_b["tags"] = explain_b["tags"].select { |tag| tags[tag]["level"] == "danger" }
95
+ explain_b["messages"] = explain_b["messages"].select do |message|
96
+ tag = message['tag']
97
+ tags[tag]["level"] == "danger"
98
+ end
96
99
  explain_b
97
100
  end
98
101
 
@@ -133,4 +136,4 @@ module Shiba
133
136
  end
134
137
 
135
138
  end
136
- end
139
+ end
@@ -1,3 +1,3 @@
1
1
  module Shiba
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -2,7 +2,7 @@
2
2
  .query-info-box {
3
3
  border: 1px solid black;
4
4
  padding: 10px;
5
- margin: 20px;
5
+ margin: 5px;
6
6
  }
7
7
 
8
8
  .backtrace {
@@ -41,11 +41,30 @@
41
41
  border-color: #ffb100;
42
42
  }
43
43
 
44
- .shiba-info-list {
45
- list-style-type:none;
46
- margin: 5px;
47
- padding: 0;
44
+ .shiba-badge-td {
45
+ width: 100px;
46
+ }
47
+
48
+ .shiba-messages {
49
+ margin: 0px;
50
+ margin-top: 10px;
51
+ width: 100%;
52
+ }
53
+
54
+ .shiba-messages td {
55
+ padding-top: 5px;
48
56
  }
49
57
 
58
+ .shiba-message {
59
+ padding-right: 10px;
60
+ width: 90%;
61
+ }
62
+
63
+ .running-totals {
64
+ align: right;
65
+ font-family: monospace;
66
+ }
67
+
68
+
50
69
  [v-cloak] { display: none }
51
70
 
@@ -43,11 +43,21 @@
43
43
  Object.assign(this, obj);
44
44
  this.severityIndex = severityIndexes[this.severity];
45
45
  this.splitSQL();
46
+ this.makeSearchString();
46
47
  };
47
48
 
48
49
  Query.prototype = {
50
+ makeSearchString: function() {
51
+ var arr = [this.sql];
52
+ arr = arr.concat(this.messages.map(function(m) { return m.tag }).join(':'));
53
+ arr = arr.concat(this.backtrace.join(':'));
54
+
55
+ this.searchString = arr.join(':').toLowerCase();
56
+ },
49
57
  hasTag: function(tag) {
50
- return this.tags.includes(tag);
58
+ return this.messages.find(function(m) {
59
+ return m.tag == tag;
60
+ });
51
61
  },
52
62
  splitSQL: function() {
53
63
  this.sqlFragments = this.sql.match(/(SELECT\s)(.*?)(\s+FROM .*)/i);
@@ -72,10 +82,20 @@
72
82
  queriesByTable.push(q);
73
83
  }
74
84
 
75
- if ( q.hasTag("fuzzed_data" ) )
85
+ if ( q.hasTag("fuzzed_data") )
76
86
  queriesHaveFuzzed = true;
77
87
 
78
88
  q.expandSelect = false;
89
+
90
+ var rCost = 0;
91
+ q.messages.forEach(function(m) {
92
+ if ( m.cost ) {
93
+ rCost += m.cost;
94
+ m.running_cost = rCost;
95
+ } else {
96
+ m.running_cost = '';
97
+ }
98
+ });
79
99
  });
80
100
 
81
101
  var f = sortByFunc(['severityIndex', 'table']);
@@ -106,11 +126,9 @@
106
126
  <div v-for="backtrace in query.backtrace" v-html="makeURL(backtrace, backtrace)"></div>
107
127
  </div>
108
128
  </div>
109
- <ul class="shiba-info-list">
110
- <li v-for="tag in query.tags">
111
- <component v-bind:is="'tag-' + tag" v-bind:query="query"></component>
112
- </li>
113
- </ul>
129
+ <table class="shiba-messages">
130
+ <component v-for="message in query.messages" v-bind:is="'tag-' + message.tag" v-bind="message"></component>
131
+ </table>
114
132
  <div v-if="!rawExpanded">
115
133
  <a href="#" v-on:click.prevent="rawExpanded = !rawExpanded">See full EXPLAIN</a>
116
134
  </div>
@@ -124,47 +142,82 @@
124
142
  </div>
125
143
  </script>
126
144
 
145
+ <script>
146
+ var greenToRedGradient = [
147
+ '#57bb8a','#63b682', '#73b87e', '#84bb7b', '#94bd77', '#a4c073', '#b0be6e',
148
+ '#c4c56d', '#d4c86a', '#e2c965', '#f5ce62', '#f3c563', '#e9b861', '#e6ad61',
149
+ '#ecac67', '#e9a268', '#e79a69', '#e5926b', '#e2886c', '#e0816d', '#dd776e'];
150
+
151
+ var templateComputedFunctions = {
152
+ key_parts: function() {
153
+ if ( this.index_used && this.index_used.length > 0 )
154
+ return this.index_used.join(',');
155
+ else
156
+ return "";
157
+ },
158
+ fuzz_table_sizes: function() {
159
+ var h = {};
160
+ var tables = this.tables;
161
+
162
+ Object.keys(tables).forEach(function(k) {
163
+ console.log(k);
164
+ var size = tables[k];
165
+ if ( !h[size] )
166
+ h[size] = [];
167
+
168
+ h[size].push(k);
169
+ });
170
+
171
+ var sizesDesc = Object.keys(h).sort(function(a, b) { return b - a });
172
+ var str = "";
173
+
174
+ sizesDesc.forEach(function(size) {
175
+ str = str + h[size].join(", ") + ": " + size.toLocaleString() + " rows. ";
176
+ });
177
+
178
+ return str;
179
+ },
180
+ formatted_cost: function() {
181
+ var costPercentage = (this.cost / this.table_size) * 100.0;
182
+ if ( this.cost > 100 && costPercentage > 1 ) // todo: make better
183
+ return `${costPercentage.toFixed()}% (${this.cost.toLocaleString()}) of the`;
184
+ else
185
+ return this.cost.toLocaleString();
186
+ },
187
+ costToColor: function() {
188
+ var goodColor = [34, 160, 60];
189
+ var endColor = [255, 0, 0];
190
+ var costScale = this.cost ? this.cost / 5000 : 0;
191
+
192
+ if ( costScale > 1 )
193
+ costScale = 1;
194
+
195
+ var pos = (costScale * (greenToRedGradient.length - 1)).toFixed();
196
+
197
+ debugger;
198
+ return "border-color: " + greenToRedGradient[pos];
199
+ }
200
+ }
201
+ </script>
127
202
  <% data[:tags].each do |tag, h| %>
128
- <script type="text/x-template" id="tag-<%= tag %>-template">
129
- <span><a class="badge shiba-badge-<%= h['level'] %>"><%= h['title'] %></a><%= h['summary'] %></span>
203
+ <script type="text/x-template" id="tag-<%= tag %>-template">
204
+ <tr>
205
+ <td class="shiba-badge-td">
206
+ <a class="badge" v-bind:style="costToColor"><%= h['title'] %></a>
207
+ </td>
208
+ <td class="shiba-message">
209
+ <%= h['summary'] %>
210
+ </td>
211
+ <td class="running-totals">
212
+ {{ running_cost.toLocaleString() }}
213
+ </td>
214
+ </tr>
130
215
  </script>
131
216
  <script>
132
217
  Vue.component('tag-<%= tag %>', {
133
218
  template: '#tag-<%= tag %>-template',
134
- props: [ 'query' ],
135
- computed: {
136
-
137
- table: function() {
138
- return this.query.table;
139
- },
140
-
141
- table_size: function() {
142
- return this.query.table_size;
143
- },
144
-
145
- key: function() {
146
- return this.query.key;
147
- },
148
-
149
- return_size: function() {
150
- return this.query.return_size.toLocaleString();
151
- },
152
-
153
- key_parts: function() {
154
- if ( this.query.used_key_parts && this.query.used_key_parts.length > 0 )
155
- return this.query.used_key_parts.join(',');
156
- else
157
- return "";
158
- },
159
-
160
- cost: function() {
161
- var costPercentage = (this.query.cost / this.query.table_size) * 100.0;
162
- if ( this.query.cost > 100 && costPercentage > 1 ) // todo: make better
163
- return `${costPercentage.toFixed()}% (${this.query.cost.toLocaleString()}) of the`;
164
- else
165
- return this.query.cost.toLocaleString();
166
- }
167
- }
219
+ props: [ 'table_size', 'result_size', 'table', 'cost', 'index', 'join_to', 'index_used', 'running_cost', 'tables' ],
220
+ computed: templateComputedFunctions
168
221
  });
169
222
  </script>
170
223
  <% end %>
@@ -269,13 +322,13 @@
269
322
  this.expanded = !this.expanded;
270
323
  },
271
324
  shortLocation: function(query) {
272
- if ( !query.backtrace )
325
+ if ( !query.backtrace || query.backtrace.length == 0 )
273
326
  return null;
274
327
  var location = query.backtrace[0];
275
328
  return location.match(/([^\/]+:\d+):/)[1];
276
329
  },
277
330
  makeURL: function(line, content) {
278
- if ( !data.url )
331
+ if ( !data.url || !line )
279
332
  return content;
280
333
 
281
334
  var matches = line.match(/(.+):(\d+):/);
@@ -304,7 +357,6 @@
304
357
  },
305
358
  methods: {
306
359
  updateSearch: _.debounce(function (e) {
307
- console.log("ok, updating...");
308
360
  this.search = e.target.value;
309
361
  }, 500)
310
362
  },
@@ -314,8 +366,7 @@
314
366
  var filtered = [];
315
367
  var lcSearch = this.search.toLowerCase();
316
368
  this.highQ.concat(this.lowQ).forEach(function(q) {
317
- var searchString = q.sql + ":" + q.tags.join(":") + q.backtrace.join(":");
318
- if ( searchString.toLowerCase().includes(lcSearch) )
369
+ if ( q.searchString.includes(lcSearch) )
319
370
  filtered.push(q);
320
371
  });
321
372
  return filtered;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shiba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Osheroff
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-02-23 00:00:00.000000000 Z
12
+ date: 2019-02-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -101,6 +101,7 @@ files:
101
101
  - Rakefile
102
102
  - TODO
103
103
  - bin/console
104
+ - bin/dump_stats
104
105
  - bin/explain
105
106
  - bin/fingerprint
106
107
  - bin/mysql_dump_stats
@@ -127,9 +128,12 @@ files:
127
128
  - lib/shiba/connection/postgres.rb
128
129
  - lib/shiba/diff.rb
129
130
  - lib/shiba/explain.rb
131
+ - lib/shiba/explain/check_support.rb
132
+ - lib/shiba/explain/checks.rb
130
133
  - lib/shiba/explain/mysql_explain.rb
131
134
  - lib/shiba/explain/postgres_explain.rb
132
135
  - lib/shiba/explain/postgres_explain_index_conditions.rb
136
+ - lib/shiba/explain/result.rb
133
137
  - lib/shiba/fuzzer.rb
134
138
  - lib/shiba/index.rb
135
139
  - lib/shiba/index_stats.rb
@@ -174,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
178
  version: '0'
175
179
  requirements: []
176
180
  rubyforge_project:
177
- rubygems_version: 2.5.1
181
+ rubygems_version: 2.7.6
178
182
  signing_key:
179
183
  specification_version: 4
180
184
  summary: A gem that attempts to find bad queries before you shoot self in foot