shiba 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/shiba.rb CHANGED
@@ -1,12 +1,14 @@
1
1
  require "shiba/version"
2
2
  require "shiba/configure"
3
3
  require "mysql2"
4
+ require "pp"
5
+ require "byebug" if ENV['SHIBA_DEBUG']
4
6
 
5
7
  module Shiba
6
8
  class Error < StandardError; end
7
9
 
8
10
  def self.configure(options)
9
- @connection_hash = options.select { |k, v| ['username', 'database', 'host', 'password'].include?(k) }
11
+ @connection_hash = options.select { |k, v| [ 'default_file', 'default_group', 'username', 'database', 'host', 'password'].include?(k) }
10
12
  @main_config = Configure.read_config_file(options['config'], "config/shiba.yml")
11
13
  @index_config = Configure.read_config_file(options['index'], "config/shiba_index.yml")
12
14
  end
@@ -69,7 +69,7 @@ module Shiba
69
69
  protected
70
70
 
71
71
  def dump_error(e, query)
72
- $stderr.puts "got exception trying to explain: #{e.message}"
72
+ $stderr.puts "got #{e.class.name} exception trying to explain: #{e.message}"
73
73
  $stderr.puts "query: #{query.sql} (index #{query.index})"
74
74
  $stderr.puts e.backtrace.join("\n")
75
75
  end
@@ -79,10 +79,7 @@ module Shiba
79
79
  begin
80
80
  explain = query.explain
81
81
  rescue Mysql2::Error => e
82
- # we're picking up crap on the command-line that's not good SQL. ignore it.
83
- if !(e.message =~ /You have an error in your SQL syntax/)
84
- dump_error(e, query)
85
- end
82
+ dump_error(e, query) if verbose?
86
83
  rescue StandardError => e
87
84
  dump_error(e, query)
88
85
  end
@@ -96,5 +93,9 @@ module Shiba
96
93
  def write(line)
97
94
  @output.puts(line)
98
95
  end
96
+
97
+ def verbose?
98
+ @options['verbose'] == true
99
+ end
99
100
  end
100
101
  end
@@ -0,0 +1,56 @@
1
+ require 'open3'
2
+
3
+ module Shiba
4
+ module Backtrace
5
+ IGNORE = /\.rvm|gem|vendor\/|rbenv|seed|db|shiba|test|spec/
6
+
7
+ # 8 backtrace lines starting from the app caller, cleaned of app/project cruft.
8
+ def self.from_app
9
+ app_line_idx = caller_locations.index { |line| line.to_s !~ IGNORE }
10
+ if app_line_idx == nil
11
+ return
12
+ end
13
+
14
+ caller_locations(app_line_idx+1, 8).map do |loc|
15
+ clean!(loc.to_s)
16
+ end
17
+ end
18
+
19
+ def self.clean!(line)
20
+ line.sub!(backtrace_ignore_pattern, '')
21
+ line
22
+ end
23
+
24
+ protected
25
+
26
+ def self.backtrace_ignore_pattern
27
+ @roots ||= begin
28
+ paths = Gem.path
29
+ paths << Rails.root.to_s if defined?(Rails.root)
30
+ paths << repo_root
31
+ paths << ENV['HOME']
32
+ paths.uniq!
33
+ paths.compact!
34
+ # match and replace longest path first
35
+ paths.sort_by!(&:size).reverse!
36
+
37
+ r = Regexp.new(paths.map {|r| Regexp.escape(r) }.join("|"))
38
+ # kill leading slash
39
+ /(#{r})\/?/
40
+ end
41
+ end
42
+
43
+ # /user/git_repo => "/user/git_repo"
44
+ # /user/not_a_repo => nil
45
+ def self.repo_root
46
+ root = nil
47
+ Open3.popen3('git rev-parse --show-toplevel') {|_,o,_,_|
48
+ if root = o.gets
49
+ root = root.chomp
50
+ end
51
+ }
52
+ root
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,103 @@
1
+ require 'json'
2
+ require 'open3'
3
+
4
+ require 'shiba/diff'
5
+ require 'shiba/backtrace'
6
+
7
+ module Shiba
8
+ class Checker
9
+ Result = Struct.new(:status, :message, :problems)
10
+
11
+ attr_reader :options
12
+
13
+ def initialize(options)
14
+ @options = options
15
+ end
16
+
17
+ def run(log)
18
+ msg = nil
19
+
20
+ if options['verbose']
21
+ puts cmd
22
+ end
23
+
24
+ if changes.empty?
25
+ if options['verbose']
26
+ msg = "No changes found in git"
27
+ end
28
+ return Result.new(:pass, msg)
29
+ end
30
+
31
+ explains = select_lines_with_changed_files(log)
32
+ problems = explains.select { |explain| explain["cost"] && explain["cost"] > MAGIC_COST }
33
+
34
+ problems.select! do |problem|
35
+ backtrace_has_updated_line?(problem["backtrace"], updated_lines)
36
+ end
37
+
38
+ if problems.empty?
39
+ if options['verbose']
40
+ msg = "No problems found"
41
+ end
42
+
43
+ return Result.new(:pass, msg)
44
+ end
45
+
46
+ return Result.new(:fail, "Potential problems", problems)
47
+ end
48
+
49
+ protected
50
+
51
+ def backtrace_has_updated_line?(backtrace, updates)
52
+ backtrace.any? do |bl|
53
+ updates.any? do |path, lines|
54
+ next if !bl.start_with?(path)
55
+ bl =~ /:(\d+):/
56
+ lines.include?($1.to_i)
57
+ end
58
+ end
59
+ end
60
+
61
+ def select_lines_with_changed_files(log)
62
+ patterns = changes.split("\n").map { |path| "-e #{path}" }.join(" ")
63
+ json_lines = `grep #{log} #{patterns}`
64
+ json_lines.each_line.map { |line| JSON.parse(line) }
65
+ end
66
+
67
+ def changes
68
+ @changes ||= begin
69
+ result = `git diff#{cmd} --name-only --diff-filter=d`
70
+ if $?.exitstatus != 0
71
+ error("Failed to read changes", $?.exitstatus)
72
+ end
73
+
74
+ result
75
+ end
76
+ end
77
+
78
+ def updated_lines
79
+ return @updated_lines if @updated_lines
80
+
81
+ Open3.popen3("git diff#{cmd} --unified=0 --diff-filter=d") {|_,o,_,_|
82
+ @updated_lines = Shiba::Diff.new(o).updated_lines
83
+ }
84
+
85
+ @updated_lines.map! do |path, lines|
86
+ [ Shiba::Backtrace.clean!(path), lines ]
87
+ end
88
+ end
89
+
90
+ def cmd
91
+ cmd = case
92
+ when options["staged"]
93
+ " --staged"
94
+ when options["unstaged"]
95
+ ""
96
+ else
97
+ commit = " HEAD"
98
+ commit << "...#{options["branch"]}" if options["branch"]
99
+ commit
100
+ end
101
+ end
102
+ end
103
+ end
@@ -27,6 +27,13 @@ module Shiba
27
27
  raise e, "Cannot load `#{path}`:\n#{e.message}", e.backtrace
28
28
  end
29
29
 
30
+ # loosely based on https://dev.mysql.com/doc/refman/8.0/en/option-files.html
31
+ def self.mysql_config_path
32
+ paths = [ File.join(Dir.home, '.mylogin.cnf'), File.join(Dir.home, '.my.cnf') ]
33
+
34
+ paths.detect { |p| File.exist?(p) }
35
+ end
36
+
30
37
  def self.read_config_file(option_file, default)
31
38
  file_to_read = nil
32
39
  if option_file
@@ -78,25 +85,38 @@ module Shiba
78
85
  options["limit"] = l.to_i
79
86
  end
80
87
 
81
- opts.on("-s","--stats FILES", "location of index statistics tsv file") do |f|
82
- options["stats"] = f
83
- end
84
-
85
88
  opts.on("-f", "--file FILE", "location of file containing queries") do |f|
86
89
  options["file"] = f
87
90
  end
88
91
 
89
- opts.on("-e", "--explain FILE", "write explain JSON to file. default: stdout") do |f|
90
- options["explain"] = f
92
+ opts.on("-j", "--json [FILE]", "write JSON report here. default: to stdout") do |f|
93
+ if f
94
+ options["json"] = File.open(f, 'w')
95
+ else
96
+ options["json"] = $stdout
97
+ end
91
98
  end
92
99
 
93
- opts.on("-o", "--output PATH", "path to put generated report in. default: /tmp") do |p|
94
- options["output"] = p
100
+ opts.on("-h", "--html FILE", "write html report here. Default to /tmp/explain.html") do |h|
101
+ options["html"] = h
95
102
  end
96
103
 
97
104
  opts.on("-t", "--test", "analyze queries at --file instead of analyzing a process") do |f|
98
105
  options["test"] = true
99
106
  end
107
+
108
+ opts.on("-v", "--verbose", "print internal runtime information") do
109
+ options["verbose"] = true
110
+ end
111
+
112
+ # This naming seems to be mysql convention, maybe we should just do our own thing though.
113
+ opts.on("--login-path", "The option group from the mysql config file to read from") do |f|
114
+ options["default_group"] = f
115
+ end
116
+
117
+ opts.on("--default-extras-file", "The option file to read mysql configuration from") do |f|
118
+ options["default_file"] = f
119
+ end
100
120
  end
101
121
  end
102
122
  end
data/lib/shiba/diff.rb ADDED
@@ -0,0 +1,119 @@
1
+ module Shiba
2
+ class Diff
3
+ # +++ b/config/environments/test.rb
4
+ FILE_PATTERN = /\A\+\+\+ b\/(.*?)\Z/
5
+
6
+ # @@ -177,0 +178 @@ ...
7
+ # @@ -177,0 +178,5 @@ ...
8
+ # @@ -21 +24 @@ ...
9
+ LINE_PATTERN = /\A@@ \-\d+,?\d+? \+(\d+),?(\d+)? @@/
10
+
11
+ # via https://developer.github.com/v3/pulls/comments/#create-a-comment
12
+ # The position value equals the number of lines down from the first "@@" hunk header
13
+ # in the file you want to add a comment.
14
+
15
+ # diff = `git diff --unified=0`
16
+ # parse_diff(StringIO.new(diff))
17
+ # => "hello.rb:1"
18
+ # => "hello.rb:2"
19
+ # => "test.rb:5"
20
+
21
+ # For simplicity, the default output of git diff is not supported.
22
+ # The expected format is from 'git diff unified=0'
23
+
24
+ attr_reader :status
25
+
26
+ def initialize(file)
27
+ @diff = file
28
+ @status = :new
29
+ end
30
+
31
+ # Returns the file and line numbers that contain inserts. Deletions are ignored.
32
+ def updated_lines
33
+ io = @diff.each_line
34
+ path = nil
35
+
36
+ found = []
37
+
38
+ while true
39
+ line = io.next
40
+ if line =~ FILE_PATTERN
41
+ path = $1
42
+ end
43
+
44
+ if hunk_header?(line)
45
+ line_numbers = line_numbers_for_destination(line)
46
+ found << [ path, line_numbers ]
47
+ end
48
+ end
49
+ rescue StopIteration
50
+ return found
51
+ end
52
+
53
+ # Returns the position in the diff, after the relevant file header,
54
+ # that contains the specified file/lineno modification.
55
+ # Only supports finding the position in the destination / newest version of the file.
56
+ def find_position(path, line_number)
57
+ io = @diff.each_line # maybe redundant?
58
+
59
+ file_header = "+++ b/#{path}\n" # fixme
60
+ if !io.find_index(file_header)
61
+ @status = :file_not_found
62
+ return
63
+ end
64
+
65
+ line = io.peek
66
+ if !hunk_header?(line)
67
+ raise StandardError.new("Expected hunk header to be after file header, but got '#{line}'")
68
+ end
69
+
70
+ pos = 0
71
+
72
+ while true
73
+ line = io.next
74
+ pos += 1
75
+
76
+ if file_header?(line)
77
+ @status = :line_not_found
78
+ return
79
+ end
80
+
81
+ if !hunk_header?(line)
82
+ next
83
+ end
84
+
85
+ line_numbers = line_numbers_for_destination(line)
86
+
87
+ if destination_position = line_numbers.find_index(line_number)
88
+ @status = :found_position
89
+ return pos + find_hunk_index(io, destination_position)
90
+ end
91
+ end
92
+ rescue StopIteration
93
+ @status = :line_not_found
94
+ end
95
+
96
+ protected
97
+
98
+ def find_hunk_index(hunk, pos)
99
+ line, idx = hunk.with_index.select { |l,idx| !l.start_with?('-') }.take(pos+1).last
100
+ idx
101
+ end
102
+
103
+ def file_header?(line)
104
+ line.match?(FILE_PATTERN)
105
+ end
106
+
107
+ def hunk_header?(line)
108
+ line.match?(LINE_PATTERN)
109
+ end
110
+
111
+ def line_numbers_for_destination(diff_line)
112
+ diff_line =~ LINE_PATTERN
113
+ line = $1.to_i
114
+ line_count = ($2 && $2.to_i) || 0
115
+ line..line+line_count
116
+ end
117
+
118
+ end
119
+ end
data/lib/shiba/explain.rb CHANGED
@@ -13,8 +13,8 @@ module Shiba
13
13
 
14
14
  @options = options
15
15
  ex = Shiba.connection.query("EXPLAIN FORMAT=JSON #{@sql}").to_a
16
- json = JSON.parse(ex.first['EXPLAIN'])
17
- @rows = self.class.transform_json(json['query_block'])
16
+ @explain_json = JSON.parse(ex.first['EXPLAIN'])
17
+ @rows = self.class.transform_json(@explain_json['query_block'])
18
18
  @stats = stats
19
19
  run_checks!
20
20
  end
@@ -23,11 +23,15 @@ module Shiba
23
23
  {
24
24
  sql: @sql,
25
25
  table: get_table,
26
+ table_size: table_size,
26
27
  key: first_key,
27
28
  tags: messages,
28
29
  cost: @cost,
30
+ return_size: @return_size,
31
+ severity: severity,
29
32
  used_key_parts: first['used_key_parts'],
30
33
  possible_keys: first['possible_keys'],
34
+ raw_explain: humanized_explain,
31
35
  backtrace: @backtrace
32
36
  }
33
37
  end
@@ -43,7 +47,7 @@ module Shiba
43
47
  table
44
48
  end
45
49
 
46
- def self.transform_table(table)
50
+ def self.transform_table(table, extra = {})
47
51
  t = table
48
52
  res = {}
49
53
  res['table'] = t['table_name']
@@ -57,24 +61,30 @@ module Shiba
57
61
  res['possible_keys'] = t['possible_keys']
58
62
  end
59
63
  res['using_index'] = t['using_index'] if t['using_index']
64
+
65
+ res.merge!(extra)
66
+
60
67
  res
61
68
  end
62
69
 
63
- def self.transform_json(json, res = [])
70
+ def self.transform_json(json, res = [], extra = {})
64
71
  rows = []
65
72
 
66
- if json['ordering_operation']
67
- return transform_json(json['ordering_operation'])
73
+ if (ordering = json['ordering_operation'])
74
+ index_walk = (ordering['using_filesort'] == false)
75
+ return transform_json(json['ordering_operation'], res, { "index_walk" => index_walk } )
68
76
  elsif json['duplicates_removal']
69
- return transform_json(json['duplicates_removal'])
77
+ return transform_json(json['duplicates_removal'], res, extra)
78
+ elsif json['grouping_operation']
79
+ return transform_json(json['grouping_operation'], res, extra)
70
80
  elsif !json['nested_loop'] && !json['table']
71
81
  return [{'Extra' => json['message']}]
72
82
  elsif json['nested_loop']
73
83
  json['nested_loop'].map do |nested|
74
- transform_json(nested, res)
84
+ transform_json(nested, res, extra)
75
85
  end
76
86
  elsif json['table']
77
- res << transform_table(json['table'])
87
+ res << transform_table(json['table'], extra)
78
88
  end
79
89
  res
80
90
  end
@@ -112,15 +122,12 @@ module Shiba
112
122
  first.merge(cost: cost, messages: messages)
113
123
  end
114
124
 
115
- IGNORE_PATTERNS = [
116
- /No tables used/,
117
- /Impossible WHERE/,
118
- /Select tables optimized away/,
119
- /No matching min\/max row/
120
- ]
121
-
122
125
  def table_size
123
- Shiba::Index.count(first["table"], @stats)
126
+ @stats.table_count(first['table'])
127
+ end
128
+
129
+ def fuzzed?(table)
130
+ @stats.fuzzed?(first['table'])
124
131
  end
125
132
 
126
133
  def no_matching_row_in_const_table?
@@ -128,7 +135,6 @@ module Shiba
128
135
  end
129
136
 
130
137
  def ignore_explain?
131
- first_extra && IGNORE_PATTERNS.any? { |p| first_extra =~ p }
132
138
  end
133
139
 
134
140
  def derived?
@@ -137,7 +143,18 @@ module Shiba
137
143
 
138
144
  # TODO: need to parse SQL here I think
139
145
  def simple_table_scan?
140
- @rows.size == 1 && (@sql !~ /where/i || @sql =~ /where\s*1=1/i) && (@sql !~ /order by/i)
146
+ @rows.size == 1 && first['using_index'] && (@sql !~ /order by/i)
147
+ end
148
+
149
+ def severity
150
+ case @cost
151
+ when 0..100
152
+ "low"
153
+ when 100..1000
154
+ "medium"
155
+ when 1000..1_000_000_000
156
+ "high"
157
+ end
141
158
  end
142
159
 
143
160
  def limit
@@ -148,53 +165,115 @@ module Shiba
148
165
  end
149
166
  end
150
167
 
151
- def tag_query_type
152
- access_type = first['access_type']
168
+ def aggregation?
169
+ @sql =~ /select\s*(.*?)from/i
170
+ select_fields = $1
171
+ select_fields =~ /min|max|avg|count|sum|group_concat\s*\(.*?\)/i
172
+ end
153
173
 
154
- return unless access_type
155
- access_type = 'tablescan' if access_type == 'ALL'
156
- messages << "access_type_" + access_type
174
+ def self.check(c)
175
+ @checks ||= []
176
+ @checks << c
177
+ end
178
+
179
+ def self.get_checks
180
+ @checks
181
+ end
182
+
183
+ check :check_query_is_ignored
184
+ def check_query_is_ignored
185
+ if ignore?
186
+ messages << "ignored"
187
+ @cost = 0
188
+ end
157
189
  end
158
190
 
159
- def estimate_row_count
191
+ check :check_no_matching_row_in_const_table
192
+ def check_no_matching_row_in_const_table
160
193
  if no_matching_row_in_const_table?
161
194
  messages << "access_type_const"
162
195
  first['key'] = 'PRIMARY'
163
- return 0
196
+ @cost = 1
164
197
  end
198
+ end
165
199
 
166
- return 0 if ignore_explain?
200
+ IGNORE_PATTERNS = [
201
+ /No tables used/,
202
+ /Impossible WHERE/,
203
+ /Select tables optimized away/,
204
+ /No matching min\/max row/
205
+ ]
167
206
 
168
- messages << "fuzzed_data" if Shiba::Index.fuzzed?(first_table, @stats)
207
+ check :check_query_shortcircuits
208
+ def check_query_shortcircuits
209
+ if first_extra && IGNORE_PATTERNS.any? { |p| first_extra =~ p }
210
+ @cost = 0
211
+ end
212
+ end
213
+
214
+ check :check_fuzzed
215
+ def check_fuzzed
216
+ messages << "fuzzed_data" if fuzzed?(first_table)
217
+ end
169
218
 
219
+ check :check_simple_table_scan
220
+ def check_simple_table_scan
170
221
  if simple_table_scan?
171
222
  if limit
172
223
  messages << 'limited_tablescan'
224
+ @cost = limit
173
225
  else
174
- messages << 'access_type_tablescan'
226
+ tag_query_type
227
+ @cost = @stats.estimate_key(first_table, first_key, first['used_key_parts'])
175
228
  end
176
-
177
- return limit || table_size
178
229
  end
230
+ end
179
231
 
232
+ check :check_derived
233
+ def check_derived
180
234
  if derived?
181
235
  # select count(*) from ( select 1 from foo where blah )
182
236
  @rows.shift
183
- return estimate_row_count
237
+ return run_checks!
238
+ end
239
+ end
240
+
241
+
242
+ check :tag_query_type
243
+ def tag_query_type
244
+ access_type = first['access_type']
245
+
246
+ if access_type.nil?
247
+ @cost = 0
248
+ return
184
249
  end
185
250
 
186
- tag_query_type
251
+ access_type = 'tablescan' if access_type == 'ALL'
252
+ messages << "access_type_" + access_type
253
+ end
187
254
 
255
+ #check :check_index_walk
256
+ # disabling this one for now, it's not quite good enough and has a high
257
+ # false-negative rate.
258
+ def check_index_walk
259
+ if first['index_walk']
260
+ @cost = limit
261
+ messages << 'index_walk'
262
+ end
263
+ end
264
+
265
+ check :check_key_size
266
+ def check_key_size
188
267
  # TODO: if possible_keys but mysql chooses NULL, this could be a test-data issue,
189
268
  # pick the best key from the list of possibilities.
190
269
  #
191
270
  if first_key
192
- Shiba::Index.estimate_key(first_table, first_key, first['used_key_parts'], @stats)
271
+ @cost = @stats.estimate_key(first_table, first_key, first['used_key_parts'])
193
272
  else
194
273
  if first['possible_keys'].nil?
195
274
  # if no possibile we're table scanning, use PRIMARY to indicate that cost.
196
275
  # note that this can be wildly inaccurate bcs of WHERE + LIMIT stuff.
197
- Shiba::Index.count(first_table, @stats)
276
+ @cost = table_size
198
277
  else
199
278
  if @options[:force_key]
200
279
  # we were asked to force a key, but mysql still told us to fuck ourselves.
@@ -202,20 +281,37 @@ module Shiba
202
281
  #
203
282
  # there seems to be cases where mysql lists `possible_key` values
204
283
  # that it then cannot use, seen this in OR queries.
205
- return Shiba::Index.count(first_table, @stats)
284
+ @cost = table_size
285
+ else
286
+ possibilities = [table_size]
287
+ possibilities += first['possible_keys'].map do |key|
288
+ estimate_row_count_with_key(key)
289
+ end
290
+ @cost = possibilities.compact.min
206
291
  end
207
-
208
- possibilities = [Shiba::Index.count(first_table, @stats)]
209
- possibilities += first['possible_keys'].map do |key|
210
- estimate_row_count_with_key(key)
211
- end
212
- possibilities.compact.min
213
292
  end
214
293
  end
215
294
  end
216
295
 
296
+ def check_return_size
297
+ if limit
298
+ @return_size = limit
299
+ elsif aggregation?
300
+ @return_size = 1
301
+ else
302
+ @return_size = @cost
303
+ end
304
+
305
+ if @return_size && @return_size > 100
306
+ messages << "retsize_bad"
307
+ else
308
+ messages << "retsize_good"
309
+ end
310
+ end
311
+
217
312
  def estimate_row_count_with_key(key)
218
- Explain.new(@sql, @stats, @backtrace, force_key: key).estimate_row_count
313
+ explain = Explain.new(@sql, @stats, @backtrace, force_key: key)
314
+ explain.run_checks!
219
315
  rescue Mysql2::Error => e
220
316
  if /Key .+? doesn't exist in table/ =~ e.message
221
317
  return nil
@@ -244,14 +340,18 @@ module Shiba
244
340
  end
245
341
 
246
342
  def run_checks!
247
- if ignore?
248
- @cost = 0
249
- messages << "ignored"
250
- return
343
+ self.class.get_checks.each do |check|
344
+ res = send(check)
345
+ break if @cost
251
346
  end
347
+ check_return_size
348
+ @cost
349
+ end
252
350
 
253
- @cost = estimate_row_count
351
+ def humanized_explain
352
+ h = @explain_json['query_block'].dup
353
+ %w(select_id cost_info).each { |i| h.delete(i) }
354
+ h
254
355
  end
255
356
  end
256
357
  end
257
-