shiba 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/shiba.rb CHANGED
@@ -1,12 +1,14 @@
1
1
  require "shiba/version"
2
2
  require "shiba/configure"
3
3
  require "mysql2"
4
+ require "pp"
5
+ require "byebug" if ENV['SHIBA_DEBUG']
4
6
 
5
7
  module Shiba
6
8
  class Error < StandardError; end
7
9
 
8
10
  def self.configure(options)
9
- @connection_hash = options.select { |k, v| ['username', 'database', 'host', 'password'].include?(k) }
11
+ @connection_hash = options.select { |k, v| [ 'default_file', 'default_group', 'username', 'database', 'host', 'password'].include?(k) }
10
12
  @main_config = Configure.read_config_file(options['config'], "config/shiba.yml")
11
13
  @index_config = Configure.read_config_file(options['index'], "config/shiba_index.yml")
12
14
  end
@@ -69,7 +69,7 @@ module Shiba
69
69
  protected
70
70
 
71
71
  def dump_error(e, query)
72
- $stderr.puts "got exception trying to explain: #{e.message}"
72
+ $stderr.puts "got #{e.class.name} exception trying to explain: #{e.message}"
73
73
  $stderr.puts "query: #{query.sql} (index #{query.index})"
74
74
  $stderr.puts e.backtrace.join("\n")
75
75
  end
@@ -79,10 +79,7 @@ module Shiba
79
79
  begin
80
80
  explain = query.explain
81
81
  rescue Mysql2::Error => e
82
- # we're picking up crap on the command-line that's not good SQL. ignore it.
83
- if !(e.message =~ /You have an error in your SQL syntax/)
84
- dump_error(e, query)
85
- end
82
+ dump_error(e, query) if verbose?
86
83
  rescue StandardError => e
87
84
  dump_error(e, query)
88
85
  end
@@ -96,5 +93,9 @@ module Shiba
96
93
  def write(line)
97
94
  @output.puts(line)
98
95
  end
96
+
97
+ def verbose?
98
+ @options['verbose'] == true
99
+ end
99
100
  end
100
101
  end
@@ -0,0 +1,56 @@
1
+ require 'open3'
2
+
3
+ module Shiba
4
+ module Backtrace
5
+ IGNORE = /\.rvm|gem|vendor\/|rbenv|seed|db|shiba|test|spec/
6
+
7
+ # 8 backtrace lines starting from the app caller, cleaned of app/project cruft.
8
+ def self.from_app
9
+ app_line_idx = caller_locations.index { |line| line.to_s !~ IGNORE }
10
+ if app_line_idx == nil
11
+ return
12
+ end
13
+
14
+ caller_locations(app_line_idx+1, 8).map do |loc|
15
+ clean!(loc.to_s)
16
+ end
17
+ end
18
+
19
+ def self.clean!(line)
20
+ line.sub!(backtrace_ignore_pattern, '')
21
+ line
22
+ end
23
+
24
+ protected
25
+
26
+ def self.backtrace_ignore_pattern
27
+ @roots ||= begin
28
+ paths = Gem.path
29
+ paths << Rails.root.to_s if defined?(Rails.root)
30
+ paths << repo_root
31
+ paths << ENV['HOME']
32
+ paths.uniq!
33
+ paths.compact!
34
+ # match and replace longest path first
35
+ paths.sort_by!(&:size).reverse!
36
+
37
+ r = Regexp.new(paths.map {|r| Regexp.escape(r) }.join("|"))
38
+ # kill leading slash
39
+ /(#{r})\/?/
40
+ end
41
+ end
42
+
43
+ # /user/git_repo => "/user/git_repo"
44
+ # /user/not_a_repo => nil
45
+ def self.repo_root
46
+ root = nil
47
+ Open3.popen3('git rev-parse --show-toplevel') {|_,o,_,_|
48
+ if root = o.gets
49
+ root = root.chomp
50
+ end
51
+ }
52
+ root
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,103 @@
1
+ require 'json'
2
+ require 'open3'
3
+
4
+ require 'shiba/diff'
5
+ require 'shiba/backtrace'
6
+
7
+ module Shiba
8
+ class Checker
9
+ Result = Struct.new(:status, :message, :problems)
10
+
11
+ attr_reader :options
12
+
13
+ def initialize(options)
14
+ @options = options
15
+ end
16
+
17
+ def run(log)
18
+ msg = nil
19
+
20
+ if options['verbose']
21
+ puts cmd
22
+ end
23
+
24
+ if changes.empty?
25
+ if options['verbose']
26
+ msg = "No changes found in git"
27
+ end
28
+ return Result.new(:pass, msg)
29
+ end
30
+
31
+ explains = select_lines_with_changed_files(log)
32
+ problems = explains.select { |explain| explain["cost"] && explain["cost"] > MAGIC_COST }
33
+
34
+ problems.select! do |problem|
35
+ backtrace_has_updated_line?(problem["backtrace"], updated_lines)
36
+ end
37
+
38
+ if problems.empty?
39
+ if options['verbose']
40
+ msg = "No problems found"
41
+ end
42
+
43
+ return Result.new(:pass, msg)
44
+ end
45
+
46
+ return Result.new(:fail, "Potential problems", problems)
47
+ end
48
+
49
+ protected
50
+
51
+ def backtrace_has_updated_line?(backtrace, updates)
52
+ backtrace.any? do |bl|
53
+ updates.any? do |path, lines|
54
+ next if !bl.start_with?(path)
55
+ bl =~ /:(\d+):/
56
+ lines.include?($1.to_i)
57
+ end
58
+ end
59
+ end
60
+
61
+ def select_lines_with_changed_files(log)
62
+ patterns = changes.split("\n").map { |path| "-e #{path}" }.join(" ")
63
+ json_lines = `grep #{log} #{patterns}`
64
+ json_lines.each_line.map { |line| JSON.parse(line) }
65
+ end
66
+
67
+ def changes
68
+ @changes ||= begin
69
+ result = `git diff#{cmd} --name-only --diff-filter=d`
70
+ if $?.exitstatus != 0
71
+ error("Failed to read changes", $?.exitstatus)
72
+ end
73
+
74
+ result
75
+ end
76
+ end
77
+
78
+ def updated_lines
79
+ return @updated_lines if @updated_lines
80
+
81
+ Open3.popen3("git diff#{cmd} --unified=0 --diff-filter=d") {|_,o,_,_|
82
+ @updated_lines = Shiba::Diff.new(o).updated_lines
83
+ }
84
+
85
+ @updated_lines.map! do |path, lines|
86
+ [ Shiba::Backtrace.clean!(path), lines ]
87
+ end
88
+ end
89
+
90
+ def cmd
91
+ cmd = case
92
+ when options["staged"]
93
+ " --staged"
94
+ when options["unstaged"]
95
+ ""
96
+ else
97
+ commit = " HEAD"
98
+ commit << "...#{options["branch"]}" if options["branch"]
99
+ commit
100
+ end
101
+ end
102
+ end
103
+ end
@@ -27,6 +27,13 @@ module Shiba
27
27
  raise e, "Cannot load `#{path}`:\n#{e.message}", e.backtrace
28
28
  end
29
29
 
30
+ # loosely based on https://dev.mysql.com/doc/refman/8.0/en/option-files.html
31
+ def self.mysql_config_path
32
+ paths = [ File.join(Dir.home, '.mylogin.cnf'), File.join(Dir.home, '.my.cnf') ]
33
+
34
+ paths.detect { |p| File.exist?(p) }
35
+ end
36
+
30
37
  def self.read_config_file(option_file, default)
31
38
  file_to_read = nil
32
39
  if option_file
@@ -78,25 +85,38 @@ module Shiba
78
85
  options["limit"] = l.to_i
79
86
  end
80
87
 
81
- opts.on("-s","--stats FILES", "location of index statistics tsv file") do |f|
82
- options["stats"] = f
83
- end
84
-
85
88
  opts.on("-f", "--file FILE", "location of file containing queries") do |f|
86
89
  options["file"] = f
87
90
  end
88
91
 
89
- opts.on("-e", "--explain FILE", "write explain JSON to file. default: stdout") do |f|
90
- options["explain"] = f
92
+ opts.on("-j", "--json [FILE]", "write JSON report here. default: to stdout") do |f|
93
+ if f
94
+ options["json"] = File.open(f, 'w')
95
+ else
96
+ options["json"] = $stdout
97
+ end
91
98
  end
92
99
 
93
- opts.on("-o", "--output PATH", "path to put generated report in. default: /tmp") do |p|
94
- options["output"] = p
100
+ opts.on("-h", "--html FILE", "write html report here. Default to /tmp/explain.html") do |h|
101
+ options["html"] = h
95
102
  end
96
103
 
97
104
  opts.on("-t", "--test", "analyze queries at --file instead of analyzing a process") do |f|
98
105
  options["test"] = true
99
106
  end
107
+
108
+ opts.on("-v", "--verbose", "print internal runtime information") do
109
+ options["verbose"] = true
110
+ end
111
+
112
+ # This naming seems to be mysql convention, maybe we should just do our own thing though.
113
+ opts.on("--login-path", "The option group from the mysql config file to read from") do |f|
114
+ options["default_group"] = f
115
+ end
116
+
117
+ opts.on("--default-extras-file", "The option file to read mysql configuration from") do |f|
118
+ options["default_file"] = f
119
+ end
100
120
  end
101
121
  end
102
122
  end
data/lib/shiba/diff.rb ADDED
@@ -0,0 +1,119 @@
1
+ module Shiba
2
+ class Diff
3
+ # +++ b/config/environments/test.rb
4
+ FILE_PATTERN = /\A\+\+\+ b\/(.*?)\Z/
5
+
6
+ # @@ -177,0 +178 @@ ...
7
+ # @@ -177,0 +178,5 @@ ...
8
+ # @@ -21 +24 @@ ...
9
+ LINE_PATTERN = /\A@@ \-\d+,?\d+? \+(\d+),?(\d+)? @@/
10
+
11
+ # via https://developer.github.com/v3/pulls/comments/#create-a-comment
12
+ # The position value equals the number of lines down from the first "@@" hunk header
13
+ # in the file you want to add a comment.
14
+
15
+ # diff = `git diff --unified=0`
16
+ # parse_diff(StringIO.new(diff))
17
+ # => "hello.rb:1"
18
+ # => "hello.rb:2"
19
+ # => "test.rb:5"
20
+
21
+ # For simplicity, the default output of git diff is not supported.
22
+ # The expected format is from 'git diff unified=0'
23
+
24
+ attr_reader :status
25
+
26
+ def initialize(file)
27
+ @diff = file
28
+ @status = :new
29
+ end
30
+
31
+ # Returns the file and line numbers that contain inserts. Deletions are ignored.
32
+ def updated_lines
33
+ io = @diff.each_line
34
+ path = nil
35
+
36
+ found = []
37
+
38
+ while true
39
+ line = io.next
40
+ if line =~ FILE_PATTERN
41
+ path = $1
42
+ end
43
+
44
+ if hunk_header?(line)
45
+ line_numbers = line_numbers_for_destination(line)
46
+ found << [ path, line_numbers ]
47
+ end
48
+ end
49
+ rescue StopIteration
50
+ return found
51
+ end
52
+
53
+ # Returns the position in the diff, after the relevant file header,
54
+ # that contains the specified file/lineno modification.
55
+ # Only supports finding the position in the destination / newest version of the file.
56
+ def find_position(path, line_number)
57
+ io = @diff.each_line # maybe redundant?
58
+
59
+ file_header = "+++ b/#{path}\n" # fixme
60
+ if !io.find_index(file_header)
61
+ @status = :file_not_found
62
+ return
63
+ end
64
+
65
+ line = io.peek
66
+ if !hunk_header?(line)
67
+ raise StandardError.new("Expected hunk header to be after file header, but got '#{line}'")
68
+ end
69
+
70
+ pos = 0
71
+
72
+ while true
73
+ line = io.next
74
+ pos += 1
75
+
76
+ if file_header?(line)
77
+ @status = :line_not_found
78
+ return
79
+ end
80
+
81
+ if !hunk_header?(line)
82
+ next
83
+ end
84
+
85
+ line_numbers = line_numbers_for_destination(line)
86
+
87
+ if destination_position = line_numbers.find_index(line_number)
88
+ @status = :found_position
89
+ return pos + find_hunk_index(io, destination_position)
90
+ end
91
+ end
92
+ rescue StopIteration
93
+ @status = :line_not_found
94
+ end
95
+
96
+ protected
97
+
98
+ def find_hunk_index(hunk, pos)
99
+ line, idx = hunk.with_index.select { |l,idx| !l.start_with?('-') }.take(pos+1).last
100
+ idx
101
+ end
102
+
103
+ def file_header?(line)
104
+ line.match?(FILE_PATTERN)
105
+ end
106
+
107
+ def hunk_header?(line)
108
+ line.match?(LINE_PATTERN)
109
+ end
110
+
111
+ def line_numbers_for_destination(diff_line)
112
+ diff_line =~ LINE_PATTERN
113
+ line = $1.to_i
114
+ line_count = ($2 && $2.to_i) || 0
115
+ line..line+line_count
116
+ end
117
+
118
+ end
119
+ end
data/lib/shiba/explain.rb CHANGED
@@ -13,8 +13,8 @@ module Shiba
13
13
 
14
14
  @options = options
15
15
  ex = Shiba.connection.query("EXPLAIN FORMAT=JSON #{@sql}").to_a
16
- json = JSON.parse(ex.first['EXPLAIN'])
17
- @rows = self.class.transform_json(json['query_block'])
16
+ @explain_json = JSON.parse(ex.first['EXPLAIN'])
17
+ @rows = self.class.transform_json(@explain_json['query_block'])
18
18
  @stats = stats
19
19
  run_checks!
20
20
  end
@@ -23,11 +23,15 @@ module Shiba
23
23
  {
24
24
  sql: @sql,
25
25
  table: get_table,
26
+ table_size: table_size,
26
27
  key: first_key,
27
28
  tags: messages,
28
29
  cost: @cost,
30
+ return_size: @return_size,
31
+ severity: severity,
29
32
  used_key_parts: first['used_key_parts'],
30
33
  possible_keys: first['possible_keys'],
34
+ raw_explain: humanized_explain,
31
35
  backtrace: @backtrace
32
36
  }
33
37
  end
@@ -43,7 +47,7 @@ module Shiba
43
47
  table
44
48
  end
45
49
 
46
- def self.transform_table(table)
50
+ def self.transform_table(table, extra = {})
47
51
  t = table
48
52
  res = {}
49
53
  res['table'] = t['table_name']
@@ -57,24 +61,30 @@ module Shiba
57
61
  res['possible_keys'] = t['possible_keys']
58
62
  end
59
63
  res['using_index'] = t['using_index'] if t['using_index']
64
+
65
+ res.merge!(extra)
66
+
60
67
  res
61
68
  end
62
69
 
63
- def self.transform_json(json, res = [])
70
+ def self.transform_json(json, res = [], extra = {})
64
71
  rows = []
65
72
 
66
- if json['ordering_operation']
67
- return transform_json(json['ordering_operation'])
73
+ if (ordering = json['ordering_operation'])
74
+ index_walk = (ordering['using_filesort'] == false)
75
+ return transform_json(json['ordering_operation'], res, { "index_walk" => index_walk } )
68
76
  elsif json['duplicates_removal']
69
- return transform_json(json['duplicates_removal'])
77
+ return transform_json(json['duplicates_removal'], res, extra)
78
+ elsif json['grouping_operation']
79
+ return transform_json(json['grouping_operation'], res, extra)
70
80
  elsif !json['nested_loop'] && !json['table']
71
81
  return [{'Extra' => json['message']}]
72
82
  elsif json['nested_loop']
73
83
  json['nested_loop'].map do |nested|
74
- transform_json(nested, res)
84
+ transform_json(nested, res, extra)
75
85
  end
76
86
  elsif json['table']
77
- res << transform_table(json['table'])
87
+ res << transform_table(json['table'], extra)
78
88
  end
79
89
  res
80
90
  end
@@ -112,15 +122,12 @@ module Shiba
112
122
  first.merge(cost: cost, messages: messages)
113
123
  end
114
124
 
115
- IGNORE_PATTERNS = [
116
- /No tables used/,
117
- /Impossible WHERE/,
118
- /Select tables optimized away/,
119
- /No matching min\/max row/
120
- ]
121
-
122
125
  def table_size
123
- Shiba::Index.count(first["table"], @stats)
126
+ @stats.table_count(first['table'])
127
+ end
128
+
129
+ def fuzzed?(table)
130
+ @stats.fuzzed?(first['table'])
124
131
  end
125
132
 
126
133
  def no_matching_row_in_const_table?
@@ -128,7 +135,6 @@ module Shiba
128
135
  end
129
136
 
130
137
  def ignore_explain?
131
- first_extra && IGNORE_PATTERNS.any? { |p| first_extra =~ p }
132
138
  end
133
139
 
134
140
  def derived?
@@ -137,7 +143,18 @@ module Shiba
137
143
 
138
144
  # TODO: need to parse SQL here I think
139
145
  def simple_table_scan?
140
- @rows.size == 1 && (@sql !~ /where/i || @sql =~ /where\s*1=1/i) && (@sql !~ /order by/i)
146
+ @rows.size == 1 && first['using_index'] && (@sql !~ /order by/i)
147
+ end
148
+
149
+ def severity
150
+ case @cost
151
+ when 0..100
152
+ "low"
153
+ when 100..1000
154
+ "medium"
155
+ when 1000..1_000_000_000
156
+ "high"
157
+ end
141
158
  end
142
159
 
143
160
  def limit
@@ -148,53 +165,115 @@ module Shiba
148
165
  end
149
166
  end
150
167
 
151
- def tag_query_type
152
- access_type = first['access_type']
168
+ def aggregation?
169
+ @sql =~ /select\s*(.*?)from/i
170
+ select_fields = $1
171
+ select_fields =~ /min|max|avg|count|sum|group_concat\s*\(.*?\)/i
172
+ end
153
173
 
154
- return unless access_type
155
- access_type = 'tablescan' if access_type == 'ALL'
156
- messages << "access_type_" + access_type
174
+ def self.check(c)
175
+ @checks ||= []
176
+ @checks << c
177
+ end
178
+
179
+ def self.get_checks
180
+ @checks
181
+ end
182
+
183
+ check :check_query_is_ignored
184
+ def check_query_is_ignored
185
+ if ignore?
186
+ messages << "ignored"
187
+ @cost = 0
188
+ end
157
189
  end
158
190
 
159
- def estimate_row_count
191
+ check :check_no_matching_row_in_const_table
192
+ def check_no_matching_row_in_const_table
160
193
  if no_matching_row_in_const_table?
161
194
  messages << "access_type_const"
162
195
  first['key'] = 'PRIMARY'
163
- return 0
196
+ @cost = 1
164
197
  end
198
+ end
165
199
 
166
- return 0 if ignore_explain?
200
+ IGNORE_PATTERNS = [
201
+ /No tables used/,
202
+ /Impossible WHERE/,
203
+ /Select tables optimized away/,
204
+ /No matching min\/max row/
205
+ ]
167
206
 
168
- messages << "fuzzed_data" if Shiba::Index.fuzzed?(first_table, @stats)
207
+ check :check_query_shortcircuits
208
+ def check_query_shortcircuits
209
+ if first_extra && IGNORE_PATTERNS.any? { |p| first_extra =~ p }
210
+ @cost = 0
211
+ end
212
+ end
213
+
214
+ check :check_fuzzed
215
+ def check_fuzzed
216
+ messages << "fuzzed_data" if fuzzed?(first_table)
217
+ end
169
218
 
219
+ check :check_simple_table_scan
220
+ def check_simple_table_scan
170
221
  if simple_table_scan?
171
222
  if limit
172
223
  messages << 'limited_tablescan'
224
+ @cost = limit
173
225
  else
174
- messages << 'access_type_tablescan'
226
+ tag_query_type
227
+ @cost = @stats.estimate_key(first_table, first_key, first['used_key_parts'])
175
228
  end
176
-
177
- return limit || table_size
178
229
  end
230
+ end
179
231
 
232
+ check :check_derived
233
+ def check_derived
180
234
  if derived?
181
235
  # select count(*) from ( select 1 from foo where blah )
182
236
  @rows.shift
183
- return estimate_row_count
237
+ return run_checks!
238
+ end
239
+ end
240
+
241
+
242
+ check :tag_query_type
243
+ def tag_query_type
244
+ access_type = first['access_type']
245
+
246
+ if access_type.nil?
247
+ @cost = 0
248
+ return
184
249
  end
185
250
 
186
- tag_query_type
251
+ access_type = 'tablescan' if access_type == 'ALL'
252
+ messages << "access_type_" + access_type
253
+ end
187
254
 
255
+ #check :check_index_walk
256
+ # disabling this one for now, it's not quite good enough and has a high
257
+ # false-negative rate.
258
+ def check_index_walk
259
+ if first['index_walk']
260
+ @cost = limit
261
+ messages << 'index_walk'
262
+ end
263
+ end
264
+
265
+ check :check_key_size
266
+ def check_key_size
188
267
  # TODO: if possible_keys but mysql chooses NULL, this could be a test-data issue,
189
268
  # pick the best key from the list of possibilities.
190
269
  #
191
270
  if first_key
192
- Shiba::Index.estimate_key(first_table, first_key, first['used_key_parts'], @stats)
271
+ @cost = @stats.estimate_key(first_table, first_key, first['used_key_parts'])
193
272
  else
194
273
  if first['possible_keys'].nil?
195
274
  # if no possibile we're table scanning, use PRIMARY to indicate that cost.
196
275
  # note that this can be wildly inaccurate bcs of WHERE + LIMIT stuff.
197
- Shiba::Index.count(first_table, @stats)
276
+ @cost = table_size
198
277
  else
199
278
  if @options[:force_key]
200
279
  # we were asked to force a key, but mysql still told us to fuck ourselves.
@@ -202,20 +281,37 @@ module Shiba
202
281
  #
203
282
  # there seems to be cases where mysql lists `possible_key` values
204
283
  # that it then cannot use, seen this in OR queries.
205
- return Shiba::Index.count(first_table, @stats)
284
+ @cost = table_size
285
+ else
286
+ possibilities = [table_size]
287
+ possibilities += first['possible_keys'].map do |key|
288
+ estimate_row_count_with_key(key)
289
+ end
290
+ @cost = possibilities.compact.min
206
291
  end
207
-
208
- possibilities = [Shiba::Index.count(first_table, @stats)]
209
- possibilities += first['possible_keys'].map do |key|
210
- estimate_row_count_with_key(key)
211
- end
212
- possibilities.compact.min
213
292
  end
214
293
  end
215
294
  end
216
295
 
296
+ def check_return_size
297
+ if limit
298
+ @return_size = limit
299
+ elsif aggregation?
300
+ @return_size = 1
301
+ else
302
+ @return_size = @cost
303
+ end
304
+
305
+ if @return_size && @return_size > 100
306
+ messages << "retsize_bad"
307
+ else
308
+ messages << "retsize_good"
309
+ end
310
+ end
311
+
217
312
  def estimate_row_count_with_key(key)
218
- Explain.new(@sql, @stats, @backtrace, force_key: key).estimate_row_count
313
+ explain = Explain.new(@sql, @stats, @backtrace, force_key: key)
314
+ explain.run_checks!
219
315
  rescue Mysql2::Error => e
220
316
  if /Key .+? doesn't exist in table/ =~ e.message
221
317
  return nil
@@ -244,14 +340,18 @@ module Shiba
244
340
  end
245
341
 
246
342
  def run_checks!
247
- if ignore?
248
- @cost = 0
249
- messages << "ignored"
250
- return
343
+ self.class.get_checks.each do |check|
344
+ res = send(check)
345
+ break if @cost
251
346
  end
347
+ check_return_size
348
+ @cost
349
+ end
252
350
 
253
- @cost = estimate_row_count
351
+ def humanized_explain
352
+ h = @explain_json['query_block'].dup
353
+ %w(select_id cost_info).each { |i| h.delete(i) }
354
+ h
254
355
  end
255
356
  end
256
357
  end
257
-