shiba 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,13 @@
1
1
  require 'shiba/query_watcher'
2
2
  require 'active_support/notifications'
3
3
  require 'active_support/lazy_load_hooks'
4
+ require 'shiba/configure'
4
5
 
5
6
  module Shiba
6
7
  # Integrates ActiveRecord with the Query Watcher by setting up the query log path, and the
7
8
  # connection options for the explain command, which it runs when the process exits.
8
9
  #
9
- # SHIBA_OUT=<log path> and SHIBA_DEBUG=true environment variables may be set.
10
+ # SHIBA_OUT and SHIBA_DEBUG=true environment variables may be set.
10
11
  class ActiveRecordIntegration
11
12
 
12
13
  attr_reader :path, :watcher
@@ -24,12 +25,11 @@ module Shiba
24
25
  protected
25
26
 
26
27
  def self.start_watcher
28
+ path = log_path
27
29
  if ENV['SHIBA_DEBUG']
28
- $stderr.puts("starting shiba watcher")
30
+ $stderr.puts("starting shiba watcher, outputting to #{path}")
29
31
  end
30
32
 
31
- path = ENV['SHIBA_OUT'] || make_tmp_path
32
-
33
33
  file = File.open(path, 'a')
34
34
  watcher = QueryWatcher.new(file)
35
35
 
@@ -40,14 +40,20 @@ module Shiba
40
40
  $stderr.puts(e.message, e.backtrace.join("\n"))
41
41
  end
42
42
 
43
- def self.make_tmp_path
44
- "/tmp/shiba-query.log-#{Time.now.to_i}"
43
+ def self.log_path
44
+ name = ENV["SHIBA_OUT"] || "query.log-#{Time.now.to_i}"
45
+ File.join(Shiba.path, name)
45
46
  end
46
47
 
47
48
  def self.run_explain(file, path)
48
49
  file.close
49
50
  puts ""
51
+
50
52
  cmd = "shiba explain #{database_args} --file #{path}"
53
+ if Shiba::Configure.ci?
54
+ cmd << " --json #{File.join(Shiba.path, 'ci.json')}"
55
+ end
56
+
51
57
  if ENV['SHIBA_DEBUG']
52
58
  $stderr.puts("running:")
53
59
  $stderr.puts(cmd)
@@ -56,16 +62,27 @@ module Shiba
56
62
  end
57
63
 
58
64
  def self.database_args
59
- c = ActiveRecord::Base.connection.raw_connection.query_options
65
+ cx = ActiveRecord::Base.connection.raw_connection
66
+ if cx.respond_to?(:query_options)
67
+ # mysql
68
+ c = cx.query_options.merge(server: 'mysql')
69
+ else
70
+ # postgres
71
+ c = { host: cx.host, database: cx.db, username: cx.user, password: cx.pass, port: cx.port, server: 'postgres' }
72
+ end
73
+
60
74
  options = {
61
- 'host': c[:host],
62
- 'database': c[:database],
63
- 'user': c[:username],
64
- 'password': c[:password]
75
+ 'host': c[:host],
76
+ 'database': c[:database],
77
+ 'user': c[:username],
78
+ 'password': c[:password],
79
+ 'port': c[:port],
80
+ 'server': c[:server]
65
81
  }
66
82
 
67
- options.reject { |k,v| v.nil? }.map { |k,v| "--#{k} #{v}" }.join(" ")
83
+ # port can be a Fixnum
84
+ options.reject { |k,v| v.nil? || v.respond_to?(:empty?) && v.empty? }.map { |k,v| "--#{k} #{v}" }.join(" ")
68
85
  end
69
86
 
70
87
  end
71
- end
88
+ end
data/lib/shiba/checker.rb CHANGED
@@ -5,7 +5,11 @@ require 'shiba/diff'
5
5
  require 'shiba/backtrace'
6
6
 
7
7
  module Shiba
8
+ # Given an explain log and a diff, returns any explain logs
9
+ # that appear to be caused by the diff.
8
10
  class Checker
11
+ MAGIC_COST = 100
12
+
9
13
  Result = Struct.new(:status, :message, :problems)
10
14
 
11
15
  attr_reader :options
@@ -14,6 +18,8 @@ module Shiba
14
18
  @options = options
15
19
  end
16
20
 
21
+ # Returns a Result object with a status, message, and any problem queries detected.
22
+ # Query problem format is [ [ "path:lineno", explain ]... ]
17
23
  def run(log)
18
24
  msg = nil
19
25
 
@@ -21,9 +27,9 @@ module Shiba
21
27
  puts cmd
22
28
  end
23
29
 
24
- if changes.empty?
30
+ if changed_files.empty?
25
31
  if options['verbose']
26
- msg = "No changes found in git"
32
+ msg = "No changes found. Are you sure you specified the correct branch?"
27
33
  end
28
34
  return Result.new(:pass, msg)
29
35
  end
@@ -31,15 +37,27 @@ module Shiba
31
37
  explains = select_lines_with_changed_files(log)
32
38
  problems = explains.select { |explain| explain["cost"] && explain["cost"] > MAGIC_COST }
33
39
 
34
- problems.select! do |problem|
35
- backtrace_has_updated_line?(problem["backtrace"], updated_lines)
40
+
41
+ if options["verbose"]
42
+ puts problems
43
+ puts "Updated lines: #{updated_lines}"
36
44
  end
37
45
 
38
46
  if problems.empty?
39
- if options['verbose']
40
- msg = "No problems found"
41
- end
47
+ msg = "No problems found caused by the diff"
48
+ return Result.new(:pass, msg)
49
+ end
50
+
51
+ problems.map! do |problem|
52
+ line = updated_line_from_backtrace(problem["backtrace"], updated_lines)
53
+ next if line.nil?
54
+
55
+ [ line, problem ]
56
+ end
57
+ problems.compact!
42
58
 
59
+ if problems.empty?
60
+ msg = "No problems found caused by the diff"
43
61
  return Result.new(:pass, msg)
44
62
  end
45
63
 
@@ -48,45 +66,91 @@ module Shiba
48
66
 
49
67
  protected
50
68
 
51
- def backtrace_has_updated_line?(backtrace, updates)
52
- backtrace.any? do |bl|
53
- updates.any? do |path, lines|
69
+ def updated_line_from_backtrace(backtrace, updates)
70
+ backtrace.each do |bl|
71
+ updates.each do |path, lines|
54
72
  next if !bl.start_with?(path)
55
73
  bl =~ /:(\d+):/
56
- lines.include?($1.to_i)
74
+ next if !lines.include?($1.to_i)
75
+
76
+ return "#{path}:#{$1}"
57
77
  end
58
78
  end
79
+
80
+ return nil
59
81
  end
60
82
 
61
83
  def select_lines_with_changed_files(log)
62
- patterns = changes.split("\n").map { |path| "-e #{path}" }.join(" ")
63
- json_lines = `grep #{log} #{patterns}`
84
+ patterns = changed_files.map { |path| "-e #{path}" }.join(" ")
85
+ cmd = "grep #{log} #{patterns}"
86
+ $stderr.puts cmd if options["verbose"]
87
+
88
+ json_lines = `#{cmd}`
64
89
  json_lines.each_line.map { |line| JSON.parse(line) }
65
90
  end
66
91
 
67
- def changes
68
- @changes ||= begin
69
- result = `git diff#{cmd} --name-only --diff-filter=d`
70
- if $?.exitstatus != 0
71
- error("Failed to read changes", $?.exitstatus)
72
- end
73
-
74
- result
92
+ def changed_files
93
+ @changed_files ||= begin
94
+ options['diff'] ? file_diff_names : git_diff_names
75
95
  end
76
96
  end
77
97
 
78
98
  def updated_lines
79
99
  return @updated_lines if @updated_lines
80
100
 
81
- Open3.popen3("git diff#{cmd} --unified=0 --diff-filter=d") {|_,o,_,_|
82
- @updated_lines = Shiba::Diff.new(o).updated_lines
83
- }
101
+
102
+ out = options['diff'] ? file_diff_lines : git_diff_lines
103
+ @updated_lines = Shiba::Diff.new(out).updated_lines
104
+
84
105
 
85
106
  @updated_lines.map! do |path, lines|
86
107
  [ Shiba::Backtrace.clean!(path), lines ]
87
108
  end
88
109
  end
89
110
 
111
+ def file_diff_lines
112
+ File.open(options['diff'])
113
+ end
114
+
115
+ def git_diff_lines
116
+ run = "git diff#{cmd} --unified=0 --diff-filter=d"
117
+ if options[:verbose]
118
+ $stderr.puts run
119
+ end
120
+
121
+ _, out,_,_ = Open3.popen3(run)
122
+ out
123
+ end
124
+
125
+ # index ade9b24..661d522 100644
126
+ # --- a/test/app/app.rb
127
+ # +++ b/test/app/app.rb
128
+ # @@ -24,4 +24,4 @@ ActiveRecord::Base...
129
+ # org = Organization.create!(name: 'test')
130
+ #
131
+ # file_diff_lines
132
+ # => test/app/app.rb
133
+ def file_diff_names
134
+ file_name_pattern = /^\+\+\+ b\/(.*?)$/
135
+ f = File.open(options['diff'])
136
+ f.grep(file_name_pattern) { $1 }
137
+ end
138
+
139
+ def git_diff_names
140
+ run = "git diff#{cmd} --name-only --diff-filter=d"
141
+
142
+ if options[:verbose]
143
+ $stderr.puts run
144
+ end
145
+ result = `#{run}`
146
+ if $?.exitstatus != 0
147
+ $stderr.puts result
148
+ raise Shiba::Error.new "Failed to read changes"
149
+ end
150
+
151
+ result.split("\n")
152
+ end
153
+
90
154
  def cmd
91
155
  cmd = case
92
156
  when options["staged"]
@@ -94,7 +158,7 @@ module Shiba
94
158
  when options["unstaged"]
95
159
  ""
96
160
  else
97
- commit = " HEAD"
161
+ commit = " origin/HEAD"
98
162
  commit << "...#{options["branch"]}" if options["branch"]
99
163
  commit
100
164
  end
@@ -1,5 +1,7 @@
1
1
  require 'pathname'
2
2
  require 'pp'
3
+ require 'optionparser'
4
+
3
5
  module Shiba
4
6
  module Configure
5
7
 
@@ -27,6 +29,10 @@ module Shiba
27
29
  raise e, "Cannot load `#{path}`:\n#{e.message}", e.backtrace
28
30
  end
29
31
 
32
+ def self.ci?
33
+ ENV['CI'] || ENV['CONTINUOUS_INTEGRATION']
34
+ end
35
+
30
36
  # loosely based on https://dev.mysql.com/doc/refman/8.0/en/option-files.html
31
37
  def self.mysql_config_path
32
38
  paths = [ File.join(Dir.home, '.mylogin.cnf'), File.join(Dir.home, '.my.cnf') ]
@@ -53,10 +59,14 @@ module Shiba
53
59
  end
54
60
  end
55
61
 
56
- def self.make_options_parser(options)
57
- parser = OptionParser.new do |opts|
62
+ def self.make_options_parser(options, only_basics = false)
63
+ OptionParser.new do |opts|
58
64
  # note that the key to the hash needs to stay the same as the
59
65
  # option name since we re-pass them
66
+ opts.on("-s","--server SERVER_TYPE", "mysql|postgres") do |s|
67
+ options["server"] = s
68
+ end
69
+
60
70
  opts.on("-h","--host HOST", "sql host") do |h|
61
71
  options["host"] = h
62
72
  end
@@ -73,6 +83,10 @@ module Shiba
73
83
  options["password"] = p
74
84
  end
75
85
 
86
+ opts.on("-P","--port PORT", "server port") do |p|
87
+ options["port"] = p
88
+ end
89
+
76
90
  opts.on("-c","--config FILE", "location of shiba.yml") do |f|
77
91
  options["config"] = f
78
92
  end
@@ -81,6 +95,12 @@ module Shiba
81
95
  options["index"] = i.to_i
82
96
  end
83
97
 
98
+ opts.on("--default-extras-file", "The option file to read mysql configuration from") do |f|
99
+ options["default_file"] = f
100
+ end
101
+
102
+ next if only_basics
103
+
84
104
  opts.on("-l", "--limit NUM", "stop after processing NUM queries") do |l|
85
105
  options["limit"] = l.to_i
86
106
  end
@@ -114,9 +134,6 @@ module Shiba
114
134
  options["default_group"] = f
115
135
  end
116
136
 
117
- opts.on("--default-extras-file", "The option file to read mysql configuration from") do |f|
118
- options["default_file"] = f
119
- end
120
137
  end
121
138
  end
122
139
  end
@@ -0,0 +1,25 @@
1
+ module Shiba
2
+ class Connection
3
+ def self.build(hash)
4
+ server_type = hash['server']
5
+ if !server_type
6
+ port = hash['port'].to_i
7
+ if port == 3306
8
+ server_type = 'mysql'
9
+ elsif port == 5432
10
+ server_type = 'postgres'
11
+ else
12
+ raise "couldn't determine server type! please pass --server"
13
+ end
14
+ end
15
+
16
+ if server_type == 'mysql'
17
+ require 'shiba/connection/mysql'
18
+ Shiba::Connection::Mysql.new(hash)
19
+ else
20
+ require 'shiba/connection/postgres'
21
+ Shiba::Connection::Postgres.new(hash)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,45 @@
1
+ require 'mysql2'
2
+ require 'json'
3
+
4
+ module Shiba
5
+ class Connection
6
+ class Mysql
7
+ def initialize(hash)
8
+ @connection = Mysql2::Client.new(hash)
9
+ end
10
+
11
+ def query(sql)
12
+ @connection.query(sql)
13
+ end
14
+
15
+ def fetch_indexes
16
+ sql =<<-EOL
17
+ select * from information_schema.statistics where
18
+ table_schema = DATABASE()
19
+ order by table_name, if(index_name = 'PRIMARY', '', index_name), seq_in_index
20
+ EOL
21
+ @connection.query(sql)
22
+ end
23
+
24
+ def count_indexes_by_table
25
+ sql =<<-EOL
26
+ select TABLE_NAME as table_name, count(*) as index_count
27
+ from information_schema.statistics where table_schema = DATABASE()
28
+ and seq_in_index = 1 and index_name not like 'fk_rails%'
29
+ group by table_name order by index_count
30
+ EOL
31
+
32
+ @connection.query(sql).to_a
33
+ end
34
+
35
+ def explain(sql)
36
+ rows = query("EXPLAIN FORMAT=JSON #{sql}").to_a
37
+ JSON.parse(rows.first['EXPLAIN'])
38
+ end
39
+
40
+ def mysql?
41
+ true
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,91 @@
1
+ require 'pg'
2
+
3
+ module Shiba
4
+ class Connection
5
+ class Postgres
6
+ def initialize(h)
7
+ @connection = PG.connect( dbname: h['database'], host: h['host'], user: h['username'], password: h['password'], port: h['port'] )
8
+ @connection.type_map_for_results = PG::BasicTypeMapForResults.new(@connection)
9
+ query("SET enable_seqscan = OFF")
10
+ end
11
+
12
+ def query(sql)
13
+ @connection.query(sql)
14
+ end
15
+
16
+ def fetch_indexes
17
+ result = query(<<-EOL
18
+ select
19
+ t.relname as table_name,
20
+ i.relname as index_name,
21
+ a.attname as column_name,
22
+ i.reltuples as numrows,
23
+ ix.indisunique as is_unique,
24
+ ix.indisprimary as is_primary,
25
+ s.n_distinct as numdistinct
26
+ from pg_namespace p
27
+ join pg_class t on t.relnamespace = p.oid
28
+ join pg_index ix on ix.indrelid = t.oid
29
+ join pg_class i on i.oid = ix.indexrelid
30
+ join pg_attribute a on a.attrelid = t.oid
31
+ left join pg_stats s on s.tablename = t.relname
32
+ AND s.attname = a.attname
33
+ where
34
+ p.nspname = 'public'
35
+ and a.attnum = ANY(ix.indkey)
36
+ and t.relkind = 'r'
37
+ order by
38
+ t.relname,
39
+ ix.indisprimary desc,
40
+ i.relname,
41
+ array_position(ix.indkey, a.attnum)
42
+ EOL
43
+ )
44
+ rows = result.to_a.map do |row|
45
+ # TBD: do better than this, have them return something objecty
46
+ if row['is_primary'] == "t"
47
+ row['index_name'] = "PRIMARY"
48
+ row['non_unique'] = 0
49
+ elsif row['is_unique']
50
+ row['non_unique'] = 0
51
+ end
52
+
53
+ if row['numdistinct'].nil?
54
+ # meaning the table's empty.
55
+ row['cardinality'] = 0
56
+ elsif row['numdistinct'] == 0
57
+ # numdistinct is 0 if there's rows in the table but all values are null
58
+ row['cardinality'] = 1
59
+ elsif row['numdistinct'] < 0
60
+ # postgres talks about either cardinality or selectivity (depending. what's their heuristic?)
61
+ # in the same way we do in the yaml file!
62
+ # if less than zero, it's negative selectivity.
63
+ row['cardinality'] = -(row['numrows'] * row['numdistinct'])
64
+ else
65
+ row['cardinality'] = row['numdistinct']
66
+ end
67
+ row
68
+ end
69
+
70
+ #TODO: estimate multi-index column cardinality
71
+ rows
72
+ end
73
+
74
+ def count_indexes_by_table
75
+ sql = <<-EOL
76
+ select tablename as table_name, count(*) as index_count from pg_indexes where schemaname='public' group by 1 order by 2
77
+ EOL
78
+ @connection.query(sql).to_a
79
+ end
80
+
81
+ def explain(sql)
82
+ rows = query("EXPLAIN (FORMAT JSON) #{sql}").to_a
83
+ rows.first["QUERY PLAN"]
84
+ end
85
+
86
+ def mysql?
87
+ false
88
+ end
89
+ end
90
+ end
91
+ end