shiba 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,13 @@
1
1
  require 'shiba/query_watcher'
2
2
  require 'active_support/notifications'
3
3
  require 'active_support/lazy_load_hooks'
4
+ require 'shiba/configure'
4
5
 
5
6
  module Shiba
6
7
  # Integrates ActiveRecord with the Query Watcher by setting up the query log path, and the
7
8
  # connection options for the explain command, which it runs when the process exits.
8
9
  #
9
- # SHIBA_OUT=<log path> and SHIBA_DEBUG=true environment variables may be set.
10
+ # SHIBA_OUT and SHIBA_DEBUG=true environment variables may be set.
10
11
  class ActiveRecordIntegration
11
12
 
12
13
  attr_reader :path, :watcher
@@ -24,12 +25,11 @@ module Shiba
24
25
  protected
25
26
 
26
27
  def self.start_watcher
28
+ path = log_path
27
29
  if ENV['SHIBA_DEBUG']
28
- $stderr.puts("starting shiba watcher")
30
+ $stderr.puts("starting shiba watcher, outputting to #{path}")
29
31
  end
30
32
 
31
- path = ENV['SHIBA_OUT'] || make_tmp_path
32
-
33
33
  file = File.open(path, 'a')
34
34
  watcher = QueryWatcher.new(file)
35
35
 
@@ -40,14 +40,20 @@ module Shiba
40
40
  $stderr.puts(e.message, e.backtrace.join("\n"))
41
41
  end
42
42
 
43
- def self.make_tmp_path
44
- "/tmp/shiba-query.log-#{Time.now.to_i}"
43
+ def self.log_path
44
+ name = ENV["SHIBA_OUT"] || "query.log-#{Time.now.to_i}"
45
+ File.join(Shiba.path, name)
45
46
  end
46
47
 
47
48
  def self.run_explain(file, path)
48
49
  file.close
49
50
  puts ""
51
+
50
52
  cmd = "shiba explain #{database_args} --file #{path}"
53
+ if Shiba::Configure.ci?
54
+ cmd << " --json #{File.join(Shiba.path, 'ci.json')}"
55
+ end
56
+
51
57
  if ENV['SHIBA_DEBUG']
52
58
  $stderr.puts("running:")
53
59
  $stderr.puts(cmd)
@@ -56,16 +62,27 @@ module Shiba
56
62
  end
57
63
 
58
64
  def self.database_args
59
- c = ActiveRecord::Base.connection.raw_connection.query_options
65
+ cx = ActiveRecord::Base.connection.raw_connection
66
+ if cx.respond_to?(:query_options)
67
+ # mysql
68
+ c = cx.query_options.merge(server: 'mysql')
69
+ else
70
+ # postgres
71
+ c = { host: cx.host, database: cx.db, username: cx.user, password: cx.pass, port: cx.port, server: 'postgres' }
72
+ end
73
+
60
74
  options = {
61
- 'host': c[:host],
62
- 'database': c[:database],
63
- 'user': c[:username],
64
- 'password': c[:password]
75
+ 'host': c[:host],
76
+ 'database': c[:database],
77
+ 'user': c[:username],
78
+ 'password': c[:password],
79
+ 'port': c[:port],
80
+ 'server': c[:server]
65
81
  }
66
82
 
67
- options.reject { |k,v| v.nil? }.map { |k,v| "--#{k} #{v}" }.join(" ")
83
+ # port can be a Fixnum
84
+ options.reject { |k,v| v.nil? || v.respond_to?(:empty?) && v.empty? }.map { |k,v| "--#{k} #{v}" }.join(" ")
68
85
  end
69
86
 
70
87
  end
71
- end
88
+ end
data/lib/shiba/checker.rb CHANGED
@@ -5,7 +5,11 @@ require 'shiba/diff'
5
5
  require 'shiba/backtrace'
6
6
 
7
7
  module Shiba
8
+ # Given an explain log and a diff, returns any explain logs
9
+ # that appear to be caused by the diff.
8
10
  class Checker
11
+ MAGIC_COST = 100
12
+
9
13
  Result = Struct.new(:status, :message, :problems)
10
14
 
11
15
  attr_reader :options
@@ -14,6 +18,8 @@ module Shiba
14
18
  @options = options
15
19
  end
16
20
 
21
+ # Returns a Result object with a status, message, and any problem queries detected.
22
+ # Query problem format is [ [ "path:lineno", explain ]... ]
17
23
  def run(log)
18
24
  msg = nil
19
25
 
@@ -21,9 +27,9 @@ module Shiba
21
27
  puts cmd
22
28
  end
23
29
 
24
- if changes.empty?
30
+ if changed_files.empty?
25
31
  if options['verbose']
26
- msg = "No changes found in git"
32
+ msg = "No changes found. Are you sure you specified the correct branch?"
27
33
  end
28
34
  return Result.new(:pass, msg)
29
35
  end
@@ -31,15 +37,27 @@ module Shiba
31
37
  explains = select_lines_with_changed_files(log)
32
38
  problems = explains.select { |explain| explain["cost"] && explain["cost"] > MAGIC_COST }
33
39
 
34
- problems.select! do |problem|
35
- backtrace_has_updated_line?(problem["backtrace"], updated_lines)
40
+
41
+ if options["verbose"]
42
+ puts problems
43
+ puts "Updated lines: #{updated_lines}"
36
44
  end
37
45
 
38
46
  if problems.empty?
39
- if options['verbose']
40
- msg = "No problems found"
41
- end
47
+ msg = "No problems found caused by the diff"
48
+ return Result.new(:pass, msg)
49
+ end
50
+
51
+ problems.map! do |problem|
52
+ line = updated_line_from_backtrace(problem["backtrace"], updated_lines)
53
+ next if line.nil?
54
+
55
+ [ line, problem ]
56
+ end
57
+ problems.compact!
42
58
 
59
+ if problems.empty?
60
+ msg = "No problems found caused by the diff"
43
61
  return Result.new(:pass, msg)
44
62
  end
45
63
 
@@ -48,45 +66,91 @@ module Shiba
48
66
 
49
67
  protected
50
68
 
51
- def backtrace_has_updated_line?(backtrace, updates)
52
- backtrace.any? do |bl|
53
- updates.any? do |path, lines|
69
+ def updated_line_from_backtrace(backtrace, updates)
70
+ backtrace.each do |bl|
71
+ updates.each do |path, lines|
54
72
  next if !bl.start_with?(path)
55
73
  bl =~ /:(\d+):/
56
- lines.include?($1.to_i)
74
+ next if !lines.include?($1.to_i)
75
+
76
+ return "#{path}:#{$1}"
57
77
  end
58
78
  end
79
+
80
+ return nil
59
81
  end
60
82
 
61
83
  def select_lines_with_changed_files(log)
62
- patterns = changes.split("\n").map { |path| "-e #{path}" }.join(" ")
63
- json_lines = `grep #{log} #{patterns}`
84
+ patterns = changed_files.map { |path| "-e #{path}" }.join(" ")
85
+ cmd = "grep #{log} #{patterns}"
86
+ $stderr.puts cmd if options["verbose"]
87
+
88
+ json_lines = `#{cmd}`
64
89
  json_lines.each_line.map { |line| JSON.parse(line) }
65
90
  end
66
91
 
67
- def changes
68
- @changes ||= begin
69
- result = `git diff#{cmd} --name-only --diff-filter=d`
70
- if $?.exitstatus != 0
71
- error("Failed to read changes", $?.exitstatus)
72
- end
73
-
74
- result
92
+ def changed_files
93
+ @changed_files ||= begin
94
+ options['diff'] ? file_diff_names : git_diff_names
75
95
  end
76
96
  end
77
97
 
78
98
  def updated_lines
79
99
  return @updated_lines if @updated_lines
80
100
 
81
- Open3.popen3("git diff#{cmd} --unified=0 --diff-filter=d") {|_,o,_,_|
82
- @updated_lines = Shiba::Diff.new(o).updated_lines
83
- }
101
+
102
+ out = options['diff'] ? file_diff_lines : git_diff_lines
103
+ @updated_lines = Shiba::Diff.new(out).updated_lines
104
+
84
105
 
85
106
  @updated_lines.map! do |path, lines|
86
107
  [ Shiba::Backtrace.clean!(path), lines ]
87
108
  end
88
109
  end
89
110
 
111
+ def file_diff_lines
112
+ File.open(options['diff'])
113
+ end
114
+
115
+ def git_diff_lines
116
+ run = "git diff#{cmd} --unified=0 --diff-filter=d"
117
+ if options[:verbose]
118
+ $stderr.puts run
119
+ end
120
+
121
+ _, out,_,_ = Open3.popen3(run)
122
+ out
123
+ end
124
+
125
+ # index ade9b24..661d522 100644
126
+ # --- a/test/app/app.rb
127
+ # +++ b/test/app/app.rb
128
+ # @@ -24,4 +24,4 @@ ActiveRecord::Base...
129
+ # org = Organization.create!(name: 'test')
130
+ #
131
+ # file_diff_lines
132
+ # => test/app/app.rb
133
+ def file_diff_names
134
+ file_name_pattern = /^\+\+\+ b\/(.*?)$/
135
+ f = File.open(options['diff'])
136
+ f.grep(file_name_pattern) { $1 }
137
+ end
138
+
139
+ def git_diff_names
140
+ run = "git diff#{cmd} --name-only --diff-filter=d"
141
+
142
+ if options[:verbose]
143
+ $stderr.puts run
144
+ end
145
+ result = `#{run}`
146
+ if $?.exitstatus != 0
147
+ $stderr.puts result
148
+ raise Shiba::Error.new "Failed to read changes"
149
+ end
150
+
151
+ result.split("\n")
152
+ end
153
+
90
154
  def cmd
91
155
  cmd = case
92
156
  when options["staged"]
@@ -94,7 +158,7 @@ module Shiba
94
158
  when options["unstaged"]
95
159
  ""
96
160
  else
97
- commit = " HEAD"
161
+ commit = " origin/HEAD"
98
162
  commit << "...#{options["branch"]}" if options["branch"]
99
163
  commit
100
164
  end
@@ -1,5 +1,7 @@
1
1
  require 'pathname'
2
2
  require 'pp'
3
+ require 'optionparser'
4
+
3
5
  module Shiba
4
6
  module Configure
5
7
 
@@ -27,6 +29,10 @@ module Shiba
27
29
  raise e, "Cannot load `#{path}`:\n#{e.message}", e.backtrace
28
30
  end
29
31
 
32
+ def self.ci?
33
+ ENV['CI'] || ENV['CONTINUOUS_INTEGRATION']
34
+ end
35
+
30
36
  # loosely based on https://dev.mysql.com/doc/refman/8.0/en/option-files.html
31
37
  def self.mysql_config_path
32
38
  paths = [ File.join(Dir.home, '.mylogin.cnf'), File.join(Dir.home, '.my.cnf') ]
@@ -53,10 +59,14 @@ module Shiba
53
59
  end
54
60
  end
55
61
 
56
- def self.make_options_parser(options)
57
- parser = OptionParser.new do |opts|
62
+ def self.make_options_parser(options, only_basics = false)
63
+ OptionParser.new do |opts|
58
64
  # note that the key to the hash needs to stay the same as the
59
65
  # option name since we re-pass them
66
+ opts.on("-s","--server SERVER_TYPE", "mysql|postgres") do |s|
67
+ options["server"] = s
68
+ end
69
+
60
70
  opts.on("-h","--host HOST", "sql host") do |h|
61
71
  options["host"] = h
62
72
  end
@@ -73,6 +83,10 @@ module Shiba
73
83
  options["password"] = p
74
84
  end
75
85
 
86
+ opts.on("-P","--port PORT", "server port") do |p|
87
+ options["port"] = p
88
+ end
89
+
76
90
  opts.on("-c","--config FILE", "location of shiba.yml") do |f|
77
91
  options["config"] = f
78
92
  end
@@ -81,6 +95,12 @@ module Shiba
81
95
  options["index"] = i.to_i
82
96
  end
83
97
 
98
+ opts.on("--default-extras-file", "The option file to read mysql configuration from") do |f|
99
+ options["default_file"] = f
100
+ end
101
+
102
+ next if only_basics
103
+
84
104
  opts.on("-l", "--limit NUM", "stop after processing NUM queries") do |l|
85
105
  options["limit"] = l.to_i
86
106
  end
@@ -114,9 +134,6 @@ module Shiba
114
134
  options["default_group"] = f
115
135
  end
116
136
 
117
- opts.on("--default-extras-file", "The option file to read mysql configuration from") do |f|
118
- options["default_file"] = f
119
- end
120
137
  end
121
138
  end
122
139
  end
@@ -0,0 +1,25 @@
1
+ module Shiba
2
+ class Connection
3
+ def self.build(hash)
4
+ server_type = hash['server']
5
+ if !server_type
6
+ port = hash['port'].to_i
7
+ if port == 3306
8
+ server_type = 'mysql'
9
+ elsif port == 5432
10
+ server_type = 'postgres'
11
+ else
12
+ raise "couldn't determine server type! please pass --server"
13
+ end
14
+ end
15
+
16
+ if server_type == 'mysql'
17
+ require 'shiba/connection/mysql'
18
+ Shiba::Connection::Mysql.new(hash)
19
+ else
20
+ require 'shiba/connection/postgres'
21
+ Shiba::Connection::Postgres.new(hash)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,45 @@
1
+ require 'mysql2'
2
+ require 'json'
3
+
4
+ module Shiba
5
+ class Connection
6
+ class Mysql
7
+ def initialize(hash)
8
+ @connection = Mysql2::Client.new(hash)
9
+ end
10
+
11
+ def query(sql)
12
+ @connection.query(sql)
13
+ end
14
+
15
+ def fetch_indexes
16
+ sql =<<-EOL
17
+ select * from information_schema.statistics where
18
+ table_schema = DATABASE()
19
+ order by table_name, if(index_name = 'PRIMARY', '', index_name), seq_in_index
20
+ EOL
21
+ @connection.query(sql)
22
+ end
23
+
24
+ def count_indexes_by_table
25
+ sql =<<-EOL
26
+ select TABLE_NAME as table_name, count(*) as index_count
27
+ from information_schema.statistics where table_schema = DATABASE()
28
+ and seq_in_index = 1 and index_name not like 'fk_rails%'
29
+ group by table_name order by index_count
30
+ EOL
31
+
32
+ @connection.query(sql).to_a
33
+ end
34
+
35
+ def explain(sql)
36
+ rows = query("EXPLAIN FORMAT=JSON #{sql}").to_a
37
+ JSON.parse(rows.first['EXPLAIN'])
38
+ end
39
+
40
+ def mysql?
41
+ true
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,91 @@
1
+ require 'pg'
2
+
3
+ module Shiba
4
+ class Connection
5
+ class Postgres
6
+ def initialize(h)
7
+ @connection = PG.connect( dbname: h['database'], host: h['host'], user: h['username'], password: h['password'], port: h['port'] )
8
+ @connection.type_map_for_results = PG::BasicTypeMapForResults.new(@connection)
9
+ query("SET enable_seqscan = OFF")
10
+ end
11
+
12
+ def query(sql)
13
+ @connection.query(sql)
14
+ end
15
+
16
+ def fetch_indexes
17
+ result = query(<<-EOL
18
+ select
19
+ t.relname as table_name,
20
+ i.relname as index_name,
21
+ a.attname as column_name,
22
+ i.reltuples as numrows,
23
+ ix.indisunique as is_unique,
24
+ ix.indisprimary as is_primary,
25
+ s.n_distinct as numdistinct
26
+ from pg_namespace p
27
+ join pg_class t on t.relnamespace = p.oid
28
+ join pg_index ix on ix.indrelid = t.oid
29
+ join pg_class i on i.oid = ix.indexrelid
30
+ join pg_attribute a on a.attrelid = t.oid
31
+ left join pg_stats s on s.tablename = t.relname
32
+ AND s.attname = a.attname
33
+ where
34
+ p.nspname = 'public'
35
+ and a.attnum = ANY(ix.indkey)
36
+ and t.relkind = 'r'
37
+ order by
38
+ t.relname,
39
+ ix.indisprimary desc,
40
+ i.relname,
41
+ array_position(ix.indkey, a.attnum)
42
+ EOL
43
+ )
44
+ rows = result.to_a.map do |row|
45
+ # TBD: do better than this, have them return something objecty
46
+ if row['is_primary'] == "t"
47
+ row['index_name'] = "PRIMARY"
48
+ row['non_unique'] = 0
49
+ elsif row['is_unique']
50
+ row['non_unique'] = 0
51
+ end
52
+
53
+ if row['numdistinct'].nil?
54
+ # meaning the table's empty.
55
+ row['cardinality'] = 0
56
+ elsif row['numdistinct'] == 0
57
+ # numdistinct is 0 if there's rows in the table but all values are null
58
+ row['cardinality'] = 1
59
+ elsif row['numdistinct'] < 0
60
+ # postgres talks about either cardinality or selectivity (depending. what's their heuristic?)
61
+ # in the same way we do in the yaml file!
62
+ # if less than zero, it's negative selectivity.
63
+ row['cardinality'] = -(row['numrows'] * row['numdistinct'])
64
+ else
65
+ row['cardinality'] = row['numdistinct']
66
+ end
67
+ row
68
+ end
69
+
70
+ #TODO: estimate multi-index column cardinality
71
+ rows
72
+ end
73
+
74
+ def count_indexes_by_table
75
+ sql = <<-EOL
76
+ select tablename as table_name, count(*) as index_count from pg_indexes where schemaname='public' group by 1 order by 2
77
+ EOL
78
+ @connection.query(sql).to_a
79
+ end
80
+
81
+ def explain(sql)
82
+ rows = query("EXPLAIN (FORMAT JSON) #{sql}").to_a
83
+ rows.first["QUERY PLAN"]
84
+ end
85
+
86
+ def mysql?
87
+ false
88
+ end
89
+ end
90
+ end
91
+ end