shiba 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,6 +22,26 @@ module Shiba
22
22
  @installed = true
23
23
  end
24
24
 
25
+ def self.connection_options
26
+ cx = ActiveRecord::Base.connection.raw_connection
27
+ if cx.respond_to?(:query_options)
28
+ # mysql
29
+ c = cx.query_options.merge(server: 'mysql')
30
+ else
31
+ # postgres
32
+ c = { host: cx.host, database: cx.db, username: cx.user, password: cx.pass, port: cx.port, server: 'postgres' }
33
+ end
34
+
35
+ options = {
36
+ 'host' => c[:host],
37
+ 'database' => c[:database],
38
+ 'user' => c[:username],
39
+ 'password' => c[:password],
40
+ 'port' => c[:port],
41
+ 'server' => c[:server],
42
+ }
43
+ end
44
+
25
45
  protected
26
46
 
27
47
  def self.start_watcher
@@ -64,26 +84,8 @@ module Shiba
64
84
  end
65
85
 
66
86
  def self.database_args
67
- cx = ActiveRecord::Base.connection.raw_connection
68
- if cx.respond_to?(:query_options)
69
- # mysql
70
- c = cx.query_options.merge(server: 'mysql')
71
- else
72
- # postgres
73
- c = { host: cx.host, database: cx.db, username: cx.user, password: cx.pass, port: cx.port, server: 'postgres' }
74
- end
75
-
76
- options = {
77
- 'host': c[:host],
78
- 'database': c[:database],
79
- 'user': c[:username],
80
- 'password': c[:password],
81
- 'port': c[:port],
82
- 'server': c[:server]
83
- }
84
-
85
87
  # port can be a Fixnum
86
- options.reject { |k,v| v.nil? || v.respond_to?(:empty?) && v.empty? }.map { |k,v| "--#{k} #{v}" }.join(" ")
88
+ connection_options.reject { |k,v| v.nil? || v.respond_to?(:empty?) && v.empty? }.map { |k,v| "--#{k} #{v}" }.join(" ")
87
89
  end
88
90
 
89
91
  end
@@ -81,7 +81,7 @@ module Shiba
81
81
  end
82
82
  return nil unless explain
83
83
 
84
- if explain.other_paths.any?
84
+ if explain.severity != 'none' && explain.other_paths.any?
85
85
  paths = [explain] + explain.other_paths
86
86
  explain = paths.sort { |a, b| a.cost - b.cost }.first
87
87
  end
@@ -7,19 +7,25 @@ module Shiba
7
7
 
8
8
  # avoiding Rails dependency on the cli tools for now.
9
9
  # yanked from https://github.com/rails/rails/blob/v5.0.5/railties/lib/rails/application/configuration.rb
10
- def self.activerecord_configuration
11
- yaml = Pathname.new("config/database.yml")
10
+ def self.activerecord_configuration(config_path = "config/database.yml")
11
+ yaml = Pathname.new(config_path)
12
12
 
13
13
  config = if yaml && yaml.exist?
14
14
  require "yaml"
15
15
  require "erb"
16
16
  YAML.load(ERB.new(yaml.read).result) || {}
17
- elsif ENV['DATABASE_URL']
18
- # Value from ENV['DATABASE_URL'] is set to default database connection
19
- # by Active Record.
20
- {}
21
17
  end
22
18
 
19
+ env = ENV["RAILS_ENV"] || "test"
20
+ config = config[env]
21
+ adapter = config.delete("adapter")
22
+
23
+ if adapter == "mysql2"
24
+ config["server"] = "mysql"
25
+ else
26
+ config["server"] = adapter
27
+ end
28
+
23
29
  config
24
30
  rescue Psych::SyntaxError => e
25
31
  raise "YAML syntax error occurred while parsing #{yaml.to_s}. " \
@@ -1,5 +1,6 @@
1
1
  require 'mysql2'
2
2
  require 'json'
3
+ require 'shiba/parsers/mysql_select_fields'
3
4
 
4
5
  module Shiba
5
6
  class Connection
@@ -21,9 +22,69 @@ module Shiba
21
22
  @connection.query(sql)
22
23
  end
23
24
 
25
+ def tables
26
+ @connection.query("show tables").map { |r| r.values.first }
27
+ end
28
+
29
+ def each_column_size
30
+ tables.each do |t|
31
+ sql = <<-EOL
32
+ select * from information_schema.columns where table_schema = DATABASE()
33
+ and table_name = '#{t}'
34
+ EOL
35
+ columns = @connection.query(sql)
36
+ col_hash = Hash[columns.map { |c| [c['COLUMN_NAME'], c] }]
37
+ estimate_column_sizes(t, col_hash)
38
+
39
+ col_hash.each do |c, h|
40
+ yield(t, c, h['size'])
41
+ end
42
+ end
43
+ end
44
+
45
+ def estimate_column_sizes(table, hash)
46
+ columns_to_sample = []
47
+ hash.each do |name, row|
48
+ row['size'] = case row['DATA_TYPE']
49
+ when 'tinyint', 'year', 'enum', 'bit'
50
+ 1
51
+ when 'smallint'
52
+ 2
53
+ when 'mediumint', 'date', 'time'
54
+ 3
55
+ when 'int', 'decimal', 'float', 'timestamp'
56
+ 4
57
+ when 'bigint', 'datetime', 'double'
58
+ 8
59
+ else
60
+ columns_to_sample << name
61
+ nil
62
+ end
63
+ end
64
+
65
+ return unless columns_to_sample.any?
66
+
67
+ select_fields = columns_to_sample.map do |c|
68
+ "AVG(LENGTH(`#{c}`)) as `#{c}`"
69
+ end.join(', ')
70
+
71
+ res = @connection.query("select #{select_fields}, count(*) as cnt from ( select * from `#{table}` limit 10000 ) as v").first
72
+ if res['cnt'] == 0
73
+ # muggles, no data. impossible to know actual size of blobs/varchars, safer to err on side of 0
74
+ res.keys.each do |c|
75
+ hash[c] && hash[c]['size'] = 0
76
+ end
77
+ else
78
+ res.each do |k, v|
79
+ hash[k] && hash[k]['size'] = v.to_i
80
+ end
81
+ end
82
+
83
+ hash
84
+ end
85
+
24
86
  def analyze!
25
- @connection.query("show tables").each do |row|
26
- t = row.values.first
87
+ tables.each do |t|
27
88
  @connection.query("analyze table `#{t}`") rescue nil
28
89
  end
29
90
  end
@@ -41,7 +102,15 @@ module Shiba
41
102
 
42
103
  def explain(sql)
43
104
  rows = query("EXPLAIN FORMAT=JSON #{sql}").to_a
44
- JSON.parse(rows.first['EXPLAIN'])
105
+ explain = JSON.parse(rows.first['EXPLAIN'])
106
+ warnings = query("show warnings").to_a
107
+ [explain, parse_select_fields(warnings)]
108
+ end
109
+
110
+ def parse_select_fields(warnings)
111
+ normalized_sql = warnings.detect { |w| w["Code"] == 1003 }["Message"]
112
+
113
+ Parsers::MysqlSelectFields.new(normalized_sql).parse_fields
45
114
  end
46
115
 
47
116
  def mysql?
@@ -81,7 +81,10 @@ module Shiba
81
81
 
82
82
  def explain(sql)
83
83
  rows = query("EXPLAIN (FORMAT JSON) #{sql}").to_a
84
- rows.first["QUERY PLAN"]
84
+ [rows.first["QUERY PLAN"], {}]
85
+ end
86
+
87
+ def each_column_size
85
88
  end
86
89
 
87
90
  def mysql?
@@ -0,0 +1,165 @@
1
+ require 'shiba'
2
+ require 'shiba/activerecord_integration'
3
+ require 'shiba/configure'
4
+ require 'shiba/analyzer'
5
+ require 'shiba/table_stats'
6
+ require 'shiba/reviewer'
7
+
8
+ module Shiba
9
+ # Provides a 'shiba' command to analyze queries from the console.
10
+ # If required in IRB or Pry, the shiba command will automatically be available,
11
+ # as it's injected into those consoles at the bottom of this file.
12
+ #
13
+ # Example:
14
+ # require 'shiba/console'
15
+ #
16
+ # shiba User.all
17
+ # => <shiba results>
18
+ # shiba "select * from users"
19
+ # => <shiba results>
20
+ module Console
21
+
22
+ def shiba(query)
23
+ @command ||= Command.new(self)
24
+ @command.execute(query)
25
+ end
26
+
27
+ class ExplainRecord
28
+
29
+ def initialize(fields)
30
+ @fields = fields
31
+ end
32
+
33
+ def comments
34
+ # renderer expects json / key strings
35
+ json = JSON.parse(JSON.dump(@fields))
36
+ renderer.render(json)
37
+ end
38
+
39
+ def md5
40
+ @fields[:md5]
41
+ end
42
+
43
+ def severity
44
+ @fields[:severity]
45
+ end
46
+
47
+ def sql
48
+ @fields[:sql]
49
+ end
50
+
51
+ def time
52
+ @fields[:cost]
53
+ end
54
+
55
+ def raw_explain
56
+ @fields[:raw_explain]
57
+ end
58
+
59
+ def message
60
+ msg = "\n"
61
+ msg << "Severity: #{severity}\n"
62
+ msg << "----------------------------\n"
63
+ msg << comments
64
+ msg << "\n\n"
65
+ end
66
+
67
+ def help
68
+ "Available methods: #{self.class.public_instance_methods(false)}"
69
+ end
70
+
71
+ def inspect
72
+ "#{to_s}: '#{sql}'. Call the 'help' method on this object for more info."
73
+ end
74
+
75
+ protected
76
+
77
+ def renderer
78
+ @renderer ||= Review::CommentRenderer.new(tags)
79
+ end
80
+
81
+ def tags
82
+ @tags ||= YAML.load_file(Shiba::TEMPLATE_FILE)
83
+ end
84
+
85
+ end
86
+
87
+ class Command
88
+
89
+ def initialize(context)
90
+ @context = context
91
+ end
92
+
93
+ def execute(query)
94
+ if !valid_query?(query)
95
+ msg = "Query does not appear to be a valid relation or select sql string"
96
+ msg << "\n#{usage}"
97
+ puts msg
98
+ return
99
+ end
100
+
101
+ result = explain(query)
102
+ if result == nil
103
+ puts "Unable to analyze query, please check the SQL syntax for typos."
104
+ return
105
+ end
106
+
107
+ record = ExplainRecord.new(result)
108
+ puts record.message
109
+ record
110
+ end
111
+
112
+ private
113
+
114
+ def usage
115
+ "Examples:
116
+ shiba User.all
117
+ shiba \"select * from users\""
118
+ end
119
+
120
+ def valid_query?(query)
121
+ query.respond_to?(:to_sql) ||
122
+ query.respond_to?(:=~) && query =~ /\Aselect/i
123
+ end
124
+
125
+ def explain(query)
126
+ query = query.to_sql if query.respond_to?(:to_sql)
127
+ Shiba.configure(connection_options)
128
+ analyzer = Shiba::Analyzer.new(nil, null, stats, { 'sql' => query })
129
+ result = analyzer.analyze.first
130
+ end
131
+
132
+ def connection_options
133
+ case
134
+ when defined?(ActiveRecord)
135
+ ActiveRecordIntegration.connection_options
136
+ when File.exist?("config/database.yml")
137
+ Shiba::Configure.activerecord_configuration
138
+ when File.exist?("test/database.yml.example")
139
+ Shiba::Configure.activerecord_configuration("test/database.yml.example")
140
+ else
141
+ raise Shiba::Error.new("ActiveRecord is currently required to analyze queries from the console.")
142
+ end
143
+ end
144
+
145
+ def stats
146
+ @stats ||= Shiba::TableStats.new(Shiba.index_config, Shiba.connection, {})
147
+ end
148
+
149
+ def null
150
+ @null ||= File.open(File::NULL, "w")
151
+ end
152
+
153
+ def puts(message)
154
+ out = @context.respond_to?(:puts) ? @context : $stdout
155
+ out.puts(message)
156
+ end
157
+
158
+ end
159
+
160
+ end
161
+ end
162
+
163
+ if defined?(Pry) || defined?(IRB)
164
+ TOPLEVEL_BINDING.eval('self').extend Shiba::Console
165
+ end
data/lib/shiba/explain.rb CHANGED
@@ -12,6 +12,8 @@ module Shiba
12
12
  COST_PER_ROW_SORT = 1.0e-07
13
13
  COST_PER_ROW_RETURNED = 3.0e-05
14
14
 
15
+ COST_PER_KB_RETURNED = 0.0004
16
+
15
17
  include CheckSupport
16
18
  extend CheckSupport::ClassMethods
17
19
 
@@ -26,7 +28,8 @@ module Shiba
26
28
  end
27
29
 
28
30
  @options = options
29
- @explain_json = Shiba.connection.explain(@sql)
31
+
32
+ @explain_json, @select_fields = Shiba.connection.explain(@sql)
30
33
 
31
34
  if Shiba.connection.mysql?
32
35
  @rows = Shiba::Explain::MysqlExplain.new.transform_json(@explain_json['query_block'])
@@ -149,18 +152,30 @@ module Shiba
149
152
  end
150
153
  end
151
154
 
155
+ def select_row_size
156
+ size = 0
157
+ @select_fields.each do |table, fields|
158
+ fields.each do |f|
159
+ size += @stats.get_column_size(table, f) || 0
160
+ end
161
+ end
162
+ size
163
+ end
164
+
152
165
  def check_return_size
153
166
  if @query.limit
154
- return_size = [@query.limit, @result.result_size].min
167
+ result_size = [@query.limit, @result.result_size].min
155
168
  elsif @query.aggregation?
156
- return_size = 1
169
+ result_size = 1
157
170
  else
158
- return_size = @result.result_size
171
+ result_size = @result.result_size
159
172
  end
160
173
 
161
- cost = COST_PER_ROW_RETURNED * return_size
174
+ result_bytes = select_row_size * result_size
175
+ cost = (result_bytes / 1024.0) * COST_PER_KB_RETURNED
176
+
162
177
  @result.cost += cost
163
- @result.messages << { tag: "retsize", result_size: return_size, cost: cost }
178
+ @result.messages << { tag: "retsize", result_size: result_size, result_bytes: result_bytes, cost: cost }
164
179
  end
165
180
 
166
181
  def run_checks!
@@ -191,7 +206,7 @@ module Shiba
191
206
  def other_paths
192
207
  if Shiba.connection.mysql?
193
208
  @rows.map do |r|
194
- next [] unless r['possible_keys'] && r['key'].nil?
209
+ next [] unless r['possible_keys']
195
210
  possible = r['possible_keys'] - [r['key']]
196
211
  possible.map do |p|
197
212
  Explain.new(@query, @stats, force_key: p) rescue nil
data/lib/shiba/fuzzer.rb CHANGED
@@ -35,6 +35,11 @@ module Shiba
35
35
 
36
36
  stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == 0)
37
37
  end
38
+
39
+ connection.each_column_size do |table, column, size|
40
+ stats.set_column_size(table, column, size)
41
+ end
42
+
38
43
  stats
39
44
  end
40
45