shiba 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,26 @@ module Shiba
22
22
  @installed = true
23
23
  end
24
24
 
25
+ def self.connection_options
26
+ cx = ActiveRecord::Base.connection.raw_connection
27
+ if cx.respond_to?(:query_options)
28
+ # mysql
29
+ c = cx.query_options.merge(server: 'mysql')
30
+ else
31
+ # postgres
32
+ c = { host: cx.host, database: cx.db, username: cx.user, password: cx.pass, port: cx.port, server: 'postgres' }
33
+ end
34
+
35
+ options = {
36
+ 'host' => c[:host],
37
+ 'database' => c[:database],
38
+ 'user' => c[:username],
39
+ 'password' => c[:password],
40
+ 'port' => c[:port],
41
+ 'server' => c[:server],
42
+ }
43
+ end
44
+
25
45
  protected
26
46
 
27
47
  def self.start_watcher
@@ -64,26 +84,8 @@ module Shiba
64
84
  end
65
85
 
66
86
  def self.database_args
67
- cx = ActiveRecord::Base.connection.raw_connection
68
- if cx.respond_to?(:query_options)
69
- # mysql
70
- c = cx.query_options.merge(server: 'mysql')
71
- else
72
- # postgres
73
- c = { host: cx.host, database: cx.db, username: cx.user, password: cx.pass, port: cx.port, server: 'postgres' }
74
- end
75
-
76
- options = {
77
- 'host': c[:host],
78
- 'database': c[:database],
79
- 'user': c[:username],
80
- 'password': c[:password],
81
- 'port': c[:port],
82
- 'server': c[:server]
83
- }
84
-
85
87
  # port can be a Fixnum
86
- options.reject { |k,v| v.nil? || v.respond_to?(:empty?) && v.empty? }.map { |k,v| "--#{k} #{v}" }.join(" ")
88
+ connection_options.reject { |k,v| v.nil? || v.respond_to?(:empty?) && v.empty? }.map { |k,v| "--#{k} #{v}" }.join(" ")
87
89
  end
88
90
 
89
91
  end
@@ -81,7 +81,7 @@ module Shiba
81
81
  end
82
82
  return nil unless explain
83
83
 
84
- if explain.other_paths.any?
84
+ if explain.severity != 'none' && explain.other_paths.any?
85
85
  paths = [explain] + explain.other_paths
86
86
  explain = paths.sort { |a, b| a.cost - b.cost }.first
87
87
  end
@@ -7,19 +7,25 @@ module Shiba
7
7
 
8
8
  # avoiding Rails dependency on the cli tools for now.
9
9
  # yanked from https://github.com/rails/rails/blob/v5.0.5/railties/lib/rails/application/configuration.rb
10
- def self.activerecord_configuration
11
- yaml = Pathname.new("config/database.yml")
10
+ def self.activerecord_configuration(config_path = "config/database.yml")
11
+ yaml = Pathname.new(config_path)
12
12
 
13
13
  config = if yaml && yaml.exist?
14
14
  require "yaml"
15
15
  require "erb"
16
16
  YAML.load(ERB.new(yaml.read).result) || {}
17
- elsif ENV['DATABASE_URL']
18
- # Value from ENV['DATABASE_URL'] is set to default database connection
19
- # by Active Record.
20
- {}
21
17
  end
22
18
 
19
+ env = ENV["RAILS_ENV"] || "test"
20
+ config = config[env]
21
+ adapter = config.delete("adapter")
22
+
23
+ if adapter == "mysql2"
24
+ config["server"] = "mysql"
25
+ else
26
+ config["server"] = adapter
27
+ end
28
+
23
29
  config
24
30
  rescue Psych::SyntaxError => e
25
31
  raise "YAML syntax error occurred while parsing #{yaml.to_s}. " \
@@ -1,5 +1,6 @@
1
1
  require 'mysql2'
2
2
  require 'json'
3
+ require 'shiba/parsers/mysql_select_fields'
3
4
 
4
5
  module Shiba
5
6
  class Connection
@@ -21,9 +22,69 @@ module Shiba
21
22
  @connection.query(sql)
22
23
  end
23
24
 
25
+ def tables
26
+ @connection.query("show tables").map { |r| r.values.first }
27
+ end
28
+
29
+ def each_column_size
30
+ tables.each do |t|
31
+ sql = <<-EOL
32
+ select * from information_schema.columns where table_schema = DATABASE()
33
+ and table_name = '#{t}'
34
+ EOL
35
+ columns = @connection.query(sql)
36
+ col_hash = Hash[columns.map { |c| [c['COLUMN_NAME'], c] }]
37
+ estimate_column_sizes(t, col_hash)
38
+
39
+ col_hash.each do |c, h|
40
+ yield(t, c, h['size'])
41
+ end
42
+ end
43
+ end
44
+
45
+ def estimate_column_sizes(table, hash)
46
+ columns_to_sample = []
47
+ hash.each do |name, row|
48
+ row['size'] = case row['DATA_TYPE']
49
+ when 'tinyint', 'year', 'enum', 'bit'
50
+ 1
51
+ when 'smallint'
52
+ 2
53
+ when 'mediumint', 'date', 'time'
54
+ 3
55
+ when 'int', 'decimal', 'float', 'timestamp'
56
+ 4
57
+ when 'bigint', 'datetime', 'double'
58
+ 8
59
+ else
60
+ columns_to_sample << name
61
+ nil
62
+ end
63
+ end
64
+
65
+ return unless columns_to_sample.any?
66
+
67
+ select_fields = columns_to_sample.map do |c|
68
+ "AVG(LENGTH(`#{c}`)) as `#{c}`"
69
+ end.join(', ')
70
+
71
+ res = @connection.query("select #{select_fields}, count(*) as cnt from ( select * from `#{table}` limit 10000 ) as v").first
72
+ if res['cnt'] == 0
73
+ # muggles, no data. impossible to know actual size of blobs/varchars, safer to err on side of 0
74
+ res.keys.each do |c|
75
+ hash[c] && hash[c]['size'] = 0
76
+ end
77
+ else
78
+ res.each do |k, v|
79
+ hash[k] && hash[k]['size'] = v.to_i
80
+ end
81
+ end
82
+
83
+ hash
84
+ end
85
+
24
86
  def analyze!
25
- @connection.query("show tables").each do |row|
26
- t = row.values.first
87
+ tables.each do |t|
27
88
  @connection.query("analyze table `#{t}`") rescue nil
28
89
  end
29
90
  end
@@ -41,7 +102,15 @@ module Shiba
41
102
 
42
103
  def explain(sql)
43
104
  rows = query("EXPLAIN FORMAT=JSON #{sql}").to_a
44
- JSON.parse(rows.first['EXPLAIN'])
105
+ explain = JSON.parse(rows.first['EXPLAIN'])
106
+ warnings = query("show warnings").to_a
107
+ [explain, parse_select_fields(warnings)]
108
+ end
109
+
110
+ def parse_select_fields(warnings)
111
+ normalized_sql = warnings.detect { |w| w["Code"] == 1003 }["Message"]
112
+
113
+ Parsers::MysqlSelectFields.new(normalized_sql).parse_fields
45
114
  end
46
115
 
47
116
  def mysql?
@@ -81,7 +81,10 @@ module Shiba
81
81
 
82
82
  def explain(sql)
83
83
  rows = query("EXPLAIN (FORMAT JSON) #{sql}").to_a
84
- rows.first["QUERY PLAN"]
84
+ [rows.first["QUERY PLAN"], {}]
85
+ end
86
+
87
+ def each_column_size
85
88
  end
86
89
 
87
90
  def mysql?
@@ -0,0 +1,165 @@
1
+ require 'shiba'
2
+ require 'shiba/activerecord_integration'
3
+ require 'shiba/configure'
4
+ require 'shiba/analyzer'
5
+ require 'shiba/table_stats'
6
+ require 'shiba/reviewer'
7
+
8
+ module Shiba
9
+ # Provides a 'shiba' command to analyze queries from the console.
10
+ # If required in IRB or Pry, the shiba command will automatically be available,
11
+ # as it's injected into those consoles at the bottom of this file.
12
+ #
13
+ # Example:
14
+ # require 'shiba/console'
15
+ #
16
+ # shiba User.all
17
+ # => <shiba results>
18
+ # shiba "select * from users"
19
+ # => <shiba results>
20
+ module Console
21
+
22
+ def shiba(query)
23
+ @command ||= Command.new(self)
24
+ @command.execute(query)
25
+ end
26
+
27
+ class ExplainRecord
28
+
29
+ def initialize(fields)
30
+ @fields = fields
31
+ end
32
+
33
+ def comments
34
+ # renderer expects json / key strings
35
+ json = JSON.parse(JSON.dump(@fields))
36
+ renderer.render(json)
37
+ end
38
+
39
+ def md5
40
+ @fields[:md5]
41
+ end
42
+
43
+ def severity
44
+ @fields[:severity]
45
+ end
46
+
47
+ def sql
48
+ @fields[:sql]
49
+ end
50
+
51
+ def time
52
+ @fields[:cost]
53
+ end
54
+
55
+ def raw_explain
56
+ @fields[:raw_explain]
57
+ end
58
+
59
+ def message
60
+ msg = "\n"
61
+ msg << "Severity: #{severity}\n"
62
+ msg << "----------------------------\n"
63
+ msg << comments
64
+ msg << "\n\n"
65
+ end
66
+
67
+ def help
68
+ "Available methods: #{self.class.public_instance_methods(false)}"
69
+ end
70
+
71
+ def inspect
72
+ "#{to_s}: '#{sql}'. Call the 'help' method on this object for more info."
73
+ end
74
+
75
+ protected
76
+
77
+ def renderer
78
+ @renderer ||= Review::CommentRenderer.new(tags)
79
+ end
80
+
81
+ def tags
82
+ @tags ||= YAML.load_file(Shiba::TEMPLATE_FILE)
83
+ end
84
+
85
+ end
86
+
87
+ class Command
88
+
89
+ def initialize(context)
90
+ @context = context
91
+ end
92
+
93
+ def execute(query)
94
+ if !valid_query?(query)
95
+ msg = "Query does not appear to be a valid relation or select sql string"
96
+ msg << "\n#{usage}"
97
+ puts msg
98
+ return
99
+ end
100
+
101
+ result = explain(query)
102
+ if result == nil
103
+ puts "Unable to analyze query, please check the SQL syntax for typos."
104
+ return
105
+ end
106
+
107
+ record = ExplainRecord.new(result)
108
+ puts record.message
109
+ record
110
+ end
111
+
112
+ private
113
+
114
+ def usage
115
+ "Examples:
116
+ shiba User.all
117
+ shiba \"select * from users\""
118
+ end
119
+
120
+ def valid_query?(query)
121
+ query.respond_to?(:to_sql) ||
122
+ query.respond_to?(:=~) && query =~ /\Aselect/i
123
+ end
124
+
125
+ def explain(query)
126
+ query = query.to_sql if query.respond_to?(:to_sql)
127
+ Shiba.configure(connection_options)
128
+ analyzer = Shiba::Analyzer.new(nil, null, stats, { 'sql' => query })
129
+ result = analyzer.analyze.first
130
+ end
131
+
132
+ def connection_options
133
+ case
134
+ when defined?(ActiveRecord)
135
+ ActiveRecordIntegration.connection_options
136
+ when File.exist?("config/database.yml")
137
+ Shiba::Configure.activerecord_configuration
138
+ when File.exist?("test/database.yml.example")
139
+ Shiba::Configure.activerecord_configuration("test/database.yml.example")
140
+ else
141
+ raise Shiba::Error.new("ActiveRecord is currently required to analyze queries from the console.")
142
+ end
143
+ end
144
+
145
+ def stats
146
+ @stats ||= Shiba::TableStats.new(Shiba.index_config, Shiba.connection, {})
147
+ end
148
+
149
+ def null
150
+ @null ||= File.open(File::NULL, "w")
151
+ end
152
+
153
+ def puts(message)
154
+ out = @context.respond_to?(:puts) ? @context : $stdout
155
+ out.puts(message)
156
+ end
157
+
158
+ end
159
+
160
+ end
161
+ end
162
+
163
+ if defined?(Pry) || defined?(IRB)
164
+ TOPLEVEL_BINDING.eval('self').extend Shiba::Console
165
+ end
data/lib/shiba/explain.rb CHANGED
@@ -12,6 +12,8 @@ module Shiba
12
12
  COST_PER_ROW_SORT = 1.0e-07
13
13
  COST_PER_ROW_RETURNED = 3.0e-05
14
14
 
15
+ COST_PER_KB_RETURNED = 0.0004
16
+
15
17
  include CheckSupport
16
18
  extend CheckSupport::ClassMethods
17
19
 
@@ -26,7 +28,8 @@ module Shiba
26
28
  end
27
29
 
28
30
  @options = options
29
- @explain_json = Shiba.connection.explain(@sql)
31
+
32
+ @explain_json, @select_fields = Shiba.connection.explain(@sql)
30
33
 
31
34
  if Shiba.connection.mysql?
32
35
  @rows = Shiba::Explain::MysqlExplain.new.transform_json(@explain_json['query_block'])
@@ -149,18 +152,30 @@ module Shiba
149
152
  end
150
153
  end
151
154
 
155
+ def select_row_size
156
+ size = 0
157
+ @select_fields.each do |table, fields|
158
+ fields.each do |f|
159
+ size += @stats.get_column_size(table, f) || 0
160
+ end
161
+ end
162
+ size
163
+ end
164
+
152
165
  def check_return_size
153
166
  if @query.limit
154
- return_size = [@query.limit, @result.result_size].min
167
+ result_size = [@query.limit, @result.result_size].min
155
168
  elsif @query.aggregation?
156
- return_size = 1
169
+ result_size = 1
157
170
  else
158
- return_size = @result.result_size
171
+ result_size = @result.result_size
159
172
  end
160
173
 
161
- cost = COST_PER_ROW_RETURNED * return_size
174
+ result_bytes = select_row_size * result_size
175
+ cost = (result_bytes / 1024.0) * COST_PER_KB_RETURNED
176
+
162
177
  @result.cost += cost
163
- @result.messages << { tag: "retsize", result_size: return_size, cost: cost }
178
+ @result.messages << { tag: "retsize", result_size: result_size, result_bytes: result_bytes, cost: cost }
164
179
  end
165
180
 
166
181
  def run_checks!
@@ -191,7 +206,7 @@ module Shiba
191
206
  def other_paths
192
207
  if Shiba.connection.mysql?
193
208
  @rows.map do |r|
194
- next [] unless r['possible_keys'] && r['key'].nil?
209
+ next [] unless r['possible_keys']
195
210
  possible = r['possible_keys'] - [r['key']]
196
211
  possible.map do |p|
197
212
  Explain.new(@query, @stats, force_key: p) rescue nil
data/lib/shiba/fuzzer.rb CHANGED
@@ -35,6 +35,11 @@ module Shiba
35
35
 
36
36
  stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == 0)
37
37
  end
38
+
39
+ connection.each_column_size do |table, column, size|
40
+ stats.set_column_size(table, column, size)
41
+ end
42
+
38
43
  stats
39
44
  end
40
45