shiba 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +13 -0
- data/.travis/my.cnf +3 -0
- data/Gemfile.lock +14 -1
- data/README.md +93 -30
- data/Rakefile +9 -1
- data/TODO +25 -7
- data/bin/check +0 -0
- data/bin/dump_stats +38 -0
- data/bin/explain +67 -28
- data/bin/shiba +4 -4
- data/lib/shiba.rb +3 -1
- data/lib/shiba/analyzer.rb +6 -5
- data/lib/shiba/backtrace.rb +56 -0
- data/lib/shiba/checker.rb +103 -0
- data/lib/shiba/configure.rb +28 -8
- data/lib/shiba/diff.rb +119 -0
- data/lib/shiba/explain.rb +149 -49
- data/lib/shiba/fuzzer.rb +77 -0
- data/lib/shiba/index.rb +8 -129
- data/lib/shiba/index_stats.rb +210 -0
- data/lib/shiba/output.rb +24 -18
- data/lib/shiba/output/tags.yaml +34 -13
- data/lib/shiba/query_watcher.rb +3 -46
- data/lib/shiba/railtie.rb +31 -8
- data/lib/shiba/table_stats.rb +34 -0
- data/lib/shiba/version.rb +1 -1
- data/shiba.gemspec +1 -0
- data/shiba.yml.example +4 -0
- data/web/main.css +32 -2
- data/web/results.html.erb +132 -58
- metadata +26 -6
- data/bin/analyze +0 -77
- data/bin/inspect +0 -0
- data/bin/parse +0 -0
- data/bin/watch.rb +0 -19
data/lib/shiba/fuzzer.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'shiba/index_stats'
|
2
|
+
|
3
|
+
module Shiba
|
4
|
+
class Fuzzer
|
5
|
+
|
6
|
+
def initialize(connection)
|
7
|
+
@connection = connection
|
8
|
+
@index_stats = IndexStats.new
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :connection
|
12
|
+
|
13
|
+
def fuzz!
|
14
|
+
fetch_index!
|
15
|
+
table_sizes = guess_table_sizes
|
16
|
+
@index_stats.tables.each do |name, table|
|
17
|
+
table.count = table_sizes[name]
|
18
|
+
table.indexes.each do |name, index|
|
19
|
+
index.columns.each do |column|
|
20
|
+
column.rows_per = index.unique ? 1 : 2
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
@index_stats
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
BIG_FUZZ_SIZE = 5_000
|
30
|
+
SMALL_FUZZ_SIZE = 100
|
31
|
+
|
32
|
+
def fetch_index!
|
33
|
+
records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
|
34
|
+
tables = {}
|
35
|
+
records.each do |h|
|
36
|
+
h.keys.each { |k| h[k.downcase] = h.delete(k) }
|
37
|
+
h["cardinality"] = h["cardinality"].to_i
|
38
|
+
@index_stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Create fake table sizes based on the table's index count.
|
43
|
+
# The more indexes, the bigger the table. Seems to rank tables fairly well.
|
44
|
+
def guess_table_sizes
|
45
|
+
index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
|
46
|
+
from information_schema.statistics where table_schema = DATABASE()
|
47
|
+
and seq_in_index = 1 and index_name not like 'fk_rails%'
|
48
|
+
group by table_name order by index_count"
|
49
|
+
|
50
|
+
index_counts = connection.query(index_count_query).to_a
|
51
|
+
|
52
|
+
# 90th table percentile based on number of indexes
|
53
|
+
# round down so we don't blow up on small tables
|
54
|
+
large_table_idx = (index_counts.size * 0.9).floor
|
55
|
+
large_table_index_count = index_counts[large_table_idx]["index_count"].to_f
|
56
|
+
|
57
|
+
sizes = Hash[index_counts.map(&:values)]
|
58
|
+
|
59
|
+
sizes.each do |table_name, index_count|
|
60
|
+
if index_count == 0
|
61
|
+
index_count = 1
|
62
|
+
end
|
63
|
+
|
64
|
+
size = sizes[table_name]
|
65
|
+
# Big
|
66
|
+
if size >= large_table_index_count
|
67
|
+
sizes[table_name] = BIG_FUZZ_SIZE
|
68
|
+
else
|
69
|
+
#small
|
70
|
+
sizes[table_name] = SMALL_FUZZ_SIZE
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
sizes
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/shiba/index.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'yaml'
|
2
|
+
require 'pp'
|
3
|
+
require 'shiba/index_stats'
|
3
4
|
|
5
|
+
module Shiba
|
6
|
+
class Index
|
4
7
|
# Given the path to the information_schema.statistics output, returns index statistics keyed by table name.
|
5
8
|
# Examples:
|
6
9
|
# Exploring the schema:
|
@@ -12,140 +15,16 @@ module Shiba
|
|
12
15
|
# => {:table_schema=>"blog_test", :table_name=>"users", :non_unique=>"0", :column_name=>"id", :cardinality=>"2", :is_visible=>"YES", :"expression\n"=>"NULL\n"}
|
13
16
|
#
|
14
17
|
def self.parse(path)
|
18
|
+
stats = IndexStats.new
|
15
19
|
tables = {}
|
16
20
|
records = read(path)
|
17
21
|
headers = records.shift.map { |header| header.downcase }
|
18
22
|
records.each do |r|
|
19
23
|
h = Hash[headers.zip(r)]
|
20
24
|
h["cardinality"] = h["cardinality"].to_i
|
21
|
-
|
22
|
-
table.push(h)
|
23
|
-
end
|
24
|
-
tables
|
25
|
-
end
|
26
|
-
|
27
|
-
# Getting a row count for a table:
|
28
|
-
#
|
29
|
-
# schema_stats = Index.parse("./shiba/schema_stats.tsv")
|
30
|
-
# users_count = Index.count(:users, schema_stats)
|
31
|
-
# => 2
|
32
|
-
def self.count(table, schema)
|
33
|
-
return nil unless schema[table]
|
34
|
-
primary = schema[table].detect { |index| index['index_name'] == "PRIMARY" }
|
35
|
-
if primary.nil?
|
36
|
-
# find the highest cardinality of a unique index, if it exists
|
37
|
-
schema[table].map do |index|
|
38
|
-
if index['non_unique'].to_i == 0
|
39
|
-
index['cardinality']
|
40
|
-
else
|
41
|
-
nil
|
42
|
-
end
|
43
|
-
end.compact.max
|
44
|
-
else
|
45
|
-
primary['cardinality'].to_i
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.fuzzed?(table, schema)
|
50
|
-
return nil unless schema[table]
|
51
|
-
schema[table].first['fuzzed']
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.estimate_key(table, key, parts, schema)
|
55
|
-
table_count = count(table, schema)
|
56
|
-
return nil unless table_count
|
57
|
-
|
58
|
-
key_stat = schema[table].detect do |i|
|
59
|
-
i["index_name"] == key && i["column_name"] == parts.last
|
60
|
-
end
|
61
|
-
|
62
|
-
return nil unless key_stat
|
63
|
-
|
64
|
-
return 0 if key_stat['cardinality'] == 0
|
65
|
-
table_count / key_stat['cardinality']
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.query(connection)
|
69
|
-
records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
|
70
|
-
tables = {}
|
71
|
-
records.each do |h|
|
72
|
-
h.keys.each { |k| h[k.downcase] = h.delete(k) }
|
73
|
-
h["cardinality"] = h["cardinality"].to_i
|
74
|
-
table = tables[h['table_name']] ||= []
|
75
|
-
table.push(h)
|
76
|
-
end
|
77
|
-
tables
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
# Up the cardinality on our indexes.
|
82
|
-
# Non uniques have a little less cardinality.
|
83
|
-
def self.fuzz!(stats)
|
84
|
-
db = stats.values.first.first['table_schema']
|
85
|
-
table_sizes = self.guess_table_sizes(db)
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
stats.each do |table,indexes|
|
90
|
-
indexes.each do |idx|
|
91
|
-
idx['cardinality'] = table_sizes[table]
|
92
|
-
|
93
|
-
if idx['non_unique'] == 1
|
94
|
-
idx['cardinality'] = (idx['cardinality'] * 0.7).round
|
95
|
-
end
|
96
|
-
|
97
|
-
idx['fuzzed'] = true
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
MINIMUM_TABLE_SIZE = 500
|
103
|
-
|
104
|
-
# Approximate median size of the tables is less than 500.
|
105
|
-
def self.insufficient_stats?(stats)
|
106
|
-
if stats.length == 0
|
107
|
-
return true
|
108
|
-
end
|
109
|
-
|
110
|
-
# Calculate a rough median.
|
111
|
-
primary_keys = stats.map do |_,indexes|
|
112
|
-
indexes.detect { |idx| idx['index_name'] == 'PRIMARY' } || {}
|
25
|
+
stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
|
113
26
|
end
|
114
|
-
|
115
|
-
table_counts = primary_keys.map { |pk| pk['cardinality'].to_i }
|
116
|
-
median = table_counts[table_counts.size/2]
|
117
|
-
|
118
|
-
return median < MINIMUM_TABLE_SIZE
|
119
|
-
end
|
120
|
-
|
121
|
-
STANDARD_FUZZ_SIZE = 5_000
|
122
|
-
|
123
|
-
# Create fake table sizes based on the table's index count.
|
124
|
-
# The more indexes, the bigger the table. Seems to rank tables fairly well.
|
125
|
-
def self.guess_table_sizes(db)
|
126
|
-
db = Shiba.connection.escape(db)
|
127
|
-
index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
|
128
|
-
from information_schema.statistics where table_schema = '#{db}'
|
129
|
-
and seq_in_index = 1 and index_name not like 'fk_rails%'
|
130
|
-
group by table_name order by index_count"
|
131
|
-
|
132
|
-
index_counts = Shiba.connection.query(index_count_query).to_a
|
133
|
-
|
134
|
-
# 80th table percentile based on number of indexes
|
135
|
-
large_table_idx = (index_counts.size * 0.8).round
|
136
|
-
large_table = index_counts[large_table_idx]
|
137
|
-
|
138
|
-
sizes = Hash[index_counts.map(&:values)]
|
139
|
-
|
140
|
-
sizes.each do |table_name, index_count|
|
141
|
-
if index_count == 0
|
142
|
-
index_count = 1
|
143
|
-
end
|
144
|
-
|
145
|
-
sizes[table_name] = STANDARD_FUZZ_SIZE * (index_count / large_table['index_count'].to_f)
|
146
|
-
end
|
147
|
-
|
148
|
-
sizes
|
27
|
+
stats
|
149
28
|
end
|
150
29
|
|
151
30
|
protected
|
@@ -0,0 +1,210 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'active_support/core_ext/hash/keys'
|
3
|
+
|
4
|
+
module Shiba
|
5
|
+
class IndexStats
|
6
|
+
|
7
|
+
def initialize(tables = {})
|
8
|
+
@tables = tables
|
9
|
+
build_from_hash!
|
10
|
+
end
|
11
|
+
|
12
|
+
def any?
|
13
|
+
@tables.any?
|
14
|
+
end
|
15
|
+
|
16
|
+
Table = Struct.new(:name, :count, :indexes) do
|
17
|
+
def encode_with(coder)
|
18
|
+
coder.map = self.to_h.stringify_keys
|
19
|
+
coder.map.delete('name')
|
20
|
+
|
21
|
+
if self.count.nil?
|
22
|
+
#uuuugly. No unique keys. we'll take our best guess.
|
23
|
+
self.count = indexes.map { |i, parts| parts.columns.map { |v| v.raw_cardinality } }.flatten.max
|
24
|
+
end
|
25
|
+
|
26
|
+
coder.tag = nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def build_index(index_name, is_unique)
|
30
|
+
self.indexes[index_name] ||= Index.new(self, index_name, [], is_unique)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_index_column(index_name, column_name, rows_per, cardinality, is_unique)
|
34
|
+
index = build_index(index_name, is_unique)
|
35
|
+
index.columns << Column.new(column_name, index, rows_per, cardinality)
|
36
|
+
|
37
|
+
if is_unique && !self.count
|
38
|
+
# set row count from unique index
|
39
|
+
self.count = cardinality
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
Index = Struct.new(:table, :name, :columns, :unique) do
|
45
|
+
def add_column(column_name, cardinality)
|
46
|
+
columns << Column.new(self, column_name, cardinality)
|
47
|
+
end
|
48
|
+
|
49
|
+
def encode_with(coder)
|
50
|
+
coder.map = self.to_h.stringify_keys
|
51
|
+
coder.map.delete('table')
|
52
|
+
coder.tag = nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Column
|
57
|
+
def initialize(column, index, rows_per, cardinality)
|
58
|
+
@column = column
|
59
|
+
@index = index
|
60
|
+
@rows_per = rows_per
|
61
|
+
@cardinality = cardinality
|
62
|
+
end
|
63
|
+
|
64
|
+
attr_reader :column
|
65
|
+
|
66
|
+
def table_count
|
67
|
+
@index.table.count
|
68
|
+
end
|
69
|
+
|
70
|
+
def raw_cardinality
|
71
|
+
@cardinality
|
72
|
+
end
|
73
|
+
|
74
|
+
def rows_per
|
75
|
+
return @rows_per if @rows_per && @rows_per.is_a?(Integer)
|
76
|
+
return nil if table_count.nil?
|
77
|
+
|
78
|
+
if @rows_per.nil?
|
79
|
+
if table_count == 0
|
80
|
+
@rows_per = 1
|
81
|
+
else
|
82
|
+
@rows_per = (table_count / @cardinality).round
|
83
|
+
end
|
84
|
+
elsif @rows_per.is_a?(String)
|
85
|
+
@rows_per = ((@rows_per.to_f / 100.0) * table_count.to_f).round
|
86
|
+
end
|
87
|
+
@rows_per
|
88
|
+
end
|
89
|
+
|
90
|
+
attr_writer :rows_per
|
91
|
+
|
92
|
+
|
93
|
+
def encode_with(coder)
|
94
|
+
coder.map = {'column' => @column}
|
95
|
+
|
96
|
+
count = table_count
|
97
|
+
count = 1 if count == 0
|
98
|
+
ratio_per_item = self.rows_per / count.to_f rescue debugger
|
99
|
+
|
100
|
+
|
101
|
+
if count <= 10
|
102
|
+
ratio_threshold = 1_000_0000 # always show a number
|
103
|
+
elsif count <= 1000
|
104
|
+
ratio_threshold = 0.1
|
105
|
+
elsif count <= 1_000_000
|
106
|
+
ratio_threshold = 0.01
|
107
|
+
elsif count <= 1_000_000_000
|
108
|
+
ratio_threshold = 0.001
|
109
|
+
end
|
110
|
+
|
111
|
+
if ratio_per_item > ratio_threshold
|
112
|
+
coder.map['rows_per'] = (ratio_per_item * 100).round.to_s + "%"
|
113
|
+
else
|
114
|
+
coder.map['rows_per'] = rows_per
|
115
|
+
end
|
116
|
+
coder.tag = nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def build_from_hash!
|
121
|
+
@tables = @tables.collect do |tbl_name, tbl_hash|
|
122
|
+
t = Table.new(tbl_name, tbl_hash['count'], {})
|
123
|
+
tbl_hash['indexes'].each do |idx_name, idx_hash|
|
124
|
+
idx_hash['columns'].each do |col_hash|
|
125
|
+
t.add_index_column(idx_name, col_hash['column'], col_hash['rows_per'], nil, idx_hash['unique'])
|
126
|
+
end
|
127
|
+
end
|
128
|
+
[tbl_name, t]
|
129
|
+
end.to_h
|
130
|
+
end
|
131
|
+
|
132
|
+
attr_reader :tables
|
133
|
+
|
134
|
+
def table_count(table)
|
135
|
+
return @tables[table].count if @tables[table]
|
136
|
+
end
|
137
|
+
|
138
|
+
def fetch_index(table, name)
|
139
|
+
tbl = @tables[table]
|
140
|
+
return nil unless tbl
|
141
|
+
|
142
|
+
tbl.indexes[name]
|
143
|
+
end
|
144
|
+
|
145
|
+
def build_table(name)
|
146
|
+
@tables[name] ||= Table.new(name, nil, {})
|
147
|
+
end
|
148
|
+
|
149
|
+
def add_index_column(table, index_name, column_name, cardinality, is_unique)
|
150
|
+
table = build_table(table)
|
151
|
+
table.add_index_column(index_name, column_name, nil, cardinality, is_unique)
|
152
|
+
end
|
153
|
+
|
154
|
+
def estimate_key(table_name, key, parts)
|
155
|
+
index = fetch_index(table_name, key)
|
156
|
+
|
157
|
+
return nil unless index
|
158
|
+
|
159
|
+
index_part = index.columns.detect do |p|
|
160
|
+
p.column == parts.last
|
161
|
+
end
|
162
|
+
|
163
|
+
return nil unless index_part
|
164
|
+
|
165
|
+
index_part.rows_per
|
166
|
+
end
|
167
|
+
|
168
|
+
def convert_rows_per_to_output!
|
169
|
+
each_index_column do |table, column|
|
170
|
+
cardinality = column.delete('cardinality')
|
171
|
+
|
172
|
+
if table.rows == 0
|
173
|
+
column['rows_per'] = 1
|
174
|
+
next
|
175
|
+
end
|
176
|
+
|
177
|
+
# the bigger the table, the more likely we should be
|
178
|
+
# to show percentages for larger counts.
|
179
|
+
#
|
180
|
+
# small table, show row count up to 10% ish
|
181
|
+
# 100_000 - show rows up to 1000, 1%
|
182
|
+
# large table, 1_000_000. show rows up to 0.1% ( 1000 )
|
183
|
+
|
184
|
+
|
185
|
+
# how many rows does each index value contain?
|
186
|
+
if cardinality
|
187
|
+
rows_per_item = (table.rows.to_f / cardinality.to_f)
|
188
|
+
else
|
189
|
+
rows_per_item = column.rows_per
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def to_yaml
|
196
|
+
@tables.to_yaml
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
def each_index_column(&block)
|
201
|
+
@tables.each do |name, table|
|
202
|
+
table.indexes.each do |index_name, index|
|
203
|
+
index.columns.each do |column|
|
204
|
+
yield(table, column)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
data/lib/shiba/output.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'json'
|
3
3
|
require 'fileutils'
|
4
|
+
require 'tmpdir'
|
4
5
|
require 'erb'
|
5
6
|
|
6
7
|
module Shiba
|
7
8
|
class Output
|
8
|
-
|
9
|
-
|
10
|
-
WEB_PATH = File.dirname(__FILE__) + "/../../web"
|
9
|
+
WEB_PATH = File.join(File.dirname(__FILE__), "..", "..", "web")
|
11
10
|
def self.tags
|
12
|
-
@tags ||= YAML.load_file(File.dirname(__FILE__)
|
11
|
+
@tags ||= YAML.load_file(File.join(File.dirname(__FILE__), "output", "tags.yaml"))
|
13
12
|
end
|
14
13
|
|
15
14
|
def initialize(queries, options = {})
|
@@ -17,10 +16,22 @@ module Shiba
|
|
17
16
|
@options = options
|
18
17
|
end
|
19
18
|
|
19
|
+
def default_filename
|
20
|
+
@default_filename ||= "shiba_results-#{Time.now.to_i}.html"
|
21
|
+
end
|
22
|
+
|
23
|
+
def logdir
|
24
|
+
File.join(Dir.pwd, "log")
|
25
|
+
end
|
26
|
+
|
20
27
|
def output_path
|
21
|
-
|
22
|
-
|
23
|
-
|
28
|
+
return @options['output'] if @options['output']
|
29
|
+
if File.exist?(logdir)
|
30
|
+
FileUtils.mkdir_p(File.join(logdir, "shiba_results"))
|
31
|
+
File.join(Dir.pwd, "log", "shiba_results", default_filename)
|
32
|
+
else
|
33
|
+
File.join(Dir.tmpdir, default_filename)
|
34
|
+
end
|
24
35
|
end
|
25
36
|
|
26
37
|
def js_path
|
@@ -38,28 +49,23 @@ module Shiba
|
|
38
49
|
end
|
39
50
|
|
40
51
|
def make_web!
|
41
|
-
|
42
|
-
|
43
|
-
js = Dir.glob(WEB_PATH + "/dist/*.js").map { |f| File.basename(f) }
|
44
|
-
js.each do |f|
|
45
|
-
system("cp #{WEB_PATH}/dist/#{f} #{js_path}")
|
46
|
-
end
|
52
|
+
js = Dir.glob(File.join(WEB_PATH, "dist", "*.js"))
|
53
|
+
css = Dir.glob(File.join(WEB_PATH, "*.css"))
|
47
54
|
|
48
55
|
data = {
|
49
56
|
js: js,
|
57
|
+
css: css,
|
50
58
|
queries: @queries,
|
51
59
|
tags: self.class.tags,
|
52
60
|
url: remote_url
|
53
61
|
}
|
54
62
|
|
55
|
-
|
56
|
-
|
57
|
-
erb = ERB.new(File.read(WEB_PATH + "/../web/results.html.erb"))
|
58
|
-
File.open(output_path + "/results.html", "w+") do |f|
|
63
|
+
erb = ERB.new(File.read(File.join(WEB_PATH, "..", "web", "results.html.erb")))
|
64
|
+
File.open(output_path, "w+") do |f|
|
59
65
|
f.write(erb.result(binding))
|
60
66
|
end
|
61
67
|
|
62
|
-
|
68
|
+
output_path
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|