shiba 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +13 -0
- data/.travis/my.cnf +3 -0
- data/Gemfile.lock +14 -1
- data/README.md +93 -30
- data/Rakefile +9 -1
- data/TODO +25 -7
- data/bin/check +0 -0
- data/bin/dump_stats +38 -0
- data/bin/explain +67 -28
- data/bin/shiba +4 -4
- data/lib/shiba.rb +3 -1
- data/lib/shiba/analyzer.rb +6 -5
- data/lib/shiba/backtrace.rb +56 -0
- data/lib/shiba/checker.rb +103 -0
- data/lib/shiba/configure.rb +28 -8
- data/lib/shiba/diff.rb +119 -0
- data/lib/shiba/explain.rb +149 -49
- data/lib/shiba/fuzzer.rb +77 -0
- data/lib/shiba/index.rb +8 -129
- data/lib/shiba/index_stats.rb +210 -0
- data/lib/shiba/output.rb +24 -18
- data/lib/shiba/output/tags.yaml +34 -13
- data/lib/shiba/query_watcher.rb +3 -46
- data/lib/shiba/railtie.rb +31 -8
- data/lib/shiba/table_stats.rb +34 -0
- data/lib/shiba/version.rb +1 -1
- data/shiba.gemspec +1 -0
- data/shiba.yml.example +4 -0
- data/web/main.css +32 -2
- data/web/results.html.erb +132 -58
- metadata +26 -6
- data/bin/analyze +0 -77
- data/bin/inspect +0 -0
- data/bin/parse +0 -0
- data/bin/watch.rb +0 -19
data/lib/shiba/fuzzer.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'shiba/index_stats'
|
2
|
+
|
3
|
+
module Shiba
|
4
|
+
class Fuzzer
|
5
|
+
|
6
|
+
def initialize(connection)
|
7
|
+
@connection = connection
|
8
|
+
@index_stats = IndexStats.new
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :connection
|
12
|
+
|
13
|
+
def fuzz!
|
14
|
+
fetch_index!
|
15
|
+
table_sizes = guess_table_sizes
|
16
|
+
@index_stats.tables.each do |name, table|
|
17
|
+
table.count = table_sizes[name]
|
18
|
+
table.indexes.each do |name, index|
|
19
|
+
index.columns.each do |column|
|
20
|
+
column.rows_per = index.unique ? 1 : 2
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
@index_stats
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
BIG_FUZZ_SIZE = 5_000
|
30
|
+
SMALL_FUZZ_SIZE = 100
|
31
|
+
|
32
|
+
def fetch_index!
|
33
|
+
records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
|
34
|
+
tables = {}
|
35
|
+
records.each do |h|
|
36
|
+
h.keys.each { |k| h[k.downcase] = h.delete(k) }
|
37
|
+
h["cardinality"] = h["cardinality"].to_i
|
38
|
+
@index_stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Create fake table sizes based on the table's index count.
|
43
|
+
# The more indexes, the bigger the table. Seems to rank tables fairly well.
|
44
|
+
def guess_table_sizes
|
45
|
+
index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
|
46
|
+
from information_schema.statistics where table_schema = DATABASE()
|
47
|
+
and seq_in_index = 1 and index_name not like 'fk_rails%'
|
48
|
+
group by table_name order by index_count"
|
49
|
+
|
50
|
+
index_counts = connection.query(index_count_query).to_a
|
51
|
+
|
52
|
+
# 90th table percentile based on number of indexes
|
53
|
+
# round down so we don't blow up on small tables
|
54
|
+
large_table_idx = (index_counts.size * 0.9).floor
|
55
|
+
large_table_index_count = index_counts[large_table_idx]["index_count"].to_f
|
56
|
+
|
57
|
+
sizes = Hash[index_counts.map(&:values)]
|
58
|
+
|
59
|
+
sizes.each do |table_name, index_count|
|
60
|
+
if index_count == 0
|
61
|
+
index_count = 1
|
62
|
+
end
|
63
|
+
|
64
|
+
size = sizes[table_name]
|
65
|
+
# Big
|
66
|
+
if size >= large_table_index_count
|
67
|
+
sizes[table_name] = BIG_FUZZ_SIZE
|
68
|
+
else
|
69
|
+
#small
|
70
|
+
sizes[table_name] = SMALL_FUZZ_SIZE
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
sizes
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/shiba/index.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'yaml'
|
2
|
+
require 'pp'
|
3
|
+
require 'shiba/index_stats'
|
3
4
|
|
5
|
+
module Shiba
|
6
|
+
class Index
|
4
7
|
# Given the path to the information_schema.statistics output, returns index statistics keyed by table name.
|
5
8
|
# Examples:
|
6
9
|
# Exploring the schema:
|
@@ -12,140 +15,16 @@ module Shiba
|
|
12
15
|
# => {:table_schema=>"blog_test", :table_name=>"users", :non_unique=>"0", :column_name=>"id", :cardinality=>"2", :is_visible=>"YES", :"expression\n"=>"NULL\n"}
|
13
16
|
#
|
14
17
|
def self.parse(path)
|
18
|
+
stats = IndexStats.new
|
15
19
|
tables = {}
|
16
20
|
records = read(path)
|
17
21
|
headers = records.shift.map { |header| header.downcase }
|
18
22
|
records.each do |r|
|
19
23
|
h = Hash[headers.zip(r)]
|
20
24
|
h["cardinality"] = h["cardinality"].to_i
|
21
|
-
|
22
|
-
table.push(h)
|
23
|
-
end
|
24
|
-
tables
|
25
|
-
end
|
26
|
-
|
27
|
-
# Getting a row count for a table:
|
28
|
-
#
|
29
|
-
# schema_stats = Index.parse("./shiba/schema_stats.tsv")
|
30
|
-
# users_count = Index.count(:users, schema_stats)
|
31
|
-
# => 2
|
32
|
-
def self.count(table, schema)
|
33
|
-
return nil unless schema[table]
|
34
|
-
primary = schema[table].detect { |index| index['index_name'] == "PRIMARY" }
|
35
|
-
if primary.nil?
|
36
|
-
# find the highest cardinality of a unique index, if it exists
|
37
|
-
schema[table].map do |index|
|
38
|
-
if index['non_unique'].to_i == 0
|
39
|
-
index['cardinality']
|
40
|
-
else
|
41
|
-
nil
|
42
|
-
end
|
43
|
-
end.compact.max
|
44
|
-
else
|
45
|
-
primary['cardinality'].to_i
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.fuzzed?(table, schema)
|
50
|
-
return nil unless schema[table]
|
51
|
-
schema[table].first['fuzzed']
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.estimate_key(table, key, parts, schema)
|
55
|
-
table_count = count(table, schema)
|
56
|
-
return nil unless table_count
|
57
|
-
|
58
|
-
key_stat = schema[table].detect do |i|
|
59
|
-
i["index_name"] == key && i["column_name"] == parts.last
|
60
|
-
end
|
61
|
-
|
62
|
-
return nil unless key_stat
|
63
|
-
|
64
|
-
return 0 if key_stat['cardinality'] == 0
|
65
|
-
table_count / key_stat['cardinality']
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.query(connection)
|
69
|
-
records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
|
70
|
-
tables = {}
|
71
|
-
records.each do |h|
|
72
|
-
h.keys.each { |k| h[k.downcase] = h.delete(k) }
|
73
|
-
h["cardinality"] = h["cardinality"].to_i
|
74
|
-
table = tables[h['table_name']] ||= []
|
75
|
-
table.push(h)
|
76
|
-
end
|
77
|
-
tables
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
# Up the cardinality on our indexes.
|
82
|
-
# Non uniques have a little less cardinality.
|
83
|
-
def self.fuzz!(stats)
|
84
|
-
db = stats.values.first.first['table_schema']
|
85
|
-
table_sizes = self.guess_table_sizes(db)
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
stats.each do |table,indexes|
|
90
|
-
indexes.each do |idx|
|
91
|
-
idx['cardinality'] = table_sizes[table]
|
92
|
-
|
93
|
-
if idx['non_unique'] == 1
|
94
|
-
idx['cardinality'] = (idx['cardinality'] * 0.7).round
|
95
|
-
end
|
96
|
-
|
97
|
-
idx['fuzzed'] = true
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
MINIMUM_TABLE_SIZE = 500
|
103
|
-
|
104
|
-
# Approximate median size of the tables is less than 500.
|
105
|
-
def self.insufficient_stats?(stats)
|
106
|
-
if stats.length == 0
|
107
|
-
return true
|
108
|
-
end
|
109
|
-
|
110
|
-
# Calculate a rough median.
|
111
|
-
primary_keys = stats.map do |_,indexes|
|
112
|
-
indexes.detect { |idx| idx['index_name'] == 'PRIMARY' } || {}
|
25
|
+
stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
|
113
26
|
end
|
114
|
-
|
115
|
-
table_counts = primary_keys.map { |pk| pk['cardinality'].to_i }
|
116
|
-
median = table_counts[table_counts.size/2]
|
117
|
-
|
118
|
-
return median < MINIMUM_TABLE_SIZE
|
119
|
-
end
|
120
|
-
|
121
|
-
STANDARD_FUZZ_SIZE = 5_000
|
122
|
-
|
123
|
-
# Create fake table sizes based on the table's index count.
|
124
|
-
# The more indexes, the bigger the table. Seems to rank tables fairly well.
|
125
|
-
def self.guess_table_sizes(db)
|
126
|
-
db = Shiba.connection.escape(db)
|
127
|
-
index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
|
128
|
-
from information_schema.statistics where table_schema = '#{db}'
|
129
|
-
and seq_in_index = 1 and index_name not like 'fk_rails%'
|
130
|
-
group by table_name order by index_count"
|
131
|
-
|
132
|
-
index_counts = Shiba.connection.query(index_count_query).to_a
|
133
|
-
|
134
|
-
# 80th table percentile based on number of indexes
|
135
|
-
large_table_idx = (index_counts.size * 0.8).round
|
136
|
-
large_table = index_counts[large_table_idx]
|
137
|
-
|
138
|
-
sizes = Hash[index_counts.map(&:values)]
|
139
|
-
|
140
|
-
sizes.each do |table_name, index_count|
|
141
|
-
if index_count == 0
|
142
|
-
index_count = 1
|
143
|
-
end
|
144
|
-
|
145
|
-
sizes[table_name] = STANDARD_FUZZ_SIZE * (index_count / large_table['index_count'].to_f)
|
146
|
-
end
|
147
|
-
|
148
|
-
sizes
|
27
|
+
stats
|
149
28
|
end
|
150
29
|
|
151
30
|
protected
|
@@ -0,0 +1,210 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'active_support/core_ext/hash/keys'
|
3
|
+
|
4
|
+
module Shiba
|
5
|
+
class IndexStats
|
6
|
+
|
7
|
+
def initialize(tables = {})
|
8
|
+
@tables = tables
|
9
|
+
build_from_hash!
|
10
|
+
end
|
11
|
+
|
12
|
+
def any?
|
13
|
+
@tables.any?
|
14
|
+
end
|
15
|
+
|
16
|
+
Table = Struct.new(:name, :count, :indexes) do
|
17
|
+
def encode_with(coder)
|
18
|
+
coder.map = self.to_h.stringify_keys
|
19
|
+
coder.map.delete('name')
|
20
|
+
|
21
|
+
if self.count.nil?
|
22
|
+
#uuuugly. No unique keys. we'll take our best guess.
|
23
|
+
self.count = indexes.map { |i, parts| parts.columns.map { |v| v.raw_cardinality } }.flatten.max
|
24
|
+
end
|
25
|
+
|
26
|
+
coder.tag = nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def build_index(index_name, is_unique)
|
30
|
+
self.indexes[index_name] ||= Index.new(self, index_name, [], is_unique)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_index_column(index_name, column_name, rows_per, cardinality, is_unique)
|
34
|
+
index = build_index(index_name, is_unique)
|
35
|
+
index.columns << Column.new(column_name, index, rows_per, cardinality)
|
36
|
+
|
37
|
+
if is_unique && !self.count
|
38
|
+
# set row count from unique index
|
39
|
+
self.count = cardinality
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
Index = Struct.new(:table, :name, :columns, :unique) do
|
45
|
+
def add_column(column_name, cardinality)
|
46
|
+
columns << Column.new(self, column_name, cardinality)
|
47
|
+
end
|
48
|
+
|
49
|
+
def encode_with(coder)
|
50
|
+
coder.map = self.to_h.stringify_keys
|
51
|
+
coder.map.delete('table')
|
52
|
+
coder.tag = nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Column
|
57
|
+
def initialize(column, index, rows_per, cardinality)
|
58
|
+
@column = column
|
59
|
+
@index = index
|
60
|
+
@rows_per = rows_per
|
61
|
+
@cardinality = cardinality
|
62
|
+
end
|
63
|
+
|
64
|
+
attr_reader :column
|
65
|
+
|
66
|
+
def table_count
|
67
|
+
@index.table.count
|
68
|
+
end
|
69
|
+
|
70
|
+
def raw_cardinality
|
71
|
+
@cardinality
|
72
|
+
end
|
73
|
+
|
74
|
+
def rows_per
|
75
|
+
return @rows_per if @rows_per && @rows_per.is_a?(Integer)
|
76
|
+
return nil if table_count.nil?
|
77
|
+
|
78
|
+
if @rows_per.nil?
|
79
|
+
if table_count == 0
|
80
|
+
@rows_per = 1
|
81
|
+
else
|
82
|
+
@rows_per = (table_count / @cardinality).round
|
83
|
+
end
|
84
|
+
elsif @rows_per.is_a?(String)
|
85
|
+
@rows_per = ((@rows_per.to_f / 100.0) * table_count.to_f).round
|
86
|
+
end
|
87
|
+
@rows_per
|
88
|
+
end
|
89
|
+
|
90
|
+
attr_writer :rows_per
|
91
|
+
|
92
|
+
|
93
|
+
def encode_with(coder)
|
94
|
+
coder.map = {'column' => @column}
|
95
|
+
|
96
|
+
count = table_count
|
97
|
+
count = 1 if count == 0
|
98
|
+
ratio_per_item = self.rows_per / count.to_f rescue debugger
|
99
|
+
|
100
|
+
|
101
|
+
if count <= 10
|
102
|
+
ratio_threshold = 1_000_0000 # always show a number
|
103
|
+
elsif count <= 1000
|
104
|
+
ratio_threshold = 0.1
|
105
|
+
elsif count <= 1_000_000
|
106
|
+
ratio_threshold = 0.01
|
107
|
+
elsif count <= 1_000_000_000
|
108
|
+
ratio_threshold = 0.001
|
109
|
+
end
|
110
|
+
|
111
|
+
if ratio_per_item > ratio_threshold
|
112
|
+
coder.map['rows_per'] = (ratio_per_item * 100).round.to_s + "%"
|
113
|
+
else
|
114
|
+
coder.map['rows_per'] = rows_per
|
115
|
+
end
|
116
|
+
coder.tag = nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def build_from_hash!
|
121
|
+
@tables = @tables.collect do |tbl_name, tbl_hash|
|
122
|
+
t = Table.new(tbl_name, tbl_hash['count'], {})
|
123
|
+
tbl_hash['indexes'].each do |idx_name, idx_hash|
|
124
|
+
idx_hash['columns'].each do |col_hash|
|
125
|
+
t.add_index_column(idx_name, col_hash['column'], col_hash['rows_per'], nil, idx_hash['unique'])
|
126
|
+
end
|
127
|
+
end
|
128
|
+
[tbl_name, t]
|
129
|
+
end.to_h
|
130
|
+
end
|
131
|
+
|
132
|
+
attr_reader :tables
|
133
|
+
|
134
|
+
def table_count(table)
|
135
|
+
return @tables[table].count if @tables[table]
|
136
|
+
end
|
137
|
+
|
138
|
+
def fetch_index(table, name)
|
139
|
+
tbl = @tables[table]
|
140
|
+
return nil unless tbl
|
141
|
+
|
142
|
+
tbl.indexes[name]
|
143
|
+
end
|
144
|
+
|
145
|
+
def build_table(name)
|
146
|
+
@tables[name] ||= Table.new(name, nil, {})
|
147
|
+
end
|
148
|
+
|
149
|
+
def add_index_column(table, index_name, column_name, cardinality, is_unique)
|
150
|
+
table = build_table(table)
|
151
|
+
table.add_index_column(index_name, column_name, nil, cardinality, is_unique)
|
152
|
+
end
|
153
|
+
|
154
|
+
def estimate_key(table_name, key, parts)
|
155
|
+
index = fetch_index(table_name, key)
|
156
|
+
|
157
|
+
return nil unless index
|
158
|
+
|
159
|
+
index_part = index.columns.detect do |p|
|
160
|
+
p.column == parts.last
|
161
|
+
end
|
162
|
+
|
163
|
+
return nil unless index_part
|
164
|
+
|
165
|
+
index_part.rows_per
|
166
|
+
end
|
167
|
+
|
168
|
+
def convert_rows_per_to_output!
|
169
|
+
each_index_column do |table, column|
|
170
|
+
cardinality = column.delete('cardinality')
|
171
|
+
|
172
|
+
if table.rows == 0
|
173
|
+
column['rows_per'] = 1
|
174
|
+
next
|
175
|
+
end
|
176
|
+
|
177
|
+
# the bigger the table, the more likely we should be
|
178
|
+
# to show percentages for larger counts.
|
179
|
+
#
|
180
|
+
# small table, show row count up to 10% ish
|
181
|
+
# 100_000 - show rows up to 1000, 1%
|
182
|
+
# large table, 1_000_000. show rows up to 0.1% ( 1000 )
|
183
|
+
|
184
|
+
|
185
|
+
# how many rows does each index value contain?
|
186
|
+
if cardinality
|
187
|
+
rows_per_item = (table.rows.to_f / cardinality.to_f)
|
188
|
+
else
|
189
|
+
rows_per_item = column.rows_per
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def to_yaml
|
196
|
+
@tables.to_yaml
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
def each_index_column(&block)
|
201
|
+
@tables.each do |name, table|
|
202
|
+
table.indexes.each do |index_name, index|
|
203
|
+
index.columns.each do |column|
|
204
|
+
yield(table, column)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
data/lib/shiba/output.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'json'
|
3
3
|
require 'fileutils'
|
4
|
+
require 'tmpdir'
|
4
5
|
require 'erb'
|
5
6
|
|
6
7
|
module Shiba
|
7
8
|
class Output
|
8
|
-
|
9
|
-
|
10
|
-
WEB_PATH = File.dirname(__FILE__) + "/../../web"
|
9
|
+
WEB_PATH = File.join(File.dirname(__FILE__), "..", "..", "web")
|
11
10
|
def self.tags
|
12
|
-
@tags ||= YAML.load_file(File.dirname(__FILE__)
|
11
|
+
@tags ||= YAML.load_file(File.join(File.dirname(__FILE__), "output", "tags.yaml"))
|
13
12
|
end
|
14
13
|
|
15
14
|
def initialize(queries, options = {})
|
@@ -17,10 +16,22 @@ module Shiba
|
|
17
16
|
@options = options
|
18
17
|
end
|
19
18
|
|
19
|
+
def default_filename
|
20
|
+
@default_filename ||= "shiba_results-#{Time.now.to_i}.html"
|
21
|
+
end
|
22
|
+
|
23
|
+
def logdir
|
24
|
+
File.join(Dir.pwd, "log")
|
25
|
+
end
|
26
|
+
|
20
27
|
def output_path
|
21
|
-
|
22
|
-
|
23
|
-
|
28
|
+
return @options['output'] if @options['output']
|
29
|
+
if File.exist?(logdir)
|
30
|
+
FileUtils.mkdir_p(File.join(logdir, "shiba_results"))
|
31
|
+
File.join(Dir.pwd, "log", "shiba_results", default_filename)
|
32
|
+
else
|
33
|
+
File.join(Dir.tmpdir, default_filename)
|
34
|
+
end
|
24
35
|
end
|
25
36
|
|
26
37
|
def js_path
|
@@ -38,28 +49,23 @@ module Shiba
|
|
38
49
|
end
|
39
50
|
|
40
51
|
def make_web!
|
41
|
-
|
42
|
-
|
43
|
-
js = Dir.glob(WEB_PATH + "/dist/*.js").map { |f| File.basename(f) }
|
44
|
-
js.each do |f|
|
45
|
-
system("cp #{WEB_PATH}/dist/#{f} #{js_path}")
|
46
|
-
end
|
52
|
+
js = Dir.glob(File.join(WEB_PATH, "dist", "*.js"))
|
53
|
+
css = Dir.glob(File.join(WEB_PATH, "*.css"))
|
47
54
|
|
48
55
|
data = {
|
49
56
|
js: js,
|
57
|
+
css: css,
|
50
58
|
queries: @queries,
|
51
59
|
tags: self.class.tags,
|
52
60
|
url: remote_url
|
53
61
|
}
|
54
62
|
|
55
|
-
|
56
|
-
|
57
|
-
erb = ERB.new(File.read(WEB_PATH + "/../web/results.html.erb"))
|
58
|
-
File.open(output_path + "/results.html", "w+") do |f|
|
63
|
+
erb = ERB.new(File.read(File.join(WEB_PATH, "..", "web", "results.html.erb")))
|
64
|
+
File.open(output_path, "w+") do |f|
|
59
65
|
f.write(erb.result(binding))
|
60
66
|
end
|
61
67
|
|
62
|
-
|
68
|
+
output_path
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|