shiba 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,77 @@
1
+ require 'shiba/index_stats'
2
+
3
+ module Shiba
4
+ class Fuzzer
5
+
6
+ def initialize(connection)
7
+ @connection = connection
8
+ @index_stats = IndexStats.new
9
+ end
10
+
11
+ attr_reader :connection
12
+
13
+ def fuzz!
14
+ fetch_index!
15
+ table_sizes = guess_table_sizes
16
+ @index_stats.tables.each do |name, table|
17
+ table.count = table_sizes[name]
18
+ table.indexes.each do |name, index|
19
+ index.columns.each do |column|
20
+ column.rows_per = index.unique ? 1 : 2
21
+ end
22
+ end
23
+ end
24
+ @index_stats
25
+ end
26
+
27
+ private
28
+
29
+ BIG_FUZZ_SIZE = 5_000
30
+ SMALL_FUZZ_SIZE = 100
31
+
32
+ def fetch_index!
33
+ records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
34
+ tables = {}
35
+ records.each do |h|
36
+ h.keys.each { |k| h[k.downcase] = h.delete(k) }
37
+ h["cardinality"] = h["cardinality"].to_i
38
+ @index_stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
39
+ end
40
+ end
41
+
42
+ # Create fake table sizes based on the table's index count.
43
+ # The more indexes, the bigger the table. Seems to rank tables fairly well.
44
+ def guess_table_sizes
45
+ index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
46
+ from information_schema.statistics where table_schema = DATABASE()
47
+ and seq_in_index = 1 and index_name not like 'fk_rails%'
48
+ group by table_name order by index_count"
49
+
50
+ index_counts = connection.query(index_count_query).to_a
51
+
52
+ # 90th table percentile based on number of indexes
53
+ # round down so we don't blow up on small tables
54
+ large_table_idx = (index_counts.size * 0.9).floor
55
+ large_table_index_count = index_counts[large_table_idx]["index_count"].to_f
56
+
57
+ sizes = Hash[index_counts.map(&:values)]
58
+
59
+ sizes.each do |table_name, index_count|
60
+ if index_count == 0
61
+ index_count = 1
62
+ end
63
+
64
+ size = sizes[table_name]
65
+ # Big
66
+ if size >= large_table_index_count
67
+ sizes[table_name] = BIG_FUZZ_SIZE
68
+ else
69
+ #small
70
+ sizes[table_name] = SMALL_FUZZ_SIZE
71
+ end
72
+ end
73
+
74
+ sizes
75
+ end
76
+ end
77
+ end
data/lib/shiba/index.rb CHANGED
@@ -1,6 +1,9 @@
1
- module Shiba
2
- module Index
1
+ require 'yaml'
2
+ require 'pp'
3
+ require 'shiba/index_stats'
3
4
 
5
+ module Shiba
6
+ class Index
4
7
  # Given the path to the information_schema.statistics output, returns index statistics keyed by table name.
5
8
  # Examples:
6
9
  # Exploring the schema:
@@ -12,140 +15,16 @@ module Shiba
12
15
  # => {:table_schema=>"blog_test", :table_name=>"users", :non_unique=>"0", :column_name=>"id", :cardinality=>"2", :is_visible=>"YES", :"expression\n"=>"NULL\n"}
13
16
  #
14
17
  def self.parse(path)
18
+ stats = IndexStats.new
15
19
  tables = {}
16
20
  records = read(path)
17
21
  headers = records.shift.map { |header| header.downcase }
18
22
  records.each do |r|
19
23
  h = Hash[headers.zip(r)]
20
24
  h["cardinality"] = h["cardinality"].to_i
21
- table = tables[h['table_name']] ||= []
22
- table.push(h)
23
- end
24
- tables
25
- end
26
-
27
- # Getting a row count for a table:
28
- #
29
- # schema_stats = Index.parse("./shiba/schema_stats.tsv")
30
- # users_count = Index.count(:users, schema_stats)
31
- # => 2
32
- def self.count(table, schema)
33
- return nil unless schema[table]
34
- primary = schema[table].detect { |index| index['index_name'] == "PRIMARY" }
35
- if primary.nil?
36
- # find the highest cardinality of a unique index, if it exists
37
- schema[table].map do |index|
38
- if index['non_unique'].to_i == 0
39
- index['cardinality']
40
- else
41
- nil
42
- end
43
- end.compact.max
44
- else
45
- primary['cardinality'].to_i
46
- end
47
- end
48
-
49
- def self.fuzzed?(table, schema)
50
- return nil unless schema[table]
51
- schema[table].first['fuzzed']
52
- end
53
-
54
- def self.estimate_key(table, key, parts, schema)
55
- table_count = count(table, schema)
56
- return nil unless table_count
57
-
58
- key_stat = schema[table].detect do |i|
59
- i["index_name"] == key && i["column_name"] == parts.last
60
- end
61
-
62
- return nil unless key_stat
63
-
64
- return 0 if key_stat['cardinality'] == 0
65
- table_count / key_stat['cardinality']
66
- end
67
-
68
- def self.query(connection)
69
- records = connection.query("select * from information_schema.statistics where table_schema = DATABASE()")
70
- tables = {}
71
- records.each do |h|
72
- h.keys.each { |k| h[k.downcase] = h.delete(k) }
73
- h["cardinality"] = h["cardinality"].to_i
74
- table = tables[h['table_name']] ||= []
75
- table.push(h)
76
- end
77
- tables
78
- end
79
-
80
-
81
- # Up the cardinality on our indexes.
82
- # Non uniques have a little less cardinality.
83
- def self.fuzz!(stats)
84
- db = stats.values.first.first['table_schema']
85
- table_sizes = self.guess_table_sizes(db)
86
-
87
-
88
-
89
- stats.each do |table,indexes|
90
- indexes.each do |idx|
91
- idx['cardinality'] = table_sizes[table]
92
-
93
- if idx['non_unique'] == 1
94
- idx['cardinality'] = (idx['cardinality'] * 0.7).round
95
- end
96
-
97
- idx['fuzzed'] = true
98
- end
99
- end
100
- end
101
-
102
- MINIMUM_TABLE_SIZE = 500
103
-
104
- # Approximate median size of the tables is less than 500.
105
- def self.insufficient_stats?(stats)
106
- if stats.length == 0
107
- return true
108
- end
109
-
110
- # Calculate a rough median.
111
- primary_keys = stats.map do |_,indexes|
112
- indexes.detect { |idx| idx['index_name'] == 'PRIMARY' } || {}
25
+ stats.add_index_column(h['table_name'], h['index_name'], h['column_name'], h['cardinality'], h['non_unique'] == "0")
113
26
  end
114
-
115
- table_counts = primary_keys.map { |pk| pk['cardinality'].to_i }
116
- median = table_counts[table_counts.size/2]
117
-
118
- return median < MINIMUM_TABLE_SIZE
119
- end
120
-
121
- STANDARD_FUZZ_SIZE = 5_000
122
-
123
- # Create fake table sizes based on the table's index count.
124
- # The more indexes, the bigger the table. Seems to rank tables fairly well.
125
- def self.guess_table_sizes(db)
126
- db = Shiba.connection.escape(db)
127
- index_count_query = "select TABLE_NAME as table_name, count(*) as index_count
128
- from information_schema.statistics where table_schema = '#{db}'
129
- and seq_in_index = 1 and index_name not like 'fk_rails%'
130
- group by table_name order by index_count"
131
-
132
- index_counts = Shiba.connection.query(index_count_query).to_a
133
-
134
- # 80th table percentile based on number of indexes
135
- large_table_idx = (index_counts.size * 0.8).round
136
- large_table = index_counts[large_table_idx]
137
-
138
- sizes = Hash[index_counts.map(&:values)]
139
-
140
- sizes.each do |table_name, index_count|
141
- if index_count == 0
142
- index_count = 1
143
- end
144
-
145
- sizes[table_name] = STANDARD_FUZZ_SIZE * (index_count / large_table['index_count'].to_f)
146
- end
147
-
148
- sizes
27
+ stats
149
28
  end
150
29
 
151
30
  protected
@@ -0,0 +1,210 @@
1
+ require 'yaml'
2
+ require 'active_support/core_ext/hash/keys'
3
+
4
+ module Shiba
5
+ class IndexStats
6
+
7
+ def initialize(tables = {})
8
+ @tables = tables
9
+ build_from_hash!
10
+ end
11
+
12
+ def any?
13
+ @tables.any?
14
+ end
15
+
16
+ Table = Struct.new(:name, :count, :indexes) do
17
+ def encode_with(coder)
18
+ coder.map = self.to_h.stringify_keys
19
+ coder.map.delete('name')
20
+
21
+ if self.count.nil?
22
+ #uuuugly. No unique keys. we'll take our best guess.
23
+ self.count = indexes.map { |i, parts| parts.columns.map { |v| v.raw_cardinality } }.flatten.max
24
+ end
25
+
26
+ coder.tag = nil
27
+ end
28
+
29
+ def build_index(index_name, is_unique)
30
+ self.indexes[index_name] ||= Index.new(self, index_name, [], is_unique)
31
+ end
32
+
33
+ def add_index_column(index_name, column_name, rows_per, cardinality, is_unique)
34
+ index = build_index(index_name, is_unique)
35
+ index.columns << Column.new(column_name, index, rows_per, cardinality)
36
+
37
+ if is_unique && !self.count
38
+ # set row count from unique index
39
+ self.count = cardinality
40
+ end
41
+ end
42
+ end
43
+
44
+ Index = Struct.new(:table, :name, :columns, :unique) do
45
+ def add_column(column_name, cardinality)
46
+ columns << Column.new(self, column_name, cardinality)
47
+ end
48
+
49
+ def encode_with(coder)
50
+ coder.map = self.to_h.stringify_keys
51
+ coder.map.delete('table')
52
+ coder.tag = nil
53
+ end
54
+ end
55
+
56
+ class Column
57
+ def initialize(column, index, rows_per, cardinality)
58
+ @column = column
59
+ @index = index
60
+ @rows_per = rows_per
61
+ @cardinality = cardinality
62
+ end
63
+
64
+ attr_reader :column
65
+
66
+ def table_count
67
+ @index.table.count
68
+ end
69
+
70
+ def raw_cardinality
71
+ @cardinality
72
+ end
73
+
74
+ def rows_per
75
+ return @rows_per if @rows_per && @rows_per.is_a?(Integer)
76
+ return nil if table_count.nil?
77
+
78
+ if @rows_per.nil?
79
+ if table_count == 0
80
+ @rows_per = 1
81
+ else
82
+ @rows_per = (table_count / @cardinality).round
83
+ end
84
+ elsif @rows_per.is_a?(String)
85
+ @rows_per = ((@rows_per.to_f / 100.0) * table_count.to_f).round
86
+ end
87
+ @rows_per
88
+ end
89
+
90
+ attr_writer :rows_per
91
+
92
+
93
+ def encode_with(coder)
94
+ coder.map = {'column' => @column}
95
+
96
+ count = table_count
97
+ count = 1 if count == 0
98
+ ratio_per_item = self.rows_per / count.to_f rescue debugger
99
+
100
+
101
+ if count <= 10
102
+ ratio_threshold = 1_000_0000 # always show a number
103
+ elsif count <= 1000
104
+ ratio_threshold = 0.1
105
+ elsif count <= 1_000_000
106
+ ratio_threshold = 0.01
107
+ elsif count <= 1_000_000_000
108
+ ratio_threshold = 0.001
109
+ end
110
+
111
+ if ratio_per_item > ratio_threshold
112
+ coder.map['rows_per'] = (ratio_per_item * 100).round.to_s + "%"
113
+ else
114
+ coder.map['rows_per'] = rows_per
115
+ end
116
+ coder.tag = nil
117
+ end
118
+ end
119
+
120
+ def build_from_hash!
121
+ @tables = @tables.collect do |tbl_name, tbl_hash|
122
+ t = Table.new(tbl_name, tbl_hash['count'], {})
123
+ tbl_hash['indexes'].each do |idx_name, idx_hash|
124
+ idx_hash['columns'].each do |col_hash|
125
+ t.add_index_column(idx_name, col_hash['column'], col_hash['rows_per'], nil, idx_hash['unique'])
126
+ end
127
+ end
128
+ [tbl_name, t]
129
+ end.to_h
130
+ end
131
+
132
+ attr_reader :tables
133
+
134
+ def table_count(table)
135
+ return @tables[table].count if @tables[table]
136
+ end
137
+
138
+ def fetch_index(table, name)
139
+ tbl = @tables[table]
140
+ return nil unless tbl
141
+
142
+ tbl.indexes[name]
143
+ end
144
+
145
+ def build_table(name)
146
+ @tables[name] ||= Table.new(name, nil, {})
147
+ end
148
+
149
+ def add_index_column(table, index_name, column_name, cardinality, is_unique)
150
+ table = build_table(table)
151
+ table.add_index_column(index_name, column_name, nil, cardinality, is_unique)
152
+ end
153
+
154
+ def estimate_key(table_name, key, parts)
155
+ index = fetch_index(table_name, key)
156
+
157
+ return nil unless index
158
+
159
+ index_part = index.columns.detect do |p|
160
+ p.column == parts.last
161
+ end
162
+
163
+ return nil unless index_part
164
+
165
+ index_part.rows_per
166
+ end
167
+
168
+ def convert_rows_per_to_output!
169
+ each_index_column do |table, column|
170
+ cardinality = column.delete('cardinality')
171
+
172
+ if table.rows == 0
173
+ column['rows_per'] = 1
174
+ next
175
+ end
176
+
177
+ # the bigger the table, the more likely we should be
178
+ # to show percentages for larger counts.
179
+ #
180
+ # small table, show row count up to 10% ish
181
+ # 100_000 - show rows up to 1000, 1%
182
+ # large table, 1_000_000. show rows up to 0.1% ( 1000 )
183
+
184
+
185
+ # how many rows does each index value contain?
186
+ if cardinality
187
+ rows_per_item = (table.rows.to_f / cardinality.to_f)
188
+ else
189
+ rows_per_item = column.rows_per
190
+ end
191
+
192
+ end
193
+ end
194
+
195
+ def to_yaml
196
+ @tables.to_yaml
197
+ end
198
+
199
+ private
200
+ def each_index_column(&block)
201
+ @tables.each do |name, table|
202
+ table.indexes.each do |index_name, index|
203
+ index.columns.each do |column|
204
+ yield(table, column)
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
data/lib/shiba/output.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  require 'yaml'
2
2
  require 'json'
3
3
  require 'fileutils'
4
+ require 'tmpdir'
4
5
  require 'erb'
5
6
 
6
7
  module Shiba
7
8
  class Output
8
- OUTPUT_PATH = "/tmp/shiba_results"
9
-
10
- WEB_PATH = File.dirname(__FILE__) + "/../../web"
9
+ WEB_PATH = File.join(File.dirname(__FILE__), "..", "..", "web")
11
10
  def self.tags
12
- @tags ||= YAML.load_file(File.dirname(__FILE__) + "/output/tags.yaml")
11
+ @tags ||= YAML.load_file(File.join(File.dirname(__FILE__), "output", "tags.yaml"))
13
12
  end
14
13
 
15
14
  def initialize(queries, options = {})
@@ -17,10 +16,22 @@ module Shiba
17
16
  @options = options
18
17
  end
19
18
 
19
+ def default_filename
20
+ @default_filename ||= "shiba_results-#{Time.now.to_i}.html"
21
+ end
22
+
23
+ def logdir
24
+ File.join(Dir.pwd, "log")
25
+ end
26
+
20
27
  def output_path
21
- path ||= File.join(@options['output'], "shiba_results") if @options['output']
22
- path ||= Dir.pwd + "/log/shiba_results" if File.exist?(Dir.pwd + "/log")
23
- path ||= OUTPUT_PATH
28
+ return @options['output'] if @options['output']
29
+ if File.exist?(logdir)
30
+ FileUtils.mkdir_p(File.join(logdir, "shiba_results"))
31
+ File.join(Dir.pwd, "log", "shiba_results", default_filename)
32
+ else
33
+ File.join(Dir.tmpdir, default_filename)
34
+ end
24
35
  end
25
36
 
26
37
  def js_path
@@ -38,28 +49,23 @@ module Shiba
38
49
  end
39
50
 
40
51
  def make_web!
41
- FileUtils.mkdir_p(js_path)
42
-
43
- js = Dir.glob(WEB_PATH + "/dist/*.js").map { |f| File.basename(f) }
44
- js.each do |f|
45
- system("cp #{WEB_PATH}/dist/#{f} #{js_path}")
46
- end
52
+ js = Dir.glob(File.join(WEB_PATH, "dist", "*.js"))
53
+ css = Dir.glob(File.join(WEB_PATH, "*.css"))
47
54
 
48
55
  data = {
49
56
  js: js,
57
+ css: css,
50
58
  queries: @queries,
51
59
  tags: self.class.tags,
52
60
  url: remote_url
53
61
  }
54
62
 
55
- system("cp #{WEB_PATH}/*.css #{output_path}")
56
-
57
- erb = ERB.new(File.read(WEB_PATH + "/../web/results.html.erb"))
58
- File.open(output_path + "/results.html", "w+") do |f|
63
+ erb = ERB.new(File.read(File.join(WEB_PATH, "..", "web", "results.html.erb")))
64
+ File.open(output_path, "w+") do |f|
59
65
  f.write(erb.result(binding))
60
66
  end
61
67
 
62
- puts "done, results are in " + File.join(output_path, "results.html")
68
+ output_path
63
69
  end
64
70
  end
65
71
  end