shiba 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +13 -0
- data/.travis/my.cnf +3 -0
- data/Gemfile.lock +14 -1
- data/README.md +93 -30
- data/Rakefile +9 -1
- data/TODO +25 -7
- data/bin/check +0 -0
- data/bin/dump_stats +38 -0
- data/bin/explain +67 -28
- data/bin/shiba +4 -4
- data/lib/shiba.rb +3 -1
- data/lib/shiba/analyzer.rb +6 -5
- data/lib/shiba/backtrace.rb +56 -0
- data/lib/shiba/checker.rb +103 -0
- data/lib/shiba/configure.rb +28 -8
- data/lib/shiba/diff.rb +119 -0
- data/lib/shiba/explain.rb +149 -49
- data/lib/shiba/fuzzer.rb +77 -0
- data/lib/shiba/index.rb +8 -129
- data/lib/shiba/index_stats.rb +210 -0
- data/lib/shiba/output.rb +24 -18
- data/lib/shiba/output/tags.yaml +34 -13
- data/lib/shiba/query_watcher.rb +3 -46
- data/lib/shiba/railtie.rb +31 -8
- data/lib/shiba/table_stats.rb +34 -0
- data/lib/shiba/version.rb +1 -1
- data/shiba.gemspec +1 -0
- data/shiba.yml.example +4 -0
- data/web/main.css +32 -2
- data/web/results.html.erb +132 -58
- metadata +26 -6
- data/bin/analyze +0 -77
- data/bin/inspect +0 -0
- data/bin/parse +0 -0
- data/bin/watch.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cd7eeaf515680a21faf4b12533d160db2b86a89
|
4
|
+
data.tar.gz: c44357f6abd8e38ec6c8df9139e81826b39aabcf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 46d865b67f759ed0e694a9c88aaaf4dbbc932c006b2f0ea088a715d55d07d45409d441b0b518ebd10cc7cf9b5eb3dc861437f8473f1eb293c443e54373476f2b
|
7
|
+
data.tar.gz: 6a5bba249b95220a9fbd8bf1f84d4c04a4a8ba74ec500ddc826994f7b2b07c7d23ef67d6e0ad9cbda9aaebc99983eb81eac3a656688bdb612bd4505168e653e4
|
data/.travis.yml
ADDED
data/.travis/my.cnf
ADDED
data/Gemfile.lock
CHANGED
@@ -1,14 +1,27 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
shiba (0.
|
4
|
+
shiba (0.2.0)
|
5
|
+
activesupport
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
8
9
|
specs:
|
10
|
+
activesupport (5.2.2)
|
11
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
12
|
+
i18n (>= 0.7, < 2)
|
13
|
+
minitest (~> 5.1)
|
14
|
+
tzinfo (~> 1.1)
|
9
15
|
byebug (10.0.2)
|
16
|
+
concurrent-ruby (1.1.4)
|
17
|
+
i18n (1.5.3)
|
18
|
+
concurrent-ruby (~> 1.0)
|
19
|
+
minitest (5.11.3)
|
10
20
|
mysql2 (0.5.2)
|
11
21
|
rake (10.5.0)
|
22
|
+
thread_safe (0.3.6)
|
23
|
+
tzinfo (1.2.5)
|
24
|
+
thread_safe (~> 0.1)
|
12
25
|
|
13
26
|
PLATFORMS
|
14
27
|
ruby
|
data/README.md
CHANGED
@@ -1,56 +1,119 @@
|
|
1
|
+
[![Build Status](https://travis-ci.com/burrito-brothers/shiba.svg?branch=master)](https://travis-ci.com/burrito-brothers/shiba)
|
2
|
+
|
1
3
|
# Shiba
|
2
4
|
|
3
|
-
Shiba is a tool that helps
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
Shiba is a tool that helps catch poorly performing queries before they cause problems in production, including:
|
6
|
+
|
7
|
+
* Full table scans
|
8
|
+
* Non selective indexes
|
9
|
+
|
10
|
+
By default, it will pretty much only detect queries that miss indexes. As it's fed more information, it warns about advanced problems, such as queries that use indexes but are still very expensive.
|
11
|
+
|
12
|
+
To help find such queries, Shiba monitors test runs for ActiveRecord queries. A warning and report are then generated. Shiba is further capable of only warning on changes that occured on a particular git branch/pull request to allow for CI integration.
|
7
13
|
|
8
14
|
## Installation
|
9
15
|
|
10
|
-
|
16
|
+
Install using bundler. Note: this gem is not designed to be run on production.
|
11
17
|
|
12
|
-
|
18
|
+
```ruby
|
19
|
+
gem 'shiba', :group => :test, :require => true
|
20
|
+
```
|
13
21
|
|
14
|
-
|
22
|
+
## Usage
|
15
23
|
|
16
24
|
```ruby
|
17
|
-
|
25
|
+
# Install
|
26
|
+
bundle
|
27
|
+
|
28
|
+
# Run some tests using to generate a SQL report
|
29
|
+
rake test:functional
|
30
|
+
rails test test/controllers/users_controller_test.rb
|
31
|
+
|
32
|
+
# 1 problematic query detected
|
33
|
+
# Report available at /tmp/shiba-explain.log-1550099512
|
18
34
|
```
|
19
35
|
|
20
|
-
|
36
|
+
## Typical query problems
|
37
|
+
|
38
|
+
Here are some typical query problems Shiba can detect. We'll assume the following schema:
|
21
39
|
|
22
40
|
```ruby
|
23
|
-
|
24
|
-
|
41
|
+
create_table :users do |t|
|
42
|
+
t.string :name
|
43
|
+
t.string :email
|
44
|
+
# add an organization_id column with an index
|
45
|
+
t.references :organization, index: true
|
46
|
+
|
47
|
+
t.timestamps
|
48
|
+
end
|
49
|
+
```
|
25
50
|
|
26
|
-
|
27
|
-
|
51
|
+
#### Full table scans
|
52
|
+
|
53
|
+
The most simple case to detect are queries that don't utilize indexes. While it isn't a problem to scan small tables, often tables will grow large enough where this can become a serious issue.
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
user = User.where(email: 'squirrel@example.com').limit(1)
|
28
57
|
```
|
29
58
|
|
30
|
-
|
59
|
+
Without an index, the database will read every row in the table until it finds one with an email address that matches. By adding an index, the database can perform a quick lookup for the record.
|
60
|
+
|
61
|
+
#### Non selective indexes
|
62
|
+
|
63
|
+
Another common case is queries that use an index, and work fine in the average case, but the distribution is non normal. These issues can be hard to track down and often impact large customers.
|
31
64
|
|
65
|
+
```ruby
|
66
|
+
users = User.where(organization_id: 1)
|
67
|
+
users.size
|
68
|
+
# => 75
|
69
|
+
|
70
|
+
users = User.where(organization_id: 42)
|
71
|
+
users.size
|
72
|
+
# => 52,000
|
32
73
|
```
|
33
|
-
# 1. Get shiba.
|
34
|
-
local:$ git clone git@github.com:burrito-brothers/shiba.git
|
35
74
|
|
36
|
-
|
37
|
-
# Shiba *can* work without any further data, but it's really best if you can
|
38
|
-
# dump index statistics from a production database, or a staging database with
|
39
|
-
# that resembles production.
|
75
|
+
Normally a query like this would only become a problem as the app grows in popularity. Fixes include adding `limit` or `find_each`.
|
40
76
|
|
41
|
-
|
42
|
-
production_host:$ mysql -ABe "select * from information_schema.statistics where table_schema = 'DATABASE'" > shiba_schema_stats.tsv
|
43
|
-
local:$ scp production_host:shiba_schema_stats.tsv shiba/
|
77
|
+
With more data, Shiba can help detect this issue when it appears in a pull request.
|
44
78
|
|
45
|
-
|
46
|
-
# set shiba loose on your queries!
|
47
|
-
# If you can't do step #2, just leave off the '-s' option
|
79
|
+
## Going beyond table scans
|
48
80
|
|
49
|
-
|
50
|
-
local:$ bin/analyze.rb -h 127.0.0.1 -d TESTDB -u MYSQLUSER -p MYSQLPASS -s shiba_schema_stats.tsv -f ~/src/MYPROJECT/log/test.log > results.json
|
81
|
+
For smarter analysis, Shiba requires general statistics about production data, such as the number of rows in a table and how unique columns are.
|
51
82
|
|
52
|
-
|
83
|
+
This information can be obtained by running the bin/dump_stats command in production.
|
53
84
|
|
54
|
-
|
85
|
+
```console
|
86
|
+
production$
|
87
|
+
git clone https://github.com/burrito-brothers/shiba.git
|
88
|
+
cd shiba ; bundle
|
89
|
+
bin/dump_stats DATABASE_NAME [MYSQLOPTS] > ~/shiba_index.yml
|
90
|
+
|
91
|
+
local$
|
92
|
+
scp production:~/shiba_index.yml RAILS_PROJECT/config
|
93
|
+
```
|
55
94
|
|
95
|
+
The stats file will look similar to the following:
|
96
|
+
|
97
|
+
```yaml
|
98
|
+
users:
|
99
|
+
count: 10000
|
100
|
+
indexes:
|
101
|
+
PRIMARY:
|
102
|
+
name: PRIMARY
|
103
|
+
columns:
|
104
|
+
- column: id
|
105
|
+
rows_per: 1
|
106
|
+
unique: true
|
107
|
+
index_users_on_login:
|
108
|
+
name: index_users_on_login
|
109
|
+
columns:
|
110
|
+
- column: login
|
111
|
+
rows_per: 1
|
112
|
+
unique: true
|
113
|
+
index_users_on_created_by_id:
|
114
|
+
name: index_users_on_created_by_id
|
115
|
+
columns:
|
116
|
+
- column: created_by_id
|
117
|
+
rows_per: 3
|
118
|
+
unique: false
|
56
119
|
```
|
data/Rakefile
CHANGED
data/TODO
CHANGED
@@ -1,12 +1,30 @@
|
|
1
1
|
===
|
2
|
-
this query is throwing the optimizer for a serious loop;
|
3
|
-
it says it can use an index on `ipb_address` but when we force key it
|
4
|
-
still table scans. Not clear on whether the OR in there is fucking us over or
|
5
|
-
if it's a test-data issue.
|
6
2
|
|
3
|
+
# we screw this up because of the table aliasing
|
4
|
+
SELECT COUNT(*) AS count_all, group_id AS group_id FROM `users` INNER JOIN `groups_users` `users_groups_users_join` ON `users_groups_users_join`.`user_id` = `users`.`id` INNER JOIN `users` `groups_users` ON `groups_users`.`id` = `users_groups_users_join`.`group_id` AND `groups_users`.`type` IN ('Group', 'GroupBuiltin', 'GroupAnonymous', 'GroupNonMember') WHERE `users`.`type` IN ('User', 'AnonymousUser') GROUP BY group_id
|
5
|
+
|
6
|
+
|
7
|
+
# UI
|
8
|
+
|
9
|
+
SQL Query -> where clause
|
10
|
+
throw tags in
|
11
|
+
|
12
|
+
# Add "ignore me" button next to query to show how to block stuff.
|
13
|
+
|
14
|
+
# Fuzzer
|
15
|
+
|
16
|
+
see how feasible a basic-ass cardinality estimator would be
|
17
|
+
|
18
|
+
# Docs
|
19
|
+
|
20
|
+
- main docs (readme.md)
|
21
|
+
- link from call to acion about fuzzed data
|
22
|
+
|
23
|
+
# Explain Stuff
|
24
|
+
|
25
|
+
Add index-walk detection
|
26
|
+
Add filesort badge
|
27
|
+
Postgres!
|
7
28
|
|
8
|
-
# SELECT ipb_id,ipb_address,ipb_timestamp,ipb_auto,ipb_anon_only,ipb_create_account,ipb_enable_autoblock,ipb_expiry,ipb_deleted,ipb_block_email,ipb_allow_usertalk,ipb_parent_block_id,ipb_sitewide,comment_ipb_reason.comment_text AS `ipb_reason_text`,comment_ipb_reason.comment_data AS `ipb_reason_data`,comment_ipb_reason.comment_id AS `ipb_reason_cid`,ipb_by,ipb_by_text,NULL AS `ipb_by_actor` FROM `ipblocks` FORCE KEY(`ipb_address`) JOIN `comment` `comment_ipb_reason` ON ((comment_ipb_reason.comment_id = ipb_reason_id)) WHERE ipb_address = '127.0.0.1' OR ((ipb_range_start LIKE '7F00%' ESCAPE '`' ) AND (ipb_range_start <= '7F000001') AND (ipb_range_end >= '7F000001'));
|
9
29
|
|
10
|
-
===
|
11
|
-
need to use format=json and see how much of an index we're using.
|
12
30
|
|
data/bin/check
CHANGED
Binary file
|
data/bin/dump_stats
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
case "$1" in
|
4
|
+
-t|--tables)
|
5
|
+
TABLES=$(echo "$2" | tr -d ' ' | tr ',' '\n')
|
6
|
+
shift 2
|
7
|
+
;;
|
8
|
+
esac
|
9
|
+
|
10
|
+
DATABASE=$1
|
11
|
+
if [ -z "$DATABASE" ]
|
12
|
+
then
|
13
|
+
echo "usage: dump_stats -tables [table1,table2...] DATABASE [ ...mysql_args]"
|
14
|
+
exit 1
|
15
|
+
fi
|
16
|
+
|
17
|
+
shift
|
18
|
+
|
19
|
+
#for x in `mysql $* -NABe "show tables from $DATABASE"` ; do
|
20
|
+
# mysql $* -e "ANALYZE TABLE $DATABASE.$x" >/dev/null 2>&1
|
21
|
+
#done
|
22
|
+
|
23
|
+
MYSQL_STATS=`mktemp`
|
24
|
+
mysql $* -ABe "select * from information_schema.statistics where table_schema = '$DATABASE'" > $MYSQL_STATS
|
25
|
+
|
26
|
+
if [ "$TABLES" ]
|
27
|
+
then
|
28
|
+
filtered=`mktemp`
|
29
|
+
head -n 1 $MYSQL_STATS >> $filtered
|
30
|
+
for t in $TABLES
|
31
|
+
do
|
32
|
+
awk -v tbl=$t '{ if ($3 == tbl) print $0 }' $MYSQL_STATS >> $filtered
|
33
|
+
done
|
34
|
+
|
35
|
+
MYSQL_STATS=$filtered
|
36
|
+
fi
|
37
|
+
|
38
|
+
bundle exec ruby -e "require 'shiba/index'; puts Shiba::Index.parse('$MYSQL_STATS').to_yaml"
|
data/bin/explain
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'bundler/setup'
|
4
4
|
require 'shiba'
|
5
5
|
require 'shiba/analyzer'
|
6
|
-
require 'shiba/
|
6
|
+
require 'shiba/table_stats'
|
7
7
|
require 'shiba/configure'
|
8
8
|
require 'shiba/output'
|
9
9
|
|
@@ -15,20 +15,51 @@ parser = Shiba::Configure.make_options_parser(options)
|
|
15
15
|
parser.banner = "Run a list of queries through shiba's analyzer."
|
16
16
|
parser.parse!
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
option_path = Shiba::Configure.mysql_config_path
|
19
|
+
|
20
|
+
if option_path
|
21
|
+
puts "Found config at #{option_path}" if options["verbose"]
|
22
|
+
options['default_file'] ||= option_path
|
23
|
+
end
|
24
|
+
|
25
|
+
option_file = if options['default_file'] && File.exist?(options['default_file'])
|
26
|
+
File.read(options['default_file'])
|
27
|
+
else
|
28
|
+
""
|
29
|
+
end
|
30
|
+
|
31
|
+
if options['json'] && options['html']
|
32
|
+
$stderr.puts "Can only output to json or html, not both"
|
33
|
+
$stderr.puts parser.banner
|
34
|
+
exit 1
|
35
|
+
end
|
36
|
+
|
37
|
+
if option_file && !options['default_group']
|
38
|
+
if option_file.include?("[client]")
|
39
|
+
options['default_group'] = 'client'
|
23
40
|
end
|
41
|
+
if option_file.include?("[mysql]")
|
42
|
+
options['default_group'] = 'mysql'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def require_option(parser, name)
|
47
|
+
$stderr.puts "Required: #{name}"
|
48
|
+
$stderr.puts parser.banner
|
49
|
+
exit 1
|
50
|
+
end
|
51
|
+
|
52
|
+
if !options["username"] && !option_file.include?('user')
|
53
|
+
require_option(parser, 'username')
|
54
|
+
end
|
55
|
+
|
56
|
+
if !options["database"] && !option_file.include?('database')
|
57
|
+
require_option(parser, 'database')
|
24
58
|
end
|
25
59
|
|
26
60
|
file = options.delete("file")
|
27
61
|
file = File.open(file, "r") if file
|
28
62
|
|
29
|
-
output = options.delete("explain")
|
30
|
-
output = File.open(output, 'w') if output
|
31
|
-
|
32
63
|
Shiba.configure(options)
|
33
64
|
|
34
65
|
schema_stats_fname = options["stats"]
|
@@ -38,27 +69,35 @@ if schema_stats_fname && !File.exist?(schema_stats_fname)
|
|
38
69
|
exit 1
|
39
70
|
end
|
40
71
|
|
41
|
-
if
|
42
|
-
|
72
|
+
file = $stdin if file.nil?
|
73
|
+
json = options['json']
|
74
|
+
json = File.open('/dev/null', 'w') if json.nil?
|
75
|
+
|
76
|
+
if options["verbose"]
|
77
|
+
$stderr.puts "Reading queries from '#{file.inspect}'..."
|
78
|
+
$stderr.puts "Analyzing SQL to '#{json.inspect}'..."
|
79
|
+
end
|
43
80
|
|
44
|
-
|
45
|
-
|
46
|
-
local_db_stats.each do |table, values|
|
47
|
-
schema_stats[table] = values unless schema_stats[table]
|
48
|
-
end
|
49
|
-
else
|
50
|
-
schema_stats = Shiba::Index.query(Shiba.connection)
|
81
|
+
table_stats = Shiba::TableStats.new(Shiba.index_config, Shiba.connection, {})
|
82
|
+
queries = Shiba::Analyzer.analyze(file, json, table_stats, options)
|
51
83
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
84
|
+
problems = queries.select { |q| q[:cost] && q[:cost] > 100 }
|
85
|
+
|
86
|
+
if problems.any?
|
87
|
+
query_word = problems.size == 1 ? 'query' : 'queries'
|
88
|
+
$stderr.puts "#{problems.size} problematic #{query_word} detected"
|
89
|
+
|
90
|
+
if options['json']
|
91
|
+
exit 3
|
57
92
|
end
|
58
|
-
end
|
59
93
|
|
60
|
-
|
61
|
-
output = $stdout if output.nil?
|
94
|
+
page = Shiba::Output.new(queries, { 'output' => options['html'] }).make_web!
|
62
95
|
|
63
|
-
|
64
|
-
|
96
|
+
if !File.exist?(page)
|
97
|
+
$stderr.puts("Failed to generate #{page}")
|
98
|
+
exit 1
|
99
|
+
end
|
100
|
+
|
101
|
+
$stderr.puts "Report available at #{page}"
|
102
|
+
exit 3
|
103
|
+
end
|
data/bin/shiba
CHANGED
@@ -5,8 +5,8 @@ require 'optionparser'
|
|
5
5
|
APP = File.basename(__FILE__)
|
6
6
|
|
7
7
|
commands = {
|
8
|
-
"
|
9
|
-
"
|
8
|
+
"explain" => "Generate a report from logged SQL queries",
|
9
|
+
"check" => "Check staged files for query problems",
|
10
10
|
}
|
11
11
|
|
12
12
|
global = OptionParser.new do |opts|
|
@@ -14,7 +14,7 @@ global = OptionParser.new do |opts|
|
|
14
14
|
opts.separator ""
|
15
15
|
opts.separator "These are the commands available:"
|
16
16
|
commands.each do |name,info|
|
17
|
-
opts.separator " #{name} #{info
|
17
|
+
opts.separator " #{name} #{info}"
|
18
18
|
end
|
19
19
|
opts.separator ""
|
20
20
|
opts.separator "See #{APP} --help <command> to read about a specific command."
|
@@ -35,6 +35,6 @@ if !commands.key?(command)
|
|
35
35
|
exit 1
|
36
36
|
end
|
37
37
|
|
38
|
-
path =
|
38
|
+
path = File.join(File.dirname(__FILE__), command)
|
39
39
|
|
40
40
|
Kernel.exec(path, *ARGV)
|