shiba 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e14e046d4bcec0e2025d671461517dabd547bc0a
4
+ data.tar.gz: 1e5b9e2043c8ecb2f2a46d2dc581be196c2daf75
5
+ SHA512:
6
+ metadata.gz: 51f1c7929ab2306a7889d37f11d9fcfc73091c7b9325a70cf17e9f57293e4463a9780ce04702ad1ef8392b3c325c0f9a82a7d0e999779d9a989eaeacbc4c6e3d
7
+ data.tar.gz: 4e7ddb0dd6e3f5e555995e2ab0a9505428770dc335489c7929bdab4b7138ac28b94c9e5b91aad2b7a75d3568c3fe9b80ef4b655274c42b6c1ecdcb52fece5efa
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ .*.sw*
10
+ node_modules
@@ -0,0 +1,3 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ We, the burrito brothers, find this humorous and odd.
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "mysql2"
4
+ gem "byebug"
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ shiba (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ byebug (10.0.2)
10
+ mysql2 (0.5.2)
11
+ rake (10.5.0)
12
+
13
+ PLATFORMS
14
+ ruby
15
+
16
+ DEPENDENCIES
17
+ bundler (~> 2.0)
18
+ byebug
19
+ mysql2
20
+ rake (~> 10.0)
21
+ shiba!
22
+
23
+ BUNDLED WITH
24
+ 2.0.1
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # Shiba
2
+
3
+ Shiba is a tool that helps you to understand and write better SQL. Integrate
4
+ the gem into your test suite, give Shiba a bit of data about your indexes, and Shiba
5
+ will let you know the impact of your queries on production, with the goal of catching
6
+ poorly performing queries before they hit production.
7
+
8
+ ## Installation
9
+
10
+ You can run shiba either as a gem in your test suite, or as a standalone utility.
11
+
12
+ ### Gem
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ group :test do
18
+ gem 'shiba'
19
+ end
20
+
21
+ bundle
22
+
23
+ # Run some some code using shiba to generate a SQL report
24
+ bundle exec shiba analyze rake test:functional
25
+ ```
26
+
27
+ ### Standalone:
28
+
29
+ ```
30
+ # 1. Get shiba.
31
+ local:$ git clone git@github.com:burrito-brothers/shiba.git
32
+
33
+ # 2. Get production data.
34
+ # Shiba *can* work without any further data, but it's really best if you can
35
+ # dump index statistics from a production database, or a staging database with
36
+ # that resembles production.
37
+
38
+ local:$ ssh production_host
39
+ production_host:$ mysql -ABe "select * from information_schema.statistics where table_schema = 'DATABASE'" > shiba_schema_stats.tsv
40
+ local:$ scp production_host:shiba_schema_stats.tsv shiba/
41
+
42
+ # 3. Analyze your queries.
43
+ # set shiba loose on your queries!
44
+ # If you can't do step #2, just leave off the '-s' option
45
+
46
+ local:$ cd shiba
47
+ local:$ bin/analyze.rb -h 127.0.0.1 -d TESTDB -u MYSQLUSER -p MYSQLPASS -s shiba_schema_stats.tsv -f ~/src/MYPROJECT/log/test.log > results.json
48
+
49
+ # analyze the results with `jq`, whynot
50
+
51
+ local:$ jq -C -s 'sort_by(.cost) | reverse' results.json | less -R
52
+
53
+ ```
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/TODO ADDED
@@ -0,0 +1,12 @@
1
+ ===
2
+ this query is throwing the optimizer for a serious loop;
3
+ it says it can use an index on `ipb_address` but when we force key it
4
+ still table scans. Not clear on whether the OR in there is fucking us over or
5
+ if it's a test-data issue.
6
+
7
+
8
+ # SELECT ipb_id,ipb_address,ipb_timestamp,ipb_auto,ipb_anon_only,ipb_create_account,ipb_enable_autoblock,ipb_expiry,ipb_deleted,ipb_block_email,ipb_allow_usertalk,ipb_parent_block_id,ipb_sitewide,comment_ipb_reason.comment_text AS `ipb_reason_text`,comment_ipb_reason.comment_data AS `ipb_reason_data`,comment_ipb_reason.comment_id AS `ipb_reason_cid`,ipb_by,ipb_by_text,NULL AS `ipb_by_actor` FROM `ipblocks` FORCE KEY(`ipb_address`) JOIN `comment` `comment_ipb_reason` ON ((comment_ipb_reason.comment_id = ipb_reason_id)) WHERE ipb_address = '127.0.0.1' OR ((ipb_range_start LIKE '7F00%' ESCAPE '`' ) AND (ipb_range_start <= '7F000001') AND (ipb_range_end >= '7F000001'));
9
+
10
+ ===
11
+ need to use format=json and see how much of an index we're using.
12
+
data/bin/analyze ADDED
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #!/usr/bin/env ruby
4
+
5
+ require 'bundler/setup'
6
+ require 'optionparser'
7
+ require 'shiba/configure'
8
+ require 'shiba/output'
9
+
10
+ options = {}
11
+ parser = OptionParser.new do |opts|
12
+ opts.banner = "analyze <command>. Creates report of SQL from the running process."
13
+
14
+ opts.on("-h","--host HOST", "sql host") do |h|
15
+ options["host"] = h
16
+ end
17
+
18
+ opts.on("-d","--database DATABASE", "sql database") do |d|
19
+ options["database"] = d
20
+ end
21
+
22
+ opts.on("-u","--user USER", "sql user") do |u|
23
+ options["user"] = u
24
+ end
25
+
26
+ opts.on("-p","--password PASSWORD", "sql password") do |p|
27
+ options["password"] = p
28
+ end
29
+
30
+ opts.on("-i","--index INDEX", "index of query to inspect") do |i|
31
+ options["index"] = i.to_i
32
+ end
33
+
34
+ opts.on("-l", "--limit NUM", "stop after processing NUM queries") do |l|
35
+ options["limit"] = l.to_i
36
+ end
37
+
38
+ opts.on("-s","--stats FILES", "location of index statistics tsv file") do |f|
39
+ options["stats"] = f
40
+ end
41
+
42
+ opts.on("-f", "--file FILE", "location of file containing queries") do |f|
43
+ options["file"] = f
44
+ end
45
+
46
+ opts.on("-o", "--output FILE", "write to file instead of stdout") do |f|
47
+ options["output"] = f
48
+ end
49
+
50
+ opts.on("-t", "--test", "analyze queries at --file instead of analyzing a process") do |f|
51
+ options["test"] = true
52
+ end
53
+
54
+ end
55
+
56
+ parser.parse!
57
+
58
+ if options["test"] && !options["file"]
59
+ $stderr.puts "--file <query log> is required for test mode"
60
+ $stderr.puts parser.banner
61
+ exit 1
62
+ end
63
+
64
+ # Automagic configuration goes here
65
+ if !options["database"]
66
+ config = Shiba::Configure.activerecord_configuration
67
+
68
+ if tc = config && config['test']
69
+ $stderr.puts "Reading configuration from '#{`pwd`.chomp}/config/database.yml'[:test]."
70
+ options['host'] ||= tc['host']
71
+ options['database'] ||= tc['database']
72
+ options['user'] ||= tc['username']
73
+ options['password'] ||= tc['password']
74
+ end
75
+ end
76
+
77
+ if !options["file"]
78
+ options["file"] = `mktemp /tmp/shiba-query.log-#{Time.now.to_i}`.chomp
79
+ end
80
+
81
+ if !options["output"]
82
+ options["output"] = `mktemp /tmp/shiba-explain.log-#{Time.now.to_i}`.chomp
83
+ end
84
+
85
+ # Log process queries
86
+ if !options.delete("test")
87
+ if ARGV.empty?
88
+ $stderr.puts "The name of a command must be passed in to generate SQL logs."
89
+ $stderr.puts "Example: shiba analyze rake spec"
90
+ $stderr.puts ""
91
+ $stderr.puts "For static analysis, try the --test option."
92
+ exit 1
93
+ end
94
+
95
+ path = "#{File.dirname(__FILE__)}/watch.rb"
96
+ watch_args = ARGV + [ "--file", options["file"] ]
97
+ pid = fork do
98
+ Signal.trap("INT") { exit 1 }
99
+ exec(path, *watch_args)
100
+ end
101
+
102
+ Signal.trap("INT", "IGNORE")
103
+ Process.wait(pid)
104
+ Signal.trap("INT", "DEFAULT")
105
+ end
106
+
107
+ # Explain
108
+ $stderr.puts "Analyzing SQL to '#{options["output"]}'..."
109
+ path = "#{File.dirname(__FILE__)}/explain"
110
+ args = options.select { |_,v| !v.nil? }.map { |k,v| [ "--#{k}", v ] }.flatten
111
+
112
+ $stderr.puts ([path] + args).join(" ")
113
+ if !system(path, *args)
114
+ exit 1
115
+ end
116
+
data/bin/check ADDED
Binary file
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "shiba"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/explain ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'shiba'
5
+ require 'shiba/analyzer'
6
+ require 'shiba/index'
7
+ require 'shiba/configure'
8
+ require 'shiba/output'
9
+
10
+ require 'optionparser'
11
+
12
+ options = {}
13
+ parser = OptionParser.new do |opts|
14
+ opts.banner = "Usage: explain -h HOST -d DB -u USER -p PASS [-f QUERY_FILE] [-s STATS_FILE] "
15
+
16
+ opts.on("-h","--host HOST", "sql host") do |h|
17
+ options["host"] = h
18
+ end
19
+
20
+ opts.on("-d","--database DATABASE", "sql database") do |d|
21
+ options["database"] = d
22
+ end
23
+
24
+ opts.on("-u","--user USER", "sql user") do |u|
25
+ options["username"] = u
26
+ end
27
+
28
+ opts.on("-p","--password PASSWORD", "sql password") do |p|
29
+ options["password"] = p
30
+ end
31
+
32
+ opts.on("-i","--index INDEX", "index of query to inspect") do |i|
33
+ options["index"] = i.to_i
34
+ end
35
+
36
+ opts.on("-l", "--limit NUM", "stop after processing NUM queries") do |l|
37
+ options["limit"] = l.to_i
38
+ end
39
+
40
+ opts.on("-s","--stats FILES", "location of index statistics tsv file") do |f|
41
+ options["stats_file"] = f
42
+ end
43
+
44
+ opts.on("-f", "--file FILE", "location of file containing queries") do |f|
45
+ options["file"] = f
46
+ end
47
+
48
+ opts.on("-o", "--output FILE", "write to file instead of stdout") do |f|
49
+ options["output"] = f
50
+ end
51
+
52
+ opts.on("--debug") do
53
+ options["debug"] = true
54
+ end
55
+ end
56
+
57
+ parser.parse!
58
+
59
+ ["database", "username"].each do |opt|
60
+ if !options[opt]
61
+ $stderr.puts "Required: #{opt}"
62
+ $stderr.puts parser.banner
63
+ exit 1
64
+ end
65
+ end
66
+
67
+ file = options.delete("file")
68
+ file = File.open(file, "r") if file
69
+
70
+ output = options.delete("output")
71
+ output = File.open(output, 'w') if output
72
+
73
+ Shiba.configure(options)
74
+
75
+ schema_stats_fname = options["stats_file"]
76
+
77
+ if schema_stats_fname && !File.exist?(schema_stats_fname)
78
+ $stderr.puts "No such file: #{schema_stats_fname}"
79
+ exit 1
80
+ end
81
+
82
+ if schema_stats_fname
83
+ schema_stats = Shiba::Index.parse(schema_stats_fname)
84
+
85
+ local_db_stats = Shiba::Index.query(Shiba.connection)
86
+ Shiba::Index.fuzz!(local_db_stats)
87
+ local_db_stats.each do |table, values|
88
+ schema_stats[table] = values unless schema_stats[table]
89
+ end
90
+ else
91
+ schema_stats = Shiba::Index.query(Shiba.connection)
92
+
93
+ if Shiba::Index.insufficient_stats?(schema_stats)
94
+ $stderr.puts "WARN: insufficient stats available in the #{options["database"]} database, guessing at numbers."
95
+ $stderr.puts "To get better analysis please specify an index statistics file."
96
+ sleep 0.5
97
+ Shiba::Index.fuzz!(schema_stats)
98
+ end
99
+ end
100
+
101
+ file = $stdin if file.nil?
102
+ output = $stdout if output.nil?
103
+
104
+ queries = Shiba::Analyzer.analyze(file, output, schema_stats, options)
105
+ Shiba::Output.new(queries).make_web!
data/bin/fingerprint ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # run from script path
4
+ cd "${0%/*}"
5
+
6
+ case "$OSTYPE" in
7
+ linux*) ./../cmd/builds/fingerprint.linux-amd64 ;;
8
+ darwin*) ./../cmd/builds/fingerprint.darwin-amd64 ;;
9
+ *) echo "unsupported system $OSTYPE" & exit 1;;
10
+ esac
data/bin/inspect ADDED
Binary file
data/bin/parse ADDED
Binary file
@@ -0,0 +1,165 @@
1
+ require 'bundler/setup'
2
+
3
+ require 'activeresource'
4
+ require 'logger'
5
+ require 'pp'
6
+ require 'mysql2'
7
+
8
+ ActiveResource::Base.logger = Logger.new($stdout)
9
+
10
+ class Redmine < ActiveResource::Base
11
+ self.site = 'http://www.redmine.org/'
12
+ self.user = 'osheroff'
13
+ self.password = `cat ~/.redmine_pass`.chomp
14
+ def enhance(hash, owner)
15
+ end
16
+ end
17
+
18
+ class Attachment < Redmine
19
+ def enhance(hash, owner)
20
+ hash['disk_filename'] = self.attributes['content_url']
21
+ end
22
+ end
23
+
24
+ class Comment < Redmine; end
25
+
26
+ class Changeset < Redmine
27
+ @@changeset_id = 1
28
+
29
+ def enhance(hash, owner)
30
+ hash['id'] = @@changeset_id
31
+ @@changeset_id += 1
32
+ hash['repository_id'] = 1
33
+ hash['scmid'] = 1
34
+ end
35
+ end
36
+
37
+ class Issue < Redmine; end
38
+
39
+ class Journal < Redmine
40
+ def enhance(hash, owner)
41
+ hash['journalized_type'] = 'Issue'
42
+ hash['journalized_id'] = owner.id
43
+ end
44
+ end
45
+
46
+ class Watcher < Redmine; end
47
+ class Relation < Redmine; end
48
+ class User < Redmine; end
49
+
50
+ class Sampler
51
+ def initialize(interested_in)
52
+ @results = {}
53
+ @interested_in = interested_in
54
+ end
55
+
56
+ def extract_hash(instance, owner = nil)
57
+ h = {}
58
+ table_name = instance.class.name.tableize
59
+ attrs = @interested_in[table_name]
60
+
61
+ instance.attributes.each do |k, v|
62
+ if k == "id" || attrs.include?(k)
63
+ h[k] = v
64
+ elsif attrs.include?("#{k}_id")
65
+ h[k + "_id"] = v.id
66
+ elsif v.is_a?(Array) && @interested_in[k]
67
+ v.each do |obj|
68
+ extract_hash(obj, instance)
69
+ end
70
+ end
71
+ end
72
+
73
+ instance.enhance(h, owner)
74
+
75
+ @results[table_name] ||= []
76
+ @results[table_name] << h
77
+ end
78
+
79
+ def get_instance(model, id, params = {})
80
+ model.find(id, params: params) rescue nil
81
+ end
82
+
83
+ def find_max(model, start, offset, params)
84
+ last_instance = nil
85
+ while instance = get_instance(model, start + offset, params)
86
+ last_instance = instance
87
+ extract_hash(instance)
88
+ offset *= 2
89
+ sleep(0.5)
90
+ end
91
+
92
+ 1.upto(6) do |i|
93
+ # check for gaps
94
+ instance = get_instance(model, start + offset + (i**2), params)
95
+ if instance
96
+ last_instance = instance
97
+ extract_hash(instance)
98
+ return find_max(model, start + offset + (i**2), offset, params) || last_instance
99
+ end
100
+ end
101
+
102
+ last_instance
103
+ end
104
+
105
+ def sample_model(model, params = {})
106
+ # go exponentially up from 1.
107
+ last = find_max(model, 0, 1, params)
108
+ # go up from the last hit value.
109
+ if last
110
+ find_max(model, last.id, 1, params)
111
+ end
112
+ end
113
+
114
+ def output
115
+ @results.each do |table, res|
116
+ File.open("/tmp/redmine/#{table}.json", "w+") do |f|
117
+ res.each do |row|
118
+ f.puts(row.to_json)
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
124
+
125
+ client = Mysql2::Client.new(username: 'root', password: '123456', database: 'redmine_test', host: '127.0.0.1')
126
+
127
+
128
+ tables = {
129
+ 'attachments' => {
130
+ model: Attachment
131
+ },
132
+ 'issues' => {
133
+ model: Issue,
134
+ params: { include: ['journals', 'comments', 'changesets', 'watchers'] },
135
+ },
136
+ 'users' => {
137
+ model: User
138
+ }
139
+ }
140
+
141
+
142
+ all_tables = tables.map do |k, v|
143
+ a = [k]
144
+ a += v[:params][:include] if v[:params] && v[:params][:include]
145
+ a
146
+ end.flatten
147
+
148
+ table_indexes = {}
149
+ all_tables.each do |table|
150
+ indexes = client.query("show indexes from #{table}")
151
+ interested_columns = indexes.to_a.map { |i| i["Column_name"] }.flatten.uniq
152
+ table_indexes[table] = interested_columns
153
+ end
154
+
155
+ sampler = Sampler.new(table_indexes)
156
+
157
+ tables.each do |table, hash|
158
+ sampler.sample_model(hash[:model], hash[:params])
159
+ end
160
+
161
+ sampler.output
162
+
163
+ #sample_model(Issue, %w(project_id status_id category_id priority_id author_id))
164
+
165
+