pgdexter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 12871a558deb7cd0cb5067476415a34164d5d906
4
+ data.tar.gz: 6a293f4992bacb524a37bd7f7882851d364f6080
5
+ SHA512:
6
+ metadata.gz: 846f1efba33eba3144e0918348068fc34fefe1ef8dfb62671231ccb01a836b2274b6ac6b43737484d43e0057556898b544aa12142df27860a881b4b9fa43b973
7
+ data.tar.gz: 676ad00ea2529681fbfe09e9982a30e49960a03f3dd25733ba206e2fed6bb0708789ec543acb193343b9ec0b31c0ecd12fd4f9d1adbbd9393324acd33322864c
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in dexter.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,67 @@
1
+ # Dexter
2
+
3
+ An automatic indexer for Postgres
4
+
5
+ ## Installation
6
+
7
+ First, install [HypoPG](https://github.com/dalibo/hypopg) on your database server. This doesn’t require a restart.
8
+
9
+ ```sh
10
+ wget https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz
11
+ tar xf 1.0.0.tar.gz
12
+ cd hypopg-1.0.0
13
+ make
14
+ make install
15
+ ```
16
+
17
+ > Note: If you have issues, make sure `postgresql-server-dev-*` is installed.
18
+
19
+ Enable logging for slow queries.
20
+
21
+ ```ini
22
+ log_min_duration_statement = 10 # ms
23
+ ```
24
+
25
+ And install with:
26
+
27
+ ```sh
28
+ gem install pgdexter
29
+ ```
30
+
31
+ ## How to Use
32
+
33
+ Dexter needs a connection to your database and a log file to process.
34
+
35
+ ```sh
36
+ dexter <database-url> <log-file>
37
+ ```
38
+
39
+ This finds slow queries and generates output like:
40
+
41
+ ```
42
+ SELECT * FROM ratings ORDER BY user_id LIMIT 10
43
+ Starting cost: 3797.99
44
+ Final cost: 0.5
45
+ CREATE INDEX CONCURRENTLY ON ratings (user_id);
46
+ ```
47
+
48
+ To be safe, Dexter does not create indexes unless you pass the `--create` flag.
49
+
50
+ You can also pass a single statement with:
51
+
52
+ ```sh
53
+ dexter <database-url> -s "SELECT * FROM ..."
54
+ ```
55
+
56
+ ## Options
57
+
58
+ - `--min-time` - only consider queries that have consumed a certain amount of DB time (in minutes)
59
+
60
+ ## Contributing
61
+
62
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
63
+
64
+ - [Report bugs](https://github.com/ankane/dexter/issues)
65
+ - Fix bugs and [submit pull requests](https://github.com/ankane/dexter/pulls)
66
+ - Write, clarify, or fix documentation
67
+ - Suggest or add new features
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task default: :test
data/exe/dexter ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "dexter"
4
+ begin
5
+ Dexter::Client.new(ARGV).perform
6
+ rescue Interrupt => e
7
+ # do nothing
8
+ end
@@ -0,0 +1,49 @@
1
+ module Dexter
2
+ class LogParser
3
+ REGEX = /duration: (\d+\.\d+) ms (statement|execute <unnamed>): (.+)/
4
+
5
+ def initialize(logfile, options = {})
6
+ @logfile = logfile
7
+ @min_time = options[:min_time] * 60000 # convert minutes to ms
8
+ end
9
+
10
+ def queries
11
+ @top_queries = {}
12
+
13
+ active_line = nil
14
+ duration = nil
15
+
16
+ File.foreach(@logfile) do |line|
17
+ if active_line
18
+ if line.include?(": ")
19
+ process_entry(active_line, duration)
20
+ active_line = nil
21
+ duration = nil
22
+ else
23
+ active_line << line
24
+ end
25
+ end
26
+
27
+ if !active_line && m = REGEX.match(line.chomp)
28
+ duration = m[1].to_f
29
+ active_line = m[3]
30
+ else
31
+ # skip
32
+ end
33
+ end
34
+ process_entry(active_line, duration) if active_line
35
+
36
+ @top_queries.select { |_, v| v[:total_time] > @min_time }.map { |_, v| v[:query] }
37
+ end
38
+
39
+ private
40
+
41
+ def process_entry(query, duration)
42
+ return unless query =~ /SELECT/i
43
+ fingerprint = PgQuery.fingerprint(query)
44
+ @top_queries[fingerprint] ||= {calls: 0, total_time: 0, query: query}
45
+ @top_queries[fingerprint][:calls] += 1
46
+ @top_queries[fingerprint][:total_time] += duration
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ module Dexter
2
+ VERSION = "0.1.0"
3
+ end
data/lib/dexter.rb ADDED
@@ -0,0 +1,265 @@
1
+ require "dexter/version"
2
+ require "slop"
3
+ require "pg"
4
+ require "pg_query"
5
+ require "time"
6
+ require "set"
7
+ require "dexter/log_parser"
8
+
9
+ module Dexter
10
+ class Client
11
+ attr_reader :arguments, :options
12
+
13
+ def initialize(args)
14
+ @arguments, @options = parse_args(args)
15
+ end
16
+
17
+ def perform
18
+ abort "Missing database url" if arguments.empty?
19
+ abort "Too many arguments" if arguments.size > 2
20
+
21
+ # get queries
22
+ queries = []
23
+ if options[:s]
24
+ queries << options[:s]
25
+ end
26
+ if arguments[1]
27
+ begin
28
+ parser = LogParser.new(arguments[1], min_time: options[:min_time])
29
+ queries.concat(parser.queries)
30
+ rescue Errno::ENOENT
31
+ abort "Log file not found"
32
+ end
33
+ end
34
+
35
+ # narrow down queries and tables
36
+ tables, queries = narrow_queries(queries)
37
+ return if tables.empty?
38
+
39
+ # get ready for hypothetical indexes
40
+ select_all("SET client_min_messages = warning")
41
+ select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
42
+ select_all("SELECT hypopg_reset()")
43
+
44
+ # ensure tables have recently been analyzed
45
+ analyze_tables(tables)
46
+
47
+ # get initial plans
48
+ initial_plans = {}
49
+ queries.each do |query|
50
+ begin
51
+ initial_plans[query] = plan(query)
52
+ rescue PG::Error
53
+ # do nothing
54
+ end
55
+ end
56
+ queries.select! { |q| initial_plans[q] }
57
+
58
+ # get existing indexes
59
+ index_set = Set.new
60
+ indexes(tables).each do |index|
61
+ # TODO make sure btree
62
+ index_set << [index["table"], index["columns"]]
63
+ end
64
+
65
+ # create hypothetical indexes
66
+ candidates = {}
67
+ columns(tables).each do |col|
68
+ unless index_set.include?([col[:table], [col[:column]]])
69
+ candidates[col] = select_all("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{col[:table]} (#{[col[:column]].join(", ")})');").first["indexname"]
70
+ end
71
+ end
72
+
73
+ new_indexes = []
74
+ queries.each do |query|
75
+ starting_cost = initial_plans[query]["Total Cost"]
76
+ plan2 = plan(query)
77
+ cost2 = plan2["Total Cost"]
78
+ best_indexes = []
79
+
80
+ candidates.each do |col, index_name|
81
+ if plan2.inspect.include?(index_name)
82
+ best_indexes << {
83
+ table: col[:table],
84
+ columns: [col[:column]]
85
+ }
86
+ end
87
+ end
88
+
89
+ puts query
90
+ puts "Starting cost: #{starting_cost}"
91
+ puts "Final cost: #{cost2}"
92
+
93
+ # must make it 20% faster
94
+ if cost2 < starting_cost * 0.8
95
+ new_indexes.concat(best_indexes)
96
+ best_indexes.each do |index|
97
+ puts "CREATE INDEX CONCURRENTLY ON #{index[:table]} (#{index[:columns].join(", ")});"
98
+ end
99
+ else
100
+ puts "Nope!"
101
+ end
102
+ puts
103
+ end
104
+
105
+ # create indexes
106
+ if new_indexes.any?
107
+ puts "Indexes to be created:"
108
+ new_indexes.uniq.sort_by(&:to_a).each do |index|
109
+ statement = "CREATE INDEX CONCURRENTLY ON #{index[:table]} (#{index[:columns].join(", ")})"
110
+ puts "#{statement};"
111
+ select_all(statement) if options[:create]
112
+ end
113
+ end
114
+ end
115
+
116
+ def conn
117
+ @conn ||= begin
118
+ uri = URI.parse(arguments[0])
119
+ config = {
120
+ host: uri.host,
121
+ port: uri.port,
122
+ dbname: uri.path.sub(/\A\//, ""),
123
+ user: uri.user,
124
+ password: uri.password,
125
+ connect_timeout: 3
126
+ }.reject { |_, value| value.to_s.empty? }
127
+ PG::Connection.new(config)
128
+ end
129
+ rescue PG::ConnectionBad
130
+ abort "Bad database url"
131
+ end
132
+
133
+ def select_all(query)
134
+ conn.exec(query).to_a
135
+ end
136
+
137
+ def plan(query)
138
+ JSON.parse(select_all("EXPLAIN (FORMAT JSON) #{query}").first["QUERY PLAN"]).first["Plan"]
139
+ end
140
+
141
+ def narrow_queries(queries)
142
+ result = select_all <<-SQL
143
+ SELECT
144
+ table_name
145
+ FROM
146
+ information_schema.tables
147
+ WHERE
148
+ table_catalog = current_database() AND
149
+ table_schema NOT IN ('pg_catalog', 'information_schema')
150
+ SQL
151
+ possible_tables = Set.new(result.map { |r| r["table_name"] })
152
+
153
+ tables = queries.flat_map { |q| PgQuery.parse(q).tables }.uniq.select { |t| possible_tables.include?(t) }
154
+
155
+ [tables, queries.select { |q| PgQuery.parse(q).tables.all? { |t| possible_tables.include?(t) } }]
156
+ end
157
+
158
+ def columns(tables)
159
+ columns = select_all <<-SQL
160
+ SELECT
161
+ table_name,
162
+ column_name
163
+ FROM
164
+ information_schema.columns
165
+ WHERE
166
+ table_schema = 'public' AND
167
+ table_name IN (#{tables.map { |t| quote(t) }.join(", ")})
168
+ SQL
169
+
170
+ columns.map { |v| {table: v["table_name"], column: v["column_name"]} }
171
+ end
172
+
173
+ def indexes(tables)
174
+ select_all(<<-SQL
175
+ SELECT
176
+ schemaname AS schema,
177
+ t.relname AS table,
178
+ ix.relname AS name,
179
+ regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\\(]*\\((.*)\\)$', '\\1') AS columns,
180
+ regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using,
181
+ indisunique AS unique,
182
+ indisprimary AS primary,
183
+ indisvalid AS valid,
184
+ indexprs::text,
185
+ indpred::text,
186
+ pg_get_indexdef(i.indexrelid) AS definition
187
+ FROM
188
+ pg_index i
189
+ INNER JOIN
190
+ pg_class t ON t.oid = i.indrelid
191
+ INNER JOIN
192
+ pg_class ix ON ix.oid = i.indexrelid
193
+ LEFT JOIN
194
+ pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
195
+ WHERE
196
+ t.relname IN (#{tables.map { |t| quote(t) }.join(", ")}) AND
197
+ schemaname IS NOT NULL AND
198
+ indisvalid = 't' AND
199
+ indexprs IS NULL AND
200
+ indpred IS NULL
201
+ ORDER BY
202
+ 1, 2
203
+ SQL
204
+ ).map { |v| v["columns"] = v["columns"].sub(") WHERE (", " WHERE ").split(", ").map { |c| unquote(c) }; v }
205
+ end
206
+
207
+ def unquote(part)
208
+ if part && part.start_with?('"')
209
+ part[1..-2]
210
+ else
211
+ part
212
+ end
213
+ end
214
+
215
+ def analyze_tables(tables)
216
+ analyze_stats = select_all <<-SQL
217
+ SELECT
218
+ schemaname AS schema,
219
+ relname AS table,
220
+ last_analyze,
221
+ last_autoanalyze
222
+ FROM
223
+ pg_stat_user_tables
224
+ WHERE
225
+ relname IN (#{tables.map { |t| quote(t) }.join(", ")})
226
+ SQL
227
+
228
+ last_analyzed = {}
229
+ analyze_stats.each do |stats|
230
+ last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
231
+ end
232
+
233
+ tables.each do |table|
234
+ if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
235
+ puts "Analyzing #{table}"
236
+ select_all("ANALYZE #{table}")
237
+ end
238
+ end
239
+ end
240
+
241
+ def quote(value)
242
+ if value.is_a?(String)
243
+ "'#{quote_string(value)}'"
244
+ else
245
+ value
246
+ end
247
+ end
248
+
249
+ # activerecord
250
+ def quote_string(s)
251
+ s.gsub(/\\/, '\&\&').gsub(/'/, "''")
252
+ end
253
+
254
+ def parse_args(args)
255
+ opts = Slop.parse(args) do |o|
256
+ o.boolean "--create", default: false
257
+ o.string "-s"
258
+ o.float "--min-time", default: 0
259
+ end
260
+ [opts.arguments, opts.to_hash]
261
+ rescue Slop::Error => e
262
+ abort e.message
263
+ end
264
+ end
265
+ end
data/pgdexter.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "dexter/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pgdexter"
8
+ spec.version = Dexter::VERSION
9
+ spec.authors = ["Andrew Kane"]
10
+ spec.email = ["andrew@chartkick.com"]
11
+
12
+ spec.summary = "An automatic indexer for Postgres"
13
+ spec.homepage = "https://github.com/ankane/dexter"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
+ f.match(%r{^(test|spec|features)/})
17
+ end
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "slop", ">= 4.2.0"
23
+ spec.add_dependency "pg"
24
+ spec.add_dependency "pg_query"
25
+
26
+ spec.add_development_dependency "bundler"
27
+ spec.add_development_dependency "rake"
28
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pgdexter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-06-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: slop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 4.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 4.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pg_query
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - andrew@chartkick.com
86
+ executables:
87
+ - dexter
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - exe/dexter
97
+ - lib/dexter.rb
98
+ - lib/dexter/log_parser.rb
99
+ - lib/dexter/version.rb
100
+ - pgdexter.gemspec
101
+ homepage: https://github.com/ankane/dexter
102
+ licenses: []
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.6.11
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: An automatic indexer for Postgres
124
+ test_files: []