pgdexter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 12871a558deb7cd0cb5067476415a34164d5d906
4
+ data.tar.gz: 6a293f4992bacb524a37bd7f7882851d364f6080
5
+ SHA512:
6
+ metadata.gz: 846f1efba33eba3144e0918348068fc34fefe1ef8dfb62671231ccb01a836b2274b6ac6b43737484d43e0057556898b544aa12142df27860a881b4b9fa43b973
7
+ data.tar.gz: 676ad00ea2529681fbfe09e9982a30e49960a03f3dd25733ba206e2fed6bb0708789ec543acb193343b9ec0b31c0ecd12fd4f9d1adbbd9393324acd33322864c
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in dexter.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2017 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,67 @@
1
+ # Dexter
2
+
3
+ An automatic indexer for Postgres
4
+
5
+ ## Installation
6
+
7
+ First, install [HypoPG](https://github.com/dalibo/hypopg) on your database server. This doesn’t require a restart.
8
+
9
+ ```sh
10
+ wget https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz
11
+ tar xf 1.0.0.tar.gz
12
+ cd hypopg-1.0.0
13
+ make
14
+ make install
15
+ ```
16
+
17
+ > Note: If you have issues, make sure `postgresql-server-dev-*` is installed.
18
+
19
+ Enable logging for slow queries.
20
+
21
+ ```ini
22
+ log_min_duration_statement = 10 # ms
23
+ ```
24
+
25
+ And install with:
26
+
27
+ ```sh
28
+ gem install pgdexter
29
+ ```
30
+
31
+ ## How to Use
32
+
33
+ Dexter needs a connection to your database and a log file to process.
34
+
35
+ ```sh
36
+ dexter <database-url> <log-file>
37
+ ```
38
+
39
+ This finds slow queries and generates output like:
40
+
41
+ ```
42
+ SELECT * FROM ratings ORDER BY user_id LIMIT 10
43
+ Starting cost: 3797.99
44
+ Final cost: 0.5
45
+ CREATE INDEX CONCURRENTLY ON ratings (user_id);
46
+ ```
47
+
48
+ To be safe, Dexter does not create indexes unless you pass the `--create` flag.
49
+
50
+ You can also pass a single statement with:
51
+
52
+ ```sh
53
+ dexter <database-url> -s "SELECT * FROM ..."
54
+ ```
55
+
56
+ ## Options
57
+
58
+ - `--min-time` - only consider queries that have consumed a certain amount of DB time (in minutes)
59
+
60
+ ## Contributing
61
+
62
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
63
+
64
+ - [Report bugs](https://github.com/ankane/dexter/issues)
65
+ - Fix bugs and [submit pull requests](https://github.com/ankane/dexter/pulls)
66
+ - Write, clarify, or fix documentation
67
+ - Suggest or add new features
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task default: :test
data/exe/dexter ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "dexter"
4
+ begin
5
+ Dexter::Client.new(ARGV).perform
6
+ rescue Interrupt => e
7
+ # do nothing
8
+ end
@@ -0,0 +1,49 @@
1
+ module Dexter
2
+ class LogParser
3
+ REGEX = /duration: (\d+\.\d+) ms (statement|execute <unnamed>): (.+)/
4
+
5
+ def initialize(logfile, options = {})
6
+ @logfile = logfile
7
+ @min_time = options[:min_time] * 60000 # convert minutes to ms
8
+ end
9
+
10
+ def queries
11
+ @top_queries = {}
12
+
13
+ active_line = nil
14
+ duration = nil
15
+
16
+ File.foreach(@logfile) do |line|
17
+ if active_line
18
+ if line.include?(": ")
19
+ process_entry(active_line, duration)
20
+ active_line = nil
21
+ duration = nil
22
+ else
23
+ active_line << line
24
+ end
25
+ end
26
+
27
+ if !active_line && m = REGEX.match(line.chomp)
28
+ duration = m[1].to_f
29
+ active_line = m[3]
30
+ else
31
+ # skip
32
+ end
33
+ end
34
+ process_entry(active_line, duration) if active_line
35
+
36
+ @top_queries.select { |_, v| v[:total_time] > @min_time }.map { |_, v| v[:query] }
37
+ end
38
+
39
+ private
40
+
41
+ def process_entry(query, duration)
42
+ return unless query =~ /SELECT/i
43
+ fingerprint = PgQuery.fingerprint(query)
44
+ @top_queries[fingerprint] ||= {calls: 0, total_time: 0, query: query}
45
+ @top_queries[fingerprint][:calls] += 1
46
+ @top_queries[fingerprint][:total_time] += duration
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ module Dexter
2
+ VERSION = "0.1.0"
3
+ end
data/lib/dexter.rb ADDED
@@ -0,0 +1,265 @@
1
+ require "dexter/version"
2
+ require "slop"
3
+ require "pg"
4
+ require "pg_query"
5
+ require "time"
6
+ require "set"
7
+ require "dexter/log_parser"
8
+
9
+ module Dexter
10
+ class Client
11
+ attr_reader :arguments, :options
12
+
13
+ def initialize(args)
14
+ @arguments, @options = parse_args(args)
15
+ end
16
+
17
+ def perform
18
+ abort "Missing database url" if arguments.empty?
19
+ abort "Too many arguments" if arguments.size > 2
20
+
21
+ # get queries
22
+ queries = []
23
+ if options[:s]
24
+ queries << options[:s]
25
+ end
26
+ if arguments[1]
27
+ begin
28
+ parser = LogParser.new(arguments[1], min_time: options[:min_time])
29
+ queries.concat(parser.queries)
30
+ rescue Errno::ENOENT
31
+ abort "Log file not found"
32
+ end
33
+ end
34
+
35
+ # narrow down queries and tables
36
+ tables, queries = narrow_queries(queries)
37
+ return if tables.empty?
38
+
39
+ # get ready for hypothetical indexes
40
+ select_all("SET client_min_messages = warning")
41
+ select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
42
+ select_all("SELECT hypopg_reset()")
43
+
44
+ # ensure tables have recently been analyzed
45
+ analyze_tables(tables)
46
+
47
+ # get initial plans
48
+ initial_plans = {}
49
+ queries.each do |query|
50
+ begin
51
+ initial_plans[query] = plan(query)
52
+ rescue PG::Error
53
+ # do nothing
54
+ end
55
+ end
56
+ queries.select! { |q| initial_plans[q] }
57
+
58
+ # get existing indexes
59
+ index_set = Set.new
60
+ indexes(tables).each do |index|
61
+ # TODO make sure btree
62
+ index_set << [index["table"], index["columns"]]
63
+ end
64
+
65
+ # create hypothetical indexes
66
+ candidates = {}
67
+ columns(tables).each do |col|
68
+ unless index_set.include?([col[:table], [col[:column]]])
69
+ candidates[col] = select_all("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{col[:table]} (#{[col[:column]].join(", ")})');").first["indexname"]
70
+ end
71
+ end
72
+
73
+ new_indexes = []
74
+ queries.each do |query|
75
+ starting_cost = initial_plans[query]["Total Cost"]
76
+ plan2 = plan(query)
77
+ cost2 = plan2["Total Cost"]
78
+ best_indexes = []
79
+
80
+ candidates.each do |col, index_name|
81
+ if plan2.inspect.include?(index_name)
82
+ best_indexes << {
83
+ table: col[:table],
84
+ columns: [col[:column]]
85
+ }
86
+ end
87
+ end
88
+
89
+ puts query
90
+ puts "Starting cost: #{starting_cost}"
91
+ puts "Final cost: #{cost2}"
92
+
93
+ # must make it 20% faster
94
+ if cost2 < starting_cost * 0.8
95
+ new_indexes.concat(best_indexes)
96
+ best_indexes.each do |index|
97
+ puts "CREATE INDEX CONCURRENTLY ON #{index[:table]} (#{index[:columns].join(", ")});"
98
+ end
99
+ else
100
+ puts "Nope!"
101
+ end
102
+ puts
103
+ end
104
+
105
+ # create indexes
106
+ if new_indexes.any?
107
+ puts "Indexes to be created:"
108
+ new_indexes.uniq.sort_by(&:to_a).each do |index|
109
+ statement = "CREATE INDEX CONCURRENTLY ON #{index[:table]} (#{index[:columns].join(", ")})"
110
+ puts "#{statement};"
111
+ select_all(statement) if options[:create]
112
+ end
113
+ end
114
+ end
115
+
116
+ def conn
117
+ @conn ||= begin
118
+ uri = URI.parse(arguments[0])
119
+ config = {
120
+ host: uri.host,
121
+ port: uri.port,
122
+ dbname: uri.path.sub(/\A\//, ""),
123
+ user: uri.user,
124
+ password: uri.password,
125
+ connect_timeout: 3
126
+ }.reject { |_, value| value.to_s.empty? }
127
+ PG::Connection.new(config)
128
+ end
129
+ rescue PG::ConnectionBad
130
+ abort "Bad database url"
131
+ end
132
+
133
+ def select_all(query)
134
+ conn.exec(query).to_a
135
+ end
136
+
137
+ def plan(query)
138
+ JSON.parse(select_all("EXPLAIN (FORMAT JSON) #{query}").first["QUERY PLAN"]).first["Plan"]
139
+ end
140
+
141
+ def narrow_queries(queries)
142
+ result = select_all <<-SQL
143
+ SELECT
144
+ table_name
145
+ FROM
146
+ information_schema.tables
147
+ WHERE
148
+ table_catalog = current_database() AND
149
+ table_schema NOT IN ('pg_catalog', 'information_schema')
150
+ SQL
151
+ possible_tables = Set.new(result.map { |r| r["table_name"] })
152
+
153
+ tables = queries.flat_map { |q| PgQuery.parse(q).tables }.uniq.select { |t| possible_tables.include?(t) }
154
+
155
+ [tables, queries.select { |q| PgQuery.parse(q).tables.all? { |t| possible_tables.include?(t) } }]
156
+ end
157
+
158
+ def columns(tables)
159
+ columns = select_all <<-SQL
160
+ SELECT
161
+ table_name,
162
+ column_name
163
+ FROM
164
+ information_schema.columns
165
+ WHERE
166
+ table_schema = 'public' AND
167
+ table_name IN (#{tables.map { |t| quote(t) }.join(", ")})
168
+ SQL
169
+
170
+ columns.map { |v| {table: v["table_name"], column: v["column_name"]} }
171
+ end
172
+
173
+ def indexes(tables)
174
+ select_all(<<-SQL
175
+ SELECT
176
+ schemaname AS schema,
177
+ t.relname AS table,
178
+ ix.relname AS name,
179
+ regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\\(]*\\((.*)\\)$', '\\1') AS columns,
180
+ regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using,
181
+ indisunique AS unique,
182
+ indisprimary AS primary,
183
+ indisvalid AS valid,
184
+ indexprs::text,
185
+ indpred::text,
186
+ pg_get_indexdef(i.indexrelid) AS definition
187
+ FROM
188
+ pg_index i
189
+ INNER JOIN
190
+ pg_class t ON t.oid = i.indrelid
191
+ INNER JOIN
192
+ pg_class ix ON ix.oid = i.indexrelid
193
+ LEFT JOIN
194
+ pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
195
+ WHERE
196
+ t.relname IN (#{tables.map { |t| quote(t) }.join(", ")}) AND
197
+ schemaname IS NOT NULL AND
198
+ indisvalid = 't' AND
199
+ indexprs IS NULL AND
200
+ indpred IS NULL
201
+ ORDER BY
202
+ 1, 2
203
+ SQL
204
+ ).map { |v| v["columns"] = v["columns"].sub(") WHERE (", " WHERE ").split(", ").map { |c| unquote(c) }; v }
205
+ end
206
+
207
+ def unquote(part)
208
+ if part && part.start_with?('"')
209
+ part[1..-2]
210
+ else
211
+ part
212
+ end
213
+ end
214
+
215
+ def analyze_tables(tables)
216
+ analyze_stats = select_all <<-SQL
217
+ SELECT
218
+ schemaname AS schema,
219
+ relname AS table,
220
+ last_analyze,
221
+ last_autoanalyze
222
+ FROM
223
+ pg_stat_user_tables
224
+ WHERE
225
+ relname IN (#{tables.map { |t| quote(t) }.join(", ")})
226
+ SQL
227
+
228
+ last_analyzed = {}
229
+ analyze_stats.each do |stats|
230
+ last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
231
+ end
232
+
233
+ tables.each do |table|
234
+ if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
235
+ puts "Analyzing #{table}"
236
+ select_all("ANALYZE #{table}")
237
+ end
238
+ end
239
+ end
240
+
241
+ def quote(value)
242
+ if value.is_a?(String)
243
+ "'#{quote_string(value)}'"
244
+ else
245
+ value
246
+ end
247
+ end
248
+
249
+ # activerecord
250
+ def quote_string(s)
251
+ s.gsub(/\\/, '\&\&').gsub(/'/, "''")
252
+ end
253
+
254
+ def parse_args(args)
255
+ opts = Slop.parse(args) do |o|
256
+ o.boolean "--create", default: false
257
+ o.string "-s"
258
+ o.float "--min-time", default: 0
259
+ end
260
+ [opts.arguments, opts.to_hash]
261
+ rescue Slop::Error => e
262
+ abort e.message
263
+ end
264
+ end
265
+ end
data/pgdexter.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "dexter/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pgdexter"
8
+ spec.version = Dexter::VERSION
9
+ spec.authors = ["Andrew Kane"]
10
+ spec.email = ["andrew@chartkick.com"]
11
+
12
+ spec.summary = "An automatic indexer for Postgres"
13
+ spec.homepage = "https://github.com/ankane/dexter"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
+ f.match(%r{^(test|spec|features)/})
17
+ end
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "slop", ">= 4.2.0"
23
+ spec.add_dependency "pg"
24
+ spec.add_dependency "pg_query"
25
+
26
+ spec.add_development_dependency "bundler"
27
+ spec.add_development_dependency "rake"
28
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pgdexter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-06-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: slop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 4.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 4.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pg_query
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - andrew@chartkick.com
86
+ executables:
87
+ - dexter
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - exe/dexter
97
+ - lib/dexter.rb
98
+ - lib/dexter/log_parser.rb
99
+ - lib/dexter/version.rb
100
+ - pgdexter.gemspec
101
+ homepage: https://github.com/ankane/dexter
102
+ licenses: []
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.6.11
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: An automatic indexer for Postgres
124
+ test_files: []