pgdexter 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +26 -5
- data/lib/dexter.rb +5 -44
- data/lib/dexter/client.rb +67 -0
- data/lib/dexter/collector.rb +47 -0
- data/lib/dexter/indexer.rb +174 -91
- data/lib/dexter/log_parser.rb +11 -80
- data/lib/dexter/logging.rb +7 -0
- data/lib/dexter/processor.rb +61 -0
- data/lib/dexter/query.rb +15 -0
- data/lib/dexter/version.rb +1 -1
- data/pgdexter.gemspec +2 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b472c8289c8c878c0ea790a3ec5fdf43e22bfaa1
|
4
|
+
data.tar.gz: 55f96eb8bd0a8739c79fb7c749a8aee68b2a119f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 873d3d0f9471f90c9b90721274623c6c3dcfbcfe88989143a6b66a5ce1345a2187bab75ef4f32f52efd9f34a1e1806524696854348e592d6eb31bec25ca193c8
|
7
|
+
data.tar.gz: a545b2b1a9d2f6650ec04c1769b243863c412fbff3b1e0575ab4cc6c98648119e487a2bc75f0afeeb704895184580ada13169a05edb3e00bc0caca37759183bc
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Dexter
|
2
2
|
|
3
|
-
|
3
|
+
The automatic indexer for Postgres
|
4
|
+
|
5
|
+
[Read about how it works](https://medium.com/@ankane/introducing-dexter-the-automatic-indexer-for-postgres-5f8fa8b28f27)
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -22,7 +24,7 @@ Enable logging for slow queries.
|
|
22
24
|
log_min_duration_statement = 10 # ms
|
23
25
|
```
|
24
26
|
|
25
|
-
And install with:
|
27
|
+
And install the command line tool with:
|
26
28
|
|
27
29
|
```sh
|
28
30
|
gem install pgdexter
|
@@ -50,12 +52,31 @@ This finds slow queries and generates output like:
|
|
50
52
|
2017-06-25T17:53:22+00:00 Processing 12 new query fingerprints
|
51
53
|
```
|
52
54
|
|
53
|
-
To be safe, Dexter will not create indexes unless you pass the `--create` flag.
|
55
|
+
To be safe, Dexter will not create indexes unless you pass the `--create` flag. In this case, you’ll see:
|
56
|
+
|
57
|
+
```log
|
58
|
+
2017-06-25T17:52:22+00:00 Index found: ratings (user_id)
|
59
|
+
2017-06-25T17:52:22+00:00 Creating index: CREATE INDEX CONCURRENTLY ON ratings (user_id)
|
60
|
+
2017-06-25T17:52:37+00:00 Index created: 15243 ms
|
61
|
+
```
|
54
62
|
|
55
63
|
## Options
|
56
64
|
|
57
|
-
|
58
|
-
|
65
|
+
Name | Description | Default
|
66
|
+
--- | --- | ---
|
67
|
+
exclude | prevent specific tables from being indexed | None
|
68
|
+
interval | time to wait between processing queries, in seconds | 60
|
69
|
+
log-level | `debug` gives additional info for suggested indexes<br />`debug2` gives additional info for all processed queries | info
|
70
|
+
log-sql | log SQL statements executed | false
|
71
|
+
min-time | only process queries consuming a min amount of DB time, in minutes | 0
|
72
|
+
|
73
|
+
## Future Work
|
74
|
+
|
75
|
+
[Here are some ideas](https://github.com/ankane/dexter/issues/1)
|
76
|
+
|
77
|
+
## Thanks
|
78
|
+
|
79
|
+
This software wouldn’t be possible without [HypoPG](https://github.com/dalibo/hypopg), which allows you to create hypothetical indexes, and [pg_query](https://github.com/lfittl/pg_query), which allows you to parse and fingerprint queries. A big thanks to Dalibo and Lukas Fittl respectively.
|
59
80
|
|
60
81
|
## Contributing
|
61
82
|
|
data/lib/dexter.rb
CHANGED
@@ -5,49 +5,10 @@ require "pg_query"
|
|
5
5
|
require "time"
|
6
6
|
require "set"
|
7
7
|
require "thread"
|
8
|
+
require "dexter/logging"
|
9
|
+
require "dexter/client"
|
10
|
+
require "dexter/collector"
|
8
11
|
require "dexter/indexer"
|
9
12
|
require "dexter/log_parser"
|
10
|
-
|
11
|
-
|
12
|
-
class Client
|
13
|
-
attr_reader :arguments, :options
|
14
|
-
|
15
|
-
def initialize(args)
|
16
|
-
@arguments, @options = parse_args(args)
|
17
|
-
end
|
18
|
-
|
19
|
-
def perform
|
20
|
-
abort "Missing database url" if arguments.empty?
|
21
|
-
abort "Too many arguments" if arguments.size > 2
|
22
|
-
|
23
|
-
# get queries
|
24
|
-
queries = []
|
25
|
-
if options[:s]
|
26
|
-
queries << options[:s]
|
27
|
-
Indexer.new(self).process_queries(queries)
|
28
|
-
end
|
29
|
-
if arguments[1]
|
30
|
-
begin
|
31
|
-
LogParser.new(arguments[1], self).perform
|
32
|
-
rescue Errno::ENOENT
|
33
|
-
abort "Log file not found"
|
34
|
-
end
|
35
|
-
end
|
36
|
-
if !options[:s] && !arguments[1]
|
37
|
-
LogParser.new(STDIN, self).perform
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def parse_args(args)
|
42
|
-
opts = Slop.parse(args) do |o|
|
43
|
-
o.boolean "--create", default: false
|
44
|
-
o.string "-s"
|
45
|
-
o.float "--min-time", default: 0
|
46
|
-
o.integer "--interval", default: 60
|
47
|
-
end
|
48
|
-
[opts.arguments, opts.to_hash]
|
49
|
-
rescue Slop::Error => e
|
50
|
-
abort e.message
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
13
|
+
require "dexter/processor"
|
14
|
+
require "dexter/query"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Client
|
3
|
+
attr_reader :arguments, :options
|
4
|
+
|
5
|
+
def initialize(args)
|
6
|
+
@arguments, @options = parse_args(args)
|
7
|
+
end
|
8
|
+
|
9
|
+
def perform
|
10
|
+
STDOUT.sync = true
|
11
|
+
STDERR.sync = true
|
12
|
+
|
13
|
+
if options[:statement]
|
14
|
+
fingerprint = PgQuery.fingerprint(options[:statement]) rescue "unknown"
|
15
|
+
query = Query.new(options[:statement], fingerprint)
|
16
|
+
Indexer.new(arguments[0], options).process_queries([query])
|
17
|
+
elsif arguments[1]
|
18
|
+
Processor.new(arguments[0], arguments[1], options).perform
|
19
|
+
else
|
20
|
+
Processor.new(arguments[0], STDIN, options).perform
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_args(args)
|
25
|
+
opts = Slop.parse(args) do |o|
|
26
|
+
o.banner = %(Usage:
|
27
|
+
dexter <database-url> [options]
|
28
|
+
|
29
|
+
Options:)
|
30
|
+
o.boolean "--create", "create indexes", default: false
|
31
|
+
o.array "--exclude", "prevent specific tables from being indexed"
|
32
|
+
o.integer "--interval", "time to wait between processing queries, in seconds", default: 60
|
33
|
+
o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0
|
34
|
+
o.string "--log-level", "log level", default: "info"
|
35
|
+
o.boolean "--log-sql", "log sql", default: false
|
36
|
+
o.string "-s", "--statement", "process a single statement"
|
37
|
+
o.on "-v", "--version", "print the version" do
|
38
|
+
log Dexter::VERSION
|
39
|
+
exit
|
40
|
+
end
|
41
|
+
o.on "-h", "--help", "prints help" do
|
42
|
+
log o
|
43
|
+
exit
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
arguments = opts.arguments
|
48
|
+
|
49
|
+
if arguments.empty?
|
50
|
+
log opts
|
51
|
+
exit
|
52
|
+
end
|
53
|
+
|
54
|
+
abort "Too many arguments" if arguments.size > 2
|
55
|
+
|
56
|
+
abort "Unknown log level" unless ["info", "debug", "debug2"].include?(opts.to_hash[:log_level].to_s.downcase)
|
57
|
+
|
58
|
+
[arguments, opts.to_hash]
|
59
|
+
rescue Slop::Error => e
|
60
|
+
abort e.message
|
61
|
+
end
|
62
|
+
|
63
|
+
def log(message)
|
64
|
+
$stderr.puts message
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Collector
|
3
|
+
def initialize(options = {})
|
4
|
+
@top_queries = {}
|
5
|
+
@new_queries = Set.new
|
6
|
+
@mutex = Mutex.new
|
7
|
+
@min_time = options[:min_time] * 60000 # convert minutes to ms
|
8
|
+
end
|
9
|
+
|
10
|
+
def add(query, duration)
|
11
|
+
fingerprint =
|
12
|
+
begin
|
13
|
+
PgQuery.fingerprint(query)
|
14
|
+
rescue PgQuery::ParseError
|
15
|
+
# do nothing
|
16
|
+
end
|
17
|
+
|
18
|
+
return unless fingerprint
|
19
|
+
|
20
|
+
@top_queries[fingerprint] ||= {calls: 0, total_time: 0}
|
21
|
+
@top_queries[fingerprint][:calls] += 1
|
22
|
+
@top_queries[fingerprint][:total_time] += duration
|
23
|
+
@top_queries[fingerprint][:query] = query
|
24
|
+
@mutex.synchronize do
|
25
|
+
@new_queries << fingerprint
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch_queries
|
30
|
+
new_queries = nil
|
31
|
+
|
32
|
+
@mutex.synchronize do
|
33
|
+
new_queries = @new_queries.dup
|
34
|
+
@new_queries.clear
|
35
|
+
end
|
36
|
+
|
37
|
+
queries = []
|
38
|
+
@top_queries.each do |k, v|
|
39
|
+
if new_queries.include?(k) && v[:total_time] > @min_time
|
40
|
+
queries << Query.new(v[:query], k)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
queries.sort_by(&:fingerprint)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/dexter/indexer.rb
CHANGED
@@ -1,36 +1,100 @@
|
|
1
1
|
module Dexter
|
2
2
|
class Indexer
|
3
|
-
|
3
|
+
include Logging
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
5
|
+
def initialize(database_url, options)
|
6
|
+
@database_url = database_url
|
7
|
+
@create = options[:create]
|
8
|
+
@log_level = options[:log_level]
|
9
|
+
@exclude_tables = options[:exclude]
|
10
|
+
@log_sql = options[:log_sql]
|
7
11
|
|
8
|
-
|
9
|
-
select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
|
12
|
+
create_extension
|
10
13
|
end
|
11
14
|
|
12
15
|
def process_queries(queries)
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
+
# reset hypothetical indexes
|
17
|
+
reset_hypothetical_indexes
|
18
|
+
|
19
|
+
# filter queries from other databases and system tables
|
20
|
+
tables = possible_tables(queries)
|
21
|
+
queries.each do |query|
|
22
|
+
query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# exclude user specified tables
|
26
|
+
# TODO exclude write-heavy tables
|
27
|
+
@exclude_tables.each do |table|
|
28
|
+
tables.delete(table)
|
29
|
+
end
|
30
|
+
|
31
|
+
# analyze tables if needed
|
32
|
+
analyze_tables(tables) if tables.any?
|
33
|
+
|
34
|
+
# get initial costs for queries
|
35
|
+
calculate_initial_cost(queries.reject(&:missing_tables))
|
36
|
+
|
37
|
+
# create hypothetical indexes
|
38
|
+
candidates = tables.any? ? create_hypothetical_indexes(tables) : {}
|
39
|
+
|
40
|
+
# get new costs and see if new indexes were used
|
41
|
+
new_indexes = determine_indexes(queries, candidates)
|
42
|
+
|
43
|
+
# display and create new indexes
|
44
|
+
show_and_create_indexes(new_indexes)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
16
48
|
|
17
|
-
|
49
|
+
def create_extension
|
50
|
+
select_all("SET client_min_messages = warning")
|
51
|
+
select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
|
52
|
+
end
|
53
|
+
|
54
|
+
def reset_hypothetical_indexes
|
18
55
|
select_all("SELECT hypopg_reset()")
|
56
|
+
end
|
19
57
|
|
20
|
-
|
21
|
-
|
58
|
+
def analyze_tables(tables)
|
59
|
+
tables = tables.to_a.sort
|
60
|
+
|
61
|
+
analyze_stats = select_all <<-SQL
|
62
|
+
SELECT
|
63
|
+
schemaname AS schema,
|
64
|
+
relname AS table,
|
65
|
+
last_analyze,
|
66
|
+
last_autoanalyze
|
67
|
+
FROM
|
68
|
+
pg_stat_user_tables
|
69
|
+
WHERE
|
70
|
+
relname IN (#{tables.map { |t| quote(t) }.join(", ")})
|
71
|
+
SQL
|
72
|
+
|
73
|
+
last_analyzed = {}
|
74
|
+
analyze_stats.each do |stats|
|
75
|
+
last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
|
76
|
+
end
|
22
77
|
|
23
|
-
|
24
|
-
|
78
|
+
tables.each do |table|
|
79
|
+
if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
|
80
|
+
statement = "ANALYZE #{quote_ident(table)}"
|
81
|
+
log "Running analyze: #{statement}"
|
82
|
+
select_all(statement)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def calculate_initial_cost(queries)
|
25
88
|
queries.each do |query|
|
26
89
|
begin
|
27
|
-
|
90
|
+
query.initial_cost = plan(query.statement)["Total Cost"]
|
28
91
|
rescue PG::Error
|
29
92
|
# do nothing
|
30
93
|
end
|
31
94
|
end
|
32
|
-
|
95
|
+
end
|
33
96
|
|
97
|
+
def create_hypothetical_indexes(tables)
|
34
98
|
# get existing indexes
|
35
99
|
index_set = Set.new
|
36
100
|
indexes(tables).each do |index|
|
@@ -45,60 +109,96 @@ module Dexter
|
|
45
109
|
candidates[col] = select_all("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{col[:table]} (#{[col[:column]].join(", ")})');").first["indexname"]
|
46
110
|
end
|
47
111
|
end
|
112
|
+
candidates
|
113
|
+
end
|
48
114
|
|
49
|
-
|
115
|
+
def determine_indexes(queries, candidates)
|
116
|
+
new_indexes = {}
|
50
117
|
|
51
|
-
new_indexes = []
|
52
118
|
queries.each do |query|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
119
|
+
if query.initial_cost
|
120
|
+
new_plan = plan(query.statement)
|
121
|
+
query.new_cost = new_plan["Total Cost"]
|
122
|
+
cost_savings = query.new_cost < query.initial_cost * 0.5
|
123
|
+
|
124
|
+
query_indexes = []
|
125
|
+
candidates.each do |col, index_name|
|
126
|
+
if new_plan.inspect.include?(index_name)
|
127
|
+
index = {
|
128
|
+
table: col[:table],
|
129
|
+
columns: [col[:column]]
|
130
|
+
}
|
131
|
+
query_indexes << index
|
132
|
+
|
133
|
+
if cost_savings
|
134
|
+
new_indexes[index] ||= index.dup
|
135
|
+
(new_indexes[index][:queries] ||= []) << query
|
136
|
+
end
|
137
|
+
end
|
69
138
|
end
|
70
139
|
end
|
71
140
|
|
72
|
-
|
141
|
+
if @log_level == "debug2"
|
142
|
+
log "Processed #{query.fingerprint}"
|
143
|
+
if query.initial_cost
|
144
|
+
log "Cost: #{query.initial_cost} -> #{query.new_cost}"
|
145
|
+
|
146
|
+
if query_indexes.any?
|
147
|
+
log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
|
148
|
+
log "Need 50% cost savings to suggest index" unless cost_savings
|
149
|
+
else
|
150
|
+
log "Indexes: None"
|
151
|
+
end
|
152
|
+
elsif query.fingerprint == "unknown"
|
153
|
+
log "Could not parse query"
|
154
|
+
elsif query.tables.empty?
|
155
|
+
log "No tables"
|
156
|
+
elsif query.missing_tables
|
157
|
+
log "Tables not present in current database"
|
158
|
+
else
|
159
|
+
log "Could not run explain"
|
160
|
+
end
|
161
|
+
|
162
|
+
puts
|
163
|
+
puts query.statement
|
164
|
+
puts
|
165
|
+
end
|
73
166
|
end
|
74
167
|
|
75
|
-
new_indexes
|
168
|
+
new_indexes.values.sort_by(&:to_a)
|
169
|
+
end
|
76
170
|
|
77
|
-
|
171
|
+
def show_and_create_indexes(new_indexes)
|
78
172
|
if new_indexes.any?
|
79
173
|
new_indexes.each do |index|
|
80
|
-
index[:queries] = queries_by_index[index]
|
81
|
-
|
82
174
|
log "Index found: #{index[:table]} (#{index[:columns].join(", ")})"
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
175
|
+
|
176
|
+
if @log_level.start_with?("debug")
|
177
|
+
index[:queries].sort_by(&:fingerprint).each do |query|
|
178
|
+
log "Query #{query.fingerprint} (Cost: #{query.initial_cost} -> #{query.new_cost})"
|
179
|
+
puts
|
180
|
+
puts query.statement
|
181
|
+
puts
|
182
|
+
end
|
183
|
+
end
|
90
184
|
end
|
91
185
|
|
92
|
-
|
93
|
-
|
94
|
-
#
|
95
|
-
|
186
|
+
if @create
|
187
|
+
# TODO use advisory locks
|
188
|
+
# 1. create lock
|
189
|
+
# 2. refresh existing index list
|
190
|
+
# 3. create indexes that still don't exist
|
191
|
+
# 4. release lock
|
192
|
+
new_indexes.each do |index|
|
193
|
+
statement = "CREATE INDEX CONCURRENTLY ON #{index[:table]} (#{index[:columns].join(", ")})"
|
96
194
|
log "Creating index: #{statement}"
|
97
195
|
started_at = Time.now
|
98
196
|
select_all(statement)
|
99
197
|
log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
|
100
198
|
end
|
101
199
|
end
|
200
|
+
else
|
201
|
+
log "No indexes found"
|
102
202
|
end
|
103
203
|
|
104
204
|
new_indexes
|
@@ -106,7 +206,7 @@ module Dexter
|
|
106
206
|
|
107
207
|
def conn
|
108
208
|
@conn ||= begin
|
109
|
-
uri = URI.parse(
|
209
|
+
uri = URI.parse(@database_url)
|
110
210
|
config = {
|
111
211
|
host: uri.host,
|
112
212
|
port: uri.port,
|
@@ -122,14 +222,24 @@ module Dexter
|
|
122
222
|
end
|
123
223
|
|
124
224
|
def select_all(query)
|
125
|
-
|
225
|
+
# use exec_params instead of exec for securiy
|
226
|
+
#
|
227
|
+
# Unlike PQexec, PQexecParams allows at most one SQL command in the given string.
|
228
|
+
# (There can be semicolons in it, but not more than one nonempty command.)
|
229
|
+
# This is a limitation of the underlying protocol, but has some usefulness
|
230
|
+
# as an extra defense against SQL-injection attacks.
|
231
|
+
# https://www.postgresql.org/docs/current/static/libpq-exec.html
|
232
|
+
query = squish(query)
|
233
|
+
log "SQL: #{query}" if @log_sql
|
234
|
+
conn.exec_params(query, []).to_a
|
126
235
|
end
|
127
236
|
|
128
237
|
def plan(query)
|
129
|
-
|
238
|
+
# strip semi-colons as another measure of defense
|
239
|
+
JSON.parse(select_all("EXPLAIN (FORMAT JSON) #{query.gsub(";", "")}").first["QUERY PLAN"]).first["Plan"]
|
130
240
|
end
|
131
241
|
|
132
|
-
def
|
242
|
+
def database_tables
|
133
243
|
result = select_all <<-SQL
|
134
244
|
SELECT
|
135
245
|
table_name
|
@@ -139,11 +249,11 @@ module Dexter
|
|
139
249
|
table_catalog = current_database() AND
|
140
250
|
table_schema NOT IN ('pg_catalog', 'information_schema')
|
141
251
|
SQL
|
142
|
-
|
143
|
-
|
144
|
-
tables = queries.flat_map { |q| PgQuery.parse(q).tables }.uniq.select { |t| possible_tables.include?(t) }
|
252
|
+
result.map { |r| r["table_name"] }
|
253
|
+
end
|
145
254
|
|
146
|
-
|
255
|
+
def possible_tables(queries)
|
256
|
+
Set.new(queries.flat_map(&:tables).uniq & database_tables)
|
147
257
|
end
|
148
258
|
|
149
259
|
def columns(tables)
|
@@ -168,13 +278,7 @@ module Dexter
|
|
168
278
|
t.relname AS table,
|
169
279
|
ix.relname AS name,
|
170
280
|
regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\\(]*\\((.*)\\)$', '\\1') AS columns,
|
171
|
-
regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using
|
172
|
-
indisunique AS unique,
|
173
|
-
indisprimary AS primary,
|
174
|
-
indisvalid AS valid,
|
175
|
-
indexprs::text,
|
176
|
-
indpred::text,
|
177
|
-
pg_get_indexdef(i.indexrelid) AS definition
|
281
|
+
regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using
|
178
282
|
FROM
|
179
283
|
pg_index i
|
180
284
|
INNER JOIN
|
@@ -203,30 +307,8 @@ module Dexter
|
|
203
307
|
end
|
204
308
|
end
|
205
309
|
|
206
|
-
def
|
207
|
-
|
208
|
-
SELECT
|
209
|
-
schemaname AS schema,
|
210
|
-
relname AS table,
|
211
|
-
last_analyze,
|
212
|
-
last_autoanalyze
|
213
|
-
FROM
|
214
|
-
pg_stat_user_tables
|
215
|
-
WHERE
|
216
|
-
relname IN (#{tables.map { |t| quote(t) }.join(", ")})
|
217
|
-
SQL
|
218
|
-
|
219
|
-
last_analyzed = {}
|
220
|
-
analyze_stats.each do |stats|
|
221
|
-
last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
|
222
|
-
end
|
223
|
-
|
224
|
-
tables.each do |table|
|
225
|
-
if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
|
226
|
-
log "Analyzing #{table}"
|
227
|
-
select_all("ANALYZE #{table}")
|
228
|
-
end
|
229
|
-
end
|
310
|
+
def quote_ident(value)
|
311
|
+
conn.quote_ident(value)
|
230
312
|
end
|
231
313
|
|
232
314
|
def quote(value)
|
@@ -237,13 +319,14 @@ module Dexter
|
|
237
319
|
end
|
238
320
|
end
|
239
321
|
|
240
|
-
# activerecord
|
322
|
+
# from activerecord
|
241
323
|
def quote_string(s)
|
242
324
|
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
243
325
|
end
|
244
326
|
|
245
|
-
|
246
|
-
|
327
|
+
# from activesupport
|
328
|
+
def squish(str)
|
329
|
+
str.to_s.gsub(/\A[[:space:]]+/, "").gsub(/[[:space:]]+\z/, "").gsub(/[[:space:]]+/, " ")
|
247
330
|
end
|
248
331
|
end
|
249
332
|
end
|
data/lib/dexter/log_parser.rb
CHANGED
@@ -1,32 +1,11 @@
|
|
1
1
|
module Dexter
|
2
2
|
class LogParser
|
3
3
|
REGEX = /duration: (\d+\.\d+) ms (statement|execute <unnamed>): (.+)/
|
4
|
+
LINE_SEPERATOR = ": ".freeze
|
4
5
|
|
5
|
-
def initialize(logfile,
|
6
|
+
def initialize(logfile, collector)
|
6
7
|
@logfile = logfile
|
7
|
-
@
|
8
|
-
@top_queries = {}
|
9
|
-
@indexer = Indexer.new(client)
|
10
|
-
@new_queries = Set.new
|
11
|
-
@new_queries_mutex = Mutex.new
|
12
|
-
@process_queries_mutex = Mutex.new
|
13
|
-
@last_checked_at = {}
|
14
|
-
|
15
|
-
log "Started"
|
16
|
-
|
17
|
-
if @logfile == STDIN
|
18
|
-
Thread.abort_on_exception = true
|
19
|
-
|
20
|
-
@timer_thread = Thread.new do
|
21
|
-
sleep(3) # starting sleep
|
22
|
-
loop do
|
23
|
-
@process_queries_mutex.synchronize do
|
24
|
-
process_queries
|
25
|
-
end
|
26
|
-
sleep(client.options[:interval])
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
8
|
+
@collector = collector
|
30
9
|
end
|
31
10
|
|
32
11
|
def perform
|
@@ -35,10 +14,9 @@ module Dexter
|
|
35
14
|
|
36
15
|
each_line do |line|
|
37
16
|
if active_line
|
38
|
-
if line.include?(
|
17
|
+
if line.include?(LINE_SEPERATOR)
|
39
18
|
process_entry(active_line, duration)
|
40
19
|
active_line = nil
|
41
|
-
duration = nil
|
42
20
|
else
|
43
21
|
active_line << line
|
44
22
|
end
|
@@ -47,15 +25,9 @@ module Dexter
|
|
47
25
|
if !active_line && m = REGEX.match(line.chomp)
|
48
26
|
duration = m[1].to_f
|
49
27
|
active_line = m[3]
|
50
|
-
else
|
51
|
-
# skip
|
52
28
|
end
|
53
29
|
end
|
54
30
|
process_entry(active_line, duration) if active_line
|
55
|
-
|
56
|
-
@process_queries_mutex.synchronize do
|
57
|
-
process_queries
|
58
|
-
end
|
59
31
|
end
|
60
32
|
|
61
33
|
private
|
@@ -66,59 +38,18 @@ module Dexter
|
|
66
38
|
yield line
|
67
39
|
end
|
68
40
|
else
|
69
|
-
File.foreach(@logfile) do |line|
|
70
|
-
yield line
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
def process_entry(query, duration)
|
76
|
-
return unless query =~ /SELECT/i
|
77
|
-
fingerprint =
|
78
41
|
begin
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
@top_queries[fingerprint] ||= {calls: 0, total_time: 0}
|
86
|
-
@top_queries[fingerprint][:calls] += 1
|
87
|
-
@top_queries[fingerprint][:total_time] += duration
|
88
|
-
@top_queries[fingerprint][:query] = query
|
89
|
-
@new_queries_mutex.synchronize do
|
90
|
-
@new_queries << fingerprint
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
def process_queries
|
95
|
-
new_queries = nil
|
96
|
-
|
97
|
-
@new_queries_mutex.synchronize do
|
98
|
-
new_queries = @new_queries.dup
|
99
|
-
@new_queries.clear
|
100
|
-
end
|
101
|
-
|
102
|
-
now = Time.now
|
103
|
-
min_checked_at = now - 3600 # don't recheck for an hour
|
104
|
-
queries = []
|
105
|
-
fingerprints = {}
|
106
|
-
@top_queries.each do |k, v|
|
107
|
-
if new_queries.include?(k) && v[:total_time] > @min_time && (!@last_checked_at[k] || @last_checked_at[k] < min_checked_at)
|
108
|
-
fingerprints[v[:query]] = k
|
109
|
-
queries << v[:query]
|
110
|
-
@last_checked_at[k] = now
|
42
|
+
File.foreach(@logfile) do |line|
|
43
|
+
yield line
|
44
|
+
end
|
45
|
+
rescue Errno::ENOENT
|
46
|
+
abort "Log file not found"
|
111
47
|
end
|
112
48
|
end
|
113
|
-
|
114
|
-
log "Processing #{queries.size} new query fingerprints"
|
115
|
-
if queries.any?
|
116
|
-
@indexer.process_queries(queries)
|
117
|
-
end
|
118
49
|
end
|
119
50
|
|
120
|
-
def
|
121
|
-
|
51
|
+
def process_entry(query, duration)
|
52
|
+
@collector.add(query, duration) if query =~ /SELECT/i
|
122
53
|
end
|
123
54
|
end
|
124
55
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Processor
|
3
|
+
include Logging
|
4
|
+
|
5
|
+
def initialize(database_url, logfile, options)
|
6
|
+
log "Started"
|
7
|
+
|
8
|
+
@logfile = logfile
|
9
|
+
|
10
|
+
@collector = Collector.new(min_time: options[:min_time])
|
11
|
+
@log_parser = LogParser.new(logfile, @collector)
|
12
|
+
@indexer = Indexer.new(database_url, options)
|
13
|
+
|
14
|
+
@starting_interval = 3
|
15
|
+
@interval = options[:interval]
|
16
|
+
|
17
|
+
@mutex = Mutex.new
|
18
|
+
@last_checked_at = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def perform
|
22
|
+
if @logfile == STDIN
|
23
|
+
Thread.abort_on_exception = true
|
24
|
+
Thread.new do
|
25
|
+
sleep(@starting_interval)
|
26
|
+
loop do
|
27
|
+
process_queries
|
28
|
+
sleep(@interval)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@log_parser.perform
|
34
|
+
|
35
|
+
process_queries
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def process_queries
|
41
|
+
@mutex.synchronize do
|
42
|
+
process_queries_without_lock
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def process_queries_without_lock
|
47
|
+
now = Time.now
|
48
|
+
min_checked_at = now - 3600 # don't recheck for an hour
|
49
|
+
queries = []
|
50
|
+
@collector.fetch_queries.each do |query|
|
51
|
+
if !@last_checked_at[query.fingerprint] || @last_checked_at[query.fingerprint] < min_checked_at
|
52
|
+
queries << query
|
53
|
+
@last_checked_at[query.fingerprint] = now
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
log "Processing #{queries.size} new query fingerprints"
|
58
|
+
@indexer.process_queries(queries) if queries.any?
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/dexter/query.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Query
|
3
|
+
attr_reader :statement, :fingerprint
|
4
|
+
attr_accessor :initial_cost, :new_cost, :missing_tables
|
5
|
+
|
6
|
+
def initialize(statement, fingerprint)
|
7
|
+
@statement = statement
|
8
|
+
@fingerprint = fingerprint
|
9
|
+
end
|
10
|
+
|
11
|
+
def tables
|
12
|
+
@tables ||= PgQuery.parse(statement).tables rescue []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/dexter/version.rb
CHANGED
data/pgdexter.gemspec
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
|
2
3
|
lib = File.expand_path("../lib", __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require "dexter/version"
|
@@ -9,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
9
10
|
spec.authors = ["Andrew Kane"]
|
10
11
|
spec.email = ["andrew@chartkick.com"]
|
11
12
|
|
12
|
-
spec.summary = "
|
13
|
+
spec.summary = "The automatic indexer for Postgres"
|
13
14
|
spec.homepage = "https://github.com/ankane/dexter"
|
14
15
|
|
15
16
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgdexter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|
@@ -89,14 +89,20 @@ extensions: []
|
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
91
|
- ".gitignore"
|
92
|
+
- CHANGELOG.md
|
92
93
|
- Gemfile
|
93
94
|
- LICENSE.txt
|
94
95
|
- README.md
|
95
96
|
- Rakefile
|
96
97
|
- exe/dexter
|
97
98
|
- lib/dexter.rb
|
99
|
+
- lib/dexter/client.rb
|
100
|
+
- lib/dexter/collector.rb
|
98
101
|
- lib/dexter/indexer.rb
|
99
102
|
- lib/dexter/log_parser.rb
|
103
|
+
- lib/dexter/logging.rb
|
104
|
+
- lib/dexter/processor.rb
|
105
|
+
- lib/dexter/query.rb
|
100
106
|
- lib/dexter/version.rb
|
101
107
|
- pgdexter.gemspec
|
102
108
|
homepage: https://github.com/ankane/dexter
|
@@ -121,5 +127,5 @@ rubyforge_project:
|
|
121
127
|
rubygems_version: 2.6.11
|
122
128
|
signing_key:
|
123
129
|
specification_version: 4
|
124
|
-
summary:
|
130
|
+
summary: The automatic indexer for Postgres
|
125
131
|
test_files: []
|