pgdexter 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +26 -5
- data/lib/dexter.rb +5 -44
- data/lib/dexter/client.rb +67 -0
- data/lib/dexter/collector.rb +47 -0
- data/lib/dexter/indexer.rb +174 -91
- data/lib/dexter/log_parser.rb +11 -80
- data/lib/dexter/logging.rb +7 -0
- data/lib/dexter/processor.rb +61 -0
- data/lib/dexter/query.rb +15 -0
- data/lib/dexter/version.rb +1 -1
- data/pgdexter.gemspec +2 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b472c8289c8c878c0ea790a3ec5fdf43e22bfaa1
|
4
|
+
data.tar.gz: 55f96eb8bd0a8739c79fb7c749a8aee68b2a119f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 873d3d0f9471f90c9b90721274623c6c3dcfbcfe88989143a6b66a5ce1345a2187bab75ef4f32f52efd9f34a1e1806524696854348e592d6eb31bec25ca193c8
|
7
|
+
data.tar.gz: a545b2b1a9d2f6650ec04c1769b243863c412fbff3b1e0575ab4cc6c98648119e487a2bc75f0afeeb704895184580ada13169a05edb3e00bc0caca37759183bc
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Dexter
|
2
2
|
|
3
|
-
|
3
|
+
The automatic indexer for Postgres
|
4
|
+
|
5
|
+
[Read about how it works](https://medium.com/@ankane/introducing-dexter-the-automatic-indexer-for-postgres-5f8fa8b28f27)
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -22,7 +24,7 @@ Enable logging for slow queries.
|
|
22
24
|
log_min_duration_statement = 10 # ms
|
23
25
|
```
|
24
26
|
|
25
|
-
And install with:
|
27
|
+
And install the command line tool with:
|
26
28
|
|
27
29
|
```sh
|
28
30
|
gem install pgdexter
|
@@ -50,12 +52,31 @@ This finds slow queries and generates output like:
|
|
50
52
|
2017-06-25T17:53:22+00:00 Processing 12 new query fingerprints
|
51
53
|
```
|
52
54
|
|
53
|
-
To be safe, Dexter will not create indexes unless you pass the `--create` flag.
|
55
|
+
To be safe, Dexter will not create indexes unless you pass the `--create` flag. In this case, you’ll see:
|
56
|
+
|
57
|
+
```log
|
58
|
+
2017-06-25T17:52:22+00:00 Index found: ratings (user_id)
|
59
|
+
2017-06-25T17:52:22+00:00 Creating index: CREATE INDEX CONCURRENTLY ON ratings (user_id)
|
60
|
+
2017-06-25T17:52:37+00:00 Index created: 15243 ms
|
61
|
+
```
|
54
62
|
|
55
63
|
## Options
|
56
64
|
|
57
|
-
|
58
|
-
|
65
|
+
Name | Description | Default
|
66
|
+
--- | --- | ---
|
67
|
+
exclude | prevent specific tables from being indexed | None
|
68
|
+
interval | time to wait between processing queries, in seconds | 60
|
69
|
+
log-level | `debug` gives additional info for suggested indexes<br />`debug2` gives additional info for all processed queries | info
|
70
|
+
log-sql | log SQL statements executed | false
|
71
|
+
min-time | only process queries consuming a min amount of DB time, in minutes | 0
|
72
|
+
|
73
|
+
## Future Work
|
74
|
+
|
75
|
+
[Here are some ideas](https://github.com/ankane/dexter/issues/1)
|
76
|
+
|
77
|
+
## Thanks
|
78
|
+
|
79
|
+
This software wouldn’t be possible without [HypoPG](https://github.com/dalibo/hypopg), which allows you to create hypothetical indexes, and [pg_query](https://github.com/lfittl/pg_query), which allows you to parse and fingerprint queries. A big thanks to Dalibo and Lukas Fittl respectively.
|
59
80
|
|
60
81
|
## Contributing
|
61
82
|
|
data/lib/dexter.rb
CHANGED
@@ -5,49 +5,10 @@ require "pg_query"
|
|
5
5
|
require "time"
|
6
6
|
require "set"
|
7
7
|
require "thread"
|
8
|
+
require "dexter/logging"
|
9
|
+
require "dexter/client"
|
10
|
+
require "dexter/collector"
|
8
11
|
require "dexter/indexer"
|
9
12
|
require "dexter/log_parser"
|
10
|
-
|
11
|
-
|
12
|
-
class Client
|
13
|
-
attr_reader :arguments, :options
|
14
|
-
|
15
|
-
def initialize(args)
|
16
|
-
@arguments, @options = parse_args(args)
|
17
|
-
end
|
18
|
-
|
19
|
-
def perform
|
20
|
-
abort "Missing database url" if arguments.empty?
|
21
|
-
abort "Too many arguments" if arguments.size > 2
|
22
|
-
|
23
|
-
# get queries
|
24
|
-
queries = []
|
25
|
-
if options[:s]
|
26
|
-
queries << options[:s]
|
27
|
-
Indexer.new(self).process_queries(queries)
|
28
|
-
end
|
29
|
-
if arguments[1]
|
30
|
-
begin
|
31
|
-
LogParser.new(arguments[1], self).perform
|
32
|
-
rescue Errno::ENOENT
|
33
|
-
abort "Log file not found"
|
34
|
-
end
|
35
|
-
end
|
36
|
-
if !options[:s] && !arguments[1]
|
37
|
-
LogParser.new(STDIN, self).perform
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def parse_args(args)
|
42
|
-
opts = Slop.parse(args) do |o|
|
43
|
-
o.boolean "--create", default: false
|
44
|
-
o.string "-s"
|
45
|
-
o.float "--min-time", default: 0
|
46
|
-
o.integer "--interval", default: 60
|
47
|
-
end
|
48
|
-
[opts.arguments, opts.to_hash]
|
49
|
-
rescue Slop::Error => e
|
50
|
-
abort e.message
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
13
|
+
require "dexter/processor"
|
14
|
+
require "dexter/query"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Client
|
3
|
+
attr_reader :arguments, :options
|
4
|
+
|
5
|
+
def initialize(args)
|
6
|
+
@arguments, @options = parse_args(args)
|
7
|
+
end
|
8
|
+
|
9
|
+
def perform
|
10
|
+
STDOUT.sync = true
|
11
|
+
STDERR.sync = true
|
12
|
+
|
13
|
+
if options[:statement]
|
14
|
+
fingerprint = PgQuery.fingerprint(options[:statement]) rescue "unknown"
|
15
|
+
query = Query.new(options[:statement], fingerprint)
|
16
|
+
Indexer.new(arguments[0], options).process_queries([query])
|
17
|
+
elsif arguments[1]
|
18
|
+
Processor.new(arguments[0], arguments[1], options).perform
|
19
|
+
else
|
20
|
+
Processor.new(arguments[0], STDIN, options).perform
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_args(args)
|
25
|
+
opts = Slop.parse(args) do |o|
|
26
|
+
o.banner = %(Usage:
|
27
|
+
dexter <database-url> [options]
|
28
|
+
|
29
|
+
Options:)
|
30
|
+
o.boolean "--create", "create indexes", default: false
|
31
|
+
o.array "--exclude", "prevent specific tables from being indexed"
|
32
|
+
o.integer "--interval", "time to wait between processing queries, in seconds", default: 60
|
33
|
+
o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0
|
34
|
+
o.string "--log-level", "log level", default: "info"
|
35
|
+
o.boolean "--log-sql", "log sql", default: false
|
36
|
+
o.string "-s", "--statement", "process a single statement"
|
37
|
+
o.on "-v", "--version", "print the version" do
|
38
|
+
log Dexter::VERSION
|
39
|
+
exit
|
40
|
+
end
|
41
|
+
o.on "-h", "--help", "prints help" do
|
42
|
+
log o
|
43
|
+
exit
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
arguments = opts.arguments
|
48
|
+
|
49
|
+
if arguments.empty?
|
50
|
+
log opts
|
51
|
+
exit
|
52
|
+
end
|
53
|
+
|
54
|
+
abort "Too many arguments" if arguments.size > 2
|
55
|
+
|
56
|
+
abort "Unknown log level" unless ["info", "debug", "debug2"].include?(opts.to_hash[:log_level].to_s.downcase)
|
57
|
+
|
58
|
+
[arguments, opts.to_hash]
|
59
|
+
rescue Slop::Error => e
|
60
|
+
abort e.message
|
61
|
+
end
|
62
|
+
|
63
|
+
def log(message)
|
64
|
+
$stderr.puts message
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Collector
|
3
|
+
def initialize(options = {})
|
4
|
+
@top_queries = {}
|
5
|
+
@new_queries = Set.new
|
6
|
+
@mutex = Mutex.new
|
7
|
+
@min_time = options[:min_time] * 60000 # convert minutes to ms
|
8
|
+
end
|
9
|
+
|
10
|
+
def add(query, duration)
|
11
|
+
fingerprint =
|
12
|
+
begin
|
13
|
+
PgQuery.fingerprint(query)
|
14
|
+
rescue PgQuery::ParseError
|
15
|
+
# do nothing
|
16
|
+
end
|
17
|
+
|
18
|
+
return unless fingerprint
|
19
|
+
|
20
|
+
@top_queries[fingerprint] ||= {calls: 0, total_time: 0}
|
21
|
+
@top_queries[fingerprint][:calls] += 1
|
22
|
+
@top_queries[fingerprint][:total_time] += duration
|
23
|
+
@top_queries[fingerprint][:query] = query
|
24
|
+
@mutex.synchronize do
|
25
|
+
@new_queries << fingerprint
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch_queries
|
30
|
+
new_queries = nil
|
31
|
+
|
32
|
+
@mutex.synchronize do
|
33
|
+
new_queries = @new_queries.dup
|
34
|
+
@new_queries.clear
|
35
|
+
end
|
36
|
+
|
37
|
+
queries = []
|
38
|
+
@top_queries.each do |k, v|
|
39
|
+
if new_queries.include?(k) && v[:total_time] > @min_time
|
40
|
+
queries << Query.new(v[:query], k)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
queries.sort_by(&:fingerprint)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/dexter/indexer.rb
CHANGED
@@ -1,36 +1,100 @@
|
|
1
1
|
module Dexter
|
2
2
|
class Indexer
|
3
|
-
|
3
|
+
include Logging
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
5
|
+
def initialize(database_url, options)
|
6
|
+
@database_url = database_url
|
7
|
+
@create = options[:create]
|
8
|
+
@log_level = options[:log_level]
|
9
|
+
@exclude_tables = options[:exclude]
|
10
|
+
@log_sql = options[:log_sql]
|
7
11
|
|
8
|
-
|
9
|
-
select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
|
12
|
+
create_extension
|
10
13
|
end
|
11
14
|
|
12
15
|
def process_queries(queries)
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
+
# reset hypothetical indexes
|
17
|
+
reset_hypothetical_indexes
|
18
|
+
|
19
|
+
# filter queries from other databases and system tables
|
20
|
+
tables = possible_tables(queries)
|
21
|
+
queries.each do |query|
|
22
|
+
query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# exclude user specified tables
|
26
|
+
# TODO exclude write-heavy tables
|
27
|
+
@exclude_tables.each do |table|
|
28
|
+
tables.delete(table)
|
29
|
+
end
|
30
|
+
|
31
|
+
# analyze tables if needed
|
32
|
+
analyze_tables(tables) if tables.any?
|
33
|
+
|
34
|
+
# get initial costs for queries
|
35
|
+
calculate_initial_cost(queries.reject(&:missing_tables))
|
36
|
+
|
37
|
+
# create hypothetical indexes
|
38
|
+
candidates = tables.any? ? create_hypothetical_indexes(tables) : {}
|
39
|
+
|
40
|
+
# get new costs and see if new indexes were used
|
41
|
+
new_indexes = determine_indexes(queries, candidates)
|
42
|
+
|
43
|
+
# display and create new indexes
|
44
|
+
show_and_create_indexes(new_indexes)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
16
48
|
|
17
|
-
|
49
|
+
def create_extension
|
50
|
+
select_all("SET client_min_messages = warning")
|
51
|
+
select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
|
52
|
+
end
|
53
|
+
|
54
|
+
def reset_hypothetical_indexes
|
18
55
|
select_all("SELECT hypopg_reset()")
|
56
|
+
end
|
19
57
|
|
20
|
-
|
21
|
-
|
58
|
+
def analyze_tables(tables)
|
59
|
+
tables = tables.to_a.sort
|
60
|
+
|
61
|
+
analyze_stats = select_all <<-SQL
|
62
|
+
SELECT
|
63
|
+
schemaname AS schema,
|
64
|
+
relname AS table,
|
65
|
+
last_analyze,
|
66
|
+
last_autoanalyze
|
67
|
+
FROM
|
68
|
+
pg_stat_user_tables
|
69
|
+
WHERE
|
70
|
+
relname IN (#{tables.map { |t| quote(t) }.join(", ")})
|
71
|
+
SQL
|
72
|
+
|
73
|
+
last_analyzed = {}
|
74
|
+
analyze_stats.each do |stats|
|
75
|
+
last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
|
76
|
+
end
|
22
77
|
|
23
|
-
|
24
|
-
|
78
|
+
tables.each do |table|
|
79
|
+
if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
|
80
|
+
statement = "ANALYZE #{quote_ident(table)}"
|
81
|
+
log "Running analyze: #{statement}"
|
82
|
+
select_all(statement)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def calculate_initial_cost(queries)
|
25
88
|
queries.each do |query|
|
26
89
|
begin
|
27
|
-
|
90
|
+
query.initial_cost = plan(query.statement)["Total Cost"]
|
28
91
|
rescue PG::Error
|
29
92
|
# do nothing
|
30
93
|
end
|
31
94
|
end
|
32
|
-
|
95
|
+
end
|
33
96
|
|
97
|
+
def create_hypothetical_indexes(tables)
|
34
98
|
# get existing indexes
|
35
99
|
index_set = Set.new
|
36
100
|
indexes(tables).each do |index|
|
@@ -45,60 +109,96 @@ module Dexter
|
|
45
109
|
candidates[col] = select_all("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{col[:table]} (#{[col[:column]].join(", ")})');").first["indexname"]
|
46
110
|
end
|
47
111
|
end
|
112
|
+
candidates
|
113
|
+
end
|
48
114
|
|
49
|
-
|
115
|
+
def determine_indexes(queries, candidates)
|
116
|
+
new_indexes = {}
|
50
117
|
|
51
|
-
new_indexes = []
|
52
118
|
queries.each do |query|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
119
|
+
if query.initial_cost
|
120
|
+
new_plan = plan(query.statement)
|
121
|
+
query.new_cost = new_plan["Total Cost"]
|
122
|
+
cost_savings = query.new_cost < query.initial_cost * 0.5
|
123
|
+
|
124
|
+
query_indexes = []
|
125
|
+
candidates.each do |col, index_name|
|
126
|
+
if new_plan.inspect.include?(index_name)
|
127
|
+
index = {
|
128
|
+
table: col[:table],
|
129
|
+
columns: [col[:column]]
|
130
|
+
}
|
131
|
+
query_indexes << index
|
132
|
+
|
133
|
+
if cost_savings
|
134
|
+
new_indexes[index] ||= index.dup
|
135
|
+
(new_indexes[index][:queries] ||= []) << query
|
136
|
+
end
|
137
|
+
end
|
69
138
|
end
|
70
139
|
end
|
71
140
|
|
72
|
-
|
141
|
+
if @log_level == "debug2"
|
142
|
+
log "Processed #{query.fingerprint}"
|
143
|
+
if query.initial_cost
|
144
|
+
log "Cost: #{query.initial_cost} -> #{query.new_cost}"
|
145
|
+
|
146
|
+
if query_indexes.any?
|
147
|
+
log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
|
148
|
+
log "Need 50% cost savings to suggest index" unless cost_savings
|
149
|
+
else
|
150
|
+
log "Indexes: None"
|
151
|
+
end
|
152
|
+
elsif query.fingerprint == "unknown"
|
153
|
+
log "Could not parse query"
|
154
|
+
elsif query.tables.empty?
|
155
|
+
log "No tables"
|
156
|
+
elsif query.missing_tables
|
157
|
+
log "Tables not present in current database"
|
158
|
+
else
|
159
|
+
log "Could not run explain"
|
160
|
+
end
|
161
|
+
|
162
|
+
puts
|
163
|
+
puts query.statement
|
164
|
+
puts
|
165
|
+
end
|
73
166
|
end
|
74
167
|
|
75
|
-
new_indexes
|
168
|
+
new_indexes.values.sort_by(&:to_a)
|
169
|
+
end
|
76
170
|
|
77
|
-
|
171
|
+
def show_and_create_indexes(new_indexes)
|
78
172
|
if new_indexes.any?
|
79
173
|
new_indexes.each do |index|
|
80
|
-
index[:queries] = queries_by_index[index]
|
81
|
-
|
82
174
|
log "Index found: #{index[:table]} (#{index[:columns].join(", ")})"
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
175
|
+
|
176
|
+
if @log_level.start_with?("debug")
|
177
|
+
index[:queries].sort_by(&:fingerprint).each do |query|
|
178
|
+
log "Query #{query.fingerprint} (Cost: #{query.initial_cost} -> #{query.new_cost})"
|
179
|
+
puts
|
180
|
+
puts query.statement
|
181
|
+
puts
|
182
|
+
end
|
183
|
+
end
|
90
184
|
end
|
91
185
|
|
92
|
-
|
93
|
-
|
94
|
-
#
|
95
|
-
|
186
|
+
if @create
|
187
|
+
# TODO use advisory locks
|
188
|
+
# 1. create lock
|
189
|
+
# 2. refresh existing index list
|
190
|
+
# 3. create indexes that still don't exist
|
191
|
+
# 4. release lock
|
192
|
+
new_indexes.each do |index|
|
193
|
+
statement = "CREATE INDEX CONCURRENTLY ON #{index[:table]} (#{index[:columns].join(", ")})"
|
96
194
|
log "Creating index: #{statement}"
|
97
195
|
started_at = Time.now
|
98
196
|
select_all(statement)
|
99
197
|
log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
|
100
198
|
end
|
101
199
|
end
|
200
|
+
else
|
201
|
+
log "No indexes found"
|
102
202
|
end
|
103
203
|
|
104
204
|
new_indexes
|
@@ -106,7 +206,7 @@ module Dexter
|
|
106
206
|
|
107
207
|
def conn
|
108
208
|
@conn ||= begin
|
109
|
-
uri = URI.parse(
|
209
|
+
uri = URI.parse(@database_url)
|
110
210
|
config = {
|
111
211
|
host: uri.host,
|
112
212
|
port: uri.port,
|
@@ -122,14 +222,24 @@ module Dexter
|
|
122
222
|
end
|
123
223
|
|
124
224
|
def select_all(query)
|
125
|
-
|
225
|
+
# use exec_params instead of exec for securiy
|
226
|
+
#
|
227
|
+
# Unlike PQexec, PQexecParams allows at most one SQL command in the given string.
|
228
|
+
# (There can be semicolons in it, but not more than one nonempty command.)
|
229
|
+
# This is a limitation of the underlying protocol, but has some usefulness
|
230
|
+
# as an extra defense against SQL-injection attacks.
|
231
|
+
# https://www.postgresql.org/docs/current/static/libpq-exec.html
|
232
|
+
query = squish(query)
|
233
|
+
log "SQL: #{query}" if @log_sql
|
234
|
+
conn.exec_params(query, []).to_a
|
126
235
|
end
|
127
236
|
|
128
237
|
def plan(query)
|
129
|
-
|
238
|
+
# strip semi-colons as another measure of defense
|
239
|
+
JSON.parse(select_all("EXPLAIN (FORMAT JSON) #{query.gsub(";", "")}").first["QUERY PLAN"]).first["Plan"]
|
130
240
|
end
|
131
241
|
|
132
|
-
def
|
242
|
+
def database_tables
|
133
243
|
result = select_all <<-SQL
|
134
244
|
SELECT
|
135
245
|
table_name
|
@@ -139,11 +249,11 @@ module Dexter
|
|
139
249
|
table_catalog = current_database() AND
|
140
250
|
table_schema NOT IN ('pg_catalog', 'information_schema')
|
141
251
|
SQL
|
142
|
-
|
143
|
-
|
144
|
-
tables = queries.flat_map { |q| PgQuery.parse(q).tables }.uniq.select { |t| possible_tables.include?(t) }
|
252
|
+
result.map { |r| r["table_name"] }
|
253
|
+
end
|
145
254
|
|
146
|
-
|
255
|
+
def possible_tables(queries)
|
256
|
+
Set.new(queries.flat_map(&:tables).uniq & database_tables)
|
147
257
|
end
|
148
258
|
|
149
259
|
def columns(tables)
|
@@ -168,13 +278,7 @@ module Dexter
|
|
168
278
|
t.relname AS table,
|
169
279
|
ix.relname AS name,
|
170
280
|
regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\\(]*\\((.*)\\)$', '\\1') AS columns,
|
171
|
-
regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using
|
172
|
-
indisunique AS unique,
|
173
|
-
indisprimary AS primary,
|
174
|
-
indisvalid AS valid,
|
175
|
-
indexprs::text,
|
176
|
-
indpred::text,
|
177
|
-
pg_get_indexdef(i.indexrelid) AS definition
|
281
|
+
regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using
|
178
282
|
FROM
|
179
283
|
pg_index i
|
180
284
|
INNER JOIN
|
@@ -203,30 +307,8 @@ module Dexter
|
|
203
307
|
end
|
204
308
|
end
|
205
309
|
|
206
|
-
def
|
207
|
-
|
208
|
-
SELECT
|
209
|
-
schemaname AS schema,
|
210
|
-
relname AS table,
|
211
|
-
last_analyze,
|
212
|
-
last_autoanalyze
|
213
|
-
FROM
|
214
|
-
pg_stat_user_tables
|
215
|
-
WHERE
|
216
|
-
relname IN (#{tables.map { |t| quote(t) }.join(", ")})
|
217
|
-
SQL
|
218
|
-
|
219
|
-
last_analyzed = {}
|
220
|
-
analyze_stats.each do |stats|
|
221
|
-
last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
|
222
|
-
end
|
223
|
-
|
224
|
-
tables.each do |table|
|
225
|
-
if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
|
226
|
-
log "Analyzing #{table}"
|
227
|
-
select_all("ANALYZE #{table}")
|
228
|
-
end
|
229
|
-
end
|
310
|
+
def quote_ident(value)
|
311
|
+
conn.quote_ident(value)
|
230
312
|
end
|
231
313
|
|
232
314
|
def quote(value)
|
@@ -237,13 +319,14 @@ module Dexter
|
|
237
319
|
end
|
238
320
|
end
|
239
321
|
|
240
|
-
# activerecord
|
322
|
+
# from activerecord
|
241
323
|
def quote_string(s)
|
242
324
|
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
243
325
|
end
|
244
326
|
|
245
|
-
|
246
|
-
|
327
|
+
# from activesupport
|
328
|
+
def squish(str)
|
329
|
+
str.to_s.gsub(/\A[[:space:]]+/, "").gsub(/[[:space:]]+\z/, "").gsub(/[[:space:]]+/, " ")
|
247
330
|
end
|
248
331
|
end
|
249
332
|
end
|
data/lib/dexter/log_parser.rb
CHANGED
@@ -1,32 +1,11 @@
|
|
1
1
|
module Dexter
|
2
2
|
class LogParser
|
3
3
|
REGEX = /duration: (\d+\.\d+) ms (statement|execute <unnamed>): (.+)/
|
4
|
+
LINE_SEPERATOR = ": ".freeze
|
4
5
|
|
5
|
-
def initialize(logfile,
|
6
|
+
def initialize(logfile, collector)
|
6
7
|
@logfile = logfile
|
7
|
-
@
|
8
|
-
@top_queries = {}
|
9
|
-
@indexer = Indexer.new(client)
|
10
|
-
@new_queries = Set.new
|
11
|
-
@new_queries_mutex = Mutex.new
|
12
|
-
@process_queries_mutex = Mutex.new
|
13
|
-
@last_checked_at = {}
|
14
|
-
|
15
|
-
log "Started"
|
16
|
-
|
17
|
-
if @logfile == STDIN
|
18
|
-
Thread.abort_on_exception = true
|
19
|
-
|
20
|
-
@timer_thread = Thread.new do
|
21
|
-
sleep(3) # starting sleep
|
22
|
-
loop do
|
23
|
-
@process_queries_mutex.synchronize do
|
24
|
-
process_queries
|
25
|
-
end
|
26
|
-
sleep(client.options[:interval])
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
8
|
+
@collector = collector
|
30
9
|
end
|
31
10
|
|
32
11
|
def perform
|
@@ -35,10 +14,9 @@ module Dexter
|
|
35
14
|
|
36
15
|
each_line do |line|
|
37
16
|
if active_line
|
38
|
-
if line.include?(
|
17
|
+
if line.include?(LINE_SEPERATOR)
|
39
18
|
process_entry(active_line, duration)
|
40
19
|
active_line = nil
|
41
|
-
duration = nil
|
42
20
|
else
|
43
21
|
active_line << line
|
44
22
|
end
|
@@ -47,15 +25,9 @@ module Dexter
|
|
47
25
|
if !active_line && m = REGEX.match(line.chomp)
|
48
26
|
duration = m[1].to_f
|
49
27
|
active_line = m[3]
|
50
|
-
else
|
51
|
-
# skip
|
52
28
|
end
|
53
29
|
end
|
54
30
|
process_entry(active_line, duration) if active_line
|
55
|
-
|
56
|
-
@process_queries_mutex.synchronize do
|
57
|
-
process_queries
|
58
|
-
end
|
59
31
|
end
|
60
32
|
|
61
33
|
private
|
@@ -66,59 +38,18 @@ module Dexter
|
|
66
38
|
yield line
|
67
39
|
end
|
68
40
|
else
|
69
|
-
File.foreach(@logfile) do |line|
|
70
|
-
yield line
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
def process_entry(query, duration)
|
76
|
-
return unless query =~ /SELECT/i
|
77
|
-
fingerprint =
|
78
41
|
begin
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
@top_queries[fingerprint] ||= {calls: 0, total_time: 0}
|
86
|
-
@top_queries[fingerprint][:calls] += 1
|
87
|
-
@top_queries[fingerprint][:total_time] += duration
|
88
|
-
@top_queries[fingerprint][:query] = query
|
89
|
-
@new_queries_mutex.synchronize do
|
90
|
-
@new_queries << fingerprint
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
def process_queries
|
95
|
-
new_queries = nil
|
96
|
-
|
97
|
-
@new_queries_mutex.synchronize do
|
98
|
-
new_queries = @new_queries.dup
|
99
|
-
@new_queries.clear
|
100
|
-
end
|
101
|
-
|
102
|
-
now = Time.now
|
103
|
-
min_checked_at = now - 3600 # don't recheck for an hour
|
104
|
-
queries = []
|
105
|
-
fingerprints = {}
|
106
|
-
@top_queries.each do |k, v|
|
107
|
-
if new_queries.include?(k) && v[:total_time] > @min_time && (!@last_checked_at[k] || @last_checked_at[k] < min_checked_at)
|
108
|
-
fingerprints[v[:query]] = k
|
109
|
-
queries << v[:query]
|
110
|
-
@last_checked_at[k] = now
|
42
|
+
File.foreach(@logfile) do |line|
|
43
|
+
yield line
|
44
|
+
end
|
45
|
+
rescue Errno::ENOENT
|
46
|
+
abort "Log file not found"
|
111
47
|
end
|
112
48
|
end
|
113
|
-
|
114
|
-
log "Processing #{queries.size} new query fingerprints"
|
115
|
-
if queries.any?
|
116
|
-
@indexer.process_queries(queries)
|
117
|
-
end
|
118
49
|
end
|
119
50
|
|
120
|
-
def
|
121
|
-
|
51
|
+
def process_entry(query, duration)
|
52
|
+
@collector.add(query, duration) if query =~ /SELECT/i
|
122
53
|
end
|
123
54
|
end
|
124
55
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Processor
|
3
|
+
include Logging
|
4
|
+
|
5
|
+
def initialize(database_url, logfile, options)
|
6
|
+
log "Started"
|
7
|
+
|
8
|
+
@logfile = logfile
|
9
|
+
|
10
|
+
@collector = Collector.new(min_time: options[:min_time])
|
11
|
+
@log_parser = LogParser.new(logfile, @collector)
|
12
|
+
@indexer = Indexer.new(database_url, options)
|
13
|
+
|
14
|
+
@starting_interval = 3
|
15
|
+
@interval = options[:interval]
|
16
|
+
|
17
|
+
@mutex = Mutex.new
|
18
|
+
@last_checked_at = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def perform
|
22
|
+
if @logfile == STDIN
|
23
|
+
Thread.abort_on_exception = true
|
24
|
+
Thread.new do
|
25
|
+
sleep(@starting_interval)
|
26
|
+
loop do
|
27
|
+
process_queries
|
28
|
+
sleep(@interval)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@log_parser.perform
|
34
|
+
|
35
|
+
process_queries
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def process_queries
|
41
|
+
@mutex.synchronize do
|
42
|
+
process_queries_without_lock
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def process_queries_without_lock
|
47
|
+
now = Time.now
|
48
|
+
min_checked_at = now - 3600 # don't recheck for an hour
|
49
|
+
queries = []
|
50
|
+
@collector.fetch_queries.each do |query|
|
51
|
+
if !@last_checked_at[query.fingerprint] || @last_checked_at[query.fingerprint] < min_checked_at
|
52
|
+
queries << query
|
53
|
+
@last_checked_at[query.fingerprint] = now
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
log "Processing #{queries.size} new query fingerprints"
|
58
|
+
@indexer.process_queries(queries) if queries.any?
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/dexter/query.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Query
|
3
|
+
attr_reader :statement, :fingerprint
|
4
|
+
attr_accessor :initial_cost, :new_cost, :missing_tables
|
5
|
+
|
6
|
+
def initialize(statement, fingerprint)
|
7
|
+
@statement = statement
|
8
|
+
@fingerprint = fingerprint
|
9
|
+
end
|
10
|
+
|
11
|
+
def tables
|
12
|
+
@tables ||= PgQuery.parse(statement).tables rescue []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/dexter/version.rb
CHANGED
data/pgdexter.gemspec
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
|
2
3
|
lib = File.expand_path("../lib", __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require "dexter/version"
|
@@ -9,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
9
10
|
spec.authors = ["Andrew Kane"]
|
10
11
|
spec.email = ["andrew@chartkick.com"]
|
11
12
|
|
12
|
-
spec.summary = "
|
13
|
+
spec.summary = "The automatic indexer for Postgres"
|
13
14
|
spec.homepage = "https://github.com/ankane/dexter"
|
14
15
|
|
15
16
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgdexter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|
@@ -89,14 +89,20 @@ extensions: []
|
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
91
|
- ".gitignore"
|
92
|
+
- CHANGELOG.md
|
92
93
|
- Gemfile
|
93
94
|
- LICENSE.txt
|
94
95
|
- README.md
|
95
96
|
- Rakefile
|
96
97
|
- exe/dexter
|
97
98
|
- lib/dexter.rb
|
99
|
+
- lib/dexter/client.rb
|
100
|
+
- lib/dexter/collector.rb
|
98
101
|
- lib/dexter/indexer.rb
|
99
102
|
- lib/dexter/log_parser.rb
|
103
|
+
- lib/dexter/logging.rb
|
104
|
+
- lib/dexter/processor.rb
|
105
|
+
- lib/dexter/query.rb
|
100
106
|
- lib/dexter/version.rb
|
101
107
|
- pgdexter.gemspec
|
102
108
|
homepage: https://github.com/ankane/dexter
|
@@ -121,5 +127,5 @@ rubyforge_project:
|
|
121
127
|
rubygems_version: 2.6.11
|
122
128
|
signing_key:
|
123
129
|
specification_version: 4
|
124
|
-
summary:
|
130
|
+
summary: The automatic indexer for Postgres
|
125
131
|
test_files: []
|