pghero 1.2.2 → 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pghero might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/CHANGELOG.md +6 -0
- data/README.md +1 -1
- data/app/controllers/pg_hero/home_controller.rb +15 -4
- data/app/views/layouts/pg_hero/application.html.erb +4 -4
- data/app/views/pg_hero/home/explain.html.erb +1 -1
- data/app/views/pg_hero/home/index_usage.html.erb +6 -1
- data/app/views/pg_hero/home/maintenance.html.erb +6 -1
- data/app/views/pg_hero/home/space.html.erb +5 -3
- data/lib/pghero.rb +35 -1243
- data/lib/pghero/connection.rb +5 -0
- data/lib/pghero/database.rb +12 -3
- data/lib/pghero/methods/basic.rb +104 -0
- data/lib/pghero/methods/connections.rb +49 -0
- data/lib/pghero/methods/databases.rb +39 -0
- data/lib/pghero/methods/explain.rb +29 -0
- data/lib/pghero/methods/indexes.rb +154 -0
- data/lib/pghero/methods/kill.rb +27 -0
- data/lib/pghero/methods/maintenance.rb +61 -0
- data/lib/pghero/methods/queries.rb +73 -0
- data/lib/pghero/methods/query_stats.rb +188 -0
- data/lib/pghero/methods/replica.rb +22 -0
- data/lib/pghero/methods/space.rb +30 -0
- data/lib/pghero/methods/suggested_indexes.rb +322 -0
- data/lib/pghero/methods/system.rb +70 -0
- data/lib/pghero/methods/tables.rb +68 -0
- data/lib/pghero/methods/users.rb +85 -0
- data/lib/pghero/query_stats.rb +7 -0
- data/lib/pghero/version.rb +1 -1
- data/lib/{pghero/tasks.rb → tasks/pghero.rake} +0 -2
- data/test/suggested_indexes_test.rb +3 -2
- data/test/test_helper.rb +1 -1
- metadata +22 -10
- data/test/gemfiles/activerecord31.gemfile +0 -6
- data/test/gemfiles/activerecord32.gemfile +0 -6
- data/test/gemfiles/activerecord40.gemfile +0 -6
@@ -0,0 +1,61 @@
|
|
1
|
+
module PgHero
|
2
|
+
module Methods
|
3
|
+
module Maintenance
|
4
|
+
# http://www.postgresql.org/docs/9.1/static/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND
|
5
|
+
# "the system will shut down and refuse to start any new transactions
|
6
|
+
# once there are fewer than 1 million transactions left until wraparound"
|
7
|
+
# warn when 10,000,000 transactions left
|
8
|
+
def transaction_id_danger(options = {})
|
9
|
+
threshold = options[:threshold] || 10000000
|
10
|
+
select_all <<-SQL
|
11
|
+
SELECT
|
12
|
+
c.oid::regclass::text AS table,
|
13
|
+
2146483648 - GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid)) AS transactions_before_shutdown
|
14
|
+
FROM
|
15
|
+
pg_class c
|
16
|
+
LEFT JOIN
|
17
|
+
pg_class t ON c.reltoastrelid = t.oid
|
18
|
+
WHERE
|
19
|
+
c.relkind = 'r'
|
20
|
+
AND (2146483648 - GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid))) < #{threshold}
|
21
|
+
ORDER BY
|
22
|
+
2, 1
|
23
|
+
SQL
|
24
|
+
end
|
25
|
+
|
26
|
+
def autovacuum_danger
|
27
|
+
select_all <<-SQL
|
28
|
+
SELECT
|
29
|
+
c.oid::regclass::text as table,
|
30
|
+
(SELECT setting FROM pg_settings WHERE name = 'autovacuum_freeze_max_age')::int -
|
31
|
+
GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid)) AS transactions_before_autovacuum
|
32
|
+
FROM
|
33
|
+
pg_class c
|
34
|
+
LEFT JOIN
|
35
|
+
pg_class t ON c.reltoastrelid = t.oid
|
36
|
+
WHERE
|
37
|
+
c.relkind = 'r'
|
38
|
+
AND (SELECT setting FROM pg_settings WHERE name = 'autovacuum_freeze_max_age')::int - GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid)) < 2000000
|
39
|
+
ORDER BY
|
40
|
+
transactions_before_autovacuum
|
41
|
+
SQL
|
42
|
+
end
|
43
|
+
|
44
|
+
def maintenance_info
|
45
|
+
select_all <<-SQL
|
46
|
+
SELECT
|
47
|
+
schemaname AS schema,
|
48
|
+
relname AS table,
|
49
|
+
last_vacuum,
|
50
|
+
last_autovacuum,
|
51
|
+
last_analyze,
|
52
|
+
last_autoanalyze
|
53
|
+
FROM
|
54
|
+
pg_stat_user_tables
|
55
|
+
ORDER BY
|
56
|
+
1, 2
|
57
|
+
SQL
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module PgHero
|
2
|
+
module Methods
|
3
|
+
module Queries
|
4
|
+
def running_queries
|
5
|
+
select_all <<-SQL
|
6
|
+
SELECT
|
7
|
+
pid,
|
8
|
+
state,
|
9
|
+
application_name AS source,
|
10
|
+
age(now(), xact_start) AS duration,
|
11
|
+
waiting,
|
12
|
+
query,
|
13
|
+
xact_start AS started_at
|
14
|
+
FROM
|
15
|
+
pg_stat_activity
|
16
|
+
WHERE
|
17
|
+
query <> '<insufficient privilege>'
|
18
|
+
AND state <> 'idle'
|
19
|
+
AND pid <> pg_backend_pid()
|
20
|
+
ORDER BY
|
21
|
+
query_start DESC
|
22
|
+
SQL
|
23
|
+
end
|
24
|
+
|
25
|
+
def long_running_queries
|
26
|
+
select_all <<-SQL
|
27
|
+
SELECT
|
28
|
+
pid,
|
29
|
+
state,
|
30
|
+
application_name AS source,
|
31
|
+
age(now(), xact_start) AS duration,
|
32
|
+
waiting,
|
33
|
+
query,
|
34
|
+
xact_start AS started_at
|
35
|
+
FROM
|
36
|
+
pg_stat_activity
|
37
|
+
WHERE
|
38
|
+
query <> '<insufficient privilege>'
|
39
|
+
AND state <> 'idle'
|
40
|
+
AND pid <> pg_backend_pid()
|
41
|
+
AND now() - query_start > interval '#{long_running_query_sec.to_i} seconds'
|
42
|
+
ORDER BY
|
43
|
+
query_start DESC
|
44
|
+
SQL
|
45
|
+
end
|
46
|
+
|
47
|
+
def slow_queries(options = {})
|
48
|
+
query_stats = options[:query_stats] || self.query_stats(options.except(:query_stats))
|
49
|
+
query_stats.select { |q| q["calls"].to_i >= slow_query_calls.to_i && q["average_time"].to_i >= slow_query_ms.to_i }
|
50
|
+
end
|
51
|
+
|
52
|
+
def locks
|
53
|
+
select_all <<-SQL
|
54
|
+
SELECT DISTINCT ON (pid)
|
55
|
+
pg_stat_activity.pid,
|
56
|
+
pg_stat_activity.query,
|
57
|
+
age(now(), pg_stat_activity.query_start) AS age
|
58
|
+
FROM
|
59
|
+
pg_stat_activity
|
60
|
+
INNER JOIN
|
61
|
+
pg_locks ON pg_locks.pid = pg_stat_activity.pid
|
62
|
+
WHERE
|
63
|
+
pg_stat_activity.query <> '<insufficient privilege>'
|
64
|
+
AND pg_locks.mode = 'ExclusiveLock'
|
65
|
+
AND pg_stat_activity.pid <> pg_backend_pid()
|
66
|
+
ORDER BY
|
67
|
+
pid,
|
68
|
+
query_start
|
69
|
+
SQL
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
module PgHero
|
2
|
+
module Methods
|
3
|
+
module QueryStats
|
4
|
+
def query_stats(options = {})
|
5
|
+
current_query_stats = (options[:historical] && options[:end_at] && options[:end_at] < Time.now ? [] : current_query_stats(options)).index_by { |q| q["query"] }
|
6
|
+
historical_query_stats = (options[:historical] ? historical_query_stats(options) : []).index_by { |q| q["query"] }
|
7
|
+
current_query_stats.default = {}
|
8
|
+
historical_query_stats.default = {}
|
9
|
+
|
10
|
+
query_stats = []
|
11
|
+
(current_query_stats.keys + historical_query_stats.keys).uniq.each do |query|
|
12
|
+
value = {
|
13
|
+
"query" => query,
|
14
|
+
"total_minutes" => current_query_stats[query]["total_minutes"].to_f + historical_query_stats[query]["total_minutes"].to_f,
|
15
|
+
"calls" => current_query_stats[query]["calls"].to_i + historical_query_stats[query]["calls"].to_i
|
16
|
+
}
|
17
|
+
value["average_time"] = value["total_minutes"] * 1000 * 60 / value["calls"]
|
18
|
+
value["total_percent"] = value["total_minutes"] * 100.0 / (current_query_stats[query]["all_queries_total_minutes"].to_f + historical_query_stats[query]["all_queries_total_minutes"].to_f)
|
19
|
+
query_stats << value
|
20
|
+
end
|
21
|
+
sort = options[:sort] || "total_minutes"
|
22
|
+
query_stats = query_stats.sort_by { |q| -q[sort] }.first(100)
|
23
|
+
if options[:min_average_time]
|
24
|
+
query_stats.reject! { |q| q["average_time"].to_f < options[:min_average_time] }
|
25
|
+
end
|
26
|
+
if options[:min_calls]
|
27
|
+
query_stats.reject! { |q| q["calls"].to_i < options[:min_calls] }
|
28
|
+
end
|
29
|
+
query_stats
|
30
|
+
end
|
31
|
+
|
32
|
+
def query_stats_available?
|
33
|
+
select_all("SELECT COUNT(*) AS count FROM pg_available_extensions WHERE name = 'pg_stat_statements'").first["count"].to_i > 0
|
34
|
+
end
|
35
|
+
|
36
|
+
def query_stats_enabled?
|
37
|
+
select_all("SELECT COUNT(*) AS count FROM pg_extension WHERE extname = 'pg_stat_statements'").first["count"].to_i > 0 && query_stats_readable?
|
38
|
+
end
|
39
|
+
|
40
|
+
def query_stats_readable?
|
41
|
+
select_all("SELECT has_table_privilege(current_user, 'pg_stat_statements', 'SELECT')").first["has_table_privilege"] == "t"
|
42
|
+
rescue ActiveRecord::StatementInvalid
|
43
|
+
false
|
44
|
+
end
|
45
|
+
|
46
|
+
def enable_query_stats
|
47
|
+
execute("CREATE EXTENSION pg_stat_statements")
|
48
|
+
end
|
49
|
+
|
50
|
+
def disable_query_stats
|
51
|
+
execute("DROP EXTENSION IF EXISTS pg_stat_statements")
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
def reset_query_stats
|
56
|
+
if query_stats_enabled?
|
57
|
+
execute("SELECT pg_stat_statements_reset()")
|
58
|
+
true
|
59
|
+
else
|
60
|
+
false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def capture_query_stats
|
65
|
+
config["databases"].keys.each do |database|
|
66
|
+
with(database) do
|
67
|
+
now = Time.now
|
68
|
+
query_stats = self.query_stats(limit: 1000000)
|
69
|
+
if query_stats.any? && reset_query_stats
|
70
|
+
values =
|
71
|
+
query_stats.map do |qs|
|
72
|
+
[
|
73
|
+
database,
|
74
|
+
qs["query"],
|
75
|
+
qs["total_minutes"].to_f * 60 * 1000,
|
76
|
+
qs["calls"],
|
77
|
+
now
|
78
|
+
].map { |v| quote(v) }.join(",")
|
79
|
+
end.map { |v| "(#{v})" }.join(",")
|
80
|
+
|
81
|
+
stats_connection.execute("INSERT INTO pghero_query_stats (database, query, total_time, calls, captured_at) VALUES #{values}")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# http://stackoverflow.com/questions/20582500/how-to-check-if-a-table-exists-in-a-given-schema
|
88
|
+
def historical_query_stats_enabled?
|
89
|
+
# TODO use schema from config
|
90
|
+
stats_connection.select_all(squish <<-SQL
|
91
|
+
SELECT EXISTS (
|
92
|
+
SELECT
|
93
|
+
1
|
94
|
+
FROM
|
95
|
+
pg_catalog.pg_class c
|
96
|
+
INNER JOIN
|
97
|
+
pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
98
|
+
WHERE
|
99
|
+
n.nspname = 'public'
|
100
|
+
AND c.relname = 'pghero_query_stats'
|
101
|
+
AND c.relkind = 'r'
|
102
|
+
)
|
103
|
+
SQL
|
104
|
+
).to_a.first["exists"] == "t"
|
105
|
+
end
|
106
|
+
|
107
|
+
def stats_connection
|
108
|
+
::PgHero::QueryStats.connection
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# http://www.craigkerstiens.com/2013/01/10/more-on-postgres-performance/
|
114
|
+
def current_query_stats(options = {})
|
115
|
+
if query_stats_enabled?
|
116
|
+
limit = options[:limit] || 100
|
117
|
+
sort = options[:sort] || "total_minutes"
|
118
|
+
select_all <<-SQL
|
119
|
+
WITH query_stats AS (
|
120
|
+
SELECT
|
121
|
+
query,
|
122
|
+
(total_time / 1000 / 60) as total_minutes,
|
123
|
+
(total_time / calls) as average_time,
|
124
|
+
calls
|
125
|
+
FROM
|
126
|
+
pg_stat_statements
|
127
|
+
INNER JOIN
|
128
|
+
pg_database ON pg_database.oid = pg_stat_statements.dbid
|
129
|
+
WHERE
|
130
|
+
pg_database.datname = current_database()
|
131
|
+
)
|
132
|
+
SELECT
|
133
|
+
query,
|
134
|
+
total_minutes,
|
135
|
+
average_time,
|
136
|
+
calls,
|
137
|
+
total_minutes * 100.0 / (SELECT SUM(total_minutes) FROM query_stats) AS total_percent,
|
138
|
+
(SELECT SUM(total_minutes) FROM query_stats) AS all_queries_total_minutes
|
139
|
+
FROM
|
140
|
+
query_stats
|
141
|
+
ORDER BY
|
142
|
+
#{quote_table_name(sort)} DESC
|
143
|
+
LIMIT #{limit.to_i}
|
144
|
+
SQL
|
145
|
+
else
|
146
|
+
[]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def historical_query_stats(options = {})
|
151
|
+
if historical_query_stats_enabled?
|
152
|
+
sort = options[:sort] || "total_minutes"
|
153
|
+
stats_connection.select_all squish <<-SQL
|
154
|
+
WITH query_stats AS (
|
155
|
+
SELECT
|
156
|
+
query,
|
157
|
+
(SUM(total_time) / 1000 / 60) as total_minutes,
|
158
|
+
(SUM(total_time) / SUM(calls)) as average_time,
|
159
|
+
SUM(calls) as calls
|
160
|
+
FROM
|
161
|
+
pghero_query_stats
|
162
|
+
WHERE
|
163
|
+
database = #{quote(current_database)}
|
164
|
+
#{options[:start_at] ? "AND captured_at >= #{quote(options[:start_at])}" : ""}
|
165
|
+
#{options[:end_at] ? "AND captured_at <= #{quote(options[:end_at])}" : ""}
|
166
|
+
GROUP BY
|
167
|
+
query
|
168
|
+
)
|
169
|
+
SELECT
|
170
|
+
query,
|
171
|
+
total_minutes,
|
172
|
+
average_time,
|
173
|
+
calls,
|
174
|
+
total_minutes * 100.0 / (SELECT SUM(total_minutes) FROM query_stats) AS total_percent,
|
175
|
+
(SELECT SUM(total_minutes) FROM query_stats) AS all_queries_total_minutes
|
176
|
+
FROM
|
177
|
+
query_stats
|
178
|
+
ORDER BY
|
179
|
+
#{quote_table_name(sort)} DESC
|
180
|
+
LIMIT 100
|
181
|
+
SQL
|
182
|
+
else
|
183
|
+
[]
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module PgHero
|
2
|
+
module Methods
|
3
|
+
module Replica
|
4
|
+
def replica?
|
5
|
+
select_all("SELECT setting FROM pg_settings WHERE name = 'hot_standby'").first["setting"] == "on"
|
6
|
+
end
|
7
|
+
|
8
|
+
# http://www.postgresql.org/message-id/CADKbJJWz9M0swPT3oqe8f9+tfD4-F54uE6Xtkh4nERpVsQnjnw@mail.gmail.com
|
9
|
+
def replication_lag
|
10
|
+
select_all(<<-SQL
|
11
|
+
SELECT
|
12
|
+
CASE
|
13
|
+
WHEN pg_last_xlog_receive_location() = pg_last_xlog_replay_location() THEN 0
|
14
|
+
ELSE EXTRACT (EPOCH FROM NOW() - pg_last_xact_replay_timestamp())
|
15
|
+
END
|
16
|
+
AS replication_lag
|
17
|
+
SQL
|
18
|
+
).first["replication_lag"].to_f
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module PgHero
|
2
|
+
module Methods
|
3
|
+
module Space
|
4
|
+
def database_size
|
5
|
+
select_all("SELECT pg_size_pretty(pg_database_size(current_database()))").first["pg_size_pretty"]
|
6
|
+
end
|
7
|
+
|
8
|
+
def relation_sizes
|
9
|
+
select_all <<-SQL
|
10
|
+
SELECT
|
11
|
+
n.nspname AS schema,
|
12
|
+
c.relname AS name,
|
13
|
+
CASE WHEN c.relkind = 'r' THEN 'table' ELSE 'index' END AS type,
|
14
|
+
pg_size_pretty(pg_table_size(c.oid)) AS size
|
15
|
+
FROM
|
16
|
+
pg_class c
|
17
|
+
LEFT JOIN
|
18
|
+
pg_namespace n ON (n.oid = c.relnamespace)
|
19
|
+
WHERE
|
20
|
+
n.nspname NOT IN ('pg_catalog', 'information_schema')
|
21
|
+
AND n.nspname !~ '^pg_toast'
|
22
|
+
AND c.relkind IN ('r', 'i')
|
23
|
+
ORDER BY
|
24
|
+
pg_table_size(c.oid) DESC,
|
25
|
+
name ASC
|
26
|
+
SQL
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,322 @@
|
|
1
|
+
module PgHero
|
2
|
+
module Methods
|
3
|
+
module SuggestedIndexes
|
4
|
+
def suggested_indexes_enabled?
|
5
|
+
defined?(PgQuery) && query_stats_enabled?
|
6
|
+
end
|
7
|
+
|
8
|
+
# TODO clean this mess
|
9
|
+
def suggested_indexes_by_query(options = {})
|
10
|
+
best_indexes = {}
|
11
|
+
|
12
|
+
if suggested_indexes_enabled?
|
13
|
+
# get most time-consuming queries
|
14
|
+
queries = options[:queries] || (options[:query_stats] || query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs["query"] }
|
15
|
+
|
16
|
+
# get best indexes for queries
|
17
|
+
best_indexes = best_index_helper(queries)
|
18
|
+
|
19
|
+
if best_indexes.any?
|
20
|
+
existing_columns = Hash.new { |hash, key| hash[key] = Hash.new { |hash2, key2| hash2[key2] = [] } }
|
21
|
+
indexes = self.indexes
|
22
|
+
indexes.group_by { |g| g["using"] }.each do |group, inds|
|
23
|
+
inds.each do |i|
|
24
|
+
existing_columns[group][i["table"]] << i["columns"]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
indexes_by_table = indexes.group_by { |i| i["table"] }
|
28
|
+
|
29
|
+
best_indexes.each do |_query, best_index|
|
30
|
+
if best_index[:found]
|
31
|
+
index = best_index[:index]
|
32
|
+
best_index[:table_indexes] = indexes_by_table[index[:table]].to_a
|
33
|
+
covering_index = existing_columns[index[:using] || "btree"][index[:table]].find { |e| index_covers?(e, index[:columns]) }
|
34
|
+
if covering_index
|
35
|
+
best_index[:covering_index] = covering_index
|
36
|
+
best_index[:explanation] = "Covered by index on (#{covering_index.join(", ")})"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
best_indexes
|
44
|
+
end
|
45
|
+
|
46
|
+
def suggested_indexes(options = {})
|
47
|
+
indexes = []
|
48
|
+
|
49
|
+
(options[:suggested_indexes_by_query] || suggested_indexes_by_query(options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group|
|
50
|
+
details = {}
|
51
|
+
group.map(&:second).each do |g|
|
52
|
+
details = details.except(:index).deep_merge(g)
|
53
|
+
end
|
54
|
+
indexes << index.merge(queries: group.map(&:first), details: details)
|
55
|
+
end
|
56
|
+
|
57
|
+
indexes.sort_by { |i| [i[:table], i[:columns]] }
|
58
|
+
end
|
59
|
+
|
60
|
+
def autoindex(options = {})
|
61
|
+
suggested_indexes.each do |index|
|
62
|
+
p index
|
63
|
+
if options[:create]
|
64
|
+
connection.execute("CREATE INDEX CONCURRENTLY ON #{quote_table_name(index[:table])} (#{index[:columns].map { |c| quote_table_name(c) }.join(",")})")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def autoindex_all(options = {})
|
70
|
+
config["databases"].keys.each do |database|
|
71
|
+
with(database) do
|
72
|
+
puts "Autoindexing #{database}..."
|
73
|
+
autoindex(options)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def best_index(statement, _options = {})
|
79
|
+
best_index_helper([statement])[statement]
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def best_index_helper(statements)
|
85
|
+
indexes = {}
|
86
|
+
|
87
|
+
# see if this is a query we understand and can use
|
88
|
+
parts = {}
|
89
|
+
statements.each do |statement|
|
90
|
+
parts[statement] = best_index_structure(statement)
|
91
|
+
end
|
92
|
+
|
93
|
+
# get stats about columns for relevant tables
|
94
|
+
tables = parts.values.map { |t| t[:table] }.uniq
|
95
|
+
# TODO get schema from query structure, then try search path
|
96
|
+
schema = connection_model.connection_config[:schema] || "public"
|
97
|
+
if tables.any?
|
98
|
+
row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i["table"], i["reltuples"]] }]
|
99
|
+
col_stats = column_stats(table: tables, schema: schema).group_by { |i| i["table"] }
|
100
|
+
end
|
101
|
+
|
102
|
+
# find best index based on query structure and column stats
|
103
|
+
parts.each do |statement, structure|
|
104
|
+
index = {found: false}
|
105
|
+
|
106
|
+
if structure[:error]
|
107
|
+
index[:explanation] = structure[:error]
|
108
|
+
elsif structure[:table].start_with?("pg_")
|
109
|
+
index[:explanation] = "System table"
|
110
|
+
else
|
111
|
+
index[:structure] = structure
|
112
|
+
|
113
|
+
table = structure[:table]
|
114
|
+
where = structure[:where].uniq
|
115
|
+
sort = structure[:sort]
|
116
|
+
|
117
|
+
total_rows = row_stats[table].to_i
|
118
|
+
index[:rows] = total_rows
|
119
|
+
|
120
|
+
ranks = Hash[col_stats[table].to_a.map { |r| [r["column"], r] }]
|
121
|
+
columns = (where + sort).map { |c| c[:column] }.uniq
|
122
|
+
|
123
|
+
if columns.any?
|
124
|
+
if columns.all? { |c| ranks[c] }
|
125
|
+
first_desc = sort.index { |c| c[:direction] == "desc" }
|
126
|
+
sort = sort.first(first_desc + 1) if first_desc
|
127
|
+
where = where.sort_by { |c| [row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]), c[:column]] } + sort
|
128
|
+
|
129
|
+
index[:row_estimates] = Hash[where.map { |c| ["#{c[:column]} (#{c[:op] || "sort"})", row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]).round] }]
|
130
|
+
|
131
|
+
# no index needed if less than 500 rows
|
132
|
+
if total_rows >= 500
|
133
|
+
|
134
|
+
if ["~~", "~~*"].include?(where.first[:op])
|
135
|
+
index[:found] = true
|
136
|
+
index[:row_progression] = [total_rows, index[:row_estimates].values.first]
|
137
|
+
index[:index] = {table: table, columns: ["#{where.first[:column]} gist_trgm_ops"], using: "gist"}
|
138
|
+
else
|
139
|
+
# if most values are unique, no need to index others
|
140
|
+
rows_left = total_rows
|
141
|
+
final_where = []
|
142
|
+
prev_rows_left = [rows_left]
|
143
|
+
where.reject { |c| ["~~", "~~*"].include?(c[:op]) }.each do |c|
|
144
|
+
next if final_where.include?(c[:column])
|
145
|
+
final_where << c[:column]
|
146
|
+
rows_left = row_estimates(ranks[c[:column]], total_rows, rows_left, c[:op])
|
147
|
+
prev_rows_left << rows_left
|
148
|
+
if rows_left < 50 || final_where.size >= 2 || [">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(c[:op])
|
149
|
+
break
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
index[:row_progression] = prev_rows_left.map(&:round)
|
154
|
+
|
155
|
+
# if the last indexes don't give us much, don't include
|
156
|
+
prev_rows_left.reverse!
|
157
|
+
(prev_rows_left.size - 1).times do |i|
|
158
|
+
if prev_rows_left[i] > prev_rows_left[i + 1] * 0.3
|
159
|
+
final_where.pop
|
160
|
+
else
|
161
|
+
break
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
if final_where.any?
|
166
|
+
index[:found] = true
|
167
|
+
index[:index] = {table: table, columns: final_where}
|
168
|
+
end
|
169
|
+
end
|
170
|
+
else
|
171
|
+
index[:explanation] = "No index needed if less than 500 rows"
|
172
|
+
end
|
173
|
+
else
|
174
|
+
index[:explanation] = "Stats not found"
|
175
|
+
end
|
176
|
+
else
|
177
|
+
index[:explanation] = "No columns to index"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
indexes[statement] = index
|
182
|
+
end
|
183
|
+
|
184
|
+
indexes
|
185
|
+
end
|
186
|
+
|
187
|
+
def best_index_structure(statement)
|
188
|
+
begin
|
189
|
+
tree = PgQuery.parse(statement).parsetree
|
190
|
+
rescue PgQuery::ParseError
|
191
|
+
return {error: "Parse error"}
|
192
|
+
end
|
193
|
+
return {error: "Unknown structure"} unless tree.size == 1
|
194
|
+
|
195
|
+
tree = tree.first
|
196
|
+
table = parse_table(tree) rescue nil
|
197
|
+
unless table
|
198
|
+
error =
|
199
|
+
case tree.keys.first
|
200
|
+
when "INSERT INTO"
|
201
|
+
"INSERT statement"
|
202
|
+
when "SET"
|
203
|
+
"SET statement"
|
204
|
+
when "SELECT"
|
205
|
+
if (tree["SELECT"]["fromClause"].first["JOINEXPR"] rescue false)
|
206
|
+
"JOIN not supported yet"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
return {error: error || "Unknown structure"}
|
210
|
+
end
|
211
|
+
|
212
|
+
select = tree["SELECT"] || tree["DELETE FROM"] || tree["UPDATE"]
|
213
|
+
where = (select["whereClause"] ? parse_where(select["whereClause"]) : []) rescue nil
|
214
|
+
return {error: "Unknown structure"} unless where
|
215
|
+
|
216
|
+
sort = (select["sortClause"] ? parse_sort(select["sortClause"]) : []) rescue []
|
217
|
+
|
218
|
+
{table: table, where: where, sort: sort}
|
219
|
+
end
|
220
|
+
|
221
|
+
def index_covers?(indexed_columns, columns)
|
222
|
+
indexed_columns.first(columns.size) == columns
|
223
|
+
end
|
224
|
+
|
225
|
+
# TODO better row estimation
|
226
|
+
# http://www.postgresql.org/docs/current/static/row-estimation-examples.html
|
227
|
+
def row_estimates(stats, total_rows, rows_left, op)
|
228
|
+
case op
|
229
|
+
when "null"
|
230
|
+
rows_left * stats["null_frac"].to_f
|
231
|
+
when "not_null"
|
232
|
+
rows_left * (1 - stats["null_frac"].to_f)
|
233
|
+
else
|
234
|
+
rows_left *= (1 - stats["null_frac"].to_f)
|
235
|
+
ret =
|
236
|
+
if stats["n_distinct"].to_f == 0
|
237
|
+
0
|
238
|
+
elsif stats["n_distinct"].to_f < 0
|
239
|
+
if total_rows > 0
|
240
|
+
(-1 / stats["n_distinct"].to_f) * (rows_left / total_rows.to_f)
|
241
|
+
else
|
242
|
+
0
|
243
|
+
end
|
244
|
+
else
|
245
|
+
rows_left / stats["n_distinct"].to_f
|
246
|
+
end
|
247
|
+
|
248
|
+
case op
|
249
|
+
when ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"
|
250
|
+
(rows_left + ret) / 10.0 # TODO better approximation
|
251
|
+
when "<>"
|
252
|
+
rows_left - ret
|
253
|
+
else
|
254
|
+
ret
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def parse_table(tree)
|
260
|
+
case tree.keys.first
|
261
|
+
when "SELECT"
|
262
|
+
tree["SELECT"]["fromClause"].first["RANGEVAR"]["relname"]
|
263
|
+
when "DELETE FROM"
|
264
|
+
tree["DELETE FROM"]["relation"]["RANGEVAR"]["relname"]
|
265
|
+
when "UPDATE"
|
266
|
+
tree["UPDATE"]["relation"]["RANGEVAR"]["relname"]
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# TODO capture values
|
271
|
+
def parse_where(tree)
|
272
|
+
aexpr = tree["AEXPR"] || tree[nil]
|
273
|
+
|
274
|
+
if tree["BOOLEXPR"]
|
275
|
+
if tree["BOOLEXPR"]["boolop"] == 0
|
276
|
+
tree["BOOLEXPR"]["args"].flat_map { |v| parse_where(v) }
|
277
|
+
end
|
278
|
+
elsif tree["AEXPR AND"]
|
279
|
+
left = parse_where(tree["AEXPR AND"]["lexpr"])
|
280
|
+
right = parse_where(tree["AEXPR AND"]["rexpr"])
|
281
|
+
left + right if left && right
|
282
|
+
elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first)
|
283
|
+
[{column: aexpr["lexpr"]["COLUMNREF"]["fields"].last, op: aexpr["name"].first}]
|
284
|
+
elsif tree["AEXPR IN"] && ["=", "<>"].include?(tree["AEXPR IN"]["name"].first)
|
285
|
+
[{column: tree["AEXPR IN"]["lexpr"]["COLUMNREF"]["fields"].last, op: tree["AEXPR IN"]["name"].first}]
|
286
|
+
elsif tree["NULLTEST"]
|
287
|
+
op = tree["NULLTEST"]["nulltesttype"] == 1 ? "not_null" : "null"
|
288
|
+
[{column: tree["NULLTEST"]["arg"]["COLUMNREF"]["fields"].last, op: op}]
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def parse_sort(sort_clause)
|
293
|
+
sort_clause.map do |v|
|
294
|
+
{
|
295
|
+
column: v["SORTBY"]["node"]["COLUMNREF"]["fields"].last,
|
296
|
+
direction: v["SORTBY"]["sortby_dir"] == 2 ? "desc" : "asc"
|
297
|
+
}
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def column_stats(options = {})
|
302
|
+
schema = options[:schema]
|
303
|
+
tables = options[:table] ? Array(options[:table]) : nil
|
304
|
+
select_all <<-SQL
|
305
|
+
SELECT
|
306
|
+
schemaname AS schema,
|
307
|
+
tablename AS table,
|
308
|
+
attname AS column,
|
309
|
+
null_frac,
|
310
|
+
n_distinct
|
311
|
+
FROM
|
312
|
+
pg_stats
|
313
|
+
WHERE
|
314
|
+
#{tables ? "tablename IN (#{tables.map { |t| quote(t) }.join(", ")})" : "1 = 1"}
|
315
|
+
AND schemaname = #{quote(schema)}
|
316
|
+
ORDER BY
|
317
|
+
1, 2, 3
|
318
|
+
SQL
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|