pghero 1.2.2 → 1.2.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pghero might be problematic. Click here for more details.

Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +11 -0
  3. data/CHANGELOG.md +6 -0
  4. data/README.md +1 -1
  5. data/app/controllers/pg_hero/home_controller.rb +15 -4
  6. data/app/views/layouts/pg_hero/application.html.erb +4 -4
  7. data/app/views/pg_hero/home/explain.html.erb +1 -1
  8. data/app/views/pg_hero/home/index_usage.html.erb +6 -1
  9. data/app/views/pg_hero/home/maintenance.html.erb +6 -1
  10. data/app/views/pg_hero/home/space.html.erb +5 -3
  11. data/lib/pghero.rb +35 -1243
  12. data/lib/pghero/connection.rb +5 -0
  13. data/lib/pghero/database.rb +12 -3
  14. data/lib/pghero/methods/basic.rb +104 -0
  15. data/lib/pghero/methods/connections.rb +49 -0
  16. data/lib/pghero/methods/databases.rb +39 -0
  17. data/lib/pghero/methods/explain.rb +29 -0
  18. data/lib/pghero/methods/indexes.rb +154 -0
  19. data/lib/pghero/methods/kill.rb +27 -0
  20. data/lib/pghero/methods/maintenance.rb +61 -0
  21. data/lib/pghero/methods/queries.rb +73 -0
  22. data/lib/pghero/methods/query_stats.rb +188 -0
  23. data/lib/pghero/methods/replica.rb +22 -0
  24. data/lib/pghero/methods/space.rb +30 -0
  25. data/lib/pghero/methods/suggested_indexes.rb +322 -0
  26. data/lib/pghero/methods/system.rb +70 -0
  27. data/lib/pghero/methods/tables.rb +68 -0
  28. data/lib/pghero/methods/users.rb +85 -0
  29. data/lib/pghero/query_stats.rb +7 -0
  30. data/lib/pghero/version.rb +1 -1
  31. data/lib/{pghero/tasks.rb → tasks/pghero.rake} +0 -2
  32. data/test/suggested_indexes_test.rb +3 -2
  33. data/test/test_helper.rb +1 -1
  34. metadata +22 -10
  35. data/test/gemfiles/activerecord31.gemfile +0 -6
  36. data/test/gemfiles/activerecord32.gemfile +0 -6
  37. data/test/gemfiles/activerecord40.gemfile +0 -6
@@ -0,0 +1,61 @@
1
+ module PgHero
2
+ module Methods
3
+ module Maintenance
4
+ # http://www.postgresql.org/docs/9.1/static/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND
5
+ # "the system will shut down and refuse to start any new transactions
6
+ # once there are fewer than 1 million transactions left until wraparound"
7
+ # warn when 10,000,000 transactions left
8
+ def transaction_id_danger(options = {})
9
+ threshold = options[:threshold] || 10000000
10
+ select_all <<-SQL
11
+ SELECT
12
+ c.oid::regclass::text AS table,
13
+ 2146483648 - GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid)) AS transactions_before_shutdown
14
+ FROM
15
+ pg_class c
16
+ LEFT JOIN
17
+ pg_class t ON c.reltoastrelid = t.oid
18
+ WHERE
19
+ c.relkind = 'r'
20
+ AND (2146483648 - GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid))) < #{threshold}
21
+ ORDER BY
22
+ 2, 1
23
+ SQL
24
+ end
25
+
26
+ def autovacuum_danger
27
+ select_all <<-SQL
28
+ SELECT
29
+ c.oid::regclass::text as table,
30
+ (SELECT setting FROM pg_settings WHERE name = 'autovacuum_freeze_max_age')::int -
31
+ GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid)) AS transactions_before_autovacuum
32
+ FROM
33
+ pg_class c
34
+ LEFT JOIN
35
+ pg_class t ON c.reltoastrelid = t.oid
36
+ WHERE
37
+ c.relkind = 'r'
38
+ AND (SELECT setting FROM pg_settings WHERE name = 'autovacuum_freeze_max_age')::int - GREATEST(AGE(c.relfrozenxid), AGE(t.relfrozenxid)) < 2000000
39
+ ORDER BY
40
+ transactions_before_autovacuum
41
+ SQL
42
+ end
43
+
44
+ def maintenance_info
45
+ select_all <<-SQL
46
+ SELECT
47
+ schemaname AS schema,
48
+ relname AS table,
49
+ last_vacuum,
50
+ last_autovacuum,
51
+ last_analyze,
52
+ last_autoanalyze
53
+ FROM
54
+ pg_stat_user_tables
55
+ ORDER BY
56
+ 1, 2
57
+ SQL
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,73 @@
1
+ module PgHero
2
+ module Methods
3
+ module Queries
4
+ def running_queries
5
+ select_all <<-SQL
6
+ SELECT
7
+ pid,
8
+ state,
9
+ application_name AS source,
10
+ age(now(), xact_start) AS duration,
11
+ waiting,
12
+ query,
13
+ xact_start AS started_at
14
+ FROM
15
+ pg_stat_activity
16
+ WHERE
17
+ query <> '<insufficient privilege>'
18
+ AND state <> 'idle'
19
+ AND pid <> pg_backend_pid()
20
+ ORDER BY
21
+ query_start DESC
22
+ SQL
23
+ end
24
+
25
+ def long_running_queries
26
+ select_all <<-SQL
27
+ SELECT
28
+ pid,
29
+ state,
30
+ application_name AS source,
31
+ age(now(), xact_start) AS duration,
32
+ waiting,
33
+ query,
34
+ xact_start AS started_at
35
+ FROM
36
+ pg_stat_activity
37
+ WHERE
38
+ query <> '<insufficient privilege>'
39
+ AND state <> 'idle'
40
+ AND pid <> pg_backend_pid()
41
+ AND now() - query_start > interval '#{long_running_query_sec.to_i} seconds'
42
+ ORDER BY
43
+ query_start DESC
44
+ SQL
45
+ end
46
+
47
+ def slow_queries(options = {})
48
+ query_stats = options[:query_stats] || self.query_stats(options.except(:query_stats))
49
+ query_stats.select { |q| q["calls"].to_i >= slow_query_calls.to_i && q["average_time"].to_i >= slow_query_ms.to_i }
50
+ end
51
+
52
+ def locks
53
+ select_all <<-SQL
54
+ SELECT DISTINCT ON (pid)
55
+ pg_stat_activity.pid,
56
+ pg_stat_activity.query,
57
+ age(now(), pg_stat_activity.query_start) AS age
58
+ FROM
59
+ pg_stat_activity
60
+ INNER JOIN
61
+ pg_locks ON pg_locks.pid = pg_stat_activity.pid
62
+ WHERE
63
+ pg_stat_activity.query <> '<insufficient privilege>'
64
+ AND pg_locks.mode = 'ExclusiveLock'
65
+ AND pg_stat_activity.pid <> pg_backend_pid()
66
+ ORDER BY
67
+ pid,
68
+ query_start
69
+ SQL
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,188 @@
1
+ module PgHero
2
+ module Methods
3
+ module QueryStats
4
+ def query_stats(options = {})
5
+ current_query_stats = (options[:historical] && options[:end_at] && options[:end_at] < Time.now ? [] : current_query_stats(options)).index_by { |q| q["query"] }
6
+ historical_query_stats = (options[:historical] ? historical_query_stats(options) : []).index_by { |q| q["query"] }
7
+ current_query_stats.default = {}
8
+ historical_query_stats.default = {}
9
+
10
+ query_stats = []
11
+ (current_query_stats.keys + historical_query_stats.keys).uniq.each do |query|
12
+ value = {
13
+ "query" => query,
14
+ "total_minutes" => current_query_stats[query]["total_minutes"].to_f + historical_query_stats[query]["total_minutes"].to_f,
15
+ "calls" => current_query_stats[query]["calls"].to_i + historical_query_stats[query]["calls"].to_i
16
+ }
17
+ value["average_time"] = value["total_minutes"] * 1000 * 60 / value["calls"]
18
+ value["total_percent"] = value["total_minutes"] * 100.0 / (current_query_stats[query]["all_queries_total_minutes"].to_f + historical_query_stats[query]["all_queries_total_minutes"].to_f)
19
+ query_stats << value
20
+ end
21
+ sort = options[:sort] || "total_minutes"
22
+ query_stats = query_stats.sort_by { |q| -q[sort] }.first(100)
23
+ if options[:min_average_time]
24
+ query_stats.reject! { |q| q["average_time"].to_f < options[:min_average_time] }
25
+ end
26
+ if options[:min_calls]
27
+ query_stats.reject! { |q| q["calls"].to_i < options[:min_calls] }
28
+ end
29
+ query_stats
30
+ end
31
+
32
+ def query_stats_available?
33
+ select_all("SELECT COUNT(*) AS count FROM pg_available_extensions WHERE name = 'pg_stat_statements'").first["count"].to_i > 0
34
+ end
35
+
36
+ def query_stats_enabled?
37
+ select_all("SELECT COUNT(*) AS count FROM pg_extension WHERE extname = 'pg_stat_statements'").first["count"].to_i > 0 && query_stats_readable?
38
+ end
39
+
40
+ def query_stats_readable?
41
+ select_all("SELECT has_table_privilege(current_user, 'pg_stat_statements', 'SELECT')").first["has_table_privilege"] == "t"
42
+ rescue ActiveRecord::StatementInvalid
43
+ false
44
+ end
45
+
46
+ def enable_query_stats
47
+ execute("CREATE EXTENSION pg_stat_statements")
48
+ end
49
+
50
+ def disable_query_stats
51
+ execute("DROP EXTENSION IF EXISTS pg_stat_statements")
52
+ true
53
+ end
54
+
55
+ def reset_query_stats
56
+ if query_stats_enabled?
57
+ execute("SELECT pg_stat_statements_reset()")
58
+ true
59
+ else
60
+ false
61
+ end
62
+ end
63
+
64
+ def capture_query_stats
65
+ config["databases"].keys.each do |database|
66
+ with(database) do
67
+ now = Time.now
68
+ query_stats = self.query_stats(limit: 1000000)
69
+ if query_stats.any? && reset_query_stats
70
+ values =
71
+ query_stats.map do |qs|
72
+ [
73
+ database,
74
+ qs["query"],
75
+ qs["total_minutes"].to_f * 60 * 1000,
76
+ qs["calls"],
77
+ now
78
+ ].map { |v| quote(v) }.join(",")
79
+ end.map { |v| "(#{v})" }.join(",")
80
+
81
+ stats_connection.execute("INSERT INTO pghero_query_stats (database, query, total_time, calls, captured_at) VALUES #{values}")
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ # http://stackoverflow.com/questions/20582500/how-to-check-if-a-table-exists-in-a-given-schema
88
+ def historical_query_stats_enabled?
89
+ # TODO use schema from config
90
+ stats_connection.select_all(squish <<-SQL
91
+ SELECT EXISTS (
92
+ SELECT
93
+ 1
94
+ FROM
95
+ pg_catalog.pg_class c
96
+ INNER JOIN
97
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
98
+ WHERE
99
+ n.nspname = 'public'
100
+ AND c.relname = 'pghero_query_stats'
101
+ AND c.relkind = 'r'
102
+ )
103
+ SQL
104
+ ).to_a.first["exists"] == "t"
105
+ end
106
+
107
+ def stats_connection
108
+ ::PgHero::QueryStats.connection
109
+ end
110
+
111
+ private
112
+
113
+ # http://www.craigkerstiens.com/2013/01/10/more-on-postgres-performance/
114
+ def current_query_stats(options = {})
115
+ if query_stats_enabled?
116
+ limit = options[:limit] || 100
117
+ sort = options[:sort] || "total_minutes"
118
+ select_all <<-SQL
119
+ WITH query_stats AS (
120
+ SELECT
121
+ query,
122
+ (total_time / 1000 / 60) as total_minutes,
123
+ (total_time / calls) as average_time,
124
+ calls
125
+ FROM
126
+ pg_stat_statements
127
+ INNER JOIN
128
+ pg_database ON pg_database.oid = pg_stat_statements.dbid
129
+ WHERE
130
+ pg_database.datname = current_database()
131
+ )
132
+ SELECT
133
+ query,
134
+ total_minutes,
135
+ average_time,
136
+ calls,
137
+ total_minutes * 100.0 / (SELECT SUM(total_minutes) FROM query_stats) AS total_percent,
138
+ (SELECT SUM(total_minutes) FROM query_stats) AS all_queries_total_minutes
139
+ FROM
140
+ query_stats
141
+ ORDER BY
142
+ #{quote_table_name(sort)} DESC
143
+ LIMIT #{limit.to_i}
144
+ SQL
145
+ else
146
+ []
147
+ end
148
+ end
149
+
150
+ def historical_query_stats(options = {})
151
+ if historical_query_stats_enabled?
152
+ sort = options[:sort] || "total_minutes"
153
+ stats_connection.select_all squish <<-SQL
154
+ WITH query_stats AS (
155
+ SELECT
156
+ query,
157
+ (SUM(total_time) / 1000 / 60) as total_minutes,
158
+ (SUM(total_time) / SUM(calls)) as average_time,
159
+ SUM(calls) as calls
160
+ FROM
161
+ pghero_query_stats
162
+ WHERE
163
+ database = #{quote(current_database)}
164
+ #{options[:start_at] ? "AND captured_at >= #{quote(options[:start_at])}" : ""}
165
+ #{options[:end_at] ? "AND captured_at <= #{quote(options[:end_at])}" : ""}
166
+ GROUP BY
167
+ query
168
+ )
169
+ SELECT
170
+ query,
171
+ total_minutes,
172
+ average_time,
173
+ calls,
174
+ total_minutes * 100.0 / (SELECT SUM(total_minutes) FROM query_stats) AS total_percent,
175
+ (SELECT SUM(total_minutes) FROM query_stats) AS all_queries_total_minutes
176
+ FROM
177
+ query_stats
178
+ ORDER BY
179
+ #{quote_table_name(sort)} DESC
180
+ LIMIT 100
181
+ SQL
182
+ else
183
+ []
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,22 @@
1
+ module PgHero
2
+ module Methods
3
+ module Replica
4
+ def replica?
5
+ select_all("SELECT setting FROM pg_settings WHERE name = 'hot_standby'").first["setting"] == "on"
6
+ end
7
+
8
+ # http://www.postgresql.org/message-id/CADKbJJWz9M0swPT3oqe8f9+tfD4-F54uE6Xtkh4nERpVsQnjnw@mail.gmail.com
9
+ def replication_lag
10
+ select_all(<<-SQL
11
+ SELECT
12
+ CASE
13
+ WHEN pg_last_xlog_receive_location() = pg_last_xlog_replay_location() THEN 0
14
+ ELSE EXTRACT (EPOCH FROM NOW() - pg_last_xact_replay_timestamp())
15
+ END
16
+ AS replication_lag
17
+ SQL
18
+ ).first["replication_lag"].to_f
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,30 @@
1
+ module PgHero
2
+ module Methods
3
+ module Space
4
+ def database_size
5
+ select_all("SELECT pg_size_pretty(pg_database_size(current_database()))").first["pg_size_pretty"]
6
+ end
7
+
8
+ def relation_sizes
9
+ select_all <<-SQL
10
+ SELECT
11
+ n.nspname AS schema,
12
+ c.relname AS name,
13
+ CASE WHEN c.relkind = 'r' THEN 'table' ELSE 'index' END AS type,
14
+ pg_size_pretty(pg_table_size(c.oid)) AS size
15
+ FROM
16
+ pg_class c
17
+ LEFT JOIN
18
+ pg_namespace n ON (n.oid = c.relnamespace)
19
+ WHERE
20
+ n.nspname NOT IN ('pg_catalog', 'information_schema')
21
+ AND n.nspname !~ '^pg_toast'
22
+ AND c.relkind IN ('r', 'i')
23
+ ORDER BY
24
+ pg_table_size(c.oid) DESC,
25
+ name ASC
26
+ SQL
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,322 @@
1
+ module PgHero
2
+ module Methods
3
+ module SuggestedIndexes
4
+ def suggested_indexes_enabled?
5
+ defined?(PgQuery) && query_stats_enabled?
6
+ end
7
+
8
+ # TODO clean this mess
9
+ def suggested_indexes_by_query(options = {})
10
+ best_indexes = {}
11
+
12
+ if suggested_indexes_enabled?
13
+ # get most time-consuming queries
14
+ queries = options[:queries] || (options[:query_stats] || query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs["query"] }
15
+
16
+ # get best indexes for queries
17
+ best_indexes = best_index_helper(queries)
18
+
19
+ if best_indexes.any?
20
+ existing_columns = Hash.new { |hash, key| hash[key] = Hash.new { |hash2, key2| hash2[key2] = [] } }
21
+ indexes = self.indexes
22
+ indexes.group_by { |g| g["using"] }.each do |group, inds|
23
+ inds.each do |i|
24
+ existing_columns[group][i["table"]] << i["columns"]
25
+ end
26
+ end
27
+ indexes_by_table = indexes.group_by { |i| i["table"] }
28
+
29
+ best_indexes.each do |_query, best_index|
30
+ if best_index[:found]
31
+ index = best_index[:index]
32
+ best_index[:table_indexes] = indexes_by_table[index[:table]].to_a
33
+ covering_index = existing_columns[index[:using] || "btree"][index[:table]].find { |e| index_covers?(e, index[:columns]) }
34
+ if covering_index
35
+ best_index[:covering_index] = covering_index
36
+ best_index[:explanation] = "Covered by index on (#{covering_index.join(", ")})"
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ best_indexes
44
+ end
45
+
46
+ def suggested_indexes(options = {})
47
+ indexes = []
48
+
49
+ (options[:suggested_indexes_by_query] || suggested_indexes_by_query(options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group|
50
+ details = {}
51
+ group.map(&:second).each do |g|
52
+ details = details.except(:index).deep_merge(g)
53
+ end
54
+ indexes << index.merge(queries: group.map(&:first), details: details)
55
+ end
56
+
57
+ indexes.sort_by { |i| [i[:table], i[:columns]] }
58
+ end
59
+
60
+ def autoindex(options = {})
61
+ suggested_indexes.each do |index|
62
+ p index
63
+ if options[:create]
64
+ connection.execute("CREATE INDEX CONCURRENTLY ON #{quote_table_name(index[:table])} (#{index[:columns].map { |c| quote_table_name(c) }.join(",")})")
65
+ end
66
+ end
67
+ end
68
+
69
+ def autoindex_all(options = {})
70
+ config["databases"].keys.each do |database|
71
+ with(database) do
72
+ puts "Autoindexing #{database}..."
73
+ autoindex(options)
74
+ end
75
+ end
76
+ end
77
+
78
+ def best_index(statement, _options = {})
79
+ best_index_helper([statement])[statement]
80
+ end
81
+
82
+ private
83
+
84
+ def best_index_helper(statements)
85
+ indexes = {}
86
+
87
+ # see if this is a query we understand and can use
88
+ parts = {}
89
+ statements.each do |statement|
90
+ parts[statement] = best_index_structure(statement)
91
+ end
92
+
93
+ # get stats about columns for relevant tables
94
+ tables = parts.values.map { |t| t[:table] }.uniq
95
+ # TODO get schema from query structure, then try search path
96
+ schema = connection_model.connection_config[:schema] || "public"
97
+ if tables.any?
98
+ row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i["table"], i["reltuples"]] }]
99
+ col_stats = column_stats(table: tables, schema: schema).group_by { |i| i["table"] }
100
+ end
101
+
102
+ # find best index based on query structure and column stats
103
+ parts.each do |statement, structure|
104
+ index = {found: false}
105
+
106
+ if structure[:error]
107
+ index[:explanation] = structure[:error]
108
+ elsif structure[:table].start_with?("pg_")
109
+ index[:explanation] = "System table"
110
+ else
111
+ index[:structure] = structure
112
+
113
+ table = structure[:table]
114
+ where = structure[:where].uniq
115
+ sort = structure[:sort]
116
+
117
+ total_rows = row_stats[table].to_i
118
+ index[:rows] = total_rows
119
+
120
+ ranks = Hash[col_stats[table].to_a.map { |r| [r["column"], r] }]
121
+ columns = (where + sort).map { |c| c[:column] }.uniq
122
+
123
+ if columns.any?
124
+ if columns.all? { |c| ranks[c] }
125
+ first_desc = sort.index { |c| c[:direction] == "desc" }
126
+ sort = sort.first(first_desc + 1) if first_desc
127
+ where = where.sort_by { |c| [row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]), c[:column]] } + sort
128
+
129
+ index[:row_estimates] = Hash[where.map { |c| ["#{c[:column]} (#{c[:op] || "sort"})", row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]).round] }]
130
+
131
+ # no index needed if less than 500 rows
132
+ if total_rows >= 500
133
+
134
+ if ["~~", "~~*"].include?(where.first[:op])
135
+ index[:found] = true
136
+ index[:row_progression] = [total_rows, index[:row_estimates].values.first]
137
+ index[:index] = {table: table, columns: ["#{where.first[:column]} gist_trgm_ops"], using: "gist"}
138
+ else
139
+ # if most values are unique, no need to index others
140
+ rows_left = total_rows
141
+ final_where = []
142
+ prev_rows_left = [rows_left]
143
+ where.reject { |c| ["~~", "~~*"].include?(c[:op]) }.each do |c|
144
+ next if final_where.include?(c[:column])
145
+ final_where << c[:column]
146
+ rows_left = row_estimates(ranks[c[:column]], total_rows, rows_left, c[:op])
147
+ prev_rows_left << rows_left
148
+ if rows_left < 50 || final_where.size >= 2 || [">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(c[:op])
149
+ break
150
+ end
151
+ end
152
+
153
+ index[:row_progression] = prev_rows_left.map(&:round)
154
+
155
+ # if the last indexes don't give us much, don't include
156
+ prev_rows_left.reverse!
157
+ (prev_rows_left.size - 1).times do |i|
158
+ if prev_rows_left[i] > prev_rows_left[i + 1] * 0.3
159
+ final_where.pop
160
+ else
161
+ break
162
+ end
163
+ end
164
+
165
+ if final_where.any?
166
+ index[:found] = true
167
+ index[:index] = {table: table, columns: final_where}
168
+ end
169
+ end
170
+ else
171
+ index[:explanation] = "No index needed if less than 500 rows"
172
+ end
173
+ else
174
+ index[:explanation] = "Stats not found"
175
+ end
176
+ else
177
+ index[:explanation] = "No columns to index"
178
+ end
179
+ end
180
+
181
+ indexes[statement] = index
182
+ end
183
+
184
+ indexes
185
+ end
186
+
187
+ def best_index_structure(statement)
188
+ begin
189
+ tree = PgQuery.parse(statement).parsetree
190
+ rescue PgQuery::ParseError
191
+ return {error: "Parse error"}
192
+ end
193
+ return {error: "Unknown structure"} unless tree.size == 1
194
+
195
+ tree = tree.first
196
+ table = parse_table(tree) rescue nil
197
+ unless table
198
+ error =
199
+ case tree.keys.first
200
+ when "INSERT INTO"
201
+ "INSERT statement"
202
+ when "SET"
203
+ "SET statement"
204
+ when "SELECT"
205
+ if (tree["SELECT"]["fromClause"].first["JOINEXPR"] rescue false)
206
+ "JOIN not supported yet"
207
+ end
208
+ end
209
+ return {error: error || "Unknown structure"}
210
+ end
211
+
212
+ select = tree["SELECT"] || tree["DELETE FROM"] || tree["UPDATE"]
213
+ where = (select["whereClause"] ? parse_where(select["whereClause"]) : []) rescue nil
214
+ return {error: "Unknown structure"} unless where
215
+
216
+ sort = (select["sortClause"] ? parse_sort(select["sortClause"]) : []) rescue []
217
+
218
+ {table: table, where: where, sort: sort}
219
+ end
220
+
221
+ def index_covers?(indexed_columns, columns)
222
+ indexed_columns.first(columns.size) == columns
223
+ end
224
+
225
+ # TODO better row estimation
226
+ # http://www.postgresql.org/docs/current/static/row-estimation-examples.html
227
+ def row_estimates(stats, total_rows, rows_left, op)
228
+ case op
229
+ when "null"
230
+ rows_left * stats["null_frac"].to_f
231
+ when "not_null"
232
+ rows_left * (1 - stats["null_frac"].to_f)
233
+ else
234
+ rows_left *= (1 - stats["null_frac"].to_f)
235
+ ret =
236
+ if stats["n_distinct"].to_f == 0
237
+ 0
238
+ elsif stats["n_distinct"].to_f < 0
239
+ if total_rows > 0
240
+ (-1 / stats["n_distinct"].to_f) * (rows_left / total_rows.to_f)
241
+ else
242
+ 0
243
+ end
244
+ else
245
+ rows_left / stats["n_distinct"].to_f
246
+ end
247
+
248
+ case op
249
+ when ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"
250
+ (rows_left + ret) / 10.0 # TODO better approximation
251
+ when "<>"
252
+ rows_left - ret
253
+ else
254
+ ret
255
+ end
256
+ end
257
+ end
258
+
259
+ def parse_table(tree)
260
+ case tree.keys.first
261
+ when "SELECT"
262
+ tree["SELECT"]["fromClause"].first["RANGEVAR"]["relname"]
263
+ when "DELETE FROM"
264
+ tree["DELETE FROM"]["relation"]["RANGEVAR"]["relname"]
265
+ when "UPDATE"
266
+ tree["UPDATE"]["relation"]["RANGEVAR"]["relname"]
267
+ end
268
+ end
269
+
270
+ # TODO capture values
271
+ def parse_where(tree)
272
+ aexpr = tree["AEXPR"] || tree[nil]
273
+
274
+ if tree["BOOLEXPR"]
275
+ if tree["BOOLEXPR"]["boolop"] == 0
276
+ tree["BOOLEXPR"]["args"].flat_map { |v| parse_where(v) }
277
+ end
278
+ elsif tree["AEXPR AND"]
279
+ left = parse_where(tree["AEXPR AND"]["lexpr"])
280
+ right = parse_where(tree["AEXPR AND"]["rexpr"])
281
+ left + right if left && right
282
+ elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first)
283
+ [{column: aexpr["lexpr"]["COLUMNREF"]["fields"].last, op: aexpr["name"].first}]
284
+ elsif tree["AEXPR IN"] && ["=", "<>"].include?(tree["AEXPR IN"]["name"].first)
285
+ [{column: tree["AEXPR IN"]["lexpr"]["COLUMNREF"]["fields"].last, op: tree["AEXPR IN"]["name"].first}]
286
+ elsif tree["NULLTEST"]
287
+ op = tree["NULLTEST"]["nulltesttype"] == 1 ? "not_null" : "null"
288
+ [{column: tree["NULLTEST"]["arg"]["COLUMNREF"]["fields"].last, op: op}]
289
+ end
290
+ end
291
+
292
+ def parse_sort(sort_clause)
293
+ sort_clause.map do |v|
294
+ {
295
+ column: v["SORTBY"]["node"]["COLUMNREF"]["fields"].last,
296
+ direction: v["SORTBY"]["sortby_dir"] == 2 ? "desc" : "asc"
297
+ }
298
+ end
299
+ end
300
+
301
+ def column_stats(options = {})
302
+ schema = options[:schema]
303
+ tables = options[:table] ? Array(options[:table]) : nil
304
+ select_all <<-SQL
305
+ SELECT
306
+ schemaname AS schema,
307
+ tablename AS table,
308
+ attname AS column,
309
+ null_frac,
310
+ n_distinct
311
+ FROM
312
+ pg_stats
313
+ WHERE
314
+ #{tables ? "tablename IN (#{tables.map { |t| quote(t) }.join(", ")})" : "1 = 1"}
315
+ AND schemaname = #{quote(schema)}
316
+ ORDER BY
317
+ 1, 2, 3
318
+ SQL
319
+ end
320
+ end
321
+ end
322
+ end