pghero_fork 2.7.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +391 -0
  3. data/CONTRIBUTING.md +42 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +3 -0
  6. data/app/assets/images/pghero/favicon.png +0 -0
  7. data/app/assets/javascripts/pghero/Chart.bundle.js +20755 -0
  8. data/app/assets/javascripts/pghero/application.js +158 -0
  9. data/app/assets/javascripts/pghero/chartkick.js +2436 -0
  10. data/app/assets/javascripts/pghero/highlight.pack.js +2 -0
  11. data/app/assets/javascripts/pghero/jquery.js +10872 -0
  12. data/app/assets/javascripts/pghero/nouislider.js +2672 -0
  13. data/app/assets/stylesheets/pghero/application.css +514 -0
  14. data/app/assets/stylesheets/pghero/arduino-light.css +86 -0
  15. data/app/assets/stylesheets/pghero/nouislider.css +310 -0
  16. data/app/controllers/pg_hero/home_controller.rb +449 -0
  17. data/app/helpers/pg_hero/home_helper.rb +30 -0
  18. data/app/views/layouts/pg_hero/application.html.erb +68 -0
  19. data/app/views/pg_hero/home/_connections_table.html.erb +16 -0
  20. data/app/views/pg_hero/home/_live_queries_table.html.erb +51 -0
  21. data/app/views/pg_hero/home/_queries_table.html.erb +72 -0
  22. data/app/views/pg_hero/home/_query_stats_slider.html.erb +16 -0
  23. data/app/views/pg_hero/home/_suggested_index.html.erb +18 -0
  24. data/app/views/pg_hero/home/connections.html.erb +32 -0
  25. data/app/views/pg_hero/home/explain.html.erb +27 -0
  26. data/app/views/pg_hero/home/index.html.erb +518 -0
  27. data/app/views/pg_hero/home/index_bloat.html.erb +72 -0
  28. data/app/views/pg_hero/home/live_queries.html.erb +11 -0
  29. data/app/views/pg_hero/home/maintenance.html.erb +55 -0
  30. data/app/views/pg_hero/home/queries.html.erb +33 -0
  31. data/app/views/pg_hero/home/relation_space.html.erb +14 -0
  32. data/app/views/pg_hero/home/show_query.html.erb +106 -0
  33. data/app/views/pg_hero/home/space.html.erb +83 -0
  34. data/app/views/pg_hero/home/system.html.erb +34 -0
  35. data/app/views/pg_hero/home/tune.html.erb +53 -0
  36. data/config/routes.rb +32 -0
  37. data/lib/generators/pghero/config_generator.rb +13 -0
  38. data/lib/generators/pghero/query_stats_generator.rb +18 -0
  39. data/lib/generators/pghero/space_stats_generator.rb +18 -0
  40. data/lib/generators/pghero/templates/config.yml.tt +46 -0
  41. data/lib/generators/pghero/templates/query_stats.rb.tt +15 -0
  42. data/lib/generators/pghero/templates/space_stats.rb.tt +13 -0
  43. data/lib/pghero.rb +246 -0
  44. data/lib/pghero/connection.rb +5 -0
  45. data/lib/pghero/database.rb +175 -0
  46. data/lib/pghero/engine.rb +16 -0
  47. data/lib/pghero/methods/basic.rb +160 -0
  48. data/lib/pghero/methods/connections.rb +77 -0
  49. data/lib/pghero/methods/constraints.rb +30 -0
  50. data/lib/pghero/methods/explain.rb +29 -0
  51. data/lib/pghero/methods/indexes.rb +332 -0
  52. data/lib/pghero/methods/kill.rb +28 -0
  53. data/lib/pghero/methods/maintenance.rb +93 -0
  54. data/lib/pghero/methods/queries.rb +75 -0
  55. data/lib/pghero/methods/query_stats.rb +349 -0
  56. data/lib/pghero/methods/replication.rb +74 -0
  57. data/lib/pghero/methods/sequences.rb +124 -0
  58. data/lib/pghero/methods/settings.rb +37 -0
  59. data/lib/pghero/methods/space.rb +141 -0
  60. data/lib/pghero/methods/suggested_indexes.rb +329 -0
  61. data/lib/pghero/methods/system.rb +287 -0
  62. data/lib/pghero/methods/tables.rb +68 -0
  63. data/lib/pghero/methods/users.rb +87 -0
  64. data/lib/pghero/query_stats.rb +5 -0
  65. data/lib/pghero/space_stats.rb +5 -0
  66. data/lib/pghero/stats.rb +6 -0
  67. data/lib/pghero/version.rb +3 -0
  68. data/lib/tasks/pghero.rake +27 -0
  69. data/licenses/LICENSE-chart.js.txt +9 -0
  70. data/licenses/LICENSE-chartkick.js.txt +22 -0
  71. data/licenses/LICENSE-highlight.js.txt +29 -0
  72. data/licenses/LICENSE-jquery.txt +20 -0
  73. data/licenses/LICENSE-moment.txt +22 -0
  74. data/licenses/LICENSE-nouislider.txt +21 -0
  75. metadata +130 -0
@@ -0,0 +1,74 @@
1
+ module PgHero
2
+ module Methods
3
+ module Replication
4
+ def replica?
5
+ unless defined?(@replica)
6
+ @replica = select_one("SELECT pg_is_in_recovery()")
7
+ end
8
+ @replica
9
+ end
10
+
11
+ # https://www.postgresql.org/message-id/CADKbJJWz9M0swPT3oqe8f9+tfD4-F54uE6Xtkh4nERpVsQnjnw@mail.gmail.com
12
+ def replication_lag
13
+ with_feature_support(:replication_lag) do
14
+ lag_condition =
15
+ if server_version_num >= 100000
16
+ "pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()"
17
+ else
18
+ "pg_last_xlog_receive_location() = pg_last_xlog_replay_location()"
19
+ end
20
+
21
+ select_one <<-SQL
22
+ SELECT
23
+ CASE
24
+ WHEN NOT pg_is_in_recovery() OR #{lag_condition} THEN 0
25
+ ELSE EXTRACT (EPOCH FROM NOW() - pg_last_xact_replay_timestamp())
26
+ END
27
+ AS replication_lag
28
+ SQL
29
+ end
30
+ end
31
+
32
+ def replication_slots
33
+ if server_version_num >= 90400
34
+ with_feature_support(:replication_slots, []) do
35
+ select_all <<-SQL
36
+ SELECT
37
+ slot_name,
38
+ database,
39
+ active
40
+ FROM pg_replication_slots
41
+ SQL
42
+ end
43
+ else
44
+ []
45
+ end
46
+ end
47
+
48
+ def replicating?
49
+ with_feature_support(:replicating?, false) do
50
+ select_all("SELECT state FROM pg_stat_replication").any?
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def feature_support
57
+ @feature_support ||= {}
58
+ end
59
+
60
+ def with_feature_support(cache_key, default = nil)
61
+ # cache feature support to minimize errors in logs
62
+ return default if feature_support[cache_key] == false
63
+
64
+ begin
65
+ yield
66
+ rescue ActiveRecord::StatementInvalid => e
67
+ raise unless e.message.start_with?("PG::FeatureNotSupported:")
68
+ feature_support[cache_key] = false
69
+ default
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,124 @@
1
+ module PgHero
2
+ module Methods
3
+ module Sequences
4
+ def sequences
5
+ # get columns with default values
6
+ # use pg_get_expr to get correct default value
7
+ # it's what information_schema.columns uses
8
+ # also, exclude temporary tables to prevent error
9
+ # when accessing across sessions
10
+ sequences = select_all <<-SQL
11
+ SELECT
12
+ n.nspname AS table_schema,
13
+ c.relname AS table,
14
+ attname AS column,
15
+ format_type(a.atttypid, a.atttypmod) AS column_type,
16
+ pg_get_expr(d.adbin, d.adrelid) AS default_value
17
+ FROM
18
+ pg_catalog.pg_attribute a
19
+ INNER JOIN
20
+ pg_catalog.pg_class c ON c.oid = a.attrelid
21
+ INNER JOIN
22
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
23
+ INNER JOIN
24
+ pg_catalog.pg_attrdef d ON (a.attrelid, a.attnum) = (d.adrelid, d.adnum)
25
+ WHERE
26
+ NOT a.attisdropped
27
+ AND a.attnum > 0
28
+ AND pg_get_expr(d.adbin, d.adrelid) LIKE 'nextval%'
29
+ AND n.nspname NOT LIKE 'pg\\_temp\\_%'
30
+ SQL
31
+
32
+ # parse out sequence
33
+ sequences.each do |column|
34
+ column[:max_value] = column[:column_type] == 'integer' ? 2147483647 : 9223372036854775807
35
+
36
+ column[:schema], column[:sequence] = parse_default_value(column[:default_value])
37
+ column.delete(:default_value) if column[:sequence]
38
+ end
39
+
40
+ add_sequence_attributes(sequences)
41
+
42
+ sequences.select { |s| s[:readable] }.each_slice(1024) do |slice|
43
+ sql = slice.map { |s| "SELECT last_value FROM #{quote_ident(s[:schema])}.#{quote_ident(s[:sequence])}" }.join(" UNION ALL ")
44
+
45
+ select_all(sql).zip(slice) do |row, seq|
46
+ seq[:last_value] = row[:last_value]
47
+ end
48
+ end
49
+
50
+ sequences.sort_by { |s| s[:sequence] }
51
+ end
52
+
53
+ def sequence_danger(threshold: 0.9, sequences: nil)
54
+ sequences ||= self.sequences
55
+ sequences.select { |s| s[:last_value] && s[:last_value] / s[:max_value].to_f > threshold }.sort_by { |s| s[:max_value] - s[:last_value] }
56
+ end
57
+
58
+ private
59
+
60
+ # can parse
61
+ # nextval('id_seq'::regclass)
62
+ # nextval(('id_seq'::text)::regclass)
63
+ def parse_default_value(default_value)
64
+ m = /^nextval\('(.+)'\:\:regclass\)$/.match(default_value)
65
+ m = /^nextval\(\('(.+)'\:\:text\)\:\:regclass\)$/.match(default_value) unless m
66
+ if m
67
+ unquote_ident(m[1])
68
+ else
69
+ []
70
+ end
71
+ end
72
+
73
+ def unquote_ident(value)
74
+ schema, seq = value.split(".")
75
+ unless seq
76
+ seq = schema
77
+ schema = nil
78
+ end
79
+ [unquote(schema), unquote(seq)]
80
+ end
81
+
82
+ # adds readable attribute to all sequences
83
+ # also adds schema if missing
84
+ def add_sequence_attributes(sequences)
85
+ # fetch data
86
+ sequence_attributes = select_all <<-SQL
87
+ SELECT
88
+ n.nspname AS schema,
89
+ c.relname AS sequence,
90
+ has_sequence_privilege(c.oid, 'SELECT') AS readable
91
+ FROM
92
+ pg_class c
93
+ INNER JOIN
94
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
95
+ WHERE
96
+ c.relkind = 'S'
97
+ AND n.nspname NOT IN ('pg_catalog', 'information_schema')
98
+ SQL
99
+
100
+ # first populate missing schemas
101
+ missing_schema = sequences.select { |s| s[:schema].nil? && s[:sequence] }
102
+ if missing_schema.any?
103
+ sequence_schemas = sequence_attributes.group_by { |s| s[:sequence] }
104
+
105
+ missing_schema.each do |sequence|
106
+ schemas = sequence_schemas[sequence[:sequence]] || []
107
+
108
+ if schemas.size == 1
109
+ sequence[:schema] = schemas[0][:schema]
110
+ end
111
+ # otherwise, do nothing, will be marked as unreadable
112
+ # TODO better message for multiple schemas
113
+ end
114
+ end
115
+
116
+ # then populate attributes
117
+ readable = Hash[sequence_attributes.map { |s| [[s[:schema], s[:sequence]], s[:readable]] }]
118
+ sequences.each do |sequence|
119
+ sequence[:readable] = readable[[sequence[:schema], sequence[:sequence]]] || false
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,37 @@
1
+ module PgHero
2
+ module Methods
3
+ module Settings
4
+ def settings
5
+ names =
6
+ if server_version_num >= 90500
7
+ %i(
8
+ max_connections shared_buffers effective_cache_size work_mem
9
+ maintenance_work_mem min_wal_size max_wal_size checkpoint_completion_target
10
+ wal_buffers default_statistics_target
11
+ )
12
+ else
13
+ %i(
14
+ max_connections shared_buffers effective_cache_size work_mem
15
+ maintenance_work_mem checkpoint_segments checkpoint_completion_target
16
+ wal_buffers default_statistics_target
17
+ )
18
+ end
19
+ fetch_settings(names)
20
+ end
21
+
22
+ def autovacuum_settings
23
+ fetch_settings %i(autovacuum autovacuum_max_workers autovacuum_vacuum_cost_limit autovacuum_vacuum_scale_factor autovacuum_analyze_scale_factor)
24
+ end
25
+
26
+ def vacuum_settings
27
+ fetch_settings %i(vacuum_cost_limit)
28
+ end
29
+
30
+ private
31
+
32
+ def fetch_settings(names)
33
+ Hash[names.map { |name| [name, select_one("SHOW #{name}")] }]
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,141 @@
1
+ module PgHero
2
+ module Methods
3
+ module Space
4
+ def database_size
5
+ PgHero.pretty_size select_one("SELECT pg_database_size(current_database())")
6
+ end
7
+
8
+ def relation_sizes
9
+ select_all_size <<-SQL
10
+ SELECT
11
+ n.nspname AS schema,
12
+ c.relname AS relation,
13
+ CASE WHEN c.relkind = 'r' THEN 'table' ELSE 'index' END AS type,
14
+ pg_table_size(c.oid) AS size_bytes
15
+ FROM
16
+ pg_class c
17
+ LEFT JOIN
18
+ pg_namespace n ON n.oid = c.relnamespace
19
+ WHERE
20
+ n.nspname NOT IN ('pg_catalog', 'information_schema')
21
+ AND n.nspname !~ '^pg_toast'
22
+ AND c.relkind IN ('r', 'i')
23
+ ORDER BY
24
+ pg_table_size(c.oid) DESC,
25
+ 2 ASC
26
+ SQL
27
+ end
28
+
29
+ def table_sizes
30
+ select_all_size <<-SQL
31
+ SELECT
32
+ n.nspname AS schema,
33
+ c.relname AS table,
34
+ pg_total_relation_size(c.oid) AS size_bytes
35
+ FROM
36
+ pg_class c
37
+ LEFT JOIN
38
+ pg_namespace n ON n.oid = c.relnamespace
39
+ WHERE
40
+ n.nspname NOT IN ('pg_catalog', 'information_schema')
41
+ AND n.nspname !~ '^pg_toast'
42
+ AND c.relkind = 'r'
43
+ ORDER BY
44
+ pg_total_relation_size(c.oid) DESC,
45
+ 2 ASC
46
+ SQL
47
+ end
48
+
49
+ def space_growth(days: 7, relation_sizes: nil)
50
+ if space_stats_enabled?
51
+ relation_sizes ||= self.relation_sizes
52
+ sizes = Hash[ relation_sizes.map { |r| [[r[:schema], r[:relation]], r[:size_bytes]] } ]
53
+ start_at = days.days.ago
54
+
55
+ stats = select_all_stats <<-SQL
56
+ WITH t AS (
57
+ SELECT
58
+ schema,
59
+ relation,
60
+ array_agg(size ORDER BY captured_at) AS sizes
61
+ FROM
62
+ pghero_space_stats
63
+ WHERE
64
+ database = #{quote(id)}
65
+ AND captured_at >= #{quote(start_at)}
66
+ GROUP BY
67
+ 1, 2
68
+ )
69
+ SELECT
70
+ schema,
71
+ relation,
72
+ sizes[1] AS size_bytes
73
+ FROM
74
+ t
75
+ ORDER BY
76
+ 1, 2
77
+ SQL
78
+
79
+ stats.each do |r|
80
+ relation = [r[:schema], r[:relation]]
81
+ if sizes[relation]
82
+ r[:growth_bytes] = sizes[relation] - r[:size_bytes]
83
+ end
84
+ r.delete(:size_bytes)
85
+ end
86
+ stats
87
+ else
88
+ raise NotEnabled, "Space stats not enabled"
89
+ end
90
+ end
91
+
92
+ def relation_space_stats(relation, schema: "public")
93
+ if space_stats_enabled?
94
+ relation_sizes ||= self.relation_sizes
95
+ sizes = Hash[ relation_sizes.map { |r| [[r[:schema], r[:relation]], r[:size_bytes]] } ]
96
+ start_at = 30.days.ago
97
+
98
+ stats = select_all_stats <<-SQL
99
+ SELECT
100
+ captured_at,
101
+ size AS size_bytes
102
+ FROM
103
+ pghero_space_stats
104
+ WHERE
105
+ database = #{quote(id)}
106
+ AND captured_at >= #{quote(start_at)}
107
+ AND schema = #{quote(schema)}
108
+ AND relation = #{quote(relation)}
109
+ ORDER BY
110
+ 1 ASC
111
+ SQL
112
+
113
+ stats << {
114
+ captured_at: Time.now,
115
+ size_bytes: sizes[[schema, relation]].to_i
116
+ }
117
+ else
118
+ raise NotEnabled, "Space stats not enabled"
119
+ end
120
+ end
121
+
122
+ def capture_space_stats
123
+ now = Time.now
124
+ columns = %w(database schema relation size captured_at)
125
+ values = []
126
+ relation_sizes.each do |rs|
127
+ values << [id, rs[:schema], rs[:relation], rs[:size_bytes].to_i, now]
128
+ end
129
+ insert_stats("pghero_space_stats", columns, values) if values.any?
130
+ end
131
+
132
+ def clean_space_stats
133
+ PgHero::SpaceStats.where(database: id).where("captured_at < ?", 90.days.ago).delete_all
134
+ end
135
+
136
+ def space_stats_enabled?
137
+ table_exists?("pghero_space_stats")
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,329 @@
1
+ module PgHero
2
+ module Methods
3
+ module SuggestedIndexes
4
+ def suggested_indexes_enabled?
5
+ defined?(PgQuery) && Gem::Version.new(PgQuery::VERSION) >= Gem::Version.new("0.9.0") && query_stats_enabled?
6
+ end
7
+
8
+ # TODO clean this mess
9
+ def suggested_indexes_by_query(queries: nil, query_stats: nil, indexes: nil)
10
+ best_indexes = {}
11
+
12
+ if suggested_indexes_enabled?
13
+ # get most time-consuming queries
14
+ queries ||= (query_stats || self.query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs[:query] }
15
+
16
+ # get best indexes for queries
17
+ best_indexes = best_index_helper(queries)
18
+
19
+ if best_indexes.any?
20
+ existing_columns = Hash.new { |hash, key| hash[key] = Hash.new { |hash2, key2| hash2[key2] = [] } }
21
+ indexes ||= self.indexes
22
+ indexes.group_by { |g| g[:using] }.each do |group, inds|
23
+ inds.each do |i|
24
+ existing_columns[group][i[:table]] << i[:columns]
25
+ end
26
+ end
27
+ indexes_by_table = indexes.group_by { |i| i[:table] }
28
+
29
+ best_indexes.each do |_query, best_index|
30
+ if best_index[:found]
31
+ index = best_index[:index]
32
+ best_index[:table_indexes] = indexes_by_table[index[:table]].to_a
33
+
34
+ # indexes of same type
35
+ indexes = existing_columns[index[:using] || "btree"][index[:table]]
36
+
37
+ if best_index[:structure][:sort].empty?
38
+ # gist indexes without an opclass
39
+ # (opclass is part of column name, so columns won't match if opclass present)
40
+ indexes += existing_columns["gist"][index[:table]]
41
+
42
+ # hash indexes work for equality
43
+ indexes += existing_columns["hash"][index[:table]] if best_index[:structure][:where].all? { |v| v[:op] == "=" }
44
+
45
+ # brin indexes work for all
46
+ indexes += existing_columns["brin"][index[:table]]
47
+ end
48
+
49
+ covering_index = indexes.find { |e| index_covers?(e.map { |v| v.sub(/ inet_ops\z/, "") }, index[:columns]) }
50
+ if covering_index
51
+ best_index[:covering_index] = covering_index
52
+ best_index[:explanation] = "Covered by index on (#{covering_index.join(", ")})"
53
+ end
54
+ end
55
+ end
56
+ end
57
+ else
58
+ raise NotEnabled, "Suggested indexes not enabled"
59
+ end
60
+
61
+ best_indexes
62
+ end
63
+
64
+ def suggested_indexes(suggested_indexes_by_query: nil, **options)
65
+ indexes = []
66
+
67
+ (suggested_indexes_by_query || self.suggested_indexes_by_query(**options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group|
68
+ details = {}
69
+ group.map(&:second).each do |g|
70
+ details = details.except(:index).deep_merge(g)
71
+ end
72
+ indexes << index.merge(queries: group.map(&:first), details: details)
73
+ end
74
+
75
+ indexes.sort_by { |i| [i[:table], i[:columns]] }
76
+ end
77
+
78
+ def autoindex(create: false)
79
+ suggested_indexes.each do |index|
80
+ p index
81
+ if create
82
+ connection.execute("CREATE INDEX CONCURRENTLY ON #{quote_table_name(index[:table])} (#{index[:columns].map { |c| quote_table_name(c) }.join(",")})")
83
+ end
84
+ end
85
+ end
86
+
87
+ def best_index(statement)
88
+ best_index_helper([statement])[statement]
89
+ end
90
+
91
+ private
92
+
93
+ def best_index_helper(statements)
94
+ indexes = {}
95
+
96
+ # see if this is a query we understand and can use
97
+ parts = {}
98
+ statements.each do |statement|
99
+ parts[statement] = best_index_structure(statement)
100
+ end
101
+
102
+ # get stats about columns for relevant tables
103
+ tables = parts.values.map { |t| t[:table] }.uniq
104
+ # TODO get schema from query structure, then try search path
105
+ schema = PgHero.connection_config(connection_model)[:schema] || "public"
106
+ if tables.any?
107
+ row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i[:table], i[:estimated_rows]] }]
108
+ col_stats = column_stats(table: tables, schema: schema).group_by { |i| i[:table] }
109
+ end
110
+
111
+ # find best index based on query structure and column stats
112
+ parts.each do |statement, structure|
113
+ index = {found: false}
114
+
115
+ if structure[:error]
116
+ index[:explanation] = structure[:error]
117
+ elsif structure[:table].start_with?("pg_")
118
+ index[:explanation] = "System table"
119
+ else
120
+ index[:structure] = structure
121
+
122
+ table = structure[:table]
123
+ where = structure[:where].uniq
124
+ sort = structure[:sort]
125
+
126
+ total_rows = row_stats[table].to_i
127
+ index[:rows] = total_rows
128
+
129
+ ranks = Hash[col_stats[table].to_a.map { |r| [r[:column], r] }]
130
+ columns = (where + sort).map { |c| c[:column] }.uniq
131
+
132
+ if columns.any?
133
+ if columns.all? { |c| ranks[c] }
134
+ first_desc = sort.index { |c| c[:direction] == "desc" }
135
+ sort = sort.first(first_desc + 1) if first_desc
136
+ where = where.sort_by { |c| [row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]), c[:column]] } + sort
137
+
138
+ index[:row_estimates] = Hash[where.map { |c| ["#{c[:column]} (#{c[:op] || "sort"})", row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]).round] }]
139
+
140
+ # no index needed if less than 500 rows
141
+ if total_rows >= 500
142
+
143
+ if ["~~", "~~*"].include?(where.first[:op])
144
+ index[:found] = true
145
+ index[:row_progression] = [total_rows, index[:row_estimates].values.first]
146
+ index[:index] = {table: table, columns: ["#{where.first[:column]} gist_trgm_ops"], using: "gist"}
147
+ else
148
+ # if most values are unique, no need to index others
149
+ rows_left = total_rows
150
+ final_where = []
151
+ prev_rows_left = [rows_left]
152
+ where.reject { |c| ["~~", "~~*"].include?(c[:op]) }.each do |c|
153
+ next if final_where.include?(c[:column])
154
+ final_where << c[:column]
155
+ rows_left = row_estimates(ranks[c[:column]], total_rows, rows_left, c[:op])
156
+ prev_rows_left << rows_left
157
+ if rows_left < 50 || final_where.size >= 2 || [">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(c[:op])
158
+ break
159
+ end
160
+ end
161
+
162
+ index[:row_progression] = prev_rows_left.map(&:round)
163
+
164
+ # if the last indexes don't give us much, don't include
165
+ prev_rows_left.reverse!
166
+ (prev_rows_left.size - 1).times do |i|
167
+ if prev_rows_left[i] > prev_rows_left[i + 1] * 0.3
168
+ final_where.pop
169
+ else
170
+ break
171
+ end
172
+ end
173
+
174
+ if final_where.any?
175
+ index[:found] = true
176
+ index[:index] = {table: table, columns: final_where}
177
+ end
178
+ end
179
+ else
180
+ index[:explanation] = "No index needed if less than 500 rows"
181
+ end
182
+ else
183
+ index[:explanation] = "Stats not found"
184
+ end
185
+ else
186
+ index[:explanation] = "No columns to index"
187
+ end
188
+ end
189
+
190
+ indexes[statement] = index
191
+ end
192
+
193
+ indexes
194
+ end
195
+
196
+ def best_index_structure(statement)
197
+ return {error: "Too large"} if statement.to_s.length > 10000
198
+
199
+ begin
200
+ tree = PgQuery.parse(statement).tree
201
+ rescue PgQuery::ParseError
202
+ return {error: "Parse error"}
203
+ end
204
+ return {error: "Unknown structure"} unless tree.size == 1
205
+
206
+ tree = tree.first
207
+
208
+ # pg_query 1.0.0
209
+ tree = tree["RawStmt"]["stmt"] if tree["RawStmt"]
210
+
211
+ table = parse_table(tree) rescue nil
212
+ unless table
213
+ error =
214
+ case tree.keys.first
215
+ when "InsertStmt"
216
+ "INSERT statement"
217
+ when "VariableSetStmt"
218
+ "SET statement"
219
+ when "SelectStmt"
220
+ if (tree["SelectStmt"]["fromClause"].first["JoinExpr"] rescue false)
221
+ "JOIN not supported yet"
222
+ end
223
+ end
224
+ return {error: error || "Unknown structure"}
225
+ end
226
+
227
+ select = tree.values.first
228
+ where = (select["whereClause"] ? parse_where(select["whereClause"]) : []) rescue nil
229
+ return {error: "Unknown structure"} unless where
230
+
231
+ sort = (select["sortClause"] ? parse_sort(select["sortClause"]) : []) rescue []
232
+
233
+ {table: table, where: where, sort: sort}
234
+ end
235
+
236
+ # TODO better row estimation
237
+ # https://www.postgresql.org/docs/current/static/row-estimation-examples.html
238
+ def row_estimates(stats, total_rows, rows_left, op)
239
+ case op
240
+ when "null"
241
+ rows_left * stats[:null_frac].to_f
242
+ when "not_null"
243
+ rows_left * (1 - stats[:null_frac].to_f)
244
+ else
245
+ rows_left *= (1 - stats[:null_frac].to_f)
246
+ ret =
247
+ if stats[:n_distinct].to_f == 0
248
+ 0
249
+ elsif stats[:n_distinct].to_f < 0
250
+ if total_rows > 0
251
+ (-1 / stats[:n_distinct].to_f) * (rows_left / total_rows.to_f)
252
+ else
253
+ 0
254
+ end
255
+ else
256
+ rows_left / stats[:n_distinct].to_f
257
+ end
258
+
259
+ case op
260
+ when ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"
261
+ (rows_left + ret) / 10.0 # TODO better approximation
262
+ when "<>"
263
+ rows_left - ret
264
+ else
265
+ ret
266
+ end
267
+ end
268
+ end
269
+
270
+ def parse_table(tree)
271
+ case tree.keys.first
272
+ when "SelectStmt"
273
+ tree["SelectStmt"]["fromClause"].first["RangeVar"]["relname"]
274
+ when "DeleteStmt"
275
+ tree["DeleteStmt"]["relation"]["RangeVar"]["relname"]
276
+ when "UpdateStmt"
277
+ tree["UpdateStmt"]["relation"]["RangeVar"]["relname"]
278
+ end
279
+ end
280
+
281
+ # TODO capture values
282
+ def parse_where(tree)
283
+ aexpr = tree["A_Expr"]
284
+
285
+ if tree["BoolExpr"]
286
+ if tree["BoolExpr"]["boolop"] == 0
287
+ tree["BoolExpr"]["args"].flat_map { |v| parse_where(v) }
288
+ else
289
+ raise "Not Implemented"
290
+ end
291
+ elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first["String"]["str"])
292
+ [{column: aexpr["lexpr"]["ColumnRef"]["fields"].last["String"]["str"], op: aexpr["name"].first["String"]["str"]}]
293
+ elsif tree["NullTest"]
294
+ op = tree["NullTest"]["nulltesttype"] == 1 ? "not_null" : "null"
295
+ [{column: tree["NullTest"]["arg"]["ColumnRef"]["fields"].last["String"]["str"], op: op}]
296
+ else
297
+ raise "Not Implemented"
298
+ end
299
+ end
300
+
301
+ def parse_sort(sort_clause)
302
+ sort_clause.map do |v|
303
+ {
304
+ column: v["SortBy"]["node"]["ColumnRef"]["fields"].last["String"]["str"],
305
+ direction: v["SortBy"]["sortby_dir"] == 2 ? "desc" : "asc"
306
+ }
307
+ end
308
+ end
309
+
310
+ def column_stats(schema: nil, table: nil)
311
+ select_all <<-SQL
312
+ SELECT
313
+ schemaname AS schema,
314
+ tablename AS table,
315
+ attname AS column,
316
+ null_frac,
317
+ n_distinct
318
+ FROM
319
+ pg_stats
320
+ WHERE
321
+ schemaname = #{quote(schema)}
322
+ #{table ? "AND tablename IN (#{Array(table).map { |t| quote(t) }.join(", ")})" : ""}
323
+ ORDER BY
324
+ 1, 2, 3
325
+ SQL
326
+ end
327
+ end
328
+ end
329
+ end