pghero_fork 2.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +391 -0
  3. data/CONTRIBUTING.md +42 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +3 -0
  6. data/app/assets/images/pghero/favicon.png +0 -0
  7. data/app/assets/javascripts/pghero/Chart.bundle.js +20755 -0
  8. data/app/assets/javascripts/pghero/application.js +158 -0
  9. data/app/assets/javascripts/pghero/chartkick.js +2436 -0
  10. data/app/assets/javascripts/pghero/highlight.pack.js +2 -0
  11. data/app/assets/javascripts/pghero/jquery.js +10872 -0
  12. data/app/assets/javascripts/pghero/nouislider.js +2672 -0
  13. data/app/assets/stylesheets/pghero/application.css +514 -0
  14. data/app/assets/stylesheets/pghero/arduino-light.css +86 -0
  15. data/app/assets/stylesheets/pghero/nouislider.css +310 -0
  16. data/app/controllers/pg_hero/home_controller.rb +449 -0
  17. data/app/helpers/pg_hero/home_helper.rb +30 -0
  18. data/app/views/layouts/pg_hero/application.html.erb +68 -0
  19. data/app/views/pg_hero/home/_connections_table.html.erb +16 -0
  20. data/app/views/pg_hero/home/_live_queries_table.html.erb +51 -0
  21. data/app/views/pg_hero/home/_queries_table.html.erb +72 -0
  22. data/app/views/pg_hero/home/_query_stats_slider.html.erb +16 -0
  23. data/app/views/pg_hero/home/_suggested_index.html.erb +18 -0
  24. data/app/views/pg_hero/home/connections.html.erb +32 -0
  25. data/app/views/pg_hero/home/explain.html.erb +27 -0
  26. data/app/views/pg_hero/home/index.html.erb +518 -0
  27. data/app/views/pg_hero/home/index_bloat.html.erb +72 -0
  28. data/app/views/pg_hero/home/live_queries.html.erb +11 -0
  29. data/app/views/pg_hero/home/maintenance.html.erb +55 -0
  30. data/app/views/pg_hero/home/queries.html.erb +33 -0
  31. data/app/views/pg_hero/home/relation_space.html.erb +14 -0
  32. data/app/views/pg_hero/home/show_query.html.erb +106 -0
  33. data/app/views/pg_hero/home/space.html.erb +83 -0
  34. data/app/views/pg_hero/home/system.html.erb +34 -0
  35. data/app/views/pg_hero/home/tune.html.erb +53 -0
  36. data/config/routes.rb +32 -0
  37. data/lib/generators/pghero/config_generator.rb +13 -0
  38. data/lib/generators/pghero/query_stats_generator.rb +18 -0
  39. data/lib/generators/pghero/space_stats_generator.rb +18 -0
  40. data/lib/generators/pghero/templates/config.yml.tt +46 -0
  41. data/lib/generators/pghero/templates/query_stats.rb.tt +15 -0
  42. data/lib/generators/pghero/templates/space_stats.rb.tt +13 -0
  43. data/lib/pghero.rb +246 -0
  44. data/lib/pghero/connection.rb +5 -0
  45. data/lib/pghero/database.rb +175 -0
  46. data/lib/pghero/engine.rb +16 -0
  47. data/lib/pghero/methods/basic.rb +160 -0
  48. data/lib/pghero/methods/connections.rb +77 -0
  49. data/lib/pghero/methods/constraints.rb +30 -0
  50. data/lib/pghero/methods/explain.rb +29 -0
  51. data/lib/pghero/methods/indexes.rb +332 -0
  52. data/lib/pghero/methods/kill.rb +28 -0
  53. data/lib/pghero/methods/maintenance.rb +93 -0
  54. data/lib/pghero/methods/queries.rb +75 -0
  55. data/lib/pghero/methods/query_stats.rb +349 -0
  56. data/lib/pghero/methods/replication.rb +74 -0
  57. data/lib/pghero/methods/sequences.rb +124 -0
  58. data/lib/pghero/methods/settings.rb +37 -0
  59. data/lib/pghero/methods/space.rb +141 -0
  60. data/lib/pghero/methods/suggested_indexes.rb +329 -0
  61. data/lib/pghero/methods/system.rb +287 -0
  62. data/lib/pghero/methods/tables.rb +68 -0
  63. data/lib/pghero/methods/users.rb +87 -0
  64. data/lib/pghero/query_stats.rb +5 -0
  65. data/lib/pghero/space_stats.rb +5 -0
  66. data/lib/pghero/stats.rb +6 -0
  67. data/lib/pghero/version.rb +3 -0
  68. data/lib/tasks/pghero.rake +27 -0
  69. data/licenses/LICENSE-chart.js.txt +9 -0
  70. data/licenses/LICENSE-chartkick.js.txt +22 -0
  71. data/licenses/LICENSE-highlight.js.txt +29 -0
  72. data/licenses/LICENSE-jquery.txt +20 -0
  73. data/licenses/LICENSE-moment.txt +22 -0
  74. data/licenses/LICENSE-nouislider.txt +21 -0
  75. metadata +130 -0
@@ -0,0 +1,74 @@
1
+ module PgHero
2
+ module Methods
3
+ module Replication
4
+ def replica?
5
+ unless defined?(@replica)
6
+ @replica = select_one("SELECT pg_is_in_recovery()")
7
+ end
8
+ @replica
9
+ end
10
+
11
+ # https://www.postgresql.org/message-id/CADKbJJWz9M0swPT3oqe8f9+tfD4-F54uE6Xtkh4nERpVsQnjnw@mail.gmail.com
12
+ def replication_lag
13
+ with_feature_support(:replication_lag) do
14
+ lag_condition =
15
+ if server_version_num >= 100000
16
+ "pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()"
17
+ else
18
+ "pg_last_xlog_receive_location() = pg_last_xlog_replay_location()"
19
+ end
20
+
21
+ select_one <<-SQL
22
+ SELECT
23
+ CASE
24
+ WHEN NOT pg_is_in_recovery() OR #{lag_condition} THEN 0
25
+ ELSE EXTRACT (EPOCH FROM NOW() - pg_last_xact_replay_timestamp())
26
+ END
27
+ AS replication_lag
28
+ SQL
29
+ end
30
+ end
31
+
32
+ def replication_slots
33
+ if server_version_num >= 90400
34
+ with_feature_support(:replication_slots, []) do
35
+ select_all <<-SQL
36
+ SELECT
37
+ slot_name,
38
+ database,
39
+ active
40
+ FROM pg_replication_slots
41
+ SQL
42
+ end
43
+ else
44
+ []
45
+ end
46
+ end
47
+
48
+ def replicating?
49
+ with_feature_support(:replicating?, false) do
50
+ select_all("SELECT state FROM pg_stat_replication").any?
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def feature_support
57
+ @feature_support ||= {}
58
+ end
59
+
60
+ def with_feature_support(cache_key, default = nil)
61
+ # cache feature support to minimize errors in logs
62
+ return default if feature_support[cache_key] == false
63
+
64
+ begin
65
+ yield
66
+ rescue ActiveRecord::StatementInvalid => e
67
+ raise unless e.message.start_with?("PG::FeatureNotSupported:")
68
+ feature_support[cache_key] = false
69
+ default
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,124 @@
1
+ module PgHero
2
+ module Methods
3
+ module Sequences
4
+ def sequences
5
+ # get columns with default values
6
+ # use pg_get_expr to get correct default value
7
+ # it's what information_schema.columns uses
8
+ # also, exclude temporary tables to prevent error
9
+ # when accessing across sessions
10
+ sequences = select_all <<-SQL
11
+ SELECT
12
+ n.nspname AS table_schema,
13
+ c.relname AS table,
14
+ attname AS column,
15
+ format_type(a.atttypid, a.atttypmod) AS column_type,
16
+ pg_get_expr(d.adbin, d.adrelid) AS default_value
17
+ FROM
18
+ pg_catalog.pg_attribute a
19
+ INNER JOIN
20
+ pg_catalog.pg_class c ON c.oid = a.attrelid
21
+ INNER JOIN
22
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
23
+ INNER JOIN
24
+ pg_catalog.pg_attrdef d ON (a.attrelid, a.attnum) = (d.adrelid, d.adnum)
25
+ WHERE
26
+ NOT a.attisdropped
27
+ AND a.attnum > 0
28
+ AND pg_get_expr(d.adbin, d.adrelid) LIKE 'nextval%'
29
+ AND n.nspname NOT LIKE 'pg\\_temp\\_%'
30
+ SQL
31
+
32
+ # parse out sequence
33
+ sequences.each do |column|
34
+ column[:max_value] = column[:column_type] == 'integer' ? 2147483647 : 9223372036854775807
35
+
36
+ column[:schema], column[:sequence] = parse_default_value(column[:default_value])
37
+ column.delete(:default_value) if column[:sequence]
38
+ end
39
+
40
+ add_sequence_attributes(sequences)
41
+
42
+ sequences.select { |s| s[:readable] }.each_slice(1024) do |slice|
43
+ sql = slice.map { |s| "SELECT last_value FROM #{quote_ident(s[:schema])}.#{quote_ident(s[:sequence])}" }.join(" UNION ALL ")
44
+
45
+ select_all(sql).zip(slice) do |row, seq|
46
+ seq[:last_value] = row[:last_value]
47
+ end
48
+ end
49
+
50
+ sequences.sort_by { |s| s[:sequence] }
51
+ end
52
+
53
+ def sequence_danger(threshold: 0.9, sequences: nil)
54
+ sequences ||= self.sequences
55
+ sequences.select { |s| s[:last_value] && s[:last_value] / s[:max_value].to_f > threshold }.sort_by { |s| s[:max_value] - s[:last_value] }
56
+ end
57
+
58
+ private
59
+
60
+ # can parse
61
+ # nextval('id_seq'::regclass)
62
+ # nextval(('id_seq'::text)::regclass)
63
+ def parse_default_value(default_value)
64
+ m = /^nextval\('(.+)'\:\:regclass\)$/.match(default_value)
65
+ m = /^nextval\(\('(.+)'\:\:text\)\:\:regclass\)$/.match(default_value) unless m
66
+ if m
67
+ unquote_ident(m[1])
68
+ else
69
+ []
70
+ end
71
+ end
72
+
73
+ def unquote_ident(value)
74
+ schema, seq = value.split(".")
75
+ unless seq
76
+ seq = schema
77
+ schema = nil
78
+ end
79
+ [unquote(schema), unquote(seq)]
80
+ end
81
+
82
+ # adds readable attribute to all sequences
83
+ # also adds schema if missing
84
+ def add_sequence_attributes(sequences)
85
+ # fetch data
86
+ sequence_attributes = select_all <<-SQL
87
+ SELECT
88
+ n.nspname AS schema,
89
+ c.relname AS sequence,
90
+ has_sequence_privilege(c.oid, 'SELECT') AS readable
91
+ FROM
92
+ pg_class c
93
+ INNER JOIN
94
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
95
+ WHERE
96
+ c.relkind = 'S'
97
+ AND n.nspname NOT IN ('pg_catalog', 'information_schema')
98
+ SQL
99
+
100
+ # first populate missing schemas
101
+ missing_schema = sequences.select { |s| s[:schema].nil? && s[:sequence] }
102
+ if missing_schema.any?
103
+ sequence_schemas = sequence_attributes.group_by { |s| s[:sequence] }
104
+
105
+ missing_schema.each do |sequence|
106
+ schemas = sequence_schemas[sequence[:sequence]] || []
107
+
108
+ if schemas.size == 1
109
+ sequence[:schema] = schemas[0][:schema]
110
+ end
111
+ # otherwise, do nothing, will be marked as unreadable
112
+ # TODO better message for multiple schemas
113
+ end
114
+ end
115
+
116
+ # then populate attributes
117
+ readable = Hash[sequence_attributes.map { |s| [[s[:schema], s[:sequence]], s[:readable]] }]
118
+ sequences.each do |sequence|
119
+ sequence[:readable] = readable[[sequence[:schema], sequence[:sequence]]] || false
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,37 @@
1
+ module PgHero
2
+ module Methods
3
+ module Settings
4
+ def settings
5
+ names =
6
+ if server_version_num >= 90500
7
+ %i(
8
+ max_connections shared_buffers effective_cache_size work_mem
9
+ maintenance_work_mem min_wal_size max_wal_size checkpoint_completion_target
10
+ wal_buffers default_statistics_target
11
+ )
12
+ else
13
+ %i(
14
+ max_connections shared_buffers effective_cache_size work_mem
15
+ maintenance_work_mem checkpoint_segments checkpoint_completion_target
16
+ wal_buffers default_statistics_target
17
+ )
18
+ end
19
+ fetch_settings(names)
20
+ end
21
+
22
+ def autovacuum_settings
23
+ fetch_settings %i(autovacuum autovacuum_max_workers autovacuum_vacuum_cost_limit autovacuum_vacuum_scale_factor autovacuum_analyze_scale_factor)
24
+ end
25
+
26
+ def vacuum_settings
27
+ fetch_settings %i(vacuum_cost_limit)
28
+ end
29
+
30
+ private
31
+
32
+ def fetch_settings(names)
33
+ Hash[names.map { |name| [name, select_one("SHOW #{name}")] }]
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,141 @@
1
+ module PgHero
2
+ module Methods
3
+ module Space
4
+ def database_size
5
+ PgHero.pretty_size select_one("SELECT pg_database_size(current_database())")
6
+ end
7
+
8
+ def relation_sizes
9
+ select_all_size <<-SQL
10
+ SELECT
11
+ n.nspname AS schema,
12
+ c.relname AS relation,
13
+ CASE WHEN c.relkind = 'r' THEN 'table' ELSE 'index' END AS type,
14
+ pg_table_size(c.oid) AS size_bytes
15
+ FROM
16
+ pg_class c
17
+ LEFT JOIN
18
+ pg_namespace n ON n.oid = c.relnamespace
19
+ WHERE
20
+ n.nspname NOT IN ('pg_catalog', 'information_schema')
21
+ AND n.nspname !~ '^pg_toast'
22
+ AND c.relkind IN ('r', 'i')
23
+ ORDER BY
24
+ pg_table_size(c.oid) DESC,
25
+ 2 ASC
26
+ SQL
27
+ end
28
+
29
+ def table_sizes
30
+ select_all_size <<-SQL
31
+ SELECT
32
+ n.nspname AS schema,
33
+ c.relname AS table,
34
+ pg_total_relation_size(c.oid) AS size_bytes
35
+ FROM
36
+ pg_class c
37
+ LEFT JOIN
38
+ pg_namespace n ON n.oid = c.relnamespace
39
+ WHERE
40
+ n.nspname NOT IN ('pg_catalog', 'information_schema')
41
+ AND n.nspname !~ '^pg_toast'
42
+ AND c.relkind = 'r'
43
+ ORDER BY
44
+ pg_total_relation_size(c.oid) DESC,
45
+ 2 ASC
46
+ SQL
47
+ end
48
+
49
+ def space_growth(days: 7, relation_sizes: nil)
50
+ if space_stats_enabled?
51
+ relation_sizes ||= self.relation_sizes
52
+ sizes = Hash[ relation_sizes.map { |r| [[r[:schema], r[:relation]], r[:size_bytes]] } ]
53
+ start_at = days.days.ago
54
+
55
+ stats = select_all_stats <<-SQL
56
+ WITH t AS (
57
+ SELECT
58
+ schema,
59
+ relation,
60
+ array_agg(size ORDER BY captured_at) AS sizes
61
+ FROM
62
+ pghero_space_stats
63
+ WHERE
64
+ database = #{quote(id)}
65
+ AND captured_at >= #{quote(start_at)}
66
+ GROUP BY
67
+ 1, 2
68
+ )
69
+ SELECT
70
+ schema,
71
+ relation,
72
+ sizes[1] AS size_bytes
73
+ FROM
74
+ t
75
+ ORDER BY
76
+ 1, 2
77
+ SQL
78
+
79
+ stats.each do |r|
80
+ relation = [r[:schema], r[:relation]]
81
+ if sizes[relation]
82
+ r[:growth_bytes] = sizes[relation] - r[:size_bytes]
83
+ end
84
+ r.delete(:size_bytes)
85
+ end
86
+ stats
87
+ else
88
+ raise NotEnabled, "Space stats not enabled"
89
+ end
90
+ end
91
+
92
+ def relation_space_stats(relation, schema: "public")
93
+ if space_stats_enabled?
94
+ relation_sizes ||= self.relation_sizes
95
+ sizes = Hash[ relation_sizes.map { |r| [[r[:schema], r[:relation]], r[:size_bytes]] } ]
96
+ start_at = 30.days.ago
97
+
98
+ stats = select_all_stats <<-SQL
99
+ SELECT
100
+ captured_at,
101
+ size AS size_bytes
102
+ FROM
103
+ pghero_space_stats
104
+ WHERE
105
+ database = #{quote(id)}
106
+ AND captured_at >= #{quote(start_at)}
107
+ AND schema = #{quote(schema)}
108
+ AND relation = #{quote(relation)}
109
+ ORDER BY
110
+ 1 ASC
111
+ SQL
112
+
113
+ stats << {
114
+ captured_at: Time.now,
115
+ size_bytes: sizes[[schema, relation]].to_i
116
+ }
117
+ else
118
+ raise NotEnabled, "Space stats not enabled"
119
+ end
120
+ end
121
+
122
+ def capture_space_stats
123
+ now = Time.now
124
+ columns = %w(database schema relation size captured_at)
125
+ values = []
126
+ relation_sizes.each do |rs|
127
+ values << [id, rs[:schema], rs[:relation], rs[:size_bytes].to_i, now]
128
+ end
129
+ insert_stats("pghero_space_stats", columns, values) if values.any?
130
+ end
131
+
132
+ def clean_space_stats
133
+ PgHero::SpaceStats.where(database: id).where("captured_at < ?", 90.days.ago).delete_all
134
+ end
135
+
136
+ def space_stats_enabled?
137
+ table_exists?("pghero_space_stats")
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,329 @@
1
+ module PgHero
2
+ module Methods
3
+ module SuggestedIndexes
4
+ def suggested_indexes_enabled?
5
+ defined?(PgQuery) && Gem::Version.new(PgQuery::VERSION) >= Gem::Version.new("0.9.0") && query_stats_enabled?
6
+ end
7
+
8
+ # TODO clean this mess
9
+ def suggested_indexes_by_query(queries: nil, query_stats: nil, indexes: nil)
10
+ best_indexes = {}
11
+
12
+ if suggested_indexes_enabled?
13
+ # get most time-consuming queries
14
+ queries ||= (query_stats || self.query_stats(historical: true, start_at: 24.hours.ago)).map { |qs| qs[:query] }
15
+
16
+ # get best indexes for queries
17
+ best_indexes = best_index_helper(queries)
18
+
19
+ if best_indexes.any?
20
+ existing_columns = Hash.new { |hash, key| hash[key] = Hash.new { |hash2, key2| hash2[key2] = [] } }
21
+ indexes ||= self.indexes
22
+ indexes.group_by { |g| g[:using] }.each do |group, inds|
23
+ inds.each do |i|
24
+ existing_columns[group][i[:table]] << i[:columns]
25
+ end
26
+ end
27
+ indexes_by_table = indexes.group_by { |i| i[:table] }
28
+
29
+ best_indexes.each do |_query, best_index|
30
+ if best_index[:found]
31
+ index = best_index[:index]
32
+ best_index[:table_indexes] = indexes_by_table[index[:table]].to_a
33
+
34
+ # indexes of same type
35
+ indexes = existing_columns[index[:using] || "btree"][index[:table]]
36
+
37
+ if best_index[:structure][:sort].empty?
38
+ # gist indexes without an opclass
39
+ # (opclass is part of column name, so columns won't match if opclass present)
40
+ indexes += existing_columns["gist"][index[:table]]
41
+
42
+ # hash indexes work for equality
43
+ indexes += existing_columns["hash"][index[:table]] if best_index[:structure][:where].all? { |v| v[:op] == "=" }
44
+
45
+ # brin indexes work for all
46
+ indexes += existing_columns["brin"][index[:table]]
47
+ end
48
+
49
+ covering_index = indexes.find { |e| index_covers?(e.map { |v| v.sub(/ inet_ops\z/, "") }, index[:columns]) }
50
+ if covering_index
51
+ best_index[:covering_index] = covering_index
52
+ best_index[:explanation] = "Covered by index on (#{covering_index.join(", ")})"
53
+ end
54
+ end
55
+ end
56
+ end
57
+ else
58
+ raise NotEnabled, "Suggested indexes not enabled"
59
+ end
60
+
61
+ best_indexes
62
+ end
63
+
64
+ def suggested_indexes(suggested_indexes_by_query: nil, **options)
65
+ indexes = []
66
+
67
+ (suggested_indexes_by_query || self.suggested_indexes_by_query(**options)).select { |_s, i| i[:found] && !i[:covering_index] }.group_by { |_s, i| i[:index] }.each do |index, group|
68
+ details = {}
69
+ group.map(&:second).each do |g|
70
+ details = details.except(:index).deep_merge(g)
71
+ end
72
+ indexes << index.merge(queries: group.map(&:first), details: details)
73
+ end
74
+
75
+ indexes.sort_by { |i| [i[:table], i[:columns]] }
76
+ end
77
+
78
+ def autoindex(create: false)
79
+ suggested_indexes.each do |index|
80
+ p index
81
+ if create
82
+ connection.execute("CREATE INDEX CONCURRENTLY ON #{quote_table_name(index[:table])} (#{index[:columns].map { |c| quote_table_name(c) }.join(",")})")
83
+ end
84
+ end
85
+ end
86
+
87
+ def best_index(statement)
88
+ best_index_helper([statement])[statement]
89
+ end
90
+
91
+ private
92
+
93
+ def best_index_helper(statements)
94
+ indexes = {}
95
+
96
+ # see if this is a query we understand and can use
97
+ parts = {}
98
+ statements.each do |statement|
99
+ parts[statement] = best_index_structure(statement)
100
+ end
101
+
102
+ # get stats about columns for relevant tables
103
+ tables = parts.values.map { |t| t[:table] }.uniq
104
+ # TODO get schema from query structure, then try search path
105
+ schema = PgHero.connection_config(connection_model)[:schema] || "public"
106
+ if tables.any?
107
+ row_stats = Hash[table_stats(table: tables, schema: schema).map { |i| [i[:table], i[:estimated_rows]] }]
108
+ col_stats = column_stats(table: tables, schema: schema).group_by { |i| i[:table] }
109
+ end
110
+
111
+ # find best index based on query structure and column stats
112
+ parts.each do |statement, structure|
113
+ index = {found: false}
114
+
115
+ if structure[:error]
116
+ index[:explanation] = structure[:error]
117
+ elsif structure[:table].start_with?("pg_")
118
+ index[:explanation] = "System table"
119
+ else
120
+ index[:structure] = structure
121
+
122
+ table = structure[:table]
123
+ where = structure[:where].uniq
124
+ sort = structure[:sort]
125
+
126
+ total_rows = row_stats[table].to_i
127
+ index[:rows] = total_rows
128
+
129
+ ranks = Hash[col_stats[table].to_a.map { |r| [r[:column], r] }]
130
+ columns = (where + sort).map { |c| c[:column] }.uniq
131
+
132
+ if columns.any?
133
+ if columns.all? { |c| ranks[c] }
134
+ first_desc = sort.index { |c| c[:direction] == "desc" }
135
+ sort = sort.first(first_desc + 1) if first_desc
136
+ where = where.sort_by { |c| [row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]), c[:column]] } + sort
137
+
138
+ index[:row_estimates] = Hash[where.map { |c| ["#{c[:column]} (#{c[:op] || "sort"})", row_estimates(ranks[c[:column]], total_rows, total_rows, c[:op]).round] }]
139
+
140
+ # no index needed if less than 500 rows
141
+ if total_rows >= 500
142
+
143
+ if ["~~", "~~*"].include?(where.first[:op])
144
+ index[:found] = true
145
+ index[:row_progression] = [total_rows, index[:row_estimates].values.first]
146
+ index[:index] = {table: table, columns: ["#{where.first[:column]} gist_trgm_ops"], using: "gist"}
147
+ else
148
+ # if most values are unique, no need to index others
149
+ rows_left = total_rows
150
+ final_where = []
151
+ prev_rows_left = [rows_left]
152
+ where.reject { |c| ["~~", "~~*"].include?(c[:op]) }.each do |c|
153
+ next if final_where.include?(c[:column])
154
+ final_where << c[:column]
155
+ rows_left = row_estimates(ranks[c[:column]], total_rows, rows_left, c[:op])
156
+ prev_rows_left << rows_left
157
+ if rows_left < 50 || final_where.size >= 2 || [">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(c[:op])
158
+ break
159
+ end
160
+ end
161
+
162
+ index[:row_progression] = prev_rows_left.map(&:round)
163
+
164
+ # if the last indexes don't give us much, don't include
165
+ prev_rows_left.reverse!
166
+ (prev_rows_left.size - 1).times do |i|
167
+ if prev_rows_left[i] > prev_rows_left[i + 1] * 0.3
168
+ final_where.pop
169
+ else
170
+ break
171
+ end
172
+ end
173
+
174
+ if final_where.any?
175
+ index[:found] = true
176
+ index[:index] = {table: table, columns: final_where}
177
+ end
178
+ end
179
+ else
180
+ index[:explanation] = "No index needed if less than 500 rows"
181
+ end
182
+ else
183
+ index[:explanation] = "Stats not found"
184
+ end
185
+ else
186
+ index[:explanation] = "No columns to index"
187
+ end
188
+ end
189
+
190
+ indexes[statement] = index
191
+ end
192
+
193
+ indexes
194
+ end
195
+
196
+ def best_index_structure(statement)
197
+ return {error: "Too large"} if statement.to_s.length > 10000
198
+
199
+ begin
200
+ tree = PgQuery.parse(statement).tree
201
+ rescue PgQuery::ParseError
202
+ return {error: "Parse error"}
203
+ end
204
+ return {error: "Unknown structure"} unless tree.size == 1
205
+
206
+ tree = tree.first
207
+
208
+ # pg_query 1.0.0
209
+ tree = tree["RawStmt"]["stmt"] if tree["RawStmt"]
210
+
211
+ table = parse_table(tree) rescue nil
212
+ unless table
213
+ error =
214
+ case tree.keys.first
215
+ when "InsertStmt"
216
+ "INSERT statement"
217
+ when "VariableSetStmt"
218
+ "SET statement"
219
+ when "SelectStmt"
220
+ if (tree["SelectStmt"]["fromClause"].first["JoinExpr"] rescue false)
221
+ "JOIN not supported yet"
222
+ end
223
+ end
224
+ return {error: error || "Unknown structure"}
225
+ end
226
+
227
+ select = tree.values.first
228
+ where = (select["whereClause"] ? parse_where(select["whereClause"]) : []) rescue nil
229
+ return {error: "Unknown structure"} unless where
230
+
231
+ sort = (select["sortClause"] ? parse_sort(select["sortClause"]) : []) rescue []
232
+
233
+ {table: table, where: where, sort: sort}
234
+ end
235
+
236
+ # TODO better row estimation
237
+ # https://www.postgresql.org/docs/current/static/row-estimation-examples.html
238
+ def row_estimates(stats, total_rows, rows_left, op)
239
+ case op
240
+ when "null"
241
+ rows_left * stats[:null_frac].to_f
242
+ when "not_null"
243
+ rows_left * (1 - stats[:null_frac].to_f)
244
+ else
245
+ rows_left *= (1 - stats[:null_frac].to_f)
246
+ ret =
247
+ if stats[:n_distinct].to_f == 0
248
+ 0
249
+ elsif stats[:n_distinct].to_f < 0
250
+ if total_rows > 0
251
+ (-1 / stats[:n_distinct].to_f) * (rows_left / total_rows.to_f)
252
+ else
253
+ 0
254
+ end
255
+ else
256
+ rows_left / stats[:n_distinct].to_f
257
+ end
258
+
259
+ case op
260
+ when ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"
261
+ (rows_left + ret) / 10.0 # TODO better approximation
262
+ when "<>"
263
+ rows_left - ret
264
+ else
265
+ ret
266
+ end
267
+ end
268
+ end
269
+
270
+ def parse_table(tree)
271
+ case tree.keys.first
272
+ when "SelectStmt"
273
+ tree["SelectStmt"]["fromClause"].first["RangeVar"]["relname"]
274
+ when "DeleteStmt"
275
+ tree["DeleteStmt"]["relation"]["RangeVar"]["relname"]
276
+ when "UpdateStmt"
277
+ tree["UpdateStmt"]["relation"]["RangeVar"]["relname"]
278
+ end
279
+ end
280
+
281
+ # TODO capture values
282
+ def parse_where(tree)
283
+ aexpr = tree["A_Expr"]
284
+
285
+ if tree["BoolExpr"]
286
+ if tree["BoolExpr"]["boolop"] == 0
287
+ tree["BoolExpr"]["args"].flat_map { |v| parse_where(v) }
288
+ else
289
+ raise "Not Implemented"
290
+ end
291
+ elsif aexpr && ["=", "<>", ">", ">=", "<", "<=", "~~", "~~*", "BETWEEN"].include?(aexpr["name"].first["String"]["str"])
292
+ [{column: aexpr["lexpr"]["ColumnRef"]["fields"].last["String"]["str"], op: aexpr["name"].first["String"]["str"]}]
293
+ elsif tree["NullTest"]
294
+ op = tree["NullTest"]["nulltesttype"] == 1 ? "not_null" : "null"
295
+ [{column: tree["NullTest"]["arg"]["ColumnRef"]["fields"].last["String"]["str"], op: op}]
296
+ else
297
+ raise "Not Implemented"
298
+ end
299
+ end
300
+
301
+ def parse_sort(sort_clause)
302
+ sort_clause.map do |v|
303
+ {
304
+ column: v["SortBy"]["node"]["ColumnRef"]["fields"].last["String"]["str"],
305
+ direction: v["SortBy"]["sortby_dir"] == 2 ? "desc" : "asc"
306
+ }
307
+ end
308
+ end
309
+
310
+ def column_stats(schema: nil, table: nil)
311
+ select_all <<-SQL
312
+ SELECT
313
+ schemaname AS schema,
314
+ tablename AS table,
315
+ attname AS column,
316
+ null_frac,
317
+ n_distinct
318
+ FROM
319
+ pg_stats
320
+ WHERE
321
+ schemaname = #{quote(schema)}
322
+ #{table ? "AND tablename IN (#{Array(table).map { |t| quote(t) }.join(", ")})" : ""}
323
+ ORDER BY
324
+ 1, 2, 3
325
+ SQL
326
+ end
327
+ end
328
+ end
329
+ end