pgdexter 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9f136be602609b6abb8f11ef02fa7cf18a8db33a
4
- data.tar.gz: c58324b44d32b867df2a5a127a70b4d713e2dfa2
3
+ metadata.gz: d08be58d7de049df2c11f33211c0ada165f4a363
4
+ data.tar.gz: ce17e302b12f3b7babe38de9618b76bab982b9d9
5
5
  SHA512:
6
- metadata.gz: 8f39e8a9794a937fe79a637f46e76ee71eba53e9bf3b60a734ad6af270a9236216d5182440586bde0762fd05d48be11c02bb3e620039ec02e2acce1633b81d36
7
- data.tar.gz: b870ca6f00a6bc2f6d76c49d6805d0910d4053457ca5c11ccb666de863406fcd07acf37fdf7f23fb4d89f0df6795724fe74fca601738aa47f6832edbafa03a86
6
+ metadata.gz: 7e0974f0f8c39ec8039749a75a3fb61f7613c6931db15f191eba2dda2a794cf57e8875f6aff4465474a9c608727deef08fd7a5833e16ba96e2d1d5a02e3e7f7b
7
+ data.tar.gz: 73668e8c819b4334888bec707db491453ab2cc03a39e189e451738bb7606b8fe329912e2520d6e2b6b7aeb9634a8e2200b267887257c26428bf453fde74cc54e
@@ -1,3 +1,7 @@
1
+ ## 0.1.4
2
+
3
+ - Added support for multicolumn indexes
4
+
1
5
  ## 0.1.3
2
6
 
3
7
  - Fixed error with non-lowercase columns
data/README.md CHANGED
@@ -13,12 +13,12 @@ wget https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz
13
13
  tar xf 1.0.0.tar.gz
14
14
  cd hypopg-1.0.0
15
15
  make
16
- make install
16
+ make install # may need sudo
17
17
  ```
18
18
 
19
19
  > Note: If you have issues, make sure `postgresql-server-dev-*` is installed.
20
20
 
21
- Enable logging for slow queries.
21
+ Enable logging for slow queries in your Postgres config file.
22
22
 
23
23
  ```ini
24
24
  log_min_duration_statement = 10 # ms
@@ -30,6 +30,8 @@ And install the command line tool with:
30
30
  gem install pgdexter
31
31
  ```
32
32
 
33
+ The command line tool is also available as a [Linux package](guides/Linux.md).
34
+
33
35
  ## How to Use
34
36
 
35
37
  Dexter needs a connection to your database and a log file to process.
@@ -60,13 +62,21 @@ To be safe, Dexter will not create indexes unless you pass the `--create` flag.
60
62
  2017-06-25T17:52:37+00:00 Index created: 15243 ms
61
63
  ```
62
64
 
65
+ ## Single Statement Mode
66
+
67
+ You can also pass a single statement with:
68
+
69
+ ```sh
70
+ dexter <database-url> -s "SELECT * FROM ..."
71
+ ```
72
+
63
73
  ## Options
64
74
 
65
75
  Name | Description | Default
66
76
  --- | --- | ---
67
77
  exclude | prevent specific tables from being indexed | None
68
78
  interval | time to wait between processing queries, in seconds | 60
69
- log-level | `debug` gives additional info for suggested indexes<br />`debug2` gives additional info for all processed queries | info
79
+ log-level | `debug` gives additional info for suggested indexes<br />`debug2` gives additional info for processed queries | info
70
80
  log-sql | log SQL statements executed | false
71
81
  min-time | only process queries consuming a min amount of DB time, in minutes | 0
72
82
 
@@ -0,0 +1,59 @@
1
+ # Linux Packages
2
+
3
+ Distributions
4
+
5
+ - [Ubuntu 16.04 (Xenial)](#ubuntu-1604-xenial)
6
+ - [Ubuntu 14.04 (Trusty)](#ubuntu-1404-trusty)
7
+ - [Debian 8 (Jesse)](#debian-8-jesse)
8
+ - [CentOS / RHEL 7](#centos--rhel-7)
9
+ - [SUSE Linux Enterprise Server 12](#suse-linux-enterprise-server-12)
10
+
11
+ ### Ubuntu 16.04 (Xenial)
12
+
13
+ ```sh
14
+ wget -qO - https://deb.packager.io/key | sudo apt-key add -
15
+ echo "deb https://deb.packager.io/gh/pghero/dexter xenial master" | sudo tee /etc/apt/sources.list.d/dexter.list
16
+ sudo apt-get update
17
+ sudo apt-get -y install dexter
18
+ ```
19
+
20
+ ### Ubuntu 14.04 (Trusty)
21
+
22
+ ```sh
23
+ wget -qO - https://deb.packager.io/key | sudo apt-key add -
24
+ echo "deb https://deb.packager.io/gh/pghero/dexter trusty master" | sudo tee /etc/apt/sources.list.d/dexter.list
25
+ sudo apt-get update
26
+ sudo apt-get install dexter
27
+ ```
28
+
29
+ ### Debian 8 (Jesse)
30
+
31
+ ```sh
32
+ wget -qO - https://deb.packager.io/key | sudo apt-key add -
33
+ echo "deb https://deb.packager.io/gh/pghero/dexter jessie master" | sudo tee /etc/apt/sources.list.d/dexter.list
34
+ sudo apt-get update
35
+ sudo apt-get install dexter
36
+ ```
37
+
38
+ ### CentOS / RHEL 7
39
+
40
+ ```sh
41
+ sudo rpm --import https://rpm.packager.io/key
42
+ echo "[dexter]
43
+ name=Repository for pghero/dexter application.
44
+ baseurl=https://rpm.packager.io/gh/pghero/dexter/centos7/master
45
+ enabled=1" | sudo tee /etc/yum.repos.d/dexter.repo
46
+ sudo yum install dexter
47
+ ```
48
+
49
+ ### SUSE Linux Enterprise Server 12
50
+
51
+ ```sh
52
+ sudo rpm --import https://rpm.packager.io/key
53
+ sudo zypper addrepo "https://rpm.packager.io/gh/pghero/dexter/sles12/master" "dexter"
54
+ sudo zypper install dexter
55
+ ```
56
+
57
+ ## Credits
58
+
59
+ :heart: Made possible by [Packager](https://packager.io/)
@@ -31,6 +31,7 @@ Options:)
31
31
  o.array "--exclude", "prevent specific tables from being indexed"
32
32
  o.integer "--interval", "time to wait between processing queries, in seconds", default: 60
33
33
  o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0
34
+ o.boolean "--log-explain", "log explain", default: false, help: false
34
35
  o.string "--log-level", "log level", default: "info"
35
36
  o.boolean "--log-sql", "log sql", default: false
36
37
  o.string "-s", "--statement", "process a single statement"
@@ -8,6 +8,7 @@ module Dexter
8
8
  @log_level = options[:log_level]
9
9
  @exclude_tables = options[:exclude]
10
10
  @log_sql = options[:log_sql]
11
+ @log_explain = options[:log_explain]
11
12
 
12
13
  create_extension
13
14
  end
@@ -31,13 +32,10 @@ module Dexter
31
32
  # analyze tables if needed
32
33
  analyze_tables(tables) if tables.any?
33
34
 
34
- # get initial costs for queries
35
- calculate_initial_cost(queries.reject(&:missing_tables))
36
-
37
- # create hypothetical indexes
38
- candidates = tables.any? ? create_hypothetical_indexes(tables) : {}
35
+ # create hypothetical indexes and explain queries
36
+ candidates = tables.any? ? create_hypothetical_indexes(queries.reject(&:missing_tables), tables) : {}
39
37
 
40
- # get new costs and see if new indexes were used
38
+ # see if new indexes were used and meet bar
41
39
  new_indexes = determine_indexes(queries, candidates)
42
40
 
43
41
  # display and create new indexes
@@ -47,18 +45,18 @@ module Dexter
47
45
  private
48
46
 
49
47
  def create_extension
50
- select_all("SET client_min_messages = warning")
51
- select_all("CREATE EXTENSION IF NOT EXISTS hypopg")
48
+ execute("SET client_min_messages = warning")
49
+ execute("CREATE EXTENSION IF NOT EXISTS hypopg")
52
50
  end
53
51
 
54
52
  def reset_hypothetical_indexes
55
- select_all("SELECT hypopg_reset()")
53
+ execute("SELECT hypopg_reset()")
56
54
  end
57
55
 
58
56
  def analyze_tables(tables)
59
57
  tables = tables.to_a.sort
60
58
 
61
- analyze_stats = select_all <<-SQL
59
+ analyze_stats = execute <<-SQL
62
60
  SELECT
63
61
  schemaname AS schema,
64
62
  relname AS table,
@@ -79,22 +77,33 @@ module Dexter
79
77
  if !last_analyzed[table] || last_analyzed[table] < Time.now - 3600
80
78
  statement = "ANALYZE #{quote_ident(table)}"
81
79
  log "Running analyze: #{statement}"
82
- select_all(statement)
80
+ execute(statement)
83
81
  end
84
82
  end
85
83
  end
86
84
 
87
- def calculate_initial_cost(queries)
85
+ def calculate_plan(queries)
88
86
  queries.each do |query|
89
87
  begin
90
- query.initial_cost = plan(query.statement)["Total Cost"]
88
+ query.plans << plan(query.statement)
89
+ if @log_explain
90
+ log "Explaining query"
91
+ puts
92
+ # Pass format to prevent ANALYZE
93
+ puts execute("EXPLAIN (FORMAT TEXT) #{safe_statement(query.statement)}").map { |r| r["QUERY PLAN"] }.join("\n")
94
+ puts
95
+ end
91
96
  rescue PG::Error
92
97
  # do nothing
93
98
  end
94
99
  end
95
100
  end
96
101
 
97
- def create_hypothetical_indexes(tables)
102
+ def create_hypothetical_indexes(queries, tables)
103
+ # get initial costs for queries
104
+ calculate_plan(queries)
105
+ explainable_queries = queries.select(&:explainable?)
106
+
98
107
  # get existing indexes
99
108
  index_set = Set.new
100
109
  indexes(tables).each do |index|
@@ -104,13 +113,20 @@ module Dexter
104
113
 
105
114
  # create hypothetical indexes
106
115
  candidates = {}
107
- columns(tables).each do |col|
108
- unless index_set.include?([col[:table], [col[:column]]])
109
- unless ["json", "jsonb"].include?(col[:type])
110
- candidates[col] = select_all("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(col[:table])} (#{[col[:column]].map { |c| quote_ident(c) }.join(", ")})')").first["indexname"]
111
- end
112
- end
113
- end
116
+ columns_by_table = columns(tables).group_by { |c| c[:table] }
117
+
118
+ # create single column indexes
119
+ create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
120
+
121
+ # get next round of costs
122
+ calculate_plan(explainable_queries)
123
+
124
+ # create multicolumn indexes
125
+ create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
126
+
127
+ # get next round of costs
128
+ calculate_plan(explainable_queries)
129
+
114
130
  candidates
115
131
  end
116
132
 
@@ -118,17 +134,23 @@ module Dexter
118
134
  new_indexes = {}
119
135
 
120
136
  queries.each do |query|
121
- if query.initial_cost
122
- new_plan = plan(query.statement)
123
- query.new_cost = new_plan["Total Cost"]
124
- cost_savings = query.new_cost < query.initial_cost * 0.5
137
+ if query.explainable?
138
+ new_cost, new_cost2 = query.costs[1..2]
139
+
140
+ cost_savings = new_cost < query.initial_cost * 0.5
141
+ # set high bar for multicolumn indexes
142
+ cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
143
+
144
+ query.new_cost = cost_savings2 ? new_cost2 : new_cost
125
145
 
126
146
  query_indexes = []
127
- candidates.each do |col, index_name|
128
- if new_plan.inspect.include?(index_name)
147
+ candidates.each do |col_set, index_name|
148
+ key = cost_savings2 ? 2 : 1
149
+
150
+ if query.plans[key].inspect.include?(index_name)
129
151
  index = {
130
- table: col[:table],
131
- columns: [col[:column]]
152
+ table: col_set[0][:table],
153
+ columns: col_set.map { |c| c[:column] }
132
154
  }
133
155
  query_indexes << index
134
156
 
@@ -142,12 +164,12 @@ module Dexter
142
164
 
143
165
  if @log_level == "debug2"
144
166
  log "Processed #{query.fingerprint}"
145
- if query.initial_cost
167
+ if query.explainable?
146
168
  log "Cost: #{query.initial_cost} -> #{query.new_cost}"
147
169
 
148
170
  if query_indexes.any?
149
171
  log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
150
- log "Need 50% cost savings to suggest index" unless cost_savings
172
+ log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2
151
173
  else
152
174
  log "Indexes: None"
153
175
  end
@@ -195,7 +217,7 @@ module Dexter
195
217
  statement = "CREATE INDEX CONCURRENTLY ON #{quote_ident(index[:table])} (#{index[:columns].map { |c| quote_ident(c) }.join(", ")})"
196
218
  log "Creating index: #{statement}"
197
219
  started_at = Time.now
198
- select_all(statement)
220
+ execute(statement)
199
221
  log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
200
222
  end
201
223
  end
@@ -223,7 +245,7 @@ module Dexter
223
245
  abort "Bad database url"
224
246
  end
225
247
 
226
- def select_all(query)
248
+ def execute(query)
227
249
  # use exec_params instead of exec for security
228
250
  #
229
251
  # Unlike PQexec, PQexecParams allows at most one SQL command in the given string.
@@ -238,11 +260,23 @@ module Dexter
238
260
 
239
261
  def plan(query)
240
262
  # strip semi-colons as another measure of defense
241
- JSON.parse(select_all("EXPLAIN (FORMAT JSON) #{query.gsub(";", "")}").first["QUERY PLAN"]).first["Plan"]
263
+ JSON.parse(execute("EXPLAIN (FORMAT JSON) #{safe_statement(query)}").first["QUERY PLAN"]).first["Plan"]
264
+ end
265
+
266
+ # TODO for multicolumn indexes, use ordering
267
+ def create_hypothetical_indexes_helper(columns_by_table, n, index_set, candidates)
268
+ columns_by_table.each do |table, cols|
269
+ # no reason to use btree index for json columns
270
+ cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
271
+ if !index_set.include?([table, col_set.map { |col| col[:column] }])
272
+ candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
273
+ end
274
+ end
275
+ end
242
276
  end
243
277
 
244
278
  def database_tables
245
- result = select_all <<-SQL
279
+ result = execute <<-SQL
246
280
  SELECT
247
281
  table_name
248
282
  FROM
@@ -259,7 +293,7 @@ module Dexter
259
293
  end
260
294
 
261
295
  def columns(tables)
262
- columns = select_all <<-SQL
296
+ columns = execute <<-SQL
263
297
  SELECT
264
298
  table_name,
265
299
  column_name,
@@ -269,13 +303,15 @@ module Dexter
269
303
  WHERE
270
304
  table_schema = 'public' AND
271
305
  table_name IN (#{tables.map { |t| quote(t) }.join(", ")})
306
+ ORDER BY
307
+ 1, 2
272
308
  SQL
273
309
 
274
310
  columns.map { |v| {table: v["table_name"], column: v["column_name"], type: v["data_type"]} }
275
311
  end
276
312
 
277
313
  def indexes(tables)
278
- select_all(<<-SQL
314
+ execute(<<-SQL
279
315
  SELECT
280
316
  schemaname AS schema,
281
317
  t.relname AS table,
@@ -331,5 +367,9 @@ module Dexter
331
367
  def squish(str)
332
368
  str.to_s.gsub(/\A[[:space:]]+/, "").gsub(/[[:space:]]+\z/, "").gsub(/[[:space:]]+/, " ")
333
369
  end
370
+
371
+ def safe_statement(statement)
372
+ statement.gsub(";", "")
373
+ end
334
374
  end
335
375
  end
@@ -1,15 +1,28 @@
1
1
  module Dexter
2
2
  class Query
3
- attr_reader :statement, :fingerprint
4
- attr_accessor :initial_cost, :new_cost, :missing_tables
3
+ attr_reader :statement, :fingerprint, :plans
4
+ attr_accessor :missing_tables, :new_cost
5
5
 
6
6
  def initialize(statement, fingerprint)
7
7
  @statement = statement
8
8
  @fingerprint = fingerprint
9
+ @plans = []
9
10
  end
10
11
 
11
12
  def tables
12
13
  @tables ||= PgQuery.parse(statement).tables rescue []
13
14
  end
15
+
16
+ def explainable?
17
+ plans.any?
18
+ end
19
+
20
+ def costs
21
+ plans.map { |plan| plan["Total Cost"] }
22
+ end
23
+
24
+ def initial_cost
25
+ costs[0]
26
+ end
14
27
  end
15
28
  end
@@ -1,3 +1,3 @@
1
1
  module Dexter
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgdexter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-01 00:00:00.000000000 Z
11
+ date: 2017-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop
@@ -109,6 +109,7 @@ files:
109
109
  - README.md
110
110
  - Rakefile
111
111
  - exe/dexter
112
+ - guides/Linux.md
112
113
  - lib/dexter.rb
113
114
  - lib/dexter/client.rb
114
115
  - lib/dexter/collector.rb