pgdexter 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbc105f249fc27a10fcd47fa16ba9d732f0cd933
4
- data.tar.gz: 20929a8fd1d268194b9bc2c7bab71130a2eabc43
3
+ metadata.gz: 496685bbacfac7387182be3ebda3515a492723cc
4
+ data.tar.gz: b99548fccab8337b9ff5765a6182472abba9bb06
5
5
  SHA512:
6
- metadata.gz: b01d8e32db41bdf7fa0dd2412b4b490318d8574fbfba5ed0357f1aa9f3e8dcc2e9a0fbef9ee80376678c184f8840a5bf5e52fef5b325cdbffb2b31106d5c2838
7
- data.tar.gz: 2e9808dba0e161eb184ddc180ee915f0c49097225b2539a11916eaf5dd32e9bedce8ef07763391f758d6f4627258c90e6af4f7c158b473323d398050e64a9f75
6
+ metadata.gz: 3f9b30e7355c26cf7084b8deee9735206da484cd9f4725a5c1f751db5808dc32d66954341c5be3ba46cd783fa2e360e2b9c32ab0667aa2372213ebca695b29ff
7
+ data.tar.gz: 41346b8ed9906e24c68847c26bc9ffe54176eb0e74dc771138ec266ebccfca3e0d18ebd37223b73f1f2b8a9f512320502e4ad9597982bc253846881ea7e93e8d
data/.travis.yml ADDED
@@ -0,0 +1,18 @@
1
+ language: ruby
2
+ rvm: 2.4.1
3
+ cache: bundler
4
+ script: bundle exec rake test
5
+ addons:
6
+ postgresql: "9.6"
7
+ before_script:
8
+ - sudo apt-get install postgresql-server-dev-9.6
9
+ - wget https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz
10
+ - tar xf 1.0.0.tar.gz
11
+ - cd hypopg-1.0.0
12
+ - make
13
+ - sudo make install
14
+ - psql -c 'create database dexter_test;' -U postgres
15
+ notifications:
16
+ email:
17
+ on_success: never
18
+ on_failure: change
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.6
2
+
3
+ - Significant performance improvements
4
+ - Added `--include` option
5
+
1
6
  ## 0.1.5
2
7
 
3
8
  - Added support for non-`SELECT` queries
data/README.md CHANGED
@@ -4,13 +4,14 @@ The automatic indexer for Postgres
4
4
 
5
5
  [Read about how it works](https://medium.com/@ankane/introducing-dexter-the-automatic-indexer-for-postgres-5f8fa8b28f27)
6
6
 
7
+ [![Build Status](https://travis-ci.org/ankane/dexter.svg?branch=master)](https://travis-ci.org/ankane/dexter)
8
+
7
9
  ## Installation
8
10
 
9
11
  First, install [HypoPG](https://github.com/dalibo/hypopg) on your database server. This doesn’t require a restart.
10
12
 
11
13
  ```sh
12
- wget https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz
13
- tar xf 1.0.0.tar.gz
14
+ curl -L https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz | tar -x
14
15
  cd hypopg-1.0.0
15
16
  make
16
17
  make install # may need sudo
data/lib/dexter/client.rb CHANGED
@@ -30,6 +30,7 @@ module Dexter
30
30
  Options:)
31
31
  o.boolean "--create", "create indexes", default: false
32
32
  o.array "--exclude", "prevent specific tables from being indexed"
33
+ o.string "--include", "only include specific tables"
33
34
  o.integer "--interval", "time to wait between processing queries, in seconds", default: 60
34
35
  o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0
35
36
  o.boolean "--pg-stat-statements", "use pg_stat_statements", default: false, help: false
@@ -7,11 +7,13 @@ module Dexter
7
7
  @create = options[:create]
8
8
  @log_level = options[:log_level]
9
9
  @exclude_tables = options[:exclude]
10
+ @include_tables = Array(options[:include].split(",")) if options[:include]
10
11
  @log_sql = options[:log_sql]
11
12
  @log_explain = options[:log_explain]
12
13
  @min_time = options[:min_time] || 0
13
14
 
14
15
  create_extension unless extension_exists?
16
+ execute("SET lock_timeout = '5s'")
15
17
  end
16
18
 
17
19
  def process_stat_statements
@@ -30,6 +32,10 @@ module Dexter
30
32
  query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
31
33
  end
32
34
 
35
+ if @include_tables
36
+ tables = Set.new(tables.to_a & @include_tables)
37
+ end
38
+
33
39
  # exclude user specified tables
34
40
  # TODO exclude write-heavy tables
35
41
  @exclude_tables.each do |table|
@@ -43,7 +49,7 @@ module Dexter
43
49
  candidates = tables.any? ? create_hypothetical_indexes(queries.reject(&:missing_tables), tables) : {}
44
50
 
45
51
  # see if new indexes were used and meet bar
46
- new_indexes = determine_indexes(queries, candidates)
52
+ new_indexes = determine_indexes(queries, candidates, tables)
47
53
 
48
54
  # display and create new indexes
49
55
  show_and_create_indexes(new_indexes)
@@ -55,6 +61,8 @@ module Dexter
55
61
  execute("SET client_min_messages = warning")
56
62
  begin
57
63
  execute("CREATE EXTENSION IF NOT EXISTS hypopg")
64
+ rescue PG::UndefinedFile
65
+ abort "Install HypoPG first: https://github.com/ankane/dexter#installation"
58
66
  rescue PG::InsufficientPrivilege
59
67
  abort "Use a superuser to run: CREATE EXTENSION hypopg"
60
68
  end
@@ -115,41 +123,85 @@ module Dexter
115
123
  end
116
124
 
117
125
  def create_hypothetical_indexes(queries, tables)
126
+ candidates = {}
127
+
118
128
  # get initial costs for queries
119
129
  calculate_plan(queries)
120
- explainable_queries = queries.select(&:explainable?)
130
+ explainable_queries = queries.select { |q| q.explainable? && q.high_cost? }
121
131
 
122
- # get existing indexes
123
- index_set = Set.new
124
- indexes(tables).each do |index|
125
- # TODO make sure btree
126
- index_set << [index["table"], index["columns"]]
127
- end
132
+ # filter tables for performance
133
+ tables = Set.new(explainable_queries.flat_map(&:tables))
128
134
 
129
- # create hypothetical indexes
130
- candidates = {}
131
- columns_by_table = columns(tables).group_by { |c| c[:table] }
135
+ if tables.any?
136
+ # get existing indexes
137
+ index_set = Set.new
138
+ indexes(tables).each do |index|
139
+ # TODO make sure btree
140
+ index_set << [index["table"], index["columns"]]
141
+ end
142
+
143
+ # since every set of multi-column indexes are expensive
144
+ # try to parse out columns
145
+ possible_columns = Set.new
146
+ explainable_queries.each do |query|
147
+ find_columns(query.tree).each do |col|
148
+ last_col = col["fields"].last
149
+ if last_col["String"]
150
+ possible_columns << last_col["String"]["str"]
151
+ end
152
+ end
153
+ end
154
+
155
+ # create hypothetical indexes
156
+ columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
132
157
 
133
- # create single column indexes
134
- create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
158
+ # create single column indexes
159
+ create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
135
160
 
136
- # get next round of costs
137
- calculate_plan(explainable_queries)
161
+ # get next round of costs
162
+ calculate_plan(explainable_queries)
138
163
 
139
- # create multicolumn indexes
140
- create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
164
+ # create multicolumn indexes
165
+ create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
141
166
 
142
- # get next round of costs
143
- calculate_plan(explainable_queries)
167
+ # get next round of costs
168
+ calculate_plan(explainable_queries)
169
+ end
144
170
 
145
171
  candidates
146
172
  end
147
173
 
148
- def determine_indexes(queries, candidates)
174
+ def find_columns(plan)
175
+ find_by_key(plan, "ColumnRef")
176
+ end
177
+
178
+ def find_indexes(plan)
179
+ find_by_key(plan, "Index Name")
180
+ end
181
+
182
+ def find_by_key(plan, key)
183
+ indexes = []
184
+ case plan
185
+ when Hash
186
+ plan.each do |k, v|
187
+ if k == key
188
+ indexes << v
189
+ else
190
+ indexes.concat(find_by_key(v, key))
191
+ end
192
+ end
193
+ when Array
194
+ indexes.concat(plan.flat_map { |v| find_by_key(v, key) })
195
+ end
196
+ indexes
197
+ end
198
+
199
+ def determine_indexes(queries, candidates, tables)
149
200
  new_indexes = {}
201
+ index_name_to_columns = candidates.invert
150
202
 
151
203
  queries.each do |query|
152
- if query.explainable?
204
+ if query.explainable? && query.high_cost?
153
205
  new_cost, new_cost2 = query.costs[1..2]
154
206
 
155
207
  cost_savings = new_cost < query.initial_cost * 0.5
@@ -159,10 +211,13 @@ module Dexter
159
211
  query.new_cost = cost_savings2 ? new_cost2 : new_cost
160
212
 
161
213
  query_indexes = []
162
- candidates.each do |col_set, index_name|
163
- key = cost_savings2 ? 2 : 1
214
+ key = cost_savings2 ? 2 : 1
215
+ indexes = find_indexes(query.plans[key]).uniq.sort
216
+
217
+ indexes.each do |index_name|
218
+ col_set = index_name_to_columns[index_name]
164
219
 
165
- if query.plans[key].inspect.include?(index_name)
220
+ if col_set
166
221
  index = {
167
222
  table: col_set[0][:table],
168
223
  columns: col_set.map { |c| c[:column] }
@@ -179,7 +234,11 @@ module Dexter
179
234
 
180
235
  if @log_level == "debug2"
181
236
  log "Processed #{query.fingerprint}"
182
- if query.explainable?
237
+ if tables.empty?
238
+ log "No candidate tables for indexes"
239
+ elsif query.explainable? && !query.high_cost?
240
+ log "Low initial cost: #{query.initial_cost}"
241
+ elsif query.explainable?
183
242
  log "Cost: #{query.initial_cost} -> #{query.new_cost}"
184
243
 
185
244
  if query_indexes.any?
@@ -233,8 +292,12 @@ module Dexter
233
292
  statement = "CREATE INDEX CONCURRENTLY ON #{quote_ident(index[:table])} (#{index[:columns].map { |c| quote_ident(c) }.join(", ")})"
234
293
  log "Creating index: #{statement}"
235
294
  started_at = Time.now
236
- execute(statement)
237
- log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
295
+ begin
296
+ execute(statement)
297
+ log "Index created: #{((Time.now - started_at) * 1000).to_i} ms"
298
+ rescue PG::LockNotAvailable => e
299
+ log "Could not acquire lock: #{index[:table]}"
300
+ end
238
301
  end
239
302
  end
240
303
  end
@@ -6,6 +6,8 @@ module Dexter
6
6
  def initialize(logfile, collector)
7
7
  @logfile = logfile
8
8
  @collector = collector
9
+
10
+ abort "Log file not found" unless File.exist?(logfile)
9
11
  end
10
12
 
11
13
  def perform
@@ -38,12 +40,8 @@ module Dexter
38
40
  yield line
39
41
  end
40
42
  else
41
- begin
42
- File.foreach(@logfile) do |line|
43
- yield line
44
- end
45
- rescue Errno::ENOENT
46
- abort "Log file not found"
43
+ File.foreach(@logfile) do |line|
44
+ yield line
47
45
  end
48
46
  end
49
47
  end
@@ -3,8 +3,6 @@ module Dexter
3
3
  include Logging
4
4
 
5
5
  def initialize(database_url, logfile, options)
6
- log "Started"
7
-
8
6
  @logfile = logfile
9
7
 
10
8
  @collector = Collector.new(min_time: options[:min_time])
@@ -16,6 +14,8 @@ module Dexter
16
14
 
17
15
  @mutex = Mutex.new
18
16
  @last_checked_at = {}
17
+
18
+ log "Started"
19
19
  end
20
20
 
21
21
  def perform
@@ -24,7 +24,11 @@ module Dexter
24
24
  Thread.new do
25
25
  sleep(@starting_interval)
26
26
  loop do
27
- process_queries
27
+ begin
28
+ process_queries
29
+ rescue PG::ServerError => e
30
+ log "ERROR: #{e.class.name}: #{e.message}"
31
+ end
28
32
  sleep(@interval)
29
33
  end
30
34
  end
data/lib/dexter/query.rb CHANGED
@@ -13,7 +13,11 @@ module Dexter
13
13
  end
14
14
 
15
15
  def tables
16
- @tables ||= PgQuery.parse(statement).tables rescue []
16
+ @tables ||= parse ? parse.tables : []
17
+ end
18
+
19
+ def tree
20
+ parse.tree
17
21
  end
18
22
 
19
23
  def explainable?
@@ -27,5 +31,18 @@ module Dexter
27
31
  def initial_cost
28
32
  costs[0]
29
33
  end
34
+
35
+ def high_cost?
36
+ initial_cost && initial_cost >= 100
37
+ end
38
+
39
+ private
40
+
41
+ def parse
42
+ unless defined?(@parse)
43
+ @parse = PgQuery.parse(statement) rescue nil
44
+ end
45
+ @parse
46
+ end
30
47
  end
31
48
  end
@@ -1,3 +1,3 @@
1
1
  module Dexter
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgdexter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-15 00:00:00.000000000 Z
11
+ date: 2017-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop
@@ -103,6 +103,7 @@ extensions: []
103
103
  extra_rdoc_files: []
104
104
  files:
105
105
  - ".gitignore"
106
+ - ".travis.yml"
106
107
  - CHANGELOG.md
107
108
  - Gemfile
108
109
  - LICENSE.txt