pgdexter 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c513808e2cdd9690c477c30548979fad122575a4
4
- data.tar.gz: 6f4c605a0b68baa0275a523155b7be4ea9bc4d56
3
+ metadata.gz: 739c75ffdf977b9bbe8c29584da2c762f70fe527
4
+ data.tar.gz: 5059b53b96e5208146d3fffc44dcf485f40269e6
5
5
  SHA512:
6
- metadata.gz: 3b38a53e96516a485394f3ec594ee0418c1a7ab94fe8726b3228cc5443843dd7f7ba2bd1fe4bb0cc5c4dbd877bd9b9b036d5bfd5b4cda275dc7176dbdf9da82c
7
- data.tar.gz: cbe64d0cf9a40b96bf2644a80519bb837d235d6956ca9b61b24533867130958c95932ffbf4d69672218a10628c32b87a1f1e87f86a4968577f1629fc871ef440
6
+ metadata.gz: c9f071adbd8d2abe21dc454a709ddc2f0b7f9165473eac2a7de29ec4494e6cc17823d30fb000214d4d9f8d7c2cb327bef1a18e90418129868bf9c4011ad3b27c
7
+ data.tar.gz: e235bd08981cd3a0a2a75e266045bac412252bbab0a57623894f98e98442eabd0d8aa2923c8641dffbaed083aa1f3d026631f20983c411112502575052977676
@@ -1,3 +1,8 @@
1
+ ## 0.2.1
2
+
3
+ - Fixed bad suggestions
4
+ - Improved debugging output
5
+
1
6
  ## 0.2.0
2
7
 
3
8
  - Added same connection options as `psql`
@@ -8,12 +8,12 @@ Linux
8
8
 
9
9
  ```sh
10
10
  sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
11
- sudo apt-get install wget ca-certificates
11
+ sudo apt-get install -y wget ca-certificates
12
12
  wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
13
13
  sudo apt-get update
14
- sudo apt-get install postgresql-9.6 postgresql-server-dev-9.6
14
+ sudo apt-get install -y postgresql-9.6 postgresql-server-dev-9.6
15
15
  sudo -u postgres createuser $(whoami) -s
16
- sudo apt-get install ruby2.2 ruby2.2-dev
16
+ sudo apt-get install -y ruby2.2 ruby2.2-dev
17
17
  ```
18
18
 
19
19
  Mac
@@ -83,7 +83,7 @@ heroku logs -p postgres > postgresql.log
83
83
  We recommend creating a new instance from a snapshot for the dump to avoid affecting customers.
84
84
 
85
85
  ```sh
86
- pg_dump -v -j 8 -Fd -f /tmp/newout.dir <connection-string>
86
+ pg_dump -v -j 8 -Fd -f /tmp/newout.dir <connection-options>
87
87
  ```
88
88
 
89
89
  Then shutdown the dump instance. Restore with:
@@ -39,6 +39,7 @@ Options:)
39
39
  o.string "--log-level", "log level", default: "info"
40
40
  o.boolean "--log-sql", "log sql", default: false
41
41
  o.string "-s", "--statement", "process a single statement"
42
+ # separator must go here to show up correctly - slop bug?
42
43
  o.separator ""
43
44
  o.separator "Connection options:"
44
45
  o.on "-v", "--version", "print the version" do
@@ -37,7 +37,10 @@ module Dexter
37
37
  queries = []
38
38
  @top_queries.each do |k, v|
39
39
  if new_queries.include?(k) && v[:total_time] > @min_time
40
- queries << Query.new(v[:query], k)
40
+ query = Query.new(v[:query], k)
41
+ query.total_time = v[:total_time]
42
+ query.calls = v[:calls]
43
+ queries << query
41
44
  end
42
45
  end
43
46
 
@@ -52,7 +52,7 @@ module Dexter
52
52
  new_indexes = determine_indexes(queries, candidates, tables)
53
53
 
54
54
  # display and create new indexes
55
- show_and_create_indexes(new_indexes)
55
+ show_and_create_indexes(new_indexes, queries, tables)
56
56
  end
57
57
 
58
58
  private
@@ -133,13 +133,6 @@ module Dexter
133
133
  tables = Set.new(explainable_queries.flat_map(&:tables))
134
134
 
135
135
  if tables.any?
136
- # get existing indexes
137
- index_set = Set.new
138
- indexes(tables).each do |index|
139
- # TODO make sure btree
140
- index_set << [index["table"], index["columns"]]
141
- end
142
-
143
136
  # since every set of multi-column indexes are expensive
144
137
  # try to parse out columns
145
138
  possible_columns = Set.new
@@ -156,13 +149,13 @@ module Dexter
156
149
  columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
157
150
 
158
151
  # create single column indexes
159
- create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
152
+ create_hypothetical_indexes_helper(columns_by_table, 1, candidates)
160
153
 
161
154
  # get next round of costs
162
155
  calculate_plan(explainable_queries)
163
156
 
164
157
  # create multicolumn indexes
165
- create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
158
+ create_hypothetical_indexes_helper(columns_by_table, 2, candidates)
166
159
 
167
160
  # get next round of costs
168
161
  calculate_plan(explainable_queries)
@@ -196,88 +189,153 @@ module Dexter
196
189
  indexes
197
190
  end
198
191
 
192
+ def hypo_indexes_from_plan(index_name_to_columns, plan, index_set)
193
+ query_indexes = []
194
+
195
+ find_indexes(plan).uniq.sort.each do |index_name|
196
+ col_set = index_name_to_columns[index_name]
197
+
198
+ if col_set
199
+ index = {
200
+ table: col_set[0][:table],
201
+ columns: col_set.map { |c| c[:column] }
202
+ }
203
+
204
+ unless index_set.include?([index[:table], index[:columns]])
205
+ query_indexes << index
206
+ end
207
+ end
208
+ end
209
+
210
+ query_indexes
211
+ end
212
+
199
213
  def determine_indexes(queries, candidates, tables)
200
214
  new_indexes = {}
201
215
  index_name_to_columns = candidates.invert
202
216
 
217
+ # filter out existing indexes
218
+ # this must happen at end of process
219
+ # since sometimes hypothetical indexes
220
+ # can give lower cost than actual indexes
221
+ index_set = Set.new
222
+ if tables.any?
223
+ indexes(tables).each do |index|
224
+ if index["using"] == "btree"
225
+ # don't add indexes that are already covered
226
+ index_set << [index["table"], index["columns"].first(1)]
227
+ index_set << [index["table"], index["columns"].first(2)]
228
+ end
229
+ end
230
+ end
231
+
203
232
  queries.each do |query|
204
233
  if query.explainable? && query.high_cost?
205
234
  new_cost, new_cost2 = query.costs[1..2]
206
235
 
207
236
  cost_savings = new_cost < query.initial_cost * 0.5
237
+
208
238
  # set high bar for multicolumn indexes
209
239
  cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
210
240
 
211
- query.new_cost = cost_savings2 ? new_cost2 : new_cost
212
-
213
- query_indexes = []
214
241
  key = cost_savings2 ? 2 : 1
215
- indexes = find_indexes(query.plans[key]).uniq.sort
242
+ query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set)
216
243
 
217
- indexes.each do |index_name|
218
- col_set = index_name_to_columns[index_name]
244
+ # likely a bad suggestion, so try single column
245
+ if cost_savings2 && query_indexes.size > 1
246
+ query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
247
+ cost_savings2 = false
248
+ end
219
249
 
220
- if col_set
221
- index = {
222
- table: col_set[0][:table],
223
- columns: col_set.map { |c| c[:column] }
224
- }
225
- query_indexes << index
250
+ # TODO if multiple indexes are found (for either single or multicolumn)
251
+ # determine the impact of each individually
252
+ # for now, be conservative and don't suggest if more than one index
253
+ suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1
226
254
 
227
- if cost_savings
228
- new_indexes[index] ||= index.dup
229
- (new_indexes[index][:queries] ||= []) << query
230
- end
255
+ if suggest_index
256
+ query_indexes.each do |index|
257
+ new_indexes[index] ||= index.dup
258
+ (new_indexes[index][:queries] ||= []) << query
231
259
  end
232
260
  end
233
- end
234
261
 
235
- if @log_level == "debug2"
236
- log "Processed #{query.fingerprint}"
237
- if tables.empty?
238
- log "No candidate tables for indexes"
239
- elsif query.explainable? && !query.high_cost?
240
- log "Low initial cost: #{query.initial_cost}"
241
- elsif query.explainable?
242
- log "Cost: #{query.initial_cost} -> #{query.new_cost}"
243
-
244
- if query_indexes.any?
245
- log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
246
- log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2
262
+ query.indexes = query_indexes
263
+ query.suggest_index = suggest_index
264
+ query.new_cost =
265
+ if suggest_index
266
+ cost_savings2 ? new_cost2 : new_cost
247
267
  else
248
- log "Indexes: None"
268
+ query.initial_cost
249
269
  end
250
- elsif query.fingerprint == "unknown"
251
- log "Could not parse query"
252
- elsif query.tables.empty?
253
- log "No tables"
254
- elsif query.missing_tables
255
- log "Tables not present in current database"
256
- else
257
- log "Could not run explain"
270
+
271
+ # TODO optimize
272
+ if @log_level.start_with?("debug")
273
+ query.pass1_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
274
+ query.pass2_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[2], index_set)
258
275
  end
276
+ end
277
+ end
259
278
 
260
- puts
261
- puts query.statement
262
- puts
279
+ # filter out covered indexes
280
+ covered = Set.new
281
+ new_indexes.values.each do |index|
282
+ if index[:columns].size > 1
283
+ covered << [index[:table], index[:columns].first(1)]
263
284
  end
264
285
  end
265
286
 
266
- new_indexes.values.sort_by(&:to_a)
287
+ new_indexes.values.reject { |i| covered.include?([i[:table], i[:columns]]) }.sort_by(&:to_a)
267
288
  end
268
289
 
269
- def show_and_create_indexes(new_indexes)
290
+ def log_indexes(indexes)
291
+ if indexes.any?
292
+ indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")
293
+ else
294
+ "None"
295
+ end
296
+ end
297
+
298
+ def show_and_create_indexes(new_indexes, queries, tables)
270
299
  if new_indexes.any?
271
300
  new_indexes.each do |index|
272
301
  log "Index found: #{index[:table]} (#{index[:columns].join(", ")})"
302
+ end
273
303
 
274
- if @log_level.start_with?("debug")
275
- index[:queries].sort_by(&:fingerprint).each do |query|
276
- log "Query #{query.fingerprint} (Cost: #{query.initial_cost} -> #{query.new_cost})"
277
- puts
278
- puts query.statement
279
- puts
304
+ if @log_level.start_with?("debug")
305
+ index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) }
306
+ if @log_level == "debug2"
307
+ fingerprints = Set.new(index_queries.map(&:fingerprint))
308
+ index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint))
309
+ end
310
+ index_queries.each do |query|
311
+ log "-" * 80
312
+ log "Query #{query.fingerprint}"
313
+ log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.total_time
314
+ if tables.empty?
315
+ log "No candidate tables for indexes"
316
+ elsif query.explainable? && !query.high_cost?
317
+ log "Low initial cost: #{query.initial_cost}"
318
+ elsif query.explainable?
319
+ query_indexes = query.indexes || []
320
+ log "Start: #{query.costs[0]}"
321
+ log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
322
+ log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
323
+ log "Final: #{query.new_cost} : #{log_indexes(query_indexes)}"
324
+ if query_indexes.any? && !query.suggest_index
325
+ log "Need 50% cost savings to suggest index"
326
+ end
327
+ elsif query.fingerprint == "unknown"
328
+ log "Could not parse query"
329
+ elsif query.tables.empty?
330
+ log "No tables"
331
+ elsif query.missing_tables
332
+ log "Tables not present in current database"
333
+ else
334
+ log "Could not run explain"
280
335
  end
336
+ log
337
+ log query.statement
338
+ log
281
339
  end
282
340
  end
283
341
 
@@ -347,13 +405,11 @@ module Dexter
347
405
  end
348
406
 
349
407
  # TODO for multicolumn indexes, use ordering
350
- def create_hypothetical_indexes_helper(columns_by_table, n, index_set, candidates)
408
+ def create_hypothetical_indexes_helper(columns_by_table, n, candidates)
351
409
  columns_by_table.each do |table, cols|
352
410
  # no reason to use btree index for json columns
353
411
  cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
354
- if !index_set.include?([table, col_set.map { |col| col[:column] }])
355
- candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
356
- end
412
+ candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
357
413
  end
358
414
  end
359
415
  end
@@ -12,7 +12,7 @@ module Dexter
12
12
  active_line = nil
13
13
  duration = nil
14
14
 
15
- each_line do |line|
15
+ @logfile.each_line do |line|
16
16
  if active_line
17
17
  if line.include?(LINE_SEPERATOR)
18
18
  process_entry(active_line, duration)
@@ -32,12 +32,6 @@ module Dexter
32
32
 
33
33
  private
34
34
 
35
- def each_line
36
- @logfile.each_line do |line|
37
- yield line
38
- end
39
- end
40
-
41
35
  def process_entry(query, duration)
42
36
  @collector.add(query, duration)
43
37
  end
@@ -1,7 +1,7 @@
1
1
  module Dexter
2
2
  module Logging
3
- def log(message)
4
- puts "#{Time.now.iso8601} #{message}" unless $log_level == "error"
3
+ def log(message = "")
4
+ puts message unless $log_level == "error"
5
5
  end
6
6
  end
7
7
  end
@@ -1,7 +1,7 @@
1
1
  module Dexter
2
2
  class Query
3
3
  attr_reader :statement, :fingerprint, :plans
4
- attr_accessor :missing_tables, :new_cost
4
+ attr_accessor :missing_tables, :new_cost, :total_time, :calls, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes
5
5
 
6
6
  def initialize(statement, fingerprint = nil)
7
7
  @statement = statement
@@ -1,3 +1,3 @@
1
1
  module Dexter
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgdexter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-28 00:00:00.000000000 Z
11
+ date: 2017-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop