pgdexter 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c513808e2cdd9690c477c30548979fad122575a4
4
- data.tar.gz: 6f4c605a0b68baa0275a523155b7be4ea9bc4d56
3
+ metadata.gz: 739c75ffdf977b9bbe8c29584da2c762f70fe527
4
+ data.tar.gz: 5059b53b96e5208146d3fffc44dcf485f40269e6
5
5
  SHA512:
6
- metadata.gz: 3b38a53e96516a485394f3ec594ee0418c1a7ab94fe8726b3228cc5443843dd7f7ba2bd1fe4bb0cc5c4dbd877bd9b9b036d5bfd5b4cda275dc7176dbdf9da82c
7
- data.tar.gz: cbe64d0cf9a40b96bf2644a80519bb837d235d6956ca9b61b24533867130958c95932ffbf4d69672218a10628c32b87a1f1e87f86a4968577f1629fc871ef440
6
+ metadata.gz: c9f071adbd8d2abe21dc454a709ddc2f0b7f9165473eac2a7de29ec4494e6cc17823d30fb000214d4d9f8d7c2cb327bef1a18e90418129868bf9c4011ad3b27c
7
+ data.tar.gz: e235bd08981cd3a0a2a75e266045bac412252bbab0a57623894f98e98442eabd0d8aa2923c8641dffbaed083aa1f3d026631f20983c411112502575052977676
@@ -1,3 +1,8 @@
1
+ ## 0.2.1
2
+
3
+ - Fixed bad suggestions
4
+ - Improved debugging output
5
+
1
6
  ## 0.2.0
2
7
 
3
8
  - Added same connection options as `psql`
@@ -8,12 +8,12 @@ Linux
8
8
 
9
9
  ```sh
10
10
  sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
11
- sudo apt-get install wget ca-certificates
11
+ sudo apt-get install -y wget ca-certificates
12
12
  wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
13
13
  sudo apt-get update
14
- sudo apt-get install postgresql-9.6 postgresql-server-dev-9.6
14
+ sudo apt-get install -y postgresql-9.6 postgresql-server-dev-9.6
15
15
  sudo -u postgres createuser $(whoami) -s
16
- sudo apt-get install ruby2.2 ruby2.2-dev
16
+ sudo apt-get install -y ruby2.2 ruby2.2-dev
17
17
  ```
18
18
 
19
19
  Mac
@@ -83,7 +83,7 @@ heroku logs -p postgres > postgresql.log
83
83
  We recommend creating a new instance from a snapshot for the dump to avoid affecting customers.
84
84
 
85
85
  ```sh
86
- pg_dump -v -j 8 -Fd -f /tmp/newout.dir <connection-string>
86
+ pg_dump -v -j 8 -Fd -f /tmp/newout.dir <connection-options>
87
87
  ```
88
88
 
89
89
  Then shutdown the dump instance. Restore with:
@@ -39,6 +39,7 @@ Options:)
39
39
  o.string "--log-level", "log level", default: "info"
40
40
  o.boolean "--log-sql", "log sql", default: false
41
41
  o.string "-s", "--statement", "process a single statement"
42
+ # separator must go here to show up correctly - slop bug?
42
43
  o.separator ""
43
44
  o.separator "Connection options:"
44
45
  o.on "-v", "--version", "print the version" do
@@ -37,7 +37,10 @@ module Dexter
37
37
  queries = []
38
38
  @top_queries.each do |k, v|
39
39
  if new_queries.include?(k) && v[:total_time] > @min_time
40
- queries << Query.new(v[:query], k)
40
+ query = Query.new(v[:query], k)
41
+ query.total_time = v[:total_time]
42
+ query.calls = v[:calls]
43
+ queries << query
41
44
  end
42
45
  end
43
46
 
@@ -52,7 +52,7 @@ module Dexter
52
52
  new_indexes = determine_indexes(queries, candidates, tables)
53
53
 
54
54
  # display and create new indexes
55
- show_and_create_indexes(new_indexes)
55
+ show_and_create_indexes(new_indexes, queries, tables)
56
56
  end
57
57
 
58
58
  private
@@ -133,13 +133,6 @@ module Dexter
133
133
  tables = Set.new(explainable_queries.flat_map(&:tables))
134
134
 
135
135
  if tables.any?
136
- # get existing indexes
137
- index_set = Set.new
138
- indexes(tables).each do |index|
139
- # TODO make sure btree
140
- index_set << [index["table"], index["columns"]]
141
- end
142
-
143
136
  # since every set of multi-column indexes are expensive
144
137
  # try to parse out columns
145
138
  possible_columns = Set.new
@@ -156,13 +149,13 @@ module Dexter
156
149
  columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
157
150
 
158
151
  # create single column indexes
159
- create_hypothetical_indexes_helper(columns_by_table, 1, index_set, candidates)
152
+ create_hypothetical_indexes_helper(columns_by_table, 1, candidates)
160
153
 
161
154
  # get next round of costs
162
155
  calculate_plan(explainable_queries)
163
156
 
164
157
  # create multicolumn indexes
165
- create_hypothetical_indexes_helper(columns_by_table, 2, index_set, candidates)
158
+ create_hypothetical_indexes_helper(columns_by_table, 2, candidates)
166
159
 
167
160
  # get next round of costs
168
161
  calculate_plan(explainable_queries)
@@ -196,88 +189,153 @@ module Dexter
196
189
  indexes
197
190
  end
198
191
 
192
+ def hypo_indexes_from_plan(index_name_to_columns, plan, index_set)
193
+ query_indexes = []
194
+
195
+ find_indexes(plan).uniq.sort.each do |index_name|
196
+ col_set = index_name_to_columns[index_name]
197
+
198
+ if col_set
199
+ index = {
200
+ table: col_set[0][:table],
201
+ columns: col_set.map { |c| c[:column] }
202
+ }
203
+
204
+ unless index_set.include?([index[:table], index[:columns]])
205
+ query_indexes << index
206
+ end
207
+ end
208
+ end
209
+
210
+ query_indexes
211
+ end
212
+
199
213
  def determine_indexes(queries, candidates, tables)
200
214
  new_indexes = {}
201
215
  index_name_to_columns = candidates.invert
202
216
 
217
+ # filter out existing indexes
218
+ # this must happen at end of process
219
+ # since sometimes hypothetical indexes
220
+ # can give lower cost than actual indexes
221
+ index_set = Set.new
222
+ if tables.any?
223
+ indexes(tables).each do |index|
224
+ if index["using"] == "btree"
225
+ # don't add indexes that are already covered
226
+ index_set << [index["table"], index["columns"].first(1)]
227
+ index_set << [index["table"], index["columns"].first(2)]
228
+ end
229
+ end
230
+ end
231
+
203
232
  queries.each do |query|
204
233
  if query.explainable? && query.high_cost?
205
234
  new_cost, new_cost2 = query.costs[1..2]
206
235
 
207
236
  cost_savings = new_cost < query.initial_cost * 0.5
237
+
208
238
  # set high bar for multicolumn indexes
209
239
  cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
210
240
 
211
- query.new_cost = cost_savings2 ? new_cost2 : new_cost
212
-
213
- query_indexes = []
214
241
  key = cost_savings2 ? 2 : 1
215
- indexes = find_indexes(query.plans[key]).uniq.sort
242
+ query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set)
216
243
 
217
- indexes.each do |index_name|
218
- col_set = index_name_to_columns[index_name]
244
+ # likely a bad suggestion, so try single column
245
+ if cost_savings2 && query_indexes.size > 1
246
+ query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
247
+ cost_savings2 = false
248
+ end
219
249
 
220
- if col_set
221
- index = {
222
- table: col_set[0][:table],
223
- columns: col_set.map { |c| c[:column] }
224
- }
225
- query_indexes << index
250
+ # TODO if multiple indexes are found (for either single or multicolumn)
251
+ # determine the impact of each individually
252
+ # for now, be conservative and don't suggest if more than one index
253
+ suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1
226
254
 
227
- if cost_savings
228
- new_indexes[index] ||= index.dup
229
- (new_indexes[index][:queries] ||= []) << query
230
- end
255
+ if suggest_index
256
+ query_indexes.each do |index|
257
+ new_indexes[index] ||= index.dup
258
+ (new_indexes[index][:queries] ||= []) << query
231
259
  end
232
260
  end
233
- end
234
261
 
235
- if @log_level == "debug2"
236
- log "Processed #{query.fingerprint}"
237
- if tables.empty?
238
- log "No candidate tables for indexes"
239
- elsif query.explainable? && !query.high_cost?
240
- log "Low initial cost: #{query.initial_cost}"
241
- elsif query.explainable?
242
- log "Cost: #{query.initial_cost} -> #{query.new_cost}"
243
-
244
- if query_indexes.any?
245
- log "Indexes: #{query_indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")}"
246
- log "Need 50% cost savings to suggest index" unless cost_savings || cost_savings2
262
+ query.indexes = query_indexes
263
+ query.suggest_index = suggest_index
264
+ query.new_cost =
265
+ if suggest_index
266
+ cost_savings2 ? new_cost2 : new_cost
247
267
  else
248
- log "Indexes: None"
268
+ query.initial_cost
249
269
  end
250
- elsif query.fingerprint == "unknown"
251
- log "Could not parse query"
252
- elsif query.tables.empty?
253
- log "No tables"
254
- elsif query.missing_tables
255
- log "Tables not present in current database"
256
- else
257
- log "Could not run explain"
270
+
271
+ # TODO optimize
272
+ if @log_level.start_with?("debug")
273
+ query.pass1_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[1], index_set)
274
+ query.pass2_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[2], index_set)
258
275
  end
276
+ end
277
+ end
259
278
 
260
- puts
261
- puts query.statement
262
- puts
279
+ # filter out covered indexes
280
+ covered = Set.new
281
+ new_indexes.values.each do |index|
282
+ if index[:columns].size > 1
283
+ covered << [index[:table], index[:columns].first(1)]
263
284
  end
264
285
  end
265
286
 
266
- new_indexes.values.sort_by(&:to_a)
287
+ new_indexes.values.reject { |i| covered.include?([i[:table], i[:columns]]) }.sort_by(&:to_a)
267
288
  end
268
289
 
269
- def show_and_create_indexes(new_indexes)
290
+ def log_indexes(indexes)
291
+ if indexes.any?
292
+ indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ")
293
+ else
294
+ "None"
295
+ end
296
+ end
297
+
298
+ def show_and_create_indexes(new_indexes, queries, tables)
270
299
  if new_indexes.any?
271
300
  new_indexes.each do |index|
272
301
  log "Index found: #{index[:table]} (#{index[:columns].join(", ")})"
302
+ end
273
303
 
274
- if @log_level.start_with?("debug")
275
- index[:queries].sort_by(&:fingerprint).each do |query|
276
- log "Query #{query.fingerprint} (Cost: #{query.initial_cost} -> #{query.new_cost})"
277
- puts
278
- puts query.statement
279
- puts
304
+ if @log_level.start_with?("debug")
305
+ index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) }
306
+ if @log_level == "debug2"
307
+ fingerprints = Set.new(index_queries.map(&:fingerprint))
308
+ index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint))
309
+ end
310
+ index_queries.each do |query|
311
+ log "-" * 80
312
+ log "Query #{query.fingerprint}"
313
+ log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.total_time
314
+ if tables.empty?
315
+ log "No candidate tables for indexes"
316
+ elsif query.explainable? && !query.high_cost?
317
+ log "Low initial cost: #{query.initial_cost}"
318
+ elsif query.explainable?
319
+ query_indexes = query.indexes || []
320
+ log "Start: #{query.costs[0]}"
321
+ log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
322
+ log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
323
+ log "Final: #{query.new_cost} : #{log_indexes(query_indexes)}"
324
+ if query_indexes.any? && !query.suggest_index
325
+ log "Need 50% cost savings to suggest index"
326
+ end
327
+ elsif query.fingerprint == "unknown"
328
+ log "Could not parse query"
329
+ elsif query.tables.empty?
330
+ log "No tables"
331
+ elsif query.missing_tables
332
+ log "Tables not present in current database"
333
+ else
334
+ log "Could not run explain"
280
335
  end
336
+ log
337
+ log query.statement
338
+ log
281
339
  end
282
340
  end
283
341
 
@@ -347,13 +405,11 @@ module Dexter
347
405
  end
348
406
 
349
407
  # TODO for multicolumn indexes, use ordering
350
- def create_hypothetical_indexes_helper(columns_by_table, n, index_set, candidates)
408
+ def create_hypothetical_indexes_helper(columns_by_table, n, candidates)
351
409
  columns_by_table.each do |table, cols|
352
410
  # no reason to use btree index for json columns
353
411
  cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
354
- if !index_set.include?([table, col_set.map { |col| col[:column] }])
355
- candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
356
- end
412
+ candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
357
413
  end
358
414
  end
359
415
  end
@@ -12,7 +12,7 @@ module Dexter
12
12
  active_line = nil
13
13
  duration = nil
14
14
 
15
- each_line do |line|
15
+ @logfile.each_line do |line|
16
16
  if active_line
17
17
  if line.include?(LINE_SEPERATOR)
18
18
  process_entry(active_line, duration)
@@ -32,12 +32,6 @@ module Dexter
32
32
 
33
33
  private
34
34
 
35
- def each_line
36
- @logfile.each_line do |line|
37
- yield line
38
- end
39
- end
40
-
41
35
  def process_entry(query, duration)
42
36
  @collector.add(query, duration)
43
37
  end
@@ -1,7 +1,7 @@
1
1
  module Dexter
2
2
  module Logging
3
- def log(message)
4
- puts "#{Time.now.iso8601} #{message}" unless $log_level == "error"
3
+ def log(message = "")
4
+ puts message unless $log_level == "error"
5
5
  end
6
6
  end
7
7
  end
@@ -1,7 +1,7 @@
1
1
  module Dexter
2
2
  class Query
3
3
  attr_reader :statement, :fingerprint, :plans
4
- attr_accessor :missing_tables, :new_cost
4
+ attr_accessor :missing_tables, :new_cost, :total_time, :calls, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes
5
5
 
6
6
  def initialize(statement, fingerprint = nil)
7
7
  @statement = statement
@@ -1,3 +1,3 @@
1
1
  module Dexter
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgdexter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-28 00:00:00.000000000 Z
11
+ date: 2017-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop