pgdexter 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 714767248afe28ad9e354ebb6485b34a8d6aadd0
4
- data.tar.gz: 545eaaea1df0312049f4cecff3fe9b03b80a6cd9
3
+ metadata.gz: 14bc122b136301535793c29b19336a2d0614f640
4
+ data.tar.gz: 624c8c6ae5aabd8a5e2150887aa83efe6e67f9b4
5
5
  SHA512:
6
- metadata.gz: '096684887c5d5a48a2df74a2dae6cf70bc2b5e98dd5dc6d7d6d8b583bfb76193ce86166972d9553f89452f16629a8576a30f2627fd8d0b16a9672ce76a316835'
7
- data.tar.gz: d128734845414672a3f470f138c6a422dc2b24024a66866e50524fe3bb3e949bd4a899305a93f71fb14af3b56820f66e20a00d5ba5788cd0c88bd0afaa064e32
6
+ metadata.gz: 44224c687d8590c0441d587b432fde3b85cef77728980059d497ade992cd953e8f8445747de61c984d559f55d74461ac284c6c8423c0f1f8b08e439e361c141d
7
+ data.tar.gz: 42ec40ec7527dfc3093853ec013c5b30bacbaee5cbe8d27a1530649bfbdc16c639d9d93d65e72b062d53b33bff1d0a1bec67e971d283f5d5a2e6fea7d884fccd
data/.travis.yml CHANGED
@@ -6,9 +6,8 @@ addons:
6
6
  postgresql: "9.6"
7
7
  before_script:
8
8
  - sudo apt-get install postgresql-server-dev-9.6
9
- - wget https://github.com/dalibo/hypopg/archive/1.0.0.tar.gz
10
- - tar xf 1.0.0.tar.gz
11
- - cd hypopg-1.0.0
9
+ - git clone https://github.com/dalibo/hypopg.git
10
+ - cd hypopg
12
11
  - make
13
12
  - sudo make install
14
13
  - psql -c 'create database dexter_test;' -U postgres
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.3.3
2
+
3
+ - Added support for views and materialized views
4
+ - Better handle case when multiple indexes are found for a query
5
+ - Added `--min-cost-savings-pct` option
6
+
1
7
  ## 0.3.2
2
8
 
3
9
  - Fixed parsing issue with named prepared statements
data/README.md CHANGED
@@ -110,6 +110,8 @@ or use the [pg_stat_statements](https://www.postgresql.org/docs/current/static/p
110
110
  dexter <connection-options> --pg-stat-statements
111
111
  ```
112
112
 
113
+ > Note: Logs are highly preferred over pg_stat_statements, as pg_stat_statements often doesn’t store enough information to optimize queries.
114
+
113
115
  ### Collection Options
114
116
 
115
117
  To prevent one-off queries from being indexed, specify a minimum number of calls before a query is considered for indexing
data/guides/Linux.md CHANGED
@@ -4,6 +4,7 @@ Distributions
4
4
 
5
5
  - [Ubuntu 16.04 (Xenial)](#ubuntu-1604-xenial)
6
6
  - [Ubuntu 14.04 (Trusty)](#ubuntu-1404-trusty)
7
+ - [Debian 9 (Stretch)](#debian-9-stretch)
7
8
  - [Debian 8 (Jesse)](#debian-8-jesse)
8
9
  - [CentOS / RHEL 7](#centos--rhel-7)
9
10
  - [SUSE Linux Enterprise Server 12](#suse-linux-enterprise-server-12)
@@ -28,6 +29,16 @@ sudo apt-get update
28
29
  sudo apt-get install dexter
29
30
  ```
30
31
 
32
+ ### Debian 9 (Stretch)
33
+
34
+ ```sh
35
+ wget -qO- https://dl.packager.io/srv/pghero/dexter/key | sudo apt-key add -
36
+ sudo wget -O /etc/apt/sources.list.d/dexter.list \
37
+ https://dl.packager.io/srv/pghero/dexter/master/installer/debian/9.repo
38
+ sudo apt-get update
39
+ sudo apt-get install dexter
40
+ ```
41
+
31
42
  ### Debian 8 (Jesse)
32
43
 
33
44
  ```sh
data/lib/dexter/client.rb CHANGED
@@ -43,6 +43,7 @@ Options:)
43
43
  o.boolean "--log-sql", "log sql", default: false
44
44
  o.float "--min-calls", "only process queries that have been called a certain number of times", default: 0
45
45
  o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0
46
+ o.integer "--min-cost-savings-pct", default: 50, help: false
46
47
  o.boolean "--pg-stat-statements", "use pg_stat_statements", default: false, help: false
47
48
  o.string "-s", "--statement", "process a single statement"
48
49
  # separator must go here to show up correctly - slop bug?
@@ -12,6 +12,7 @@ module Dexter
12
12
  @min_time = options[:min_time] || 0
13
13
  @min_calls = options[:min_calls] || 0
14
14
  @analyze = options[:analyze]
15
+ @min_cost_savings_pct = options[:min_cost_savings_pct].to_i
15
16
  @options = options
16
17
 
17
18
  create_extension unless extension_exists?
@@ -28,7 +29,7 @@ module Dexter
28
29
  # reset hypothetical indexes
29
30
  reset_hypothetical_indexes
30
31
 
31
- tables = Set.new(database_tables)
32
+ tables = Set.new(database_tables + materialized_views)
32
33
 
33
34
  # map tables without schema to schema
34
35
  no_schema_tables = {}
@@ -37,11 +38,28 @@ module Dexter
37
38
  no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".")[0]] || 1000000, t] }[0]
38
39
  end
39
40
 
41
+ # add tables from views
42
+ view_tables = database_view_tables
43
+ view_tables.each do |v, vt|
44
+ view_tables[v] = vt.map { |t| no_schema_tables[t] || t }
45
+ end
46
+
47
+ # fully resolve tables
48
+ # make sure no views in result
49
+ view_tables.each do |v, vt|
50
+ view_tables[v] = vt.flat_map { |t| view_tables[t] || [t] }.uniq
51
+ end
52
+
40
53
  # filter queries from other databases and system tables
41
54
  queries.each do |query|
42
55
  # add schema to table if needed
43
56
  query.tables = query.tables.map { |t| no_schema_tables[t] || t }
44
57
 
58
+ # substitute view tables
59
+ new_tables = query.tables.flat_map { |t| view_tables[t] || [t] }.uniq
60
+ query.tables_from_views = new_tables - query.tables
61
+ query.tables = new_tables
62
+
45
63
  # check for missing tables
46
64
  query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
47
65
  end
@@ -166,6 +184,7 @@ module Dexter
166
184
 
167
185
  # filter tables for performance
168
186
  tables = Set.new(explainable_queries.flat_map(&:tables))
187
+ tables_from_views = Set.new(explainable_queries.flat_map(&:tables_from_views))
169
188
 
170
189
  if tables.any?
171
190
  # since every set of multi-column indexes are expensive
@@ -182,7 +201,8 @@ module Dexter
182
201
  end
183
202
 
184
203
  # create hypothetical indexes
185
- columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) }.group_by { |c| c[:table] }
204
+ # use all columns in tables from views
205
+ columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) || tables_from_views.include?(c[:table]) }.group_by { |c| c[:table] }
186
206
 
187
207
  # create single column indexes
188
208
  create_hypothetical_indexes_helper(columns_by_table, 1, candidates)
@@ -265,14 +285,16 @@ module Dexter
265
285
  end
266
286
  end
267
287
 
288
+ savings_ratio = (1 - @min_cost_savings_pct / 100.0)
289
+
268
290
  queries.each do |query|
269
291
  if query.explainable? && query.high_cost?
270
292
  new_cost, new_cost2 = query.costs[1..2]
271
293
 
272
- cost_savings = new_cost < query.initial_cost * 0.5
294
+ cost_savings = new_cost < query.initial_cost * savings_ratio
273
295
 
274
296
  # set high bar for multicolumn indexes
275
- cost_savings2 = new_cost > 100 && new_cost2 < new_cost * 0.5
297
+ cost_savings2 = new_cost > 100 && new_cost2 < new_cost * savings_ratio
276
298
 
277
299
  key = cost_savings2 ? 2 : 1
278
300
  query_indexes = hypo_indexes_from_plan(index_name_to_columns, query.plans[key], index_set)
@@ -283,10 +305,55 @@ module Dexter
283
305
  cost_savings2 = false
284
306
  end
285
307
 
286
- # TODO if multiple indexes are found (for either single or multicolumn)
308
+ suggest_index = cost_savings || cost_savings2
309
+
310
+ cost_savings3 = false
311
+ new_cost3 = nil
312
+
313
+ # if multiple indexes are found (for either single or multicolumn)
287
314
  # determine the impact of each individually
288
- # for now, be conservative and don't suggest if more than one index
289
- suggest_index = (cost_savings || cost_savings2) && query_indexes.size == 1
315
+ # there may be a better single index that we're not considering
316
+ # that didn't get picked up by pass1 or pass2
317
+ # TODO clean this up
318
+ # TODO suggest more than one index from this if savings are there
319
+ if suggest_index && query_indexes.size > 1
320
+ winning_index = nil
321
+ winning_cost = nil
322
+ winning_plan = nil
323
+
324
+ query_indexes.each do |query_index|
325
+ reset_hypothetical_indexes
326
+ create_hypothetical_index(query_index[:table], query_index[:columns].map { |v| {column: v} })
327
+ plan3 = plan(query.statement)
328
+ cost3 = plan3["Total Cost"]
329
+
330
+ if !winning_cost || cost3 < winning_cost
331
+ winning_cost = cost3
332
+ winning_index = query_index
333
+ winning_plan = plan3
334
+ end
335
+ end
336
+
337
+ query.plans << winning_plan
338
+
339
+ # duplicated from above
340
+ # TODO DRY
341
+ use_winning =
342
+ if cost_savings2
343
+ new_cost > 100 && winning_cost < new_cost * savings_ratio
344
+ else
345
+ winning_cost < query.initial_cost * savings_ratio
346
+ end
347
+
348
+ if use_winning
349
+ query_indexes = [winning_index]
350
+ cost_savings3 = true
351
+ new_cost3 = winning_cost
352
+ query.pass3_indexes = query_indexes
353
+ else
354
+ suggest_index = false
355
+ end
356
+ end
290
357
 
291
358
  if suggest_index
292
359
  query_indexes.each do |index|
@@ -299,7 +366,7 @@ module Dexter
299
366
  query.suggest_index = suggest_index
300
367
  query.new_cost =
301
368
  if suggest_index
302
- cost_savings2 ? new_cost2 : new_cost
369
+ cost_savings3 ? new_cost3 : (cost_savings2 ? new_cost2 : new_cost)
303
370
  else
304
371
  query.initial_cost
305
372
  end
@@ -368,9 +435,12 @@ module Dexter
368
435
  log "Start: #{query.costs[0]}"
369
436
  log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
370
437
  log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
438
+ if query.costs[3]
439
+ log "Pass3: #{query.costs[3]} : #{log_indexes(query.pass3_indexes || [])}"
440
+ end
371
441
  log "Final: #{query.new_cost} : #{log_indexes(query.suggest_index ? query_indexes : [])}"
372
442
  if query_indexes.size == 1 && !query.suggest_index
373
- log "Need 50% cost savings to suggest index"
443
+ log "Need #{@min_cost_savings_pct}% cost savings to suggest index"
374
444
  end
375
445
  else
376
446
  log "Could not run explain"
@@ -449,11 +519,15 @@ module Dexter
449
519
  columns_by_table.each do |table, cols|
450
520
  # no reason to use btree index for json columns
451
521
  cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
452
- candidates[col_set] = execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
522
+ candidates[col_set] = create_hypothetical_index(table, col_set)
453
523
  end
454
524
  end
455
525
  end
456
526
 
527
+ def create_hypothetical_index(table, col_set)
528
+ execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column]) }.join(", ")})')").first["indexname"]
529
+ end
530
+
457
531
  def database_tables
458
532
  result = execute <<-SQL
459
533
  SELECT
@@ -466,6 +540,43 @@ module Dexter
466
540
  result.map { |r| r["table_name"] }
467
541
  end
468
542
 
543
+ def materialized_views
544
+ if server_version_num >= 90300
545
+ result = execute <<-SQL
546
+ SELECT
547
+ schemaname || '.' || matviewname AS table_name
548
+ FROM
549
+ pg_matviews
550
+ SQL
551
+ result.map { |r| r["table_name"] }
552
+ else
553
+ []
554
+ end
555
+ end
556
+
557
+ def server_version_num
558
+ execute("SHOW server_version_num").first["server_version_num"].to_i
559
+ end
560
+
561
+ def database_view_tables
562
+ result = execute <<-SQL
563
+ SELECT
564
+ schemaname || '.' || viewname AS table_name,
565
+ definition
566
+ FROM
567
+ pg_views
568
+ WHERE
569
+ schemaname NOT IN ('information_schema', 'pg_catalog')
570
+ SQL
571
+
572
+ view_tables = {}
573
+ result.each do |row|
574
+ view_tables[row["table_name"]] = PgQuery.parse(row["definition"]).tables
575
+ end
576
+
577
+ view_tables
578
+ end
579
+
469
580
  def stat_statements
470
581
  result = execute <<-SQL
471
582
  SELECT
@@ -515,13 +626,15 @@ module Dexter
515
626
  def columns(tables)
516
627
  columns = execute <<-SQL
517
628
  SELECT
518
- table_schema || '.' || table_name AS table_name,
519
- column_name,
520
- data_type
521
- FROM
522
- information_schema.columns
523
- WHERE
524
- table_schema || '.' || table_name IN (#{tables.map { |t| quote(t) }.join(", ")})
629
+ s.nspname || '.' || t.relname AS table_name,
630
+ a.attname AS column_name,
631
+ pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type
632
+ FROM pg_attribute a
633
+ JOIN pg_class t on a.attrelid = t.oid
634
+ JOIN pg_namespace s on t.relnamespace = s.oid
635
+ WHERE a.attnum > 0
636
+ AND NOT a.attisdropped
637
+ AND s.nspname || '.' || t.relname IN (#{tables.map { |t| quote(t) }.join(", ")})
525
638
  ORDER BY
526
639
  1, 2
527
640
  SQL
data/lib/dexter/query.rb CHANGED
@@ -2,7 +2,7 @@ module Dexter
2
2
  class Query
3
3
  attr_reader :statement, :fingerprint, :plans
4
4
  attr_writer :tables
5
- attr_accessor :missing_tables, :new_cost, :total_time, :calls, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes, :candidate_tables
5
+ attr_accessor :missing_tables, :new_cost, :total_time, :calls, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes, :pass3_indexes, :candidate_tables, :tables_from_views
6
6
 
7
7
  def initialize(statement, fingerprint = nil)
8
8
  @statement = statement
@@ -11,6 +11,7 @@ module Dexter
11
11
  end
12
12
  @fingerprint = fingerprint
13
13
  @plans = []
14
+ @tables_from_views = []
14
15
  end
15
16
 
16
17
  def tables
@@ -1,3 +1,3 @@
1
1
  module Dexter
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgdexter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-01-05 00:00:00.000000000 Z
11
+ date: 2018-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop