pgdexter 0.5.6 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ module Dexter
2
+ class CsvLogParser < LogParser
3
+ FIRST_LINE_REGEX = /\A.+/
4
+
5
+ def perform(collector)
6
+ CSV.new(@logfile.to_io).each do |row|
7
+ message = row[13]
8
+ detail = row[14]
9
+
10
+ if (m = REGEX.match(message))
11
+ # replace first line with match
12
+ # needed for multiline queries
13
+ active_line = message.sub(FIRST_LINE_REGEX, m[3])
14
+
15
+ add_parameters(active_line, detail) if detail
16
+ collector.add(active_line, m[1].to_f)
17
+ end
18
+ end
19
+ rescue CSV::MalformedCSVError => e
20
+ raise Error, "ERROR: #{e.message}"
21
+ ensure
22
+ @logfile.close
23
+ end
24
+ end
25
+ end
@@ -2,7 +2,7 @@ module Dexter
2
2
  class JsonLogParser < LogParser
3
3
  FIRST_LINE_REGEX = /\A.+/
4
4
 
5
- def perform
5
+ def perform(collector)
6
6
  @logfile.each_line do |line|
7
7
  row = JSON.parse(line.chomp)
8
8
  if (m = REGEX.match(row["message"]))
@@ -11,11 +11,13 @@ module Dexter
11
11
  active_line = row["message"].sub(FIRST_LINE_REGEX, m[3])
12
12
 
13
13
  add_parameters(active_line, row["detail"]) if row["detail"]
14
- process_entry(active_line, m[1].to_f)
14
+ collector.add(active_line, m[1].to_f)
15
15
  end
16
16
  end
17
17
  rescue JSON::ParserError => e
18
- raise Dexter::Abort, "ERROR: #{e.message}"
18
+ raise Error, "ERROR: #{e.message}"
19
+ ensure
20
+ @logfile.close
19
21
  end
20
22
  end
21
23
  end
@@ -1,20 +1,13 @@
1
1
  module Dexter
2
2
  class LogParser
3
- include Logging
4
-
5
3
  REGEX = /duration: (\d+\.\d+) ms (statement|execute [^:]+): (.+)/
6
4
 
7
- def initialize(logfile, collector)
5
+ def initialize(logfile)
8
6
  @logfile = logfile
9
- @collector = collector
10
7
  end
11
8
 
12
9
  private
13
10
 
14
- def process_entry(query, duration)
15
- @collector.add(query, duration)
16
- end
17
-
18
11
  def add_parameters(active_line, details)
19
12
  if details.start_with?("parameters: ")
20
13
  params = Hash[details[12..-1].split(", ").map { |s| s.split(" = ", 2) }]
@@ -1,9 +1,10 @@
1
1
  module Dexter
2
2
  class SqlLogParser < LogParser
3
- def perform
3
+ def perform(collector)
4
4
  # TODO support streaming
5
5
  @logfile.read.split(";").each do |statement|
6
- process_entry(statement, 0)
6
+ statement = statement.strip
7
+ collector.add(statement, 0) unless statement.empty?
7
8
  end
8
9
  end
9
10
  end
@@ -3,20 +3,16 @@ module Dexter
3
3
  LINE_SEPERATOR = ": ".freeze
4
4
  DETAIL_LINE = "DETAIL: ".freeze
5
5
 
6
- def perform
7
- process_stderr(@logfile.each_line)
8
- end
9
-
10
- def process_stderr(rows)
6
+ def perform(collector)
11
7
  active_line = nil
12
8
  duration = nil
13
9
 
14
- rows.each do |line|
10
+ @logfile.each_line do |line|
15
11
  if active_line
16
12
  if line.include?(DETAIL_LINE)
17
13
  add_parameters(active_line, line.chomp.split(DETAIL_LINE)[1])
18
14
  elsif line.include?(LINE_SEPERATOR)
19
- process_entry(active_line, duration)
15
+ collector.add(active_line, duration)
20
16
  active_line = nil
21
17
  else
22
18
  active_line << line
@@ -28,7 +24,7 @@ module Dexter
28
24
  active_line = m[3]
29
25
  end
30
26
  end
31
- process_entry(active_line, duration) if active_line
27
+ collector.add(active_line, duration) if active_line
32
28
  end
33
29
  end
34
30
  end
@@ -2,36 +2,22 @@ module Dexter
2
2
  class Processor
3
3
  include Logging
4
4
 
5
- def initialize(logfile, options)
6
- @logfile = logfile
7
-
8
- @collector = Collector.new(min_time: options[:min_time], min_calls: options[:min_calls])
9
- @indexer = Indexer.new(options)
10
-
11
- @log_parser =
12
- if @logfile == :pg_stat_activity
13
- PgStatActivityParser.new(@indexer, @collector)
14
- elsif options[:input_format] == "csv"
15
- CsvLogParser.new(logfile, @collector)
16
- elsif options[:input_format] == "json"
17
- JsonLogParser.new(logfile, @collector)
18
- elsif options[:input_format] == "sql"
19
- SqlLogParser.new(logfile, @collector)
20
- else
21
- StderrLogParser.new(logfile, @collector)
22
- end
5
+ def initialize(source, collector, indexer, interval:)
6
+ @source = source
7
+ @collector = collector
8
+ @indexer = indexer
23
9
 
24
10
  @starting_interval = 3
25
- @interval = options[:interval]
11
+ @interval = interval
26
12
 
27
13
  @mutex = Mutex.new
28
14
  @last_checked_at = {}
29
15
 
30
- log "Started"
16
+ log "Started" if !@source.is_a?(PgStatStatementsSource) && !@source.is_a?(StatementSource)
31
17
  end
32
18
 
33
19
  def perform
34
- if [STDIN].include?(@logfile)
20
+ if @source.is_a?(LogSource) && @source.stdin?
35
21
  Thread.abort_on_exception = true
36
22
  Thread.new do
37
23
  sleep(@starting_interval)
@@ -47,9 +33,9 @@ module Dexter
47
33
  end
48
34
 
49
35
  begin
50
- @log_parser.perform
36
+ @source.perform(@collector)
51
37
  rescue Errno::ENOENT => e
52
- raise Dexter::Abort, "ERROR: #{e.message}"
38
+ raise Error, "ERROR: #{e.message}"
53
39
  end
54
40
 
55
41
  process_queries
@@ -74,7 +60,7 @@ module Dexter
74
60
  end
75
61
  end
76
62
 
77
- log "Processing #{queries.size} new query fingerprints"
63
+ log "Processing #{queries.size} new query fingerprints" unless @source.is_a?(StatementSource)
78
64
  @indexer.process_queries(queries) if queries.any?
79
65
  end
80
66
  end
data/lib/dexter/query.rb CHANGED
@@ -1,36 +1,32 @@
1
1
  module Dexter
2
2
  class Query
3
- attr_reader :statement, :fingerprint, :plans
4
- attr_writer :tables
5
- attr_accessor :missing_tables, :new_cost, :total_time, :calls, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes, :pass3_indexes, :candidate_tables, :tables_from_views, :candidates
3
+ attr_reader :statement, :fingerprint, :total_time, :calls, :plans
4
+ attr_accessor :tables, :missing_tables, :new_cost, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes, :pass3_indexes, :candidate_tables, :tables_from_views, :index_mapping, :columns, :candidate_columns
6
5
 
7
- def initialize(statement, fingerprint = nil)
6
+ def initialize(statement, fingerprint = nil, total_time: nil, calls: nil)
8
7
  @statement = statement
9
- unless fingerprint
10
- fingerprint = PgQuery.fingerprint(statement) rescue "unknown"
11
- end
12
8
  @fingerprint = fingerprint
9
+ @total_time = total_time
10
+ @calls = calls
13
11
  @plans = []
14
12
  @tables_from_views = []
13
+ @candidate_tables = []
14
+ @columns = []
15
+ @candidate_columns = []
15
16
  end
16
17
 
17
- def tables
18
- @tables ||= begin
19
- parse ? parse.tables : []
20
- rescue => e
21
- # possible pg_query bug
22
- $stderr.puts "Error extracting tables. Please report to https://github.com/ankane/dexter/issues"
23
- $stderr.puts "#{e.class.name}: #{e.message}"
24
- $stderr.puts statement
25
- []
18
+ def parser_result
19
+ unless defined?(@parser_result)
20
+ @parser_result = PgQuery.parse(statement) rescue nil
26
21
  end
22
+ @parser_result
27
23
  end
28
24
 
29
25
  def tree
30
- parse.tree
26
+ parser_result.tree
31
27
  end
32
28
 
33
- def explainable?
29
+ def fully_analyzed?
34
30
  plans.size >= 3
35
31
  end
36
32
 
@@ -41,18 +37,5 @@ module Dexter
41
37
  def initial_cost
42
38
  costs[0]
43
39
  end
44
-
45
- def high_cost?
46
- initial_cost && initial_cost >= 100
47
- end
48
-
49
- private
50
-
51
- def parse
52
- unless defined?(@parse)
53
- @parse = PgQuery.parse(statement) rescue nil
54
- end
55
- @parse
56
- end
57
40
  end
58
41
  end
@@ -0,0 +1,26 @@
1
+ module Dexter
2
+ class LogSource
3
+ def initialize(logfile, input_format)
4
+ @log_parser =
5
+ case input_format
6
+ when "csv"
7
+ CsvLogParser.new(logfile)
8
+ when "json"
9
+ JsonLogParser.new(logfile)
10
+ when "sql"
11
+ SqlLogParser.new(logfile)
12
+ else
13
+ StderrLogParser.new(logfile)
14
+ end
15
+ @stdin = logfile == STDIN
16
+ end
17
+
18
+ def perform(collector)
19
+ @log_parser.perform(collector)
20
+ end
21
+
22
+ def stdin?
23
+ @stdin
24
+ end
25
+ end
26
+ end
@@ -1,6 +1,10 @@
1
1
  module Dexter
2
- class PgStatActivityParser < LogParser
3
- def perform
2
+ class PgStatActivitySource
3
+ def initialize(connection)
4
+ @connection = connection
5
+ end
6
+
7
+ def perform(collector)
4
8
  previous_queries = {}
5
9
 
6
10
  10.times do
@@ -11,7 +15,7 @@ module Dexter
11
15
  if row["state"] == "active"
12
16
  active_queries[row["id"]] = row
13
17
  else
14
- process_entry(row["query"], row["duration_ms"].to_f)
18
+ collector.add(row["query"], row["duration_ms"].to_f)
15
19
  processed_queries[row["id"]] = true
16
20
  end
17
21
  end
@@ -19,13 +23,13 @@ module Dexter
19
23
  # store queries after they complete
20
24
  previous_queries.each do |id, row|
21
25
  if !active_queries[id] && !processed_queries[id]
22
- process_entry(row["query"], row["duration_ms"].to_f)
26
+ collector.add(row["query"], row["duration_ms"].to_f)
23
27
  end
24
28
  end
25
29
 
26
30
  previous_queries = active_queries
27
31
 
28
- sleep(0.1)
32
+ sleep($dexter_test ? 0 : 0.1)
29
33
  end
30
34
  end
31
35
 
@@ -44,7 +48,7 @@ module Dexter
44
48
  ORDER BY
45
49
  1
46
50
  SQL
47
- @logfile.send(:execute, sql)
51
+ @connection.execute(sql)
48
52
  end
49
53
  end
50
54
  end
@@ -0,0 +1,34 @@
1
+ module Dexter
2
+ class PgStatStatementsSource
3
+ def initialize(connection)
4
+ @connection = connection
5
+ end
6
+
7
+ def perform(collector)
8
+ stat_statements.each do |row|
9
+ collector.add(row["query"], row["duration_ms"].to_f, row["calls"].to_i)
10
+ end
11
+ end
12
+
13
+ # could group, sum, and filter min_time/min_calls in SQL, but keep simple for now
14
+ def stat_statements
15
+ sql = <<~SQL
16
+ SELECT
17
+ query,
18
+ total_plan_time + total_exec_time AS duration_ms,
19
+ calls
20
+ FROM
21
+ pg_stat_statements
22
+ INNER JOIN
23
+ pg_database ON pg_database.oid = pg_stat_statements.dbid
24
+ WHERE
25
+ datname = current_database()
26
+ ORDER BY
27
+ 1
28
+ SQL
29
+ @connection.execute(sql)
30
+ rescue PG::UndefinedTable => e
31
+ raise Error, e.message
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,11 @@
1
+ module Dexter
2
+ class StatementSource
3
+ def initialize(statement)
4
+ @statement = statement
5
+ end
6
+
7
+ def perform(collector)
8
+ collector.add(@statement, 0, 0, true)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,120 @@
1
+ module Dexter
2
+ class TableResolver
3
+ include Logging
4
+
5
+ def initialize(connection, queries, log_level:)
6
+ @connection = connection
7
+ @queries = queries
8
+ @log_level = log_level
9
+ end
10
+
11
+ def perform
12
+ tables = Set.new(database_tables + materialized_views)
13
+ no_schema_tables = self.no_schema_tables(tables)
14
+ view_tables = self.view_tables(no_schema_tables)
15
+
16
+ @queries.each do |query|
17
+ # add schema to table if needed
18
+ query_tables = self.tables(query).map { |t| no_schema_tables[t] || t }
19
+
20
+ # substitute view tables
21
+ query.tables = query_tables.flat_map { |t| view_tables[t] || [t] }.uniq
22
+ query.tables_from_views = query.tables - query_tables
23
+
24
+ query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def tables(query)
31
+ query.parser_result&.tables || []
32
+ rescue => e
33
+ # possible pg_query bug
34
+ $stderr.puts "Error extracting tables. Please report to https://github.com/ankane/dexter/issues"
35
+ $stderr.puts "#{e.class.name}: #{e.message}"
36
+ $stderr.puts query.statement
37
+ []
38
+ end
39
+
40
+ def no_schema_tables(tables)
41
+ search_path_index = Hash[search_path.map.with_index.to_a]
42
+ tables.group_by { |t| t.split(".")[-1] }.to_h do |group, t2|
43
+ [group, t2.sort_by { |t| [search_path_index[t.split(".")[0]] || 1000000, t] }[0]]
44
+ end
45
+ end
46
+
47
+ def view_tables(no_schema_tables)
48
+ # add tables from views
49
+ view_tables = database_view_tables
50
+ view_tables.each do |v, vt|
51
+ view_tables[v] = vt.map { |t| no_schema_tables[t] || t }
52
+ end
53
+
54
+ # fully resolve tables
55
+ # make sure no views in result
56
+ view_tables.each do |v, vt|
57
+ view_tables[v] = vt.flat_map { |t| view_tables[t] || [t] }.uniq
58
+ end
59
+
60
+ view_tables
61
+ end
62
+
63
+ def execute(...)
64
+ @connection.execute(...)
65
+ end
66
+
67
+ def search_path
68
+ execute("SELECT current_schemas(true)")[0]["current_schemas"][1..-2].split(",")
69
+ end
70
+
71
+ def database_tables
72
+ result = execute <<~SQL
73
+ SELECT
74
+ table_schema || '.' || table_name AS table_name
75
+ FROM
76
+ information_schema.tables
77
+ WHERE
78
+ table_catalog = current_database()
79
+ AND table_type IN ('BASE TABLE', 'VIEW')
80
+ SQL
81
+ result.map { |r| r["table_name"] }
82
+ end
83
+
84
+ def materialized_views
85
+ result = execute <<~SQL
86
+ SELECT
87
+ schemaname || '.' || matviewname AS table_name
88
+ FROM
89
+ pg_matviews
90
+ SQL
91
+ result.map { |r| r["table_name"] }
92
+ end
93
+
94
+ def views
95
+ execute <<~SQL
96
+ SELECT
97
+ schemaname || '.' || viewname AS table_name,
98
+ definition
99
+ FROM
100
+ pg_views
101
+ WHERE
102
+ schemaname NOT IN ('information_schema', 'pg_catalog')
103
+ SQL
104
+ end
105
+
106
+ def database_view_tables
107
+ view_tables = {}
108
+ views.each do |row|
109
+ begin
110
+ view_tables[row["table_name"]] = PgQuery.parse(row["definition"]).tables
111
+ rescue PgQuery::ParseError
112
+ if @log_level.start_with?("debug")
113
+ log colorize("ERROR: Cannot parse view definition: #{row["table_name"]}", :red)
114
+ end
115
+ end
116
+ end
117
+ view_tables
118
+ end
119
+ end
120
+ end
@@ -1,3 +1,3 @@
1
1
  module Dexter
2
- VERSION = "0.5.6"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/dexter.rb CHANGED
@@ -13,19 +13,27 @@ require "time"
13
13
  require_relative "dexter/logging"
14
14
  require_relative "dexter/client"
15
15
  require_relative "dexter/collector"
16
+ require_relative "dexter/column_resolver"
17
+ require_relative "dexter/connection"
16
18
  require_relative "dexter/indexer"
17
19
  require_relative "dexter/processor"
18
20
  require_relative "dexter/query"
21
+ require_relative "dexter/table_resolver"
19
22
  require_relative "dexter/version"
20
23
 
21
24
  # parsers
22
- require_relative "dexter/log_parser"
23
- require_relative "dexter/csv_log_parser"
24
- require_relative "dexter/json_log_parser"
25
- require_relative "dexter/pg_stat_activity_parser"
26
- require_relative "dexter/sql_log_parser"
27
- require_relative "dexter/stderr_log_parser"
25
+ require_relative "dexter/parsers/log_parser"
26
+ require_relative "dexter/parsers/csv_log_parser"
27
+ require_relative "dexter/parsers/json_log_parser"
28
+ require_relative "dexter/parsers/sql_log_parser"
29
+ require_relative "dexter/parsers/stderr_log_parser"
30
+
31
+ # sources
32
+ require_relative "dexter/sources/log_source"
33
+ require_relative "dexter/sources/pg_stat_activity_source"
34
+ require_relative "dexter/sources/pg_stat_statements_source"
35
+ require_relative "dexter/sources/statement_source"
28
36
 
29
37
  module Dexter
30
- class Abort < StandardError; end
38
+ class Error < StandardError; end
31
39
  end
data/lib/pgdexter.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative "dexter"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgdexter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.6
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-02-01 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: csv
@@ -29,14 +29,14 @@ dependencies:
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 0.18.2
32
+ version: '1'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: 0.18.2
39
+ version: '1'
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: pg_query
42
42
  requirement: !ruby/object:Gem::Requirement
@@ -78,17 +78,24 @@ files:
78
78
  - lib/dexter.rb
79
79
  - lib/dexter/client.rb
80
80
  - lib/dexter/collector.rb
81
- - lib/dexter/csv_log_parser.rb
81
+ - lib/dexter/column_resolver.rb
82
+ - lib/dexter/connection.rb
82
83
  - lib/dexter/indexer.rb
83
- - lib/dexter/json_log_parser.rb
84
- - lib/dexter/log_parser.rb
85
84
  - lib/dexter/logging.rb
86
- - lib/dexter/pg_stat_activity_parser.rb
85
+ - lib/dexter/parsers/csv_log_parser.rb
86
+ - lib/dexter/parsers/json_log_parser.rb
87
+ - lib/dexter/parsers/log_parser.rb
88
+ - lib/dexter/parsers/sql_log_parser.rb
89
+ - lib/dexter/parsers/stderr_log_parser.rb
87
90
  - lib/dexter/processor.rb
88
91
  - lib/dexter/query.rb
89
- - lib/dexter/sql_log_parser.rb
90
- - lib/dexter/stderr_log_parser.rb
92
+ - lib/dexter/sources/log_source.rb
93
+ - lib/dexter/sources/pg_stat_activity_source.rb
94
+ - lib/dexter/sources/pg_stat_statements_source.rb
95
+ - lib/dexter/sources/statement_source.rb
96
+ - lib/dexter/table_resolver.rb
91
97
  - lib/dexter/version.rb
98
+ - lib/pgdexter.rb
92
99
  homepage: https://github.com/ankane/dexter
93
100
  licenses:
94
101
  - MIT
@@ -100,14 +107,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
100
107
  requirements:
101
108
  - - ">="
102
109
  - !ruby/object:Gem::Version
103
- version: '2.7'
110
+ version: '3'
104
111
  required_rubygems_version: !ruby/object:Gem::Requirement
105
112
  requirements:
106
113
  - - ">="
107
114
  - !ruby/object:Gem::Version
108
115
  version: '0'
109
116
  requirements: []
110
- rubygems_version: 3.6.2
117
+ rubygems_version: 3.6.7
111
118
  specification_version: 4
112
119
  summary: The automatic indexer for Postgres
113
120
  test_files: []
@@ -1,24 +0,0 @@
1
- module Dexter
2
- class CsvLogParser < LogParser
3
- FIRST_LINE_REGEX = /\A.+/
4
-
5
- def perform
6
- CSV.new(@logfile.to_io).each do |row|
7
- process_csv_row(row[13], row[14])
8
- end
9
- rescue CSV::MalformedCSVError => e
10
- raise Dexter::Abort, "ERROR: #{e.message}"
11
- end
12
-
13
- def process_csv_row(message, detail)
14
- if (m = REGEX.match(message))
15
- # replace first line with match
16
- # needed for multiline queries
17
- active_line = message.sub(FIRST_LINE_REGEX, m[3])
18
-
19
- add_parameters(active_line, detail) if detail
20
- process_entry(active_line, m[1].to_f)
21
- end
22
- end
23
- end
24
- end