pgdexter 0.5.6 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -14
- data/lib/dexter/client.rb +45 -28
- data/lib/dexter/collector.rb +11 -19
- data/lib/dexter/column_resolver.rb +74 -0
- data/lib/dexter/connection.rb +92 -0
- data/lib/dexter/indexer.rb +189 -377
- data/lib/dexter/parsers/csv_log_parser.rb +25 -0
- data/lib/dexter/{json_log_parser.rb → parsers/json_log_parser.rb} +5 -3
- data/lib/dexter/{log_parser.rb → parsers/log_parser.rb} +1 -8
- data/lib/dexter/{sql_log_parser.rb → parsers/sql_log_parser.rb} +3 -2
- data/lib/dexter/{stderr_log_parser.rb → parsers/stderr_log_parser.rb} +4 -8
- data/lib/dexter/processor.rb +10 -24
- data/lib/dexter/query.rb +14 -31
- data/lib/dexter/sources/log_source.rb +26 -0
- data/lib/dexter/{pg_stat_activity_parser.rb → sources/pg_stat_activity_source.rb} +10 -6
- data/lib/dexter/sources/pg_stat_statements_source.rb +34 -0
- data/lib/dexter/sources/statement_source.rb +11 -0
- data/lib/dexter/table_resolver.rb +120 -0
- data/lib/dexter/version.rb +1 -1
- data/lib/dexter.rb +15 -7
- data/lib/pgdexter.rb +1 -0
- metadata +19 -12
- data/lib/dexter/csv_log_parser.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 234a20833730445c80e03b5d789f174f9f50e9fb443614938912aaa1f74a7bd8
|
4
|
+
data.tar.gz: b286dd2c8735f17cd943c71405a2863699d4540487f5f1c730f5c19e6337da79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6464fe3a0d24567a1759272ca8ab82b6462c88c5221f10a7784a0fdf41234268896b0368fbd20e442ce8f7a307d917c6779f484006c1c4f7971be07a8ba26de1
|
7
|
+
data.tar.gz: e3b0e7abc058f8f250e431d8b332e761a83bbc737b782449804a663f251dd7f33dc3213cc247dfb6be06b636c1d166af54803c13d7541dd9a8479baecaa86e8c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.6.0 (2025-06-01)
|
2
|
+
|
3
|
+
- Added Linux packages for Ubuntu 24.04 and Debian 12
|
4
|
+
- Fixed error with hypothetical index limit
|
5
|
+
- Dropped support for Linux package for Ubuntu 20.04
|
6
|
+
- Dropped support for Ruby < 3
|
7
|
+
- Dropped support for Postgres < 13
|
8
|
+
|
1
9
|
## 0.5.6 (2025-02-01)
|
2
10
|
|
3
11
|
- Updated pg_query
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -91,7 +91,7 @@ Dexter can collect queries from a number of sources.
|
|
91
91
|
|
92
92
|
- [Query stats](#query-stats)
|
93
93
|
- [Live queries](#live-queries)
|
94
|
-
- [Log files](#log-
|
94
|
+
- [Log files](#log-files)
|
95
95
|
- [SQL files](#sql-files)
|
96
96
|
|
97
97
|
### Query Stats
|
@@ -138,7 +138,7 @@ For real-time indexing, pipe your logfile:
|
|
138
138
|
tail -F -n +1 postgresql.log | dexter <connection-options> --stdin
|
139
139
|
```
|
140
140
|
|
141
|
-
And pass `--input-format
|
141
|
+
And pass `--input-format csv` or `--input-format json` if needed.
|
142
142
|
|
143
143
|
### SQL Files
|
144
144
|
|
@@ -232,10 +232,10 @@ If compilation fails with `fatal error: postgres.h: No such file or directory`,
|
|
232
232
|
For Ubuntu and Debian, use:
|
233
233
|
|
234
234
|
```sh
|
235
|
-
sudo apt-get install postgresql-server-dev-
|
235
|
+
sudo apt-get install postgresql-server-dev-16
|
236
236
|
```
|
237
237
|
|
238
|
-
Note: Replace `
|
238
|
+
Note: Replace `16` with your Postgres server version
|
239
239
|
|
240
240
|
## Additional Installation Methods
|
241
241
|
|
@@ -282,16 +282,6 @@ gem install specific_install
|
|
282
282
|
gem specific_install https://github.com/ankane/dexter.git
|
283
283
|
```
|
284
284
|
|
285
|
-
## Upgrade Notes
|
286
|
-
|
287
|
-
### 0.5.0
|
288
|
-
|
289
|
-
The `--stdin` option is now required to read queries from stdin.
|
290
|
-
|
291
|
-
```sh
|
292
|
-
tail -F -n +1 postgresql.log | dexter <connection-options> --stdin
|
293
|
-
```
|
294
|
-
|
295
285
|
## Thanks
|
296
286
|
|
297
287
|
This software wouldn’t be possible without [HypoPG](https://github.com/HypoPG/hypopg), which allows you to create hypothetical indexes, and [pg_query](https://github.com/lfittl/pg_query), which allows you to parse and fingerprint queries. A big thanks to Dalibo and Lukas Fittl respectively. Also, thanks to YugabyteDB for [this article](https://dev.to/yugabyte/explain-from-pgstatstatements-normalized-queries-how-to-always-get-the-generic-plan-in--5cfi) on how to explain normalized queries.
|
data/lib/dexter/client.rb
CHANGED
@@ -6,8 +6,8 @@ module Dexter
|
|
6
6
|
attr_reader :arguments, :options
|
7
7
|
|
8
8
|
def self.start
|
9
|
-
|
10
|
-
rescue
|
9
|
+
Client.new(ARGV).perform
|
10
|
+
rescue Error => e
|
11
11
|
abort colorize(e.message.strip, :red)
|
12
12
|
end
|
13
13
|
|
@@ -19,33 +19,44 @@ module Dexter
|
|
19
19
|
STDOUT.sync = true
|
20
20
|
STDERR.sync = true
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
22
|
+
connection = Connection.new(**options.slice(:dbname, :host, :port, :username, :log_sql))
|
23
|
+
connection.setup(options[:enable_hypopg])
|
24
|
+
|
25
|
+
source =
|
26
|
+
if options[:statement]
|
27
|
+
# TODO raise error for --interval, --min-calls, --min-time
|
28
|
+
StatementSource.new(options[:statement])
|
29
|
+
elsif options[:pg_stat_statements]
|
30
|
+
# TODO support streaming option
|
31
|
+
PgStatStatementsSource.new(connection)
|
32
|
+
elsif options[:pg_stat_activity]
|
33
|
+
PgStatActivitySource.new(connection)
|
34
|
+
elsif arguments.any?
|
35
|
+
ARGV.replace(arguments)
|
36
|
+
if !options[:input_format]
|
37
|
+
ext = ARGV.map { |v| File.extname(v) }.uniq
|
38
|
+
options[:input_format] = ext.first[1..-1] if ext.size == 1
|
39
|
+
end
|
40
|
+
LogSource.new(ARGF, options[:input_format])
|
41
|
+
elsif options[:stdin]
|
42
|
+
LogSource.new(STDIN, options[:input_format])
|
43
|
+
else
|
44
|
+
raise Error, "Specify a source of queries: --pg-stat-statements, --pg-stat-activity, --stdin, or a path"
|
35
45
|
end
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
46
|
+
|
47
|
+
collector = Collector.new(**options.slice(:min_time, :min_calls))
|
48
|
+
|
49
|
+
indexer = Indexer.new(connection: connection, **options)
|
50
|
+
|
51
|
+
Processor.new(source, collector, indexer, **options.slice(:interval)).perform
|
42
52
|
end
|
43
53
|
|
44
54
|
def parse_args(args)
|
45
55
|
opts = Slop.parse(args) do |o|
|
46
|
-
o.banner =
|
47
|
-
|
48
|
-
|
56
|
+
o.banner = <<~BANNER
|
57
|
+
Usage:
|
58
|
+
dexter [options]
|
59
|
+
BANNER
|
49
60
|
|
50
61
|
o.separator "Input options:"
|
51
62
|
o.string "--input-format", "input format"
|
@@ -64,8 +75,7 @@ module Dexter
|
|
64
75
|
|
65
76
|
o.separator "Processing options:"
|
66
77
|
o.integer "--interval", "time to wait between processing queries, in seconds", default: 60
|
67
|
-
|
68
|
-
o.float "--min-calls", "only process queries that have been called a certain number of times", default: 0
|
78
|
+
o.integer "--min-calls", "only process queries that have been called a certain number of times", default: 0
|
69
79
|
o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0
|
70
80
|
o.separator ""
|
71
81
|
|
@@ -75,6 +85,7 @@ module Dexter
|
|
75
85
|
o.boolean "--enable-hypopg", "enable the HypoPG extension", default: false
|
76
86
|
o.array "--exclude", "prevent specific tables from being indexed"
|
77
87
|
o.string "--include", "only include specific tables"
|
88
|
+
o.integer "--min-cost", default: 100, help: false
|
78
89
|
o.integer "--min-cost-savings-pct", default: 50, help: false
|
79
90
|
o.string "--tablespace", "tablespace to create indexes"
|
80
91
|
o.separator ""
|
@@ -103,11 +114,17 @@ module Dexter
|
|
103
114
|
|
104
115
|
# TODO don't use global var
|
105
116
|
$log_level = options[:log_level].to_s.downcase
|
106
|
-
|
117
|
+
unless ["error", "info", "debug", "debug2", "debug3"].include?($log_level)
|
118
|
+
raise Error, "Unknown log level"
|
119
|
+
end
|
120
|
+
|
121
|
+
unless [nil, "csv", "json", "sql"].include?(options[:input_format])
|
122
|
+
raise Error, "Unknown input format"
|
123
|
+
end
|
107
124
|
|
108
125
|
[arguments, options]
|
109
126
|
rescue Slop::Error => e
|
110
|
-
raise
|
127
|
+
raise Error, e.message
|
111
128
|
end
|
112
129
|
end
|
113
130
|
end
|
data/lib/dexter/collector.rb
CHANGED
@@ -1,22 +1,17 @@
|
|
1
1
|
module Dexter
|
2
2
|
class Collector
|
3
|
-
def initialize(
|
3
|
+
def initialize(min_time:, min_calls:)
|
4
4
|
@top_queries = {}
|
5
5
|
@new_queries = Set.new
|
6
6
|
@mutex = Mutex.new
|
7
|
-
@min_time =
|
8
|
-
@min_calls =
|
7
|
+
@min_time = min_time * 60000 # convert minutes to ms
|
8
|
+
@min_calls = min_calls
|
9
9
|
end
|
10
10
|
|
11
|
-
def add(query, total_time, calls = 1)
|
12
|
-
fingerprint =
|
13
|
-
|
14
|
-
|
15
|
-
rescue PgQuery::ParseError
|
16
|
-
# do nothing
|
17
|
-
end
|
18
|
-
|
19
|
-
return unless fingerprint
|
11
|
+
def add(query, total_time, calls = 1, keep_all = false)
|
12
|
+
fingerprint = PgQuery.fingerprint(query) rescue nil
|
13
|
+
fingerprint ||= "unknown" if keep_all
|
14
|
+
return if fingerprint.nil?
|
20
15
|
|
21
16
|
@top_queries[fingerprint] ||= {calls: 0, total_time: 0}
|
22
17
|
@top_queries[fingerprint][:calls] += calls
|
@@ -36,16 +31,13 @@ module Dexter
|
|
36
31
|
end
|
37
32
|
|
38
33
|
queries = []
|
39
|
-
@top_queries.each do |
|
40
|
-
if new_queries.include?(
|
41
|
-
|
42
|
-
query.total_time = v[:total_time]
|
43
|
-
query.calls = v[:calls]
|
44
|
-
queries << query
|
34
|
+
@top_queries.each do |fingerprint, query|
|
35
|
+
if new_queries.include?(fingerprint) && query[:total_time] >= @min_time && query[:calls] >= @min_calls
|
36
|
+
queries << Query.new(query[:query], fingerprint, total_time: query[:total_time], calls: query[:calls])
|
45
37
|
end
|
46
38
|
end
|
47
39
|
|
48
|
-
queries
|
40
|
+
queries
|
49
41
|
end
|
50
42
|
end
|
51
43
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Dexter
|
2
|
+
class ColumnResolver
|
3
|
+
include Logging
|
4
|
+
|
5
|
+
def initialize(connection, queries, log_level:)
|
6
|
+
@connection = connection
|
7
|
+
@queries = queries
|
8
|
+
@log_level = log_level
|
9
|
+
end
|
10
|
+
|
11
|
+
def perform
|
12
|
+
tables = Set.new(@queries.flat_map(&:candidate_tables))
|
13
|
+
columns = tables.any? ? self.columns(tables) : []
|
14
|
+
columns_by_table = columns.group_by { |c| c[:table] }.transform_values { |v| v.to_h { |c| [c[:column], c] } }
|
15
|
+
columns_by_table.default = {}
|
16
|
+
|
17
|
+
@queries.each do |query|
|
18
|
+
log "Finding columns: #{query.statement}" if @log_level == "debug3"
|
19
|
+
columns = Set.new
|
20
|
+
begin
|
21
|
+
find_columns(query.tree).each do |col|
|
22
|
+
last_col = col["fields"].last
|
23
|
+
if last_col["String"]
|
24
|
+
columns << last_col["String"]["sval"]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
rescue JSON::NestingError
|
28
|
+
if @log_level.start_with?("debug")
|
29
|
+
log colorize("ERROR: Cannot get columns", :red)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
possible_columns = []
|
34
|
+
columns.each do |column|
|
35
|
+
query.candidate_tables.each do |table|
|
36
|
+
resolved = columns_by_table.dig(table, column)
|
37
|
+
possible_columns << resolved if resolved
|
38
|
+
end
|
39
|
+
end
|
40
|
+
# use all columns in tables from views (not ideal)
|
41
|
+
(query.tables_from_views & query.candidate_tables).each do |table|
|
42
|
+
possible_columns.concat(columns_by_table[table].values)
|
43
|
+
end
|
44
|
+
query.columns = possible_columns.uniq
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def find_columns(plan)
|
51
|
+
plan = JSON.parse(plan.to_json, max_nesting: 1000)
|
52
|
+
Indexer.find_by_key(plan, "ColumnRef")
|
53
|
+
end
|
54
|
+
|
55
|
+
def columns(tables)
|
56
|
+
query = <<~SQL
|
57
|
+
SELECT
|
58
|
+
s.nspname || '.' || t.relname AS table_name,
|
59
|
+
a.attname AS column_name,
|
60
|
+
pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type
|
61
|
+
FROM pg_attribute a
|
62
|
+
JOIN pg_class t on a.attrelid = t.oid
|
63
|
+
JOIN pg_namespace s on t.relnamespace = s.oid
|
64
|
+
WHERE a.attnum > 0
|
65
|
+
AND NOT a.attisdropped
|
66
|
+
AND s.nspname || '.' || t.relname IN (#{tables.size.times.map { |i| "$#{i + 1}" }.join(", ")})
|
67
|
+
ORDER BY
|
68
|
+
1, 2
|
69
|
+
SQL
|
70
|
+
columns = @connection.execute(query, params: tables.to_a)
|
71
|
+
columns.map { |v| {table: v["table_name"], column: v["column_name"], type: v["data_type"]} }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Dexter
|
2
|
+
class Connection
|
3
|
+
include Logging
|
4
|
+
|
5
|
+
def initialize(dbname:, host:, port:, username:, log_sql:)
|
6
|
+
@dbname = dbname
|
7
|
+
@host = host
|
8
|
+
@port = port
|
9
|
+
@username = username
|
10
|
+
@log_sql = log_sql
|
11
|
+
@mutex = Mutex.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def setup(enable_hypopg)
|
15
|
+
if server_version_num < 130000
|
16
|
+
raise Error, "This version of Dexter requires Postgres 13+"
|
17
|
+
end
|
18
|
+
|
19
|
+
check_extension(enable_hypopg)
|
20
|
+
|
21
|
+
execute("SET lock_timeout = '5s'")
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute(query, pretty: true, params: [], use_exec: false)
|
25
|
+
# use exec_params instead of exec when possible for security
|
26
|
+
#
|
27
|
+
# Unlike PQexec, PQexecParams allows at most one SQL command in the given string.
|
28
|
+
# (There can be semicolons in it, but not more than one nonempty command.)
|
29
|
+
# This is a limitation of the underlying protocol, but has some usefulness
|
30
|
+
# as an extra defense against SQL-injection attacks.
|
31
|
+
# https://www.postgresql.org/docs/current/static/libpq-exec.html
|
32
|
+
query = squish(query) if pretty
|
33
|
+
log colorize("[sql] #{query}#{params.any? ? " /*#{params.to_json}*/" : ""}", :cyan) if @log_sql
|
34
|
+
|
35
|
+
@mutex.synchronize do
|
36
|
+
if use_exec
|
37
|
+
conn.exec("#{query} /*dexter*/").to_a
|
38
|
+
else
|
39
|
+
conn.exec_params("#{query} /*dexter*/", params).to_a
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def server_version_num
|
45
|
+
@server_version_num ||= execute("SHOW server_version_num").first["server_version_num"].to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def check_extension(enable_hypopg)
|
51
|
+
extension = execute("SELECT installed_version FROM pg_available_extensions WHERE name = 'hypopg'").first
|
52
|
+
|
53
|
+
if extension.nil?
|
54
|
+
raise Error, "Install HypoPG first: https://github.com/ankane/dexter#installation"
|
55
|
+
end
|
56
|
+
|
57
|
+
if extension["installed_version"].nil?
|
58
|
+
if enable_hypopg
|
59
|
+
execute("CREATE EXTENSION hypopg")
|
60
|
+
else
|
61
|
+
raise Error, "Run `CREATE EXTENSION hypopg` or pass --enable-hypopg"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def conn
|
67
|
+
@conn ||= begin
|
68
|
+
# set connect timeout if none set
|
69
|
+
ENV["PGCONNECT_TIMEOUT"] ||= "3"
|
70
|
+
|
71
|
+
if @dbname.to_s.start_with?("postgres://", "postgresql://")
|
72
|
+
config = @dbname
|
73
|
+
else
|
74
|
+
config = {
|
75
|
+
host: @host,
|
76
|
+
port: @port,
|
77
|
+
dbname: @dbname,
|
78
|
+
user: @username
|
79
|
+
}.reject { |_, value| value.to_s.empty? }
|
80
|
+
config = config[:dbname] if config.keys == [:dbname] && config[:dbname].include?("=")
|
81
|
+
end
|
82
|
+
PG::Connection.new(config)
|
83
|
+
end
|
84
|
+
rescue PG::ConnectionBad => e
|
85
|
+
raise Error, e.message
|
86
|
+
end
|
87
|
+
|
88
|
+
def squish(str)
|
89
|
+
str.to_s.gsub(/\s+/, " ").strip
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|