pg_insights 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +144 -45
- data/app/controllers/pg_insights/timeline_controller.rb +263 -0
- data/app/jobs/pg_insights/database_snapshot_job.rb +101 -0
- data/app/models/pg_insights/health_check_result.rb +151 -0
- data/app/services/pg_insights/health_check_service.rb +159 -3
- data/app/views/layouts/pg_insights/application.html.erb +1 -0
- data/app/views/pg_insights/timeline/compare.html.erb +997 -0
- data/app/views/pg_insights/timeline/index.html.erb +797 -0
- data/app/views/pg_insights/timeline/show.html.erb +1004 -0
- data/config/routes.rb +9 -5
- data/lib/generators/pg_insights/install_generator.rb +69 -18
- data/lib/pg_insights/version.rb +1 -1
- data/lib/pg_insights.rb +24 -10
- data/lib/tasks/pg_insights.rake +419 -33
- metadata +7 -2
@@ -9,6 +9,7 @@ module PgInsights
|
|
9
9
|
slow_queries
|
10
10
|
table_bloat
|
11
11
|
parameter_settings
|
12
|
+
database_snapshot
|
12
13
|
].freeze
|
13
14
|
|
14
15
|
VALID_STATUSES = %w[pending running success error].freeze
|
@@ -42,5 +43,155 @@ module PgInsights
|
|
42
43
|
threshold ||= PgInsights.health_cache_expiry
|
43
44
|
executed_at && executed_at > threshold.ago
|
44
45
|
end
|
46
|
+
|
47
|
+
def self.snapshots(limit = 90)
|
48
|
+
by_type("database_snapshot").successful.recent.limit(limit)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.latest_snapshot
|
52
|
+
by_type("database_snapshot").successful.recent.first
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.snapshots_between(start_date, end_date)
|
56
|
+
by_type("database_snapshot")
|
57
|
+
.successful
|
58
|
+
.where(executed_at: start_date.beginning_of_day..end_date.end_of_day)
|
59
|
+
.order(:executed_at)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.find_snapshot_by_date(date)
|
63
|
+
by_type("database_snapshot")
|
64
|
+
.successful
|
65
|
+
.where("DATE(executed_at) = ?", date.to_date)
|
66
|
+
.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.detect_parameter_changes_since(days_ago = 7)
|
70
|
+
snapshots = by_type("database_snapshot")
|
71
|
+
.successful
|
72
|
+
.where("executed_at >= ?", days_ago.days.ago)
|
73
|
+
.order(:executed_at)
|
74
|
+
.to_a
|
75
|
+
|
76
|
+
changes = []
|
77
|
+
snapshots.each_cons(2) do |older_snapshot, newer_snapshot|
|
78
|
+
snapshot_changes = compare_snapshots(older_snapshot, newer_snapshot)
|
79
|
+
if snapshot_changes.any?
|
80
|
+
changes << {
|
81
|
+
detected_at: newer_snapshot.executed_at,
|
82
|
+
changes: snapshot_changes
|
83
|
+
}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
changes
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.compare_snapshots(snapshot1, snapshot2)
|
91
|
+
return {} unless snapshot1&.result_data && snapshot2&.result_data
|
92
|
+
|
93
|
+
params1 = snapshot1.result_data.dig("parameters") || {}
|
94
|
+
params2 = snapshot2.result_data.dig("parameters") || {}
|
95
|
+
|
96
|
+
changes = {}
|
97
|
+
params2.each do |param_name, new_value|
|
98
|
+
old_value = params1[param_name]
|
99
|
+
if old_value != new_value && !both_nil_or_empty?(old_value, new_value)
|
100
|
+
changes[param_name] = {
|
101
|
+
from: old_value,
|
102
|
+
to: new_value,
|
103
|
+
change_type: determine_change_type(param_name, old_value, new_value),
|
104
|
+
detected_at: snapshot2.executed_at
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
changes
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.cleanup_old_snapshots
|
113
|
+
return unless PgInsights.snapshots_available?
|
114
|
+
|
115
|
+
cutoff_date = PgInsights.snapshot_retention_days.days.ago
|
116
|
+
deleted_count = by_type("database_snapshot")
|
117
|
+
.where("executed_at < ?", cutoff_date)
|
118
|
+
.delete_all
|
119
|
+
|
120
|
+
Rails.logger.info "PgInsights: Cleaned up #{deleted_count} old snapshots" if deleted_count > 0
|
121
|
+
deleted_count
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.timeline_data(days = 30)
|
125
|
+
snapshots = by_type("database_snapshot")
|
126
|
+
.successful
|
127
|
+
.where("executed_at >= ?", days.days.ago)
|
128
|
+
.order(:executed_at)
|
129
|
+
|
130
|
+
{
|
131
|
+
dates: snapshots.map { |s| s.executed_at.strftime("%Y-%m-%d %H:%M") },
|
132
|
+
cache_hit_rates: snapshots.map { |s| (s.result_data.dig("metrics", "cache_hit_rate") || 0).to_f },
|
133
|
+
avg_query_times: snapshots.map { |s| (s.result_data.dig("metrics", "avg_query_time") || 0).to_f },
|
134
|
+
bloated_tables: snapshots.map { |s| (s.result_data.dig("metrics", "bloated_tables") || 0).to_i },
|
135
|
+
total_connections: snapshots.map { |s| (s.result_data.dig("metrics", "total_connections") || 0).to_i },
|
136
|
+
parameter_changes: detect_parameter_changes_since(days)
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
def self.both_nil_or_empty?(val1, val2)
|
143
|
+
(val1.nil? || val1 == "") && (val2.nil? || val2 == "")
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.determine_change_type(param_name, old_value, new_value)
|
147
|
+
return "change" unless old_value && new_value
|
148
|
+
|
149
|
+
if numeric_parameter?(param_name)
|
150
|
+
old_numeric = extract_numeric_value(old_value)
|
151
|
+
new_numeric = extract_numeric_value(new_value)
|
152
|
+
|
153
|
+
return "change" unless old_numeric && new_numeric
|
154
|
+
|
155
|
+
if new_numeric > old_numeric
|
156
|
+
"increase"
|
157
|
+
elsif new_numeric < old_numeric
|
158
|
+
"decrease"
|
159
|
+
else
|
160
|
+
"stable"
|
161
|
+
end
|
162
|
+
else
|
163
|
+
old_value == new_value ? "stable" : "change"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.numeric_parameter?(param_name)
|
168
|
+
%w[
|
169
|
+
shared_buffers work_mem effective_cache_size max_connections
|
170
|
+
maintenance_work_mem wal_buffers max_wal_size min_wal_size
|
171
|
+
autovacuum_max_workers
|
172
|
+
].include?(param_name.to_s)
|
173
|
+
end
|
174
|
+
|
175
|
+
def self.extract_numeric_value(value)
|
176
|
+
return value.to_f if value.is_a?(Numeric)
|
177
|
+
return nil unless value.is_a?(String)
|
178
|
+
|
179
|
+
if value.match(/(\d+(?:\.\d+)?)\s*(kB|MB|GB|TB)/i)
|
180
|
+
number = $1.to_f
|
181
|
+
unit = $2.upcase
|
182
|
+
|
183
|
+
case unit
|
184
|
+
when "KB" then number * 1024
|
185
|
+
when "MB" then number * 1024 * 1024
|
186
|
+
when "GB" then number * 1024 * 1024 * 1024
|
187
|
+
when "TB" then number * 1024 * 1024 * 1024 * 1024
|
188
|
+
else number
|
189
|
+
end
|
190
|
+
elsif value.match(/^\d+(?:\.\d+)?$/)
|
191
|
+
value.to_f
|
192
|
+
else
|
193
|
+
nil
|
194
|
+
end
|
195
|
+
end
|
45
196
|
end
|
46
197
|
end
|
@@ -28,6 +28,12 @@ module PgInsights
|
|
28
28
|
get_cached_result("parameter_settings") || execute_parameter_settings_query
|
29
29
|
end
|
30
30
|
|
31
|
+
def self.collect_database_snapshot
|
32
|
+
return unless PgInsights.snapshots_available?
|
33
|
+
|
34
|
+
get_cached_result("database_snapshot") || execute_database_snapshot_query
|
35
|
+
end
|
36
|
+
|
31
37
|
def self.refresh_all!(force_synchronous: false)
|
32
38
|
if force_synchronous || !PgInsights.background_jobs_available?
|
33
39
|
execute_all_checks_synchronously
|
@@ -124,6 +130,10 @@ module PgInsights
|
|
124
130
|
execute_health_check_query("parameter_settings")
|
125
131
|
end
|
126
132
|
|
133
|
+
def self.execute_database_snapshot_query
|
134
|
+
execute_health_check_query("database_snapshot")
|
135
|
+
end
|
136
|
+
|
127
137
|
def self.execute_health_check_query(check_type, limit = 10)
|
128
138
|
case check_type
|
129
139
|
when "unused_indexes"
|
@@ -138,6 +148,8 @@ module PgInsights
|
|
138
148
|
execute_table_bloat_sql(limit)
|
139
149
|
when "parameter_settings"
|
140
150
|
execute_parameter_settings_sql
|
151
|
+
when "database_snapshot"
|
152
|
+
execute_database_snapshot_sql
|
141
153
|
else
|
142
154
|
raise ArgumentError, "Unknown check type: #{check_type}"
|
143
155
|
end
|
@@ -220,7 +232,7 @@ module PgInsights
|
|
220
232
|
n_live_tup,
|
221
233
|
CASE#{' '}
|
222
234
|
WHEN (n_live_tup + n_dead_tup) > 0#{' '}
|
223
|
-
THEN round((n_dead_tup::
|
235
|
+
THEN round((n_dead_tup::numeric / (n_live_tup + n_dead_tup) * 100), 2)
|
224
236
|
ELSE 0#{' '}
|
225
237
|
END as dead_tuple_pct,
|
226
238
|
pg_size_pretty(pg_total_relation_size(relid) / (1024*1024)) || ' MB' as table_mb_text,
|
@@ -233,7 +245,7 @@ module PgInsights
|
|
233
245
|
WHERE schemaname = 'public'
|
234
246
|
AND (n_live_tup + n_dead_tup) > 0
|
235
247
|
AND (
|
236
|
-
(n_dead_tup::
|
248
|
+
(n_dead_tup::numeric / (n_live_tup + n_dead_tup)) > 0.1
|
237
249
|
OR n_dead_tup > 1000
|
238
250
|
)
|
239
251
|
AND pg_total_relation_size(relid) > 1024*1024
|
@@ -258,7 +270,6 @@ module PgInsights
|
|
258
270
|
result = execute_query(sql)
|
259
271
|
return result if result.is_a?(Hash) && result[:error]
|
260
272
|
|
261
|
-
# todo basic recommendations (can be improved with system info)
|
262
273
|
recommendations = {
|
263
274
|
"shared_buffers" => "Recommended: 25% of total system RAM.",
|
264
275
|
"work_mem" => "Recommended: Based on RAM, connections, and query complexity. Default is often too low.",
|
@@ -273,6 +284,151 @@ module PgInsights
|
|
273
284
|
end
|
274
285
|
end
|
275
286
|
|
287
|
+
def self.execute_database_snapshot_sql
|
288
|
+
{
|
289
|
+
parameters: collect_enhanced_parameters,
|
290
|
+
metrics: collect_performance_metrics,
|
291
|
+
metadata: collect_database_metadata,
|
292
|
+
collected_at: Time.current.iso8601
|
293
|
+
}
|
294
|
+
end
|
295
|
+
|
296
|
+
def self.collect_enhanced_parameters
|
297
|
+
settings_to_check = [
|
298
|
+
"shared_buffers", "work_mem", "effective_cache_size",
|
299
|
+
"max_connections", "maintenance_work_mem", "checkpoint_completion_target",
|
300
|
+
"wal_buffers", "random_page_cost", "seq_page_cost", "cpu_tuple_cost",
|
301
|
+
"autovacuum", "autovacuum_max_workers", "max_wal_size", "min_wal_size"
|
302
|
+
]
|
303
|
+
|
304
|
+
sql = <<-SQL
|
305
|
+
SELECT name, setting, unit, context, vartype
|
306
|
+
FROM pg_settings
|
307
|
+
WHERE name IN (#{settings_to_check.map { |s| "'#{s}'" }.join(',')})
|
308
|
+
ORDER BY name;
|
309
|
+
SQL
|
310
|
+
|
311
|
+
result = execute_query(sql)
|
312
|
+
return {} if result.is_a?(Hash) && result[:error]
|
313
|
+
|
314
|
+
result.each_with_object({}) do |row, hash|
|
315
|
+
key = row["name"]
|
316
|
+
value = row["setting"]
|
317
|
+
value += row["unit"] if row["unit"].present?
|
318
|
+
hash[key] = value
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def self.collect_performance_metrics
|
323
|
+
metrics = {}
|
324
|
+
|
325
|
+
cache_sql = <<-SQL
|
326
|
+
SELECT#{' '}
|
327
|
+
CASE#{' '}
|
328
|
+
WHEN sum(heap_blks_hit) + sum(heap_blks_read) = 0 THEN 0
|
329
|
+
ELSE round((sum(heap_blks_hit)::numeric / (sum(heap_blks_hit) + sum(heap_blks_read))) * 100, 2)
|
330
|
+
END as cache_hit_rate
|
331
|
+
FROM pg_statio_user_tables;
|
332
|
+
SQL
|
333
|
+
|
334
|
+
cache_result = execute_query(cache_sql)
|
335
|
+
metrics["cache_hit_rate"] = cache_result.first&.dig("cache_hit_rate") || 0
|
336
|
+
|
337
|
+
if extension_available?("pg_stat_statements")
|
338
|
+
query_sql = <<-SQL
|
339
|
+
SELECT#{' '}
|
340
|
+
round(avg(mean_exec_time)::numeric, 2) as avg_query_time,
|
341
|
+
round(percentile_cont(0.95) WITHIN GROUP (ORDER BY mean_exec_time)::numeric, 2) as p95_query_time,
|
342
|
+
count(*) as total_queries,
|
343
|
+
sum(calls) as total_calls
|
344
|
+
FROM pg_stat_statements#{' '}
|
345
|
+
WHERE mean_exec_time > 0;
|
346
|
+
SQL
|
347
|
+
|
348
|
+
query_result = execute_query(query_sql)
|
349
|
+
if query_result.first
|
350
|
+
metrics["avg_query_time"] = query_result.first["avg_query_time"]
|
351
|
+
metrics["p95_query_time"] = query_result.first["p95_query_time"]
|
352
|
+
metrics["total_queries"] = query_result.first["total_queries"]
|
353
|
+
metrics["total_calls"] = query_result.first["total_calls"]
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
bloat_sql = <<-SQL
|
358
|
+
SELECT#{' '}
|
359
|
+
count(*) as bloated_tables,
|
360
|
+
round(avg(CASE#{' '}
|
361
|
+
WHEN (n_live_tup + n_dead_tup) > 0#{' '}
|
362
|
+
THEN (n_dead_tup::numeric / (n_live_tup + n_dead_tup) * 100)
|
363
|
+
ELSE 0#{' '}
|
364
|
+
END), 2) as avg_bloat_pct
|
365
|
+
FROM pg_stat_user_tables
|
366
|
+
WHERE schemaname = 'public'
|
367
|
+
AND (n_live_tup + n_dead_tup) > 0
|
368
|
+
AND (n_dead_tup::numeric / (n_live_tup + n_dead_tup)) > 0.1;
|
369
|
+
SQL
|
370
|
+
|
371
|
+
bloat_result = execute_query(bloat_sql)
|
372
|
+
if bloat_result.first
|
373
|
+
metrics["bloated_tables"] = bloat_result.first["bloated_tables"] || 0
|
374
|
+
metrics["avg_bloat_pct"] = bloat_result.first["avg_bloat_pct"] || 0
|
375
|
+
end
|
376
|
+
|
377
|
+
conn_sql = <<-SQL
|
378
|
+
SELECT#{' '}
|
379
|
+
count(*) as total_connections,
|
380
|
+
count(*) FILTER (WHERE state = 'active') as active_connections,
|
381
|
+
count(*) FILTER (WHERE state = 'idle') as idle_connections
|
382
|
+
FROM pg_stat_activity;
|
383
|
+
SQL
|
384
|
+
|
385
|
+
conn_result = execute_query(conn_sql)
|
386
|
+
if conn_result.first
|
387
|
+
metrics["total_connections"] = conn_result.first["total_connections"]
|
388
|
+
metrics["active_connections"] = conn_result.first["active_connections"]
|
389
|
+
metrics["idle_connections"] = conn_result.first["idle_connections"]
|
390
|
+
end
|
391
|
+
|
392
|
+
seq_scan_sql = <<-SQL
|
393
|
+
SELECT count(*) as high_seq_scan_tables
|
394
|
+
FROM pg_stat_user_tables
|
395
|
+
WHERE schemaname = 'public' AND seq_scan > 100;
|
396
|
+
SQL
|
397
|
+
|
398
|
+
seq_result = execute_query(seq_scan_sql)
|
399
|
+
metrics["high_seq_scan_tables"] = seq_result.first&.dig("high_seq_scan_tables") || 0
|
400
|
+
|
401
|
+
metrics
|
402
|
+
end
|
403
|
+
|
404
|
+
def self.collect_database_metadata
|
405
|
+
{
|
406
|
+
postgres_version: execute_query("SELECT version();").first&.dig("version"),
|
407
|
+
database_size: execute_query("SELECT pg_size_pretty(pg_database_size(current_database()));").first&.values&.first,
|
408
|
+
extensions: collect_extensions,
|
409
|
+
collected_at: Time.current.iso8601
|
410
|
+
}
|
411
|
+
end
|
412
|
+
|
413
|
+
def self.collect_extensions
|
414
|
+
ext_sql = "SELECT extname FROM pg_extension ORDER BY extname;"
|
415
|
+
result = execute_query(ext_sql)
|
416
|
+
return [] if result.is_a?(Hash) && result[:error]
|
417
|
+
|
418
|
+
result.map { |row| row["extname"] }
|
419
|
+
end
|
420
|
+
|
421
|
+
def self.extension_available?(extension_name)
|
422
|
+
escaped_name = ActiveRecord::Base.connection.quote(extension_name)
|
423
|
+
result = execute_query("SELECT 1 FROM pg_extension WHERE extname = #{escaped_name}")
|
424
|
+
|
425
|
+
return false if result.is_a?(Hash) && result[:error]
|
426
|
+
result.any?
|
427
|
+
rescue => e
|
428
|
+
Rails.logger.error "PgInsights: Extension availability check failed: #{e.message}"
|
429
|
+
false
|
430
|
+
end
|
431
|
+
|
276
432
|
def self.get_cached_result(check_type)
|
277
433
|
return nil unless defined?(HealthCheckResult)
|
278
434
|
|
@@ -25,6 +25,7 @@
|
|
25
25
|
<div class="header-right">
|
26
26
|
<nav class="header-nav">
|
27
27
|
<%= link_to "Health Dashboard", pg_insights.health_path, class: "nav-link #{'active' if current_page?(pg_insights.health_path)}" %>
|
28
|
+
<%= link_to "Timeline", pg_insights.timeline_path, class: "nav-link #{'active' if current_page?(pg_insights.timeline_path)}" %>
|
28
29
|
<%= link_to "Query Runner", pg_insights.root_path, class: "nav-link #{'active' if current_page?(pg_insights.root_path)}" %>
|
29
30
|
</nav>
|
30
31
|
</div>
|