pgbus 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/pgbus/api/insights_controller.rb +6 -1
- data/app/controllers/pgbus/insights_controller.rb +2 -0
- data/app/controllers/pgbus/jobs_controller.rb +5 -0
- data/app/frontend/pgbus/modules/charts.js +28 -1
- data/app/models/pgbus/job_stat.rb +85 -13
- data/app/views/pgbus/insights/show.html.erb +71 -4
- data/app/views/pgbus/jobs/_enqueued_table.html.erb +8 -1
- data/config/locales/da.yml +3 -0
- data/config/locales/de.yml +3 -0
- data/config/locales/en.yml +19 -0
- data/config/locales/es.yml +3 -0
- data/config/locales/fi.yml +3 -0
- data/config/locales/fr.yml +3 -0
- data/config/locales/it.yml +3 -0
- data/config/locales/ja.yml +3 -0
- data/config/locales/nb.yml +3 -0
- data/config/locales/nl.yml +3 -0
- data/config/locales/pt.yml +3 -0
- data/config/locales/sv.yml +3 -0
- data/config/routes.rb +1 -0
- data/lib/generators/pgbus/add_job_stats_latency_generator.rb +52 -0
- data/lib/generators/pgbus/templates/add_job_stats_latency.rb.erb +9 -0
- data/lib/pgbus/active_job/executor.rb +24 -5
- data/lib/pgbus/recurring/schedule.rb +86 -0
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +107 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6daa2060697774db5018b30b2d7b08ef4f3f962332e627f8fa90ce49cfe136da
|
|
4
|
+
data.tar.gz: 9fe928dba25e277212093a6e1a0443c6451336b2bc996e06375d88eaaf2dd148
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1977d39318814b8878eef04065e4abf2a73588005542e5b15b679248848fd470c11b685a3a7c2d89d26ecbee0b897fa2968d498b757b0435bf213eba61e48c3d
|
|
7
|
+
data.tar.gz: 0d999aa34400b0a9f85e030d9cd5ac9f5bb0b57e467eee50615a3cf6f19ee70f8c33da688b3d78abf575c4c053e0dab7ecc3fbc2c841390c97d9089b2f238135
|
|
@@ -5,12 +5,17 @@ module Pgbus
|
|
|
5
5
|
class InsightsController < ApplicationController
|
|
6
6
|
def show
|
|
7
7
|
minutes = insights_minutes
|
|
8
|
-
|
|
8
|
+
payload = {
|
|
9
9
|
summary: data_source.job_stats_summary(minutes: minutes),
|
|
10
10
|
throughput: data_source.job_throughput(minutes: minutes),
|
|
11
11
|
status_counts: data_source.job_status_counts(minutes: minutes),
|
|
12
12
|
slowest: data_source.slowest_job_classes(minutes: minutes)
|
|
13
13
|
}
|
|
14
|
+
if Pgbus::JobStat.latency_columns?
|
|
15
|
+
payload[:latency_trend] = data_source.latency_trend(minutes: minutes)
|
|
16
|
+
payload[:latency_by_queue] = data_source.latency_by_queue(minutes: minutes)
|
|
17
|
+
end
|
|
18
|
+
render json: payload
|
|
14
19
|
end
|
|
15
20
|
end
|
|
16
21
|
end
|
|
@@ -6,6 +6,8 @@ module Pgbus
|
|
|
6
6
|
@minutes = insights_minutes
|
|
7
7
|
@summary = data_source.job_stats_summary(minutes: @minutes)
|
|
8
8
|
@slowest = data_source.slowest_job_classes(minutes: @minutes)
|
|
9
|
+
@latency_by_queue = data_source.latency_by_queue(minutes: @minutes)
|
|
10
|
+
@latency_available = Pgbus::JobStat.latency_columns?
|
|
9
11
|
end
|
|
10
12
|
end
|
|
11
13
|
end
|
|
@@ -44,5 +44,10 @@ module Pgbus
|
|
|
44
44
|
count = data_source.discard_all_failed
|
|
45
45
|
redirect_to jobs_path, notice: "Discarded #{count} jobs."
|
|
46
46
|
end
|
|
47
|
+
|
|
48
|
+
def discard_all_enqueued
|
|
49
|
+
count = data_source.discard_all_enqueued
|
|
50
|
+
redirect_to jobs_path, notice: t("pgbus.jobs.index.discard_all_enqueued_notice", count: count)
|
|
51
|
+
end
|
|
47
52
|
end
|
|
48
53
|
end
|
|
@@ -12,13 +12,14 @@ function getThemeColors() {
|
|
|
12
12
|
};
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
let throughputChart, statusChart;
|
|
15
|
+
let throughputChart, statusChart, latencyChart;
|
|
16
16
|
|
|
17
17
|
export function renderCharts(data, i18n) {
|
|
18
18
|
const t = getThemeColors();
|
|
19
19
|
|
|
20
20
|
if (throughputChart) throughputChart.destroy();
|
|
21
21
|
if (statusChart) statusChart.destroy();
|
|
22
|
+
if (latencyChart) latencyChart.destroy();
|
|
22
23
|
|
|
23
24
|
const throughputData = data.throughput.map(p => ({
|
|
24
25
|
x: new Date(p.time).getTime(),
|
|
@@ -64,6 +65,32 @@ export function renderCharts(data, i18n) {
|
|
|
64
65
|
const el = document.querySelector("#status-chart");
|
|
65
66
|
if (el) el.innerHTML = `<p class="text-center text-sm text-gray-400 dark:text-gray-500 pt-24">${i18n.noData || "No data"}</p>`;
|
|
66
67
|
}
|
|
68
|
+
|
|
69
|
+
// Latency chart (only if data is available)
|
|
70
|
+
const latencyEl = document.querySelector("#latency-chart");
|
|
71
|
+
if (latencyEl && data.latency_trend && data.latency_trend.length > 0) {
|
|
72
|
+
const avgData = data.latency_trend.map(p => ({ x: new Date(p.time).getTime(), y: p.avg_ms }));
|
|
73
|
+
const p95Data = data.latency_trend.map(p => ({ x: new Date(p.time).getTime(), y: p.p95_ms }));
|
|
74
|
+
|
|
75
|
+
latencyChart = new ApexCharts(latencyEl, {
|
|
76
|
+
series: [
|
|
77
|
+
{ name: i18n.latencyAvg || "Avg", data: avgData },
|
|
78
|
+
{ name: i18n.latencyP95 || "P95", data: p95Data },
|
|
79
|
+
],
|
|
80
|
+
chart: { type: "line", height: 280, toolbar: { show: false }, background: "transparent", foreColor: t.text },
|
|
81
|
+
stroke: { curve: "smooth", width: [2, 2], dashArray: [0, 5] },
|
|
82
|
+
colors: ["#6366f1", "#f59e0b"],
|
|
83
|
+
xaxis: { type: "datetime", labels: { style: { colors: t.text } } },
|
|
84
|
+
yaxis: { labels: { style: { colors: t.text }, formatter: v => Math.round(v) + "ms" } },
|
|
85
|
+
grid: { borderColor: t.grid },
|
|
86
|
+
tooltip: { theme: t.tooltip },
|
|
87
|
+
dataLabels: { enabled: false },
|
|
88
|
+
legend: { position: "top", labels: { colors: t.text } },
|
|
89
|
+
});
|
|
90
|
+
latencyChart.render();
|
|
91
|
+
} else if (latencyEl) {
|
|
92
|
+
latencyEl.innerHTML = `<p class="text-center text-sm text-gray-400 dark:text-gray-500 pt-24">${i18n.noData || "No data"}</p>`;
|
|
93
|
+
}
|
|
67
94
|
}
|
|
68
95
|
|
|
69
96
|
let themeObserver = null;
|
|
@@ -10,15 +10,19 @@ module Pgbus
|
|
|
10
10
|
scope :dead_lettered, -> { where(status: "dead_lettered") }
|
|
11
11
|
|
|
12
12
|
# Record a job execution stat. Called by the executor after each job.
|
|
13
|
-
def self.record!(job_class:, queue_name:, status:, duration_ms:)
|
|
13
|
+
def self.record!(job_class:, queue_name:, status:, duration_ms:, enqueue_latency_ms: nil, retry_count: 0)
|
|
14
14
|
return unless table_exists?
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
attrs = {
|
|
17
17
|
job_class: job_class,
|
|
18
18
|
queue_name: queue_name,
|
|
19
19
|
status: status,
|
|
20
20
|
duration_ms: duration_ms
|
|
21
|
-
|
|
21
|
+
}
|
|
22
|
+
attrs[:enqueue_latency_ms] = enqueue_latency_ms if latency_columns?
|
|
23
|
+
attrs[:retry_count] = retry_count if latency_columns?
|
|
24
|
+
|
|
25
|
+
create!(attrs)
|
|
22
26
|
rescue StandardError => e
|
|
23
27
|
Pgbus.logger.debug { "[Pgbus] Failed to record job stat: #{e.message}" }
|
|
24
28
|
end
|
|
@@ -34,6 +38,15 @@ module Pgbus
|
|
|
34
38
|
@table_exists = false
|
|
35
39
|
end
|
|
36
40
|
|
|
41
|
+
# Memoized — checks if the latency migration has been applied.
|
|
42
|
+
def self.latency_columns?
|
|
43
|
+
return @latency_columns if defined?(@latency_columns)
|
|
44
|
+
|
|
45
|
+
@latency_columns = table_exists? && column_names.include?("enqueue_latency_ms")
|
|
46
|
+
rescue StandardError
|
|
47
|
+
@latency_columns = false
|
|
48
|
+
end
|
|
49
|
+
|
|
37
50
|
# Throughput: jobs per minute bucketed by minute for the last N minutes
|
|
38
51
|
def self.throughput(minutes: 60)
|
|
39
52
|
since(minutes.minutes.ago)
|
|
@@ -67,16 +80,30 @@ module Pgbus
|
|
|
67
80
|
|
|
68
81
|
# Single-query aggregate summary using conditional counts.
|
|
69
82
|
def self.summary(minutes: 60)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
83
|
+
cols = [
|
|
84
|
+
"COUNT(*)",
|
|
85
|
+
"COUNT(*) FILTER (WHERE status = 'success')",
|
|
86
|
+
"COUNT(*) FILTER (WHERE status = 'failed')",
|
|
87
|
+
"COUNT(*) FILTER (WHERE status = 'dead_lettered')",
|
|
88
|
+
"ROUND(AVG(duration_ms)::numeric, 1)",
|
|
89
|
+
"MAX(duration_ms)"
|
|
90
|
+
]
|
|
91
|
+
if latency_columns?
|
|
92
|
+
cols.push(
|
|
93
|
+
"ROUND(AVG(enqueue_latency_ms) FILTER (WHERE enqueue_latency_ms IS NOT NULL)::numeric, 1)",
|
|
94
|
+
"PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY enqueue_latency_ms) " \
|
|
95
|
+
"FILTER (WHERE enqueue_latency_ms IS NOT NULL)",
|
|
96
|
+
"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY enqueue_latency_ms) " \
|
|
97
|
+
"FILTER (WHERE enqueue_latency_ms IS NOT NULL)",
|
|
98
|
+
"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY enqueue_latency_ms) " \
|
|
99
|
+
"FILTER (WHERE enqueue_latency_ms IS NOT NULL)",
|
|
100
|
+
"ROUND(AVG(retry_count) FILTER (WHERE retry_count IS NOT NULL)::numeric, 2)"
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
row = since(minutes.minutes.ago).pick(*cols.map { |c| Arel.sql(c) })
|
|
105
|
+
|
|
106
|
+
result = {
|
|
80
107
|
total: row[0].to_i,
|
|
81
108
|
success: row[1].to_i,
|
|
82
109
|
failed: row[2].to_i,
|
|
@@ -84,6 +111,51 @@ module Pgbus
|
|
|
84
111
|
avg_duration_ms: row[4]&.to_f || 0,
|
|
85
112
|
max_duration_ms: row[5].to_i
|
|
86
113
|
}
|
|
114
|
+
|
|
115
|
+
if latency_columns?
|
|
116
|
+
result.merge!(
|
|
117
|
+
avg_latency_ms: row[6]&.to_f || 0,
|
|
118
|
+
p50_latency_ms: row[7]&.to_f || 0,
|
|
119
|
+
p95_latency_ms: row[8]&.to_f || 0,
|
|
120
|
+
p99_latency_ms: row[9]&.to_f || 0,
|
|
121
|
+
avg_retries: row[10]&.to_f || 0
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
result
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Latency trend: average enqueue latency per minute bucketed
|
|
129
|
+
def self.latency_trend(minutes: 60)
|
|
130
|
+
return [] unless latency_columns?
|
|
131
|
+
|
|
132
|
+
since(minutes.minutes.ago)
|
|
133
|
+
.where.not(enqueue_latency_ms: nil)
|
|
134
|
+
.group("date_trunc('minute', created_at)")
|
|
135
|
+
.order(Arel.sql("date_trunc('minute', created_at)"))
|
|
136
|
+
.pluck(
|
|
137
|
+
Arel.sql("date_trunc('minute', created_at)"),
|
|
138
|
+
Arel.sql("ROUND(AVG(enqueue_latency_ms))"),
|
|
139
|
+
Arel.sql("ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY enqueue_latency_ms))")
|
|
140
|
+
)
|
|
141
|
+
.map { |time, avg, p95| { time: time, avg_ms: avg.to_i, p95_ms: p95.to_i } }
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Average latency by queue
|
|
145
|
+
def self.avg_latency_by_queue(minutes: 60)
|
|
146
|
+
return [] unless latency_columns?
|
|
147
|
+
|
|
148
|
+
since(minutes.minutes.ago)
|
|
149
|
+
.where.not(enqueue_latency_ms: nil)
|
|
150
|
+
.group(:queue_name)
|
|
151
|
+
.order(Arel.sql("AVG(enqueue_latency_ms) DESC"))
|
|
152
|
+
.pluck(
|
|
153
|
+
:queue_name,
|
|
154
|
+
Arel.sql("ROUND(AVG(enqueue_latency_ms))"),
|
|
155
|
+
Arel.sql("ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY enqueue_latency_ms))"),
|
|
156
|
+
Arel.sql("COUNT(*)")
|
|
157
|
+
)
|
|
158
|
+
.map { |q, avg, p95, count| { queue_name: q, avg_ms: avg.to_i, p95_ms: p95.to_i, count: count.to_i } }
|
|
87
159
|
end
|
|
88
160
|
|
|
89
161
|
# Cleanup old stats
|
|
@@ -51,6 +51,32 @@
|
|
|
51
51
|
</div>
|
|
52
52
|
</div>
|
|
53
53
|
|
|
54
|
+
<% if @latency_available %>
|
|
55
|
+
<!-- Latency summary cards -->
|
|
56
|
+
<div class="grid grid-cols-2 gap-4 sm:grid-cols-3 lg:grid-cols-5 mb-8">
|
|
57
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
58
|
+
<dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.avg_latency") %></dt>
|
|
59
|
+
<dd class="mt-1 text-2xl font-semibold text-gray-900 dark:text-white"><%= pgbus_ms_duration(@summary[:avg_latency_ms]) %></dd>
|
|
60
|
+
</div>
|
|
61
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
62
|
+
<dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.p50_latency") %></dt>
|
|
63
|
+
<dd class="mt-1 text-2xl font-semibold text-gray-900 dark:text-white"><%= pgbus_ms_duration(@summary[:p50_latency_ms]) %></dd>
|
|
64
|
+
</div>
|
|
65
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
66
|
+
<dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.p95_latency") %></dt>
|
|
67
|
+
<dd class="mt-1 text-2xl font-semibold text-yellow-600 dark:text-yellow-400"><%= pgbus_ms_duration(@summary[:p95_latency_ms]) %></dd>
|
|
68
|
+
</div>
|
|
69
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
70
|
+
<dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.p99_latency") %></dt>
|
|
71
|
+
<dd class="mt-1 text-2xl font-semibold text-orange-600 dark:text-orange-400"><%= pgbus_ms_duration(@summary[:p99_latency_ms]) %></dd>
|
|
72
|
+
</div>
|
|
73
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
74
|
+
<dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.avg_retries") %></dt>
|
|
75
|
+
<dd class="mt-1 text-2xl font-semibold text-gray-900 dark:text-white"><%= @summary[:avg_retries]&.round(2) || 0 %></dd>
|
|
76
|
+
</div>
|
|
77
|
+
</div>
|
|
78
|
+
<% end %>
|
|
79
|
+
|
|
54
80
|
<!-- Charts -->
|
|
55
81
|
<div class="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-8">
|
|
56
82
|
<div class="rounded-lg bg-white dark:bg-gray-800 p-5 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
@@ -63,6 +89,46 @@
|
|
|
63
89
|
</div>
|
|
64
90
|
</div>
|
|
65
91
|
|
|
92
|
+
<% if @latency_available %>
|
|
93
|
+
<!-- Latency chart -->
|
|
94
|
+
<div class="grid grid-cols-1 gap-6 mb-8">
|
|
95
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 p-5 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
96
|
+
<h3 class="text-sm font-medium text-gray-700 dark:text-gray-300 mb-4"><%= t("pgbus.insights.show.charts.latency") %></h3>
|
|
97
|
+
<div id="latency-chart" style="height: 280px;"></div>
|
|
98
|
+
</div>
|
|
99
|
+
</div>
|
|
100
|
+
|
|
101
|
+
<!-- Latency by queue -->
|
|
102
|
+
<div class="rounded-lg bg-white dark:bg-gray-800 shadow ring-1 ring-gray-200 dark:ring-gray-700 mb-8">
|
|
103
|
+
<div class="px-5 py-4 border-b border-gray-200 dark:border-gray-700">
|
|
104
|
+
<h3 class="text-sm font-medium text-gray-700 dark:text-gray-300"><%= t("pgbus.insights.show.latency_by_queue.title") %></h3>
|
|
105
|
+
</div>
|
|
106
|
+
<table class="pgbus-table min-w-full divide-y divide-gray-200 dark:divide-gray-700">
|
|
107
|
+
<thead class="bg-gray-50 dark:bg-gray-900">
|
|
108
|
+
<tr>
|
|
109
|
+
<th class="px-4 py-3 text-left text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.queue") %></th>
|
|
110
|
+
<th class="px-4 py-3 text-right text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.count") %></th>
|
|
111
|
+
<th class="px-4 py-3 text-right text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.avg") %></th>
|
|
112
|
+
<th class="px-4 py-3 text-right text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.p95") %></th>
|
|
113
|
+
</tr>
|
|
114
|
+
</thead>
|
|
115
|
+
<tbody class="divide-y divide-gray-100 dark:divide-gray-700">
|
|
116
|
+
<% @latency_by_queue.each do |row| %>
|
|
117
|
+
<tr class="hover:bg-gray-50 dark:hover:bg-gray-700/50">
|
|
118
|
+
<td data-label="Queue" class="px-4 py-3 text-sm font-medium text-gray-700 dark:text-gray-300"><%= row[:queue_name] %></td>
|
|
119
|
+
<td data-label="Count" class="px-4 py-3 text-sm text-right font-mono text-gray-700 dark:text-gray-300"><%= pgbus_number(row[:count]) %></td>
|
|
120
|
+
<td data-label="Avg" class="px-4 py-3 text-sm text-right font-mono text-gray-700 dark:text-gray-300"><%= pgbus_ms_duration(row[:avg_ms]) %></td>
|
|
121
|
+
<td data-label="P95" class="px-4 py-3 text-sm text-right font-mono text-gray-700 dark:text-gray-300"><%= pgbus_ms_duration(row[:p95_ms]) %></td>
|
|
122
|
+
</tr>
|
|
123
|
+
<% end %>
|
|
124
|
+
<% if @latency_by_queue.empty? %>
|
|
125
|
+
<tr><td colspan="4" class="px-4 py-8 text-center text-sm text-gray-400 dark:text-gray-500"><%= t("pgbus.insights.show.latency_by_queue.empty") %></td></tr>
|
|
126
|
+
<% end %>
|
|
127
|
+
</tbody>
|
|
128
|
+
</table>
|
|
129
|
+
</div>
|
|
130
|
+
<% end %>
|
|
131
|
+
|
|
66
132
|
<!-- Slowest job classes -->
|
|
67
133
|
<div class="rounded-lg bg-white dark:bg-gray-800 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
68
134
|
<div class="px-5 py-4 border-b border-gray-200 dark:border-gray-700">
|
|
@@ -100,6 +166,8 @@
|
|
|
100
166
|
seriesName: "<%= j(t("pgbus.insights.show.charts.series_name")) %>",
|
|
101
167
|
noData: "<%= j(t("pgbus.insights.show.charts.no_data")) %>",
|
|
102
168
|
failedToLoad: "<%= j(t("pgbus.insights.show.charts.failed_to_load")) %>",
|
|
169
|
+
latencyAvg: "<%= j(t("pgbus.insights.show.charts.latency_avg")) %>",
|
|
170
|
+
latencyP95: "<%= j(t("pgbus.insights.show.charts.latency_p95")) %>",
|
|
103
171
|
};
|
|
104
172
|
|
|
105
173
|
let chartData = null;
|
|
@@ -114,9 +182,8 @@
|
|
|
114
182
|
.then(data => { chartData = data; renderCharts(data, i18n); })
|
|
115
183
|
.catch(err => {
|
|
116
184
|
const msg = '<p class="text-center text-sm text-gray-400 dark:text-gray-500 pt-24">' + i18n.failedToLoad + "</p>";
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
if (el2) el2.innerHTML = msg;
|
|
185
|
+
document.querySelectorAll("#throughput-chart, #status-chart, #latency-chart").forEach(el => {
|
|
186
|
+
el.innerHTML = msg;
|
|
187
|
+
});
|
|
121
188
|
});
|
|
122
189
|
</script>
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
<turbo-frame id="jobs-enqueued" data-auto-refresh data-src="<%= pgbus.jobs_path(request.query_parameters.merge(frame: 'enqueued')) %>">
|
|
2
2
|
<div>
|
|
3
|
-
<
|
|
3
|
+
<div class="flex items-center justify-between mb-3">
|
|
4
|
+
<h2 class="text-lg font-semibold text-gray-900 dark:text-white"><%= t("pgbus.jobs.enqueued_table.title") %></h2>
|
|
5
|
+
<% if @jobs.any? %>
|
|
6
|
+
<%= button_to t("pgbus.jobs.enqueued_table.discard_all"), pgbus.discard_all_enqueued_jobs_path, method: :post,
|
|
7
|
+
class: "rounded-md bg-red-600 px-3 py-2 text-sm font-medium text-white hover:bg-red-500",
|
|
8
|
+
data: { turbo_confirm: t("pgbus.jobs.enqueued_table.discard_all_confirm"), turbo_frame: "_top" } %>
|
|
9
|
+
<% end %>
|
|
10
|
+
</div>
|
|
4
11
|
<div class="overflow-hidden rounded-lg bg-white dark:bg-gray-800 shadow ring-1 ring-gray-200 dark:ring-gray-700">
|
|
5
12
|
<table class="pgbus-table min-w-full divide-y divide-gray-200 dark:divide-gray-700">
|
|
6
13
|
<thead class="bg-gray-50 dark:bg-gray-900">
|
data/config/locales/da.yml
CHANGED
|
@@ -179,6 +179,8 @@ da:
|
|
|
179
179
|
title: Job i kø
|
|
180
180
|
discard: Kassér
|
|
181
181
|
discard_confirm: Kassér denne besked?
|
|
182
|
+
discard_all: Kassér alle
|
|
183
|
+
discard_all_confirm: Kassér alle ventende jobs og frigiv deres låse? Dette kan ikke fortrydes.
|
|
182
184
|
retry: Prøv igen
|
|
183
185
|
retry_confirm: Nulstil synlighedstimeout og prøv igen?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ da:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Kassér alle
|
|
199
201
|
discard_all_confirm: Kassér alle mislykkede job?
|
|
202
|
+
discard_all_enqueued_notice: Kasserede %{count} ventende jobs og frigav deres låse.
|
|
200
203
|
retry_all: Forsøg alle igen
|
|
201
204
|
retry_all_confirm: Forsøg alle mislykkede job igen?
|
|
202
205
|
title: Job
|
data/config/locales/de.yml
CHANGED
|
@@ -179,6 +179,8 @@ de:
|
|
|
179
179
|
title: Eingereihte Jobs
|
|
180
180
|
discard: Verwerfen
|
|
181
181
|
discard_confirm: Diese Nachricht verwerfen?
|
|
182
|
+
discard_all: Alle verwerfen
|
|
183
|
+
discard_all_confirm: Alle eingereihten Jobs verwerfen und ihre Sperren freigeben? Dies kann nicht rückgängig gemacht werden.
|
|
182
184
|
retry: Wiederholen
|
|
183
185
|
retry_confirm: Sichtbarkeits-Timeout zurücksetzen und erneut versuchen?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ de:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Alle verwerfen
|
|
199
201
|
discard_all_confirm: Alle fehlgeschlagenen Jobs verwerfen?
|
|
202
|
+
discard_all_enqueued_notice: "%{count} eingereihte Jobs verworfen und Sperren freigegeben."
|
|
200
203
|
retry_all: Alle wiederholen
|
|
201
204
|
retry_all_confirm: Alle fehlgeschlagenen Jobs wiederholen?
|
|
202
205
|
title: Jobs
|
data/config/locales/en.yml
CHANGED
|
@@ -128,11 +128,22 @@ en:
|
|
|
128
128
|
show:
|
|
129
129
|
charts:
|
|
130
130
|
failed_to_load: Failed to load chart data
|
|
131
|
+
latency: Queue Latency (ms)
|
|
132
|
+
latency_avg: Avg
|
|
133
|
+
latency_p95: P95
|
|
131
134
|
no_data: No data yet
|
|
132
135
|
series_name: Jobs/min
|
|
133
136
|
status_distribution: Status Distribution
|
|
134
137
|
throughput: Throughput (jobs/min)
|
|
135
138
|
description_html: Job performance metrics for the last %{range}
|
|
139
|
+
latency_by_queue:
|
|
140
|
+
empty: No latency data yet
|
|
141
|
+
headers:
|
|
142
|
+
avg: Avg (ms)
|
|
143
|
+
count: Count
|
|
144
|
+
p95: P95 (ms)
|
|
145
|
+
queue: Queue
|
|
146
|
+
title: Latency by Queue
|
|
136
147
|
slowest:
|
|
137
148
|
empty: No job stats yet
|
|
138
149
|
headers:
|
|
@@ -143,9 +154,14 @@ en:
|
|
|
143
154
|
title: Slowest Job Classes (avg duration)
|
|
144
155
|
summary:
|
|
145
156
|
avg_duration: Avg Duration
|
|
157
|
+
avg_latency: Avg Latency
|
|
158
|
+
avg_retries: Avg Retries
|
|
146
159
|
dead_lettered: Dead Lettered
|
|
147
160
|
failed: Failed
|
|
148
161
|
max_duration: Max Duration
|
|
162
|
+
p50_latency: P50 Latency
|
|
163
|
+
p95_latency: P95 Latency
|
|
164
|
+
p99_latency: P99 Latency
|
|
149
165
|
succeeded: Succeeded
|
|
150
166
|
total_jobs: Total Jobs
|
|
151
167
|
time_ranges:
|
|
@@ -177,6 +193,8 @@ en:
|
|
|
177
193
|
timezone: 'Timezone:'
|
|
178
194
|
visible_at: 'Visible at:'
|
|
179
195
|
discard: Discard
|
|
196
|
+
discard_all: Discard All
|
|
197
|
+
discard_all_confirm: Discard all enqueued jobs and release their locks? This cannot be undone.
|
|
180
198
|
discard_confirm: Discard this message?
|
|
181
199
|
retry: Retry
|
|
182
200
|
retry_confirm: Reset visibility timeout and retry?
|
|
@@ -197,6 +215,7 @@ en:
|
|
|
197
215
|
index:
|
|
198
216
|
discard_all: Discard All
|
|
199
217
|
discard_all_confirm: Discard all failed jobs?
|
|
218
|
+
discard_all_enqueued_notice: Discarded %{count} enqueued jobs and released their locks.
|
|
200
219
|
retry_all: Retry All
|
|
201
220
|
retry_all_confirm: Retry all failed jobs?
|
|
202
221
|
title: Jobs
|
data/config/locales/es.yml
CHANGED
|
@@ -179,6 +179,8 @@ es:
|
|
|
179
179
|
title: Trabajos en Cola
|
|
180
180
|
discard: Descartar
|
|
181
181
|
discard_confirm: "¿Descartar este mensaje?"
|
|
182
|
+
discard_all: Descartar todos
|
|
183
|
+
discard_all_confirm: "¿Descartar todos los trabajos en cola y liberar sus bloqueos? Esta acción no se puede deshacer."
|
|
182
184
|
retry: Reintentar
|
|
183
185
|
retry_confirm: "¿Restablecer tiempo de visibilidad y reintentar?"
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ es:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Descartar Todo
|
|
199
201
|
discard_all_confirm: "¿Descartar todos los trabajos fallidos?"
|
|
202
|
+
discard_all_enqueued_notice: Se descartaron %{count} trabajos en cola y se liberaron sus bloqueos.
|
|
200
203
|
retry_all: Reintentar Todo
|
|
201
204
|
retry_all_confirm: "¿Reintentar todos los trabajos fallidos?"
|
|
202
205
|
title: Trabajos
|
data/config/locales/fi.yml
CHANGED
|
@@ -179,6 +179,8 @@ fi:
|
|
|
179
179
|
title: Jonotetut työt
|
|
180
180
|
discard: Hylkää
|
|
181
181
|
discard_confirm: Hylätä tämä viesti?
|
|
182
|
+
discard_all: Hylkää kaikki
|
|
183
|
+
discard_all_confirm: Hylkää kaikki jonossa olevat tehtävät ja vapauta lukot? Tätä ei voi perua.
|
|
182
184
|
retry: Yritä uudelleen
|
|
183
185
|
retry_confirm: Nollaa näkyvyysaika ja yritä uudelleen?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ fi:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Hylkää kaikki
|
|
199
201
|
discard_all_confirm: Hylätäänkö kaikki epäonnistuneet työt?
|
|
202
|
+
discard_all_enqueued_notice: Hylättiin %{count} jonossa olevaa tehtävää ja vapautettiin lukot.
|
|
200
203
|
retry_all: Yritä uudelleen kaikki
|
|
201
204
|
retry_all_confirm: Yritetäänkö uudelleen kaikki epäonnistuneet työt?
|
|
202
205
|
title: Työt
|
data/config/locales/fr.yml
CHANGED
|
@@ -179,6 +179,8 @@ fr:
|
|
|
179
179
|
title: Travaux en file d'attente
|
|
180
180
|
discard: Rejeter
|
|
181
181
|
discard_confirm: Rejeter ce message ?
|
|
182
|
+
discard_all: Tout supprimer
|
|
183
|
+
discard_all_confirm: Supprimer tous les travaux en file d'attente et libérer leurs verrous ? Cette action est irréversible.
|
|
182
184
|
retry: Réessayer
|
|
183
185
|
retry_confirm: Réinitialiser le délai de visibilité et réessayer ?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ fr:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Tout ignorer
|
|
199
201
|
discard_all_confirm: Ignorer tous les travaux échoués ?
|
|
202
|
+
discard_all_enqueued_notice: "%{count} travaux en file d'attente supprimés et verrous libérés."
|
|
200
203
|
retry_all: Tout réessayer
|
|
201
204
|
retry_all_confirm: Réessayer tous les travaux échoués ?
|
|
202
205
|
title: Travaux
|
data/config/locales/it.yml
CHANGED
|
@@ -179,6 +179,8 @@ it:
|
|
|
179
179
|
title: Lavori in coda
|
|
180
180
|
discard: Scarta
|
|
181
181
|
discard_confirm: Scartare questo messaggio?
|
|
182
|
+
discard_all: Scarta tutti
|
|
183
|
+
discard_all_confirm: Scartare tutti i lavori in coda e rilasciare i relativi blocchi? Questa azione non può essere annullata.
|
|
182
184
|
retry: Riprova
|
|
183
185
|
retry_confirm: Reimpostare il timeout di visibilità e riprovare?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ it:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Scarta Tutto
|
|
199
201
|
discard_all_confirm: Scartare tutti i lavori falliti?
|
|
202
|
+
discard_all_enqueued_notice: Scartati %{count} lavori in coda e rilasciati i relativi blocchi.
|
|
200
203
|
retry_all: Riprova Tutto
|
|
201
204
|
retry_all_confirm: Riprova tutti i lavori falliti?
|
|
202
205
|
title: Lavori
|
data/config/locales/ja.yml
CHANGED
|
@@ -179,6 +179,8 @@ ja:
|
|
|
179
179
|
title: キューに入れられたジョブ
|
|
180
180
|
discard: 破棄
|
|
181
181
|
discard_confirm: このメッセージを破棄しますか?
|
|
182
|
+
discard_all: すべて破棄
|
|
183
|
+
discard_all_confirm: キュー内のすべてのジョブを破棄し、ロックを解放しますか?この操作は元に戻せません。
|
|
182
184
|
retry: リトライ
|
|
183
185
|
retry_confirm: 可視性タイムアウトをリセットしてリトライしますか?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ ja:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: すべて破棄
|
|
199
201
|
discard_all_confirm: すべての失敗したジョブを破棄しますか?
|
|
202
|
+
discard_all_enqueued_notice: "%{count}件のキュー内ジョブを破棄し、ロックを解放しました。"
|
|
200
203
|
retry_all: すべてリトライ
|
|
201
204
|
retry_all_confirm: すべての失敗したジョブをリトライしますか?
|
|
202
205
|
title: ジョブ
|
data/config/locales/nb.yml
CHANGED
|
@@ -179,6 +179,8 @@ nb:
|
|
|
179
179
|
title: Kølagte jobber
|
|
180
180
|
discard: Forkast
|
|
181
181
|
discard_confirm: Forkaste denne meldingen?
|
|
182
|
+
discard_all: Forkast alle
|
|
183
|
+
discard_all_confirm: Forkast alle køede jobber og frigi låsene? Dette kan ikke angres.
|
|
182
184
|
retry: Prøv igjen
|
|
183
185
|
retry_confirm: Tilbakestill synlighetstidsavbrudd og prøv igjen?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ nb:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Forkast alle
|
|
199
201
|
discard_all_confirm: Forkast alle mislykkede jobber?
|
|
202
|
+
discard_all_enqueued_notice: Forkastet %{count} køede jobber og frigitt låsene.
|
|
200
203
|
retry_all: Prøv alle på nytt
|
|
201
204
|
retry_all_confirm: Prøv alle mislykkede jobber på nytt?
|
|
202
205
|
title: Jobber
|
data/config/locales/nl.yml
CHANGED
|
@@ -179,6 +179,8 @@ nl:
|
|
|
179
179
|
title: Taken in de wachtrij
|
|
180
180
|
discard: Verwerpen
|
|
181
181
|
discard_confirm: Dit bericht verwerpen?
|
|
182
|
+
discard_all: Alles verwijderen
|
|
183
|
+
discard_all_confirm: Alle taken in de wachtrij verwijderen en hun vergrendelingen vrijgeven? Dit kan niet ongedaan worden gemaakt.
|
|
182
184
|
retry: Opnieuw proberen
|
|
183
185
|
retry_confirm: Zichtbaarheidstimeout resetten en opnieuw proberen?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ nl:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Alles Verwijderen
|
|
199
201
|
discard_all_confirm: Alle mislukte taken verwijderen?
|
|
202
|
+
discard_all_enqueued_notice: "%{count} taken in de wachtrij verwijderd en vergrendelingen vrijgegeven."
|
|
200
203
|
retry_all: Alles Opnieuw Proberen
|
|
201
204
|
retry_all_confirm: Alle mislukte taken opnieuw proberen?
|
|
202
205
|
title: Taken
|
data/config/locales/pt.yml
CHANGED
|
@@ -179,6 +179,8 @@ pt:
|
|
|
179
179
|
title: Trabalhos Enfileirados
|
|
180
180
|
discard: Descartar
|
|
181
181
|
discard_confirm: Descartar esta mensagem?
|
|
182
|
+
discard_all: Descartar todos
|
|
183
|
+
discard_all_confirm: Descartar todos os trabalhos na fila e liberar seus bloqueios? Esta ação não pode ser desfeita.
|
|
182
184
|
retry: Tentar novamente
|
|
183
185
|
retry_confirm: Redefinir tempo de visibilidade e tentar novamente?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ pt:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Descartar Todos
|
|
199
201
|
discard_all_confirm: Descartar todos os trabalhos falhados?
|
|
202
|
+
discard_all_enqueued_notice: Descartados %{count} trabalhos na fila e bloqueios liberados.
|
|
200
203
|
retry_all: Tentar Novamente Todos
|
|
201
204
|
retry_all_confirm: Tentar novamente todos os trabalhos falhados?
|
|
202
205
|
title: Trabalhos
|
data/config/locales/sv.yml
CHANGED
|
@@ -179,6 +179,8 @@ sv:
|
|
|
179
179
|
title: Köade jobb
|
|
180
180
|
discard: Kassera
|
|
181
181
|
discard_confirm: Kassera detta meddelande?
|
|
182
|
+
discard_all: Kassera alla
|
|
183
|
+
discard_all_confirm: Kassera alla köade jobb och frigör deras lås? Detta kan inte ångras.
|
|
182
184
|
retry: Försök igen
|
|
183
185
|
retry_confirm: Återställ synlighetstimeout och försök igen?
|
|
184
186
|
failed_table:
|
|
@@ -197,6 +199,7 @@ sv:
|
|
|
197
199
|
index:
|
|
198
200
|
discard_all: Kassera alla
|
|
199
201
|
discard_all_confirm: Kassera alla misslyckade jobb?
|
|
202
|
+
discard_all_enqueued_notice: Kasserade %{count} köade jobb och frigjorde deras lås.
|
|
200
203
|
retry_all: Försök igen alla
|
|
201
204
|
retry_all_confirm: Försök igen alla misslyckade jobb?
|
|
202
205
|
title: Jobb
|
data/config/routes.rb
CHANGED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module Pgbus
|
|
7
|
+
module Generators
|
|
8
|
+
class AddJobStatsLatencyGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Add enqueue latency and retry count columns to pgbus_job_stats"
|
|
14
|
+
|
|
15
|
+
class_option :database,
|
|
16
|
+
type: :string,
|
|
17
|
+
default: nil,
|
|
18
|
+
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
|
+
|
|
20
|
+
def create_migration_file
|
|
21
|
+
if separate_database?
|
|
22
|
+
migration_template "add_job_stats_latency.rb.erb",
|
|
23
|
+
"db/pgbus_migrate/add_pgbus_job_stats_latency.rb"
|
|
24
|
+
else
|
|
25
|
+
migration_template "add_job_stats_latency.rb.erb",
|
|
26
|
+
"db/migrate/add_pgbus_job_stats_latency.rb"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def display_post_install
|
|
31
|
+
say ""
|
|
32
|
+
say "Pgbus job stats latency columns installed!", :green
|
|
33
|
+
say ""
|
|
34
|
+
say "Next steps:"
|
|
35
|
+
say " 1. Run: rails db:migrate#{":#{options[:database]}" if separate_database?}"
|
|
36
|
+
say " 2. Queue latency and retry metrics are now tracked automatically"
|
|
37
|
+
say " 3. View latency insights at /pgbus/insights"
|
|
38
|
+
say ""
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def migration_version
|
|
44
|
+
"[#{ActiveRecord::Migration.current_version}]"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def separate_database?
|
|
48
|
+
options[:database].present?
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
class AddPgbusJobStatsLatency < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def change
|
|
3
|
+
add_column :pgbus_job_stats, :enqueue_latency_ms, :bigint
|
|
4
|
+
add_column :pgbus_job_stats, :retry_count, :integer, default: 0
|
|
5
|
+
|
|
6
|
+
add_index :pgbus_job_stats, [:queue_name, :created_at],
|
|
7
|
+
name: "idx_pgbus_job_stats_queue_time"
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
3
5
|
module Pgbus
|
|
4
6
|
module ActiveJob
|
|
5
7
|
class Executor
|
|
@@ -20,7 +22,7 @@ module Pgbus
|
|
|
20
22
|
signal_concurrency(payload)
|
|
21
23
|
signal_batch_discarded(payload)
|
|
22
24
|
Uniqueness.release_lock(Uniqueness.extract_key(payload))
|
|
23
|
-
record_stat(payload, queue_name, "dead_lettered", execution_start)
|
|
25
|
+
record_stat(payload, queue_name, "dead_lettered", execution_start, message: message)
|
|
24
26
|
return :dead_lettered
|
|
25
27
|
end
|
|
26
28
|
|
|
@@ -56,12 +58,12 @@ module Pgbus
|
|
|
56
58
|
end
|
|
57
59
|
|
|
58
60
|
instrument("pgbus.job_completed", queue: queue_name, job_class: job_class)
|
|
59
|
-
record_stat(payload, queue_name, "success", execution_start)
|
|
61
|
+
record_stat(payload, queue_name, "success", execution_start, message: message)
|
|
60
62
|
:success
|
|
61
63
|
rescue StandardError => e
|
|
62
64
|
handle_failure(message, queue_name, e)
|
|
63
65
|
instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
|
|
64
|
-
record_stat(payload, queue_name, "failed", execution_start)
|
|
66
|
+
record_stat(payload, queue_name, "failed", execution_start, message: message)
|
|
65
67
|
# Don't signal concurrency on transient failure — the job will be retried.
|
|
66
68
|
# Semaphore is released only on success or dead-lettering.
|
|
67
69
|
:failed
|
|
@@ -91,20 +93,37 @@ module Pgbus
|
|
|
91
93
|
::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
92
94
|
end
|
|
93
95
|
|
|
94
|
-
def record_stat(payload, queue_name, status, start_time)
|
|
96
|
+
def record_stat(payload, queue_name, status, start_time, message: nil)
|
|
95
97
|
return unless config.stats_enabled
|
|
96
98
|
|
|
97
99
|
duration_ms = ((monotonic_now - start_time) * 1000).round
|
|
100
|
+
enqueue_latency_ms = compute_enqueue_latency(message)
|
|
101
|
+
retry_count = message ? [message.read_ct.to_i - 1, 0].max : 0
|
|
102
|
+
|
|
98
103
|
JobStat.record!(
|
|
99
104
|
job_class: payload&.dig("job_class") || "unknown",
|
|
100
105
|
queue_name: queue_name,
|
|
101
106
|
status: status,
|
|
102
|
-
duration_ms: duration_ms
|
|
107
|
+
duration_ms: duration_ms,
|
|
108
|
+
enqueue_latency_ms: enqueue_latency_ms,
|
|
109
|
+
retry_count: retry_count
|
|
103
110
|
)
|
|
104
111
|
rescue StandardError => e
|
|
105
112
|
Pgbus.logger.debug { "[Pgbus] Stat recording failed: #{e.message}" }
|
|
106
113
|
end
|
|
107
114
|
|
|
115
|
+
def compute_enqueue_latency(message)
|
|
116
|
+
return unless message
|
|
117
|
+
|
|
118
|
+
enqueued_at_str = message.enqueued_at
|
|
119
|
+
return unless enqueued_at_str
|
|
120
|
+
|
|
121
|
+
enqueued_at = Time.parse(enqueued_at_str.to_s)
|
|
122
|
+
[((Time.now.utc - enqueued_at) * 1000).round, 0].max
|
|
123
|
+
rescue ArgumentError, TypeError
|
|
124
|
+
nil
|
|
125
|
+
end
|
|
126
|
+
|
|
108
127
|
def handle_failure(_message, _queue_name, error)
|
|
109
128
|
Pgbus.logger.error { "[Pgbus] Job failed: #{error.class}: #{error.message}" }
|
|
110
129
|
Pgbus.logger.debug { error.backtrace&.join("\n") }
|
|
@@ -17,10 +17,24 @@ module Pgbus
|
|
|
17
17
|
def enqueue_task(task, run_at:)
|
|
18
18
|
queue = resolve_queue(task)
|
|
19
19
|
|
|
20
|
+
# Check uniqueness lock before enqueuing. If the job class declares
|
|
21
|
+
# ensures_uniqueness, we acquire the lock here so duplicate recurring
|
|
22
|
+
# enqueues are rejected while a previous instance is still queued or running.
|
|
23
|
+
if uniqueness_locked?(task)
|
|
24
|
+
Pgbus.logger.debug do
|
|
25
|
+
"[Pgbus] Recurring task #{task.key} skipped: uniqueness lock held"
|
|
26
|
+
end
|
|
27
|
+
return
|
|
28
|
+
end
|
|
29
|
+
|
|
20
30
|
RecurringExecution.record(task.key, run_at) do
|
|
21
31
|
payload = build_payload(task)
|
|
22
32
|
headers = build_headers(task, run_at)
|
|
23
33
|
|
|
34
|
+
# Inject uniqueness metadata into the payload so the worker knows
|
|
35
|
+
# to release the lock after execution.
|
|
36
|
+
payload = inject_uniqueness_metadata(task, payload)
|
|
37
|
+
|
|
24
38
|
Pgbus.client.ensure_queue(queue)
|
|
25
39
|
Pgbus.client.send_message(queue, payload, headers: headers)
|
|
26
40
|
|
|
@@ -97,6 +111,78 @@ module Pgbus
|
|
|
97
111
|
"pgbus.recurring_schedule" => task.schedule
|
|
98
112
|
}
|
|
99
113
|
end
|
|
114
|
+
|
|
115
|
+
# Check if the job class has ensures_uniqueness and if its lock is currently held.
|
|
116
|
+
# Returns true if the lock is held (skip enqueue), false otherwise.
|
|
117
|
+
def uniqueness_locked?(task)
|
|
118
|
+
return false unless task.class_name
|
|
119
|
+
|
|
120
|
+
job_class = task.class_name.safe_constantize
|
|
121
|
+
return false unless job_class
|
|
122
|
+
return false unless job_class.respond_to?(:pgbus_uniqueness)
|
|
123
|
+
|
|
124
|
+
config = job_class.pgbus_uniqueness
|
|
125
|
+
return false unless config
|
|
126
|
+
return false unless config[:strategy] == :until_executed
|
|
127
|
+
|
|
128
|
+
key = resolve_uniqueness_key(config, task)
|
|
129
|
+
return false unless key
|
|
130
|
+
|
|
131
|
+
# Try to acquire the lock. If it fails, the lock is already held.
|
|
132
|
+
acquired = JobLock.acquire!(
|
|
133
|
+
key,
|
|
134
|
+
job_class: task.class_name,
|
|
135
|
+
job_id: "recurring-#{task.key}",
|
|
136
|
+
state: "queued",
|
|
137
|
+
ttl: config[:lock_ttl]
|
|
138
|
+
)
|
|
139
|
+
# If we acquired it, great — the message will be enqueued with the lock held.
|
|
140
|
+
# If not, a previous instance is still queued/running.
|
|
141
|
+
!acquired
|
|
142
|
+
rescue StandardError => e
|
|
143
|
+
Pgbus.logger.warn { "[Pgbus] Uniqueness check failed for #{task.key}: #{e.message}" }
|
|
144
|
+
false # Fail open — allow enqueue if uniqueness check errors
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Resolve the uniqueness key for a recurring task.
|
|
148
|
+
# For no-argument recurring jobs, the key defaults to the class name.
|
|
149
|
+
def resolve_uniqueness_key(config, task)
|
|
150
|
+
key_proc = config[:key]
|
|
151
|
+
args = task.arguments || []
|
|
152
|
+
|
|
153
|
+
if args.empty?
|
|
154
|
+
key_proc.call
|
|
155
|
+
else
|
|
156
|
+
key_proc.call(*args)
|
|
157
|
+
end
|
|
158
|
+
rescue StandardError => e
|
|
159
|
+
Pgbus.logger.warn { "[Pgbus] Could not resolve uniqueness key for #{task.key}: #{e.message}" }
|
|
160
|
+
nil
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Inject uniqueness metadata into the payload so the executor releases
|
|
164
|
+
# the lock after the job completes.
|
|
165
|
+
# Only inject for :until_executed strategy — :while_executing locks are
|
|
166
|
+
# acquired at execution time by the executor, not by the scheduler.
|
|
167
|
+
def inject_uniqueness_metadata(task, payload)
|
|
168
|
+
return payload unless task.class_name
|
|
169
|
+
|
|
170
|
+
job_class = task.class_name.safe_constantize
|
|
171
|
+
return payload unless job_class.respond_to?(:pgbus_uniqueness)
|
|
172
|
+
|
|
173
|
+
config = job_class.pgbus_uniqueness
|
|
174
|
+
return payload unless config
|
|
175
|
+
return payload unless config[:strategy] == :until_executed
|
|
176
|
+
|
|
177
|
+
key = resolve_uniqueness_key(config, task)
|
|
178
|
+
return payload unless key
|
|
179
|
+
|
|
180
|
+
payload.merge(
|
|
181
|
+
Pgbus::Uniqueness::METADATA_KEY => key,
|
|
182
|
+
Pgbus::Uniqueness::STRATEGY_KEY => config[:strategy].to_s,
|
|
183
|
+
Pgbus::Uniqueness::TTL_KEY => config[:lock_ttl]
|
|
184
|
+
)
|
|
185
|
+
end
|
|
100
186
|
end
|
|
101
187
|
end
|
|
102
188
|
end
|
data/lib/pgbus/version.rb
CHANGED
|
@@ -113,9 +113,31 @@ module Pgbus
|
|
|
113
113
|
end
|
|
114
114
|
|
|
115
115
|
def discard_job(queue_name, msg_id)
|
|
116
|
+
release_lock_for_message(queue_name, msg_id)
|
|
116
117
|
@client.archive_message(queue_name, msg_id.to_i, prefixed: false)
|
|
117
118
|
end
|
|
118
119
|
|
|
120
|
+
def discard_all_enqueued
|
|
121
|
+
dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
|
|
122
|
+
queues = queues_with_metrics.reject { |q| q[:name].end_with?(dlq_suffix) }
|
|
123
|
+
total = 0
|
|
124
|
+
|
|
125
|
+
queues.each do |q|
|
|
126
|
+
messages = query_queue_messages_raw(q[:name], 10_000, 0)
|
|
127
|
+
next if messages.empty?
|
|
128
|
+
|
|
129
|
+
release_locks_for_messages(messages)
|
|
130
|
+
|
|
131
|
+
ids = messages.map { |m| m[:msg_id].to_i }
|
|
132
|
+
@client.archive_batch(q[:name], ids, prefixed: false)
|
|
133
|
+
total += ids.size
|
|
134
|
+
rescue StandardError => e
|
|
135
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error discarding enqueued messages from #{q[:name]}: #{e.message}" }
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
total
|
|
139
|
+
end
|
|
140
|
+
|
|
119
141
|
# Failed events
|
|
120
142
|
def failed_events(page: 1, per_page: 25)
|
|
121
143
|
offset = (page - 1) * per_page
|
|
@@ -170,6 +192,9 @@ module Pgbus
|
|
|
170
192
|
end
|
|
171
193
|
|
|
172
194
|
def discard_failed_event(id)
|
|
195
|
+
event = failed_event(id)
|
|
196
|
+
release_lock_for_payload(event["payload"]) if event
|
|
197
|
+
|
|
173
198
|
connection.exec_delete(
|
|
174
199
|
"DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
|
|
175
200
|
)
|
|
@@ -207,6 +232,8 @@ module Pgbus
|
|
|
207
232
|
end
|
|
208
233
|
|
|
209
234
|
def discard_all_failed
|
|
235
|
+
release_locks_for_failed_events
|
|
236
|
+
|
|
210
237
|
result = connection.execute("DELETE FROM pgbus_failed_events")
|
|
211
238
|
result.cmd_tuples
|
|
212
239
|
rescue StandardError => e
|
|
@@ -273,6 +300,7 @@ module Pgbus
|
|
|
273
300
|
|
|
274
301
|
def discard_dlq_message(queue_name, msg_id)
|
|
275
302
|
# queue_name here is the full DLQ name (already prefixed)
|
|
303
|
+
release_lock_for_message(queue_name, msg_id)
|
|
276
304
|
@client.delete_message(queue_name, msg_id.to_i, prefixed: false)
|
|
277
305
|
true
|
|
278
306
|
rescue StandardError => e
|
|
@@ -296,6 +324,8 @@ module Pgbus
|
|
|
296
324
|
messages = dlq_messages(page: 1, per_page: 1000)
|
|
297
325
|
return 0 if messages.empty?
|
|
298
326
|
|
|
327
|
+
release_locks_for_messages(messages)
|
|
328
|
+
|
|
299
329
|
# Group by queue for batch delete — one call per DLQ instead of N calls
|
|
300
330
|
messages.group_by { |m| m[:queue_name] }.sum do |queue_name, msgs|
|
|
301
331
|
ids = msgs.map { |m| m[:msg_id].to_i }
|
|
@@ -552,6 +582,20 @@ module Pgbus
|
|
|
552
582
|
[]
|
|
553
583
|
end
|
|
554
584
|
|
|
585
|
+
def latency_trend(minutes: 60)
|
|
586
|
+
JobStat.latency_trend(minutes: minutes)
|
|
587
|
+
rescue StandardError => e
|
|
588
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error fetching latency trend: #{e.message}" }
|
|
589
|
+
[]
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
def latency_by_queue(minutes: 60)
|
|
593
|
+
JobStat.avg_latency_by_queue(minutes: minutes)
|
|
594
|
+
rescue StandardError => e
|
|
595
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error fetching latency by queue: #{e.message}" }
|
|
596
|
+
[]
|
|
597
|
+
end
|
|
598
|
+
|
|
555
599
|
# Subscriber registry
|
|
556
600
|
def registered_subscribers
|
|
557
601
|
EventBus::Registry.instance.subscribers.map do |s|
|
|
@@ -719,6 +763,69 @@ module Pgbus
|
|
|
719
763
|
Pgbus.logger.debug { "[Pgbus::Web] Invalid recurring task arguments JSON: #{e.message}" }
|
|
720
764
|
[]
|
|
721
765
|
end
|
|
766
|
+
|
|
767
|
+
# --- Lock cleanup helpers ---
|
|
768
|
+
|
|
769
|
+
# Extract uniqueness key from a queue message and release its lock.
|
|
770
|
+
def release_lock_for_message(queue_name, msg_id)
|
|
771
|
+
row = connection.select_one(
|
|
772
|
+
"SELECT * FROM pgmq.q_#{sanitize_name(queue_name)} WHERE msg_id = $1",
|
|
773
|
+
"Pgbus Job Detail",
|
|
774
|
+
[msg_id.to_i]
|
|
775
|
+
)
|
|
776
|
+
return unless row
|
|
777
|
+
|
|
778
|
+
release_lock_for_payload(row["message"])
|
|
779
|
+
rescue StandardError => e
|
|
780
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error releasing lock for message #{msg_id}: #{e.message}" }
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
# Extract uniqueness key from a JSON payload string and release its lock.
|
|
784
|
+
def release_lock_for_payload(payload_str)
|
|
785
|
+
return unless payload_str
|
|
786
|
+
|
|
787
|
+
payload = payload_str.is_a?(String) ? JSON.parse(payload_str) : payload_str
|
|
788
|
+
key = payload[Uniqueness::METADATA_KEY]
|
|
789
|
+
JobLock.release!(key) if key
|
|
790
|
+
rescue JSON::ParserError => e
|
|
791
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error parsing payload for lock release: #{e.message}" }
|
|
792
|
+
end
|
|
793
|
+
|
|
794
|
+
# Extract uniqueness keys from a collection of formatted messages and
|
|
795
|
+
# release all associated locks in a single query.
|
|
796
|
+
def release_locks_for_messages(messages)
|
|
797
|
+
keys = messages.filter_map do |m|
|
|
798
|
+
payload = m[:message]
|
|
799
|
+
next unless payload
|
|
800
|
+
|
|
801
|
+
parsed = payload.is_a?(String) ? JSON.parse(payload) : payload
|
|
802
|
+
parsed[Uniqueness::METADATA_KEY]
|
|
803
|
+
rescue JSON::ParserError
|
|
804
|
+
nil
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
JobLock.where(lock_key: keys).delete_all if keys.any?
|
|
808
|
+
rescue StandardError => e
|
|
809
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error releasing locks for messages: #{e.message}" }
|
|
810
|
+
end
|
|
811
|
+
|
|
812
|
+
# Collect uniqueness keys from all failed events and release their locks.
|
|
813
|
+
def release_locks_for_failed_events
|
|
814
|
+
rows = connection.select_all(
|
|
815
|
+
"SELECT payload FROM pgbus_failed_events", "Pgbus Collect Failed Keys"
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
keys = rows.to_a.filter_map do |row|
|
|
819
|
+
payload = JSON.parse(row["payload"])
|
|
820
|
+
payload[Uniqueness::METADATA_KEY]
|
|
821
|
+
rescue JSON::ParserError
|
|
822
|
+
nil
|
|
823
|
+
end
|
|
824
|
+
|
|
825
|
+
JobLock.where(lock_key: keys).delete_all if keys.any?
|
|
826
|
+
rescue StandardError => e
|
|
827
|
+
Pgbus.logger.debug { "[Pgbus::Web] Error releasing locks for failed events: #{e.message}" }
|
|
828
|
+
end
|
|
722
829
|
end
|
|
723
830
|
end
|
|
724
831
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pgbus
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mikael Henriksson
|
|
@@ -199,12 +199,14 @@ files:
|
|
|
199
199
|
- lib/active_job/queue_adapters/pgbus_adapter.rb
|
|
200
200
|
- lib/generators/pgbus/add_job_locks_generator.rb
|
|
201
201
|
- lib/generators/pgbus/add_job_stats_generator.rb
|
|
202
|
+
- lib/generators/pgbus/add_job_stats_latency_generator.rb
|
|
202
203
|
- lib/generators/pgbus/add_outbox_generator.rb
|
|
203
204
|
- lib/generators/pgbus/add_queue_states_generator.rb
|
|
204
205
|
- lib/generators/pgbus/add_recurring_generator.rb
|
|
205
206
|
- lib/generators/pgbus/install_generator.rb
|
|
206
207
|
- lib/generators/pgbus/templates/add_job_locks.rb.erb
|
|
207
208
|
- lib/generators/pgbus/templates/add_job_stats.rb.erb
|
|
209
|
+
- lib/generators/pgbus/templates/add_job_stats_latency.rb.erb
|
|
208
210
|
- lib/generators/pgbus/templates/add_outbox.rb.erb
|
|
209
211
|
- lib/generators/pgbus/templates/add_queue_states.rb.erb
|
|
210
212
|
- lib/generators/pgbus/templates/add_recurring_tables.rb.erb
|