pgbus 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2253eb954ac89887e409ae1145105005492ce22b8f96c586dded75713d8cafb0
4
- data.tar.gz: 84908a1e8b4d4d74dc2b713863605af3b0543a67150a9082514ad8c50fd824f2
3
+ metadata.gz: d895049be3463d74d039e29485fc7d26c4c06ca96419e8a46327a4be6538db1f
4
+ data.tar.gz: f685673eaf24780e820486b9eb1f752493caf1765241b0063293d1d8d4b98e25
5
5
  SHA512:
6
- metadata.gz: 44829218ace48e2207c283ca5f0be6209a665cb31c1c636f5e96c4cf83adeadaf01a94a8548e797aec995f99f9d0b6e7984734a8506ffe0af3c5a0c02a4c251d
7
- data.tar.gz: b18ac34999fea1cdda778f4f1f218b8cd8b9a72bec2f0e07560ea4b510c5cd2908a22e4e65a14e87644c0eaabc792df5fa3becc3c49f69154f2ec218815f514a
6
+ metadata.gz: 33cb5c4fbf7ae36a8bb82a780d39bd0a32fd2c3167d0d7aaaed74a60608778a90e32eac53c1db2808ffa5956184298a83478488a8d11edcc89b052ec5fe8b0a5
7
+ data.tar.gz: 4a9cc394b02ee99e1681ab26a86db25c57f38f92e901dab0c2c48c1025224bb4089009abcd865a2432c3b37a988b51511faa64be976709069a4dbef70e3ec5ed
data/Rakefile CHANGED
@@ -31,7 +31,12 @@ namespace :bench do
31
31
  ruby "benchmarks/memory_profile.rb"
32
32
  end
33
33
 
34
- desc "Run all benchmarks"
34
+ desc "Run integration benchmarks (requires PGBUS_DATABASE_URL)"
35
+ task :integration do
36
+ ruby "benchmarks/integration_bench.rb"
37
+ end
38
+
39
+ desc "Run all benchmarks (unit-level, no DB required)"
35
40
  task all: %i[serialization client executor]
36
41
  end
37
42
 
@@ -5,12 +5,17 @@ module Pgbus
5
5
  class InsightsController < ApplicationController
6
6
  def show
7
7
  minutes = insights_minutes
8
- render json: {
8
+ payload = {
9
9
  summary: data_source.job_stats_summary(minutes: minutes),
10
10
  throughput: data_source.job_throughput(minutes: minutes),
11
11
  status_counts: data_source.job_status_counts(minutes: minutes),
12
12
  slowest: data_source.slowest_job_classes(minutes: minutes)
13
13
  }
14
+ if Pgbus::JobStat.latency_columns?
15
+ payload[:latency_trend] = data_source.latency_trend(minutes: minutes)
16
+ payload[:latency_by_queue] = data_source.latency_by_queue(minutes: minutes)
17
+ end
18
+ render json: payload
14
19
  end
15
20
  end
16
21
  end
@@ -6,6 +6,8 @@ module Pgbus
6
6
  @minutes = insights_minutes
7
7
  @summary = data_source.job_stats_summary(minutes: @minutes)
8
8
  @slowest = data_source.slowest_job_classes(minutes: @minutes)
9
+ @latency_by_queue = data_source.latency_by_queue(minutes: @minutes)
10
+ @latency_available = Pgbus::JobStat.latency_columns?
9
11
  end
10
12
  end
11
13
  end
@@ -44,5 +44,10 @@ module Pgbus
44
44
  count = data_source.discard_all_failed
45
45
  redirect_to jobs_path, notice: "Discarded #{count} jobs."
46
46
  end
47
+
48
+ def discard_all_enqueued
49
+ count = data_source.discard_all_enqueued
50
+ redirect_to jobs_path, notice: t("pgbus.jobs.index.discard_all_enqueued_notice", count: count)
51
+ end
47
52
  end
48
53
  end
@@ -12,13 +12,14 @@ function getThemeColors() {
12
12
  };
13
13
  }
14
14
 
15
- let throughputChart, statusChart;
15
+ let throughputChart, statusChart, latencyChart;
16
16
 
17
17
  export function renderCharts(data, i18n) {
18
18
  const t = getThemeColors();
19
19
 
20
20
  if (throughputChart) throughputChart.destroy();
21
21
  if (statusChart) statusChart.destroy();
22
+ if (latencyChart) latencyChart.destroy();
22
23
 
23
24
  const throughputData = data.throughput.map(p => ({
24
25
  x: new Date(p.time).getTime(),
@@ -64,6 +65,32 @@ export function renderCharts(data, i18n) {
64
65
  const el = document.querySelector("#status-chart");
65
66
  if (el) el.innerHTML = `<p class="text-center text-sm text-gray-400 dark:text-gray-500 pt-24">${i18n.noData || "No data"}</p>`;
66
67
  }
68
+
69
+ // Latency chart (only if data is available)
70
+ const latencyEl = document.querySelector("#latency-chart");
71
+ if (latencyEl && data.latency_trend && data.latency_trend.length > 0) {
72
+ const avgData = data.latency_trend.map(p => ({ x: new Date(p.time).getTime(), y: p.avg_ms }));
73
+ const p95Data = data.latency_trend.map(p => ({ x: new Date(p.time).getTime(), y: p.p95_ms }));
74
+
75
+ latencyChart = new ApexCharts(latencyEl, {
76
+ series: [
77
+ { name: i18n.latencyAvg || "Avg", data: avgData },
78
+ { name: i18n.latencyP95 || "P95", data: p95Data },
79
+ ],
80
+ chart: { type: "line", height: 280, toolbar: { show: false }, background: "transparent", foreColor: t.text },
81
+ stroke: { curve: "smooth", width: [2, 2], dashArray: [0, 5] },
82
+ colors: ["#6366f1", "#f59e0b"],
83
+ xaxis: { type: "datetime", labels: { style: { colors: t.text } } },
84
+ yaxis: { labels: { style: { colors: t.text }, formatter: v => Math.round(v) + "ms" } },
85
+ grid: { borderColor: t.grid },
86
+ tooltip: { theme: t.tooltip },
87
+ dataLabels: { enabled: false },
88
+ legend: { position: "top", labels: { colors: t.text } },
89
+ });
90
+ latencyChart.render();
91
+ } else if (latencyEl) {
92
+ latencyEl.innerHTML = `<p class="text-center text-sm text-gray-400 dark:text-gray-500 pt-24">${i18n.noData || "No data"}</p>`;
93
+ }
67
94
  }
68
95
 
69
96
  let themeObserver = null;
@@ -16,42 +16,50 @@ module Pgbus
16
16
  # Atomically try to acquire a lock.
17
17
  # Cleans up expired locks for this key first (crash recovery at acquire time).
18
18
  # Returns true if acquired, false if already locked.
19
- def self.acquire!(lock_key, job_class:, ttl:, job_id: nil, state: "queued", owner_pid: nil, owner_hostname: nil)
19
+ #
20
+ # Uses raw SQL on the hot path to minimize ActiveRecord allocations
21
+ # (~29 objects vs ~304 per acquire+release cycle with AR query builder).
22
+ def self.acquire!(lock_key, job_class:, ttl:, job_id: nil, state: "queued", owner_pid: nil, owner_hostname: nil) # rubocop:disable Naming/PredicateMethod
23
+ expires_at = Time.current + ttl
24
+
20
25
  # Remove any expired lock for this key inline (last-resort TTL recovery)
21
- where(lock_key: lock_key).where("expires_at < ?", Time.current).delete_all
26
+ connection.exec_delete(
27
+ "DELETE FROM #{table_name} WHERE lock_key = $1 AND expires_at < $2",
28
+ "JobLock Expire", [lock_key, Time.current]
29
+ )
22
30
 
23
- result = insert(
24
- {
25
- lock_key: lock_key, job_class: job_class, job_id: job_id,
26
- state: state, owner_pid: owner_pid, owner_hostname: owner_hostname,
27
- expires_at: Time.current + ttl
28
- },
29
- unique_by: :lock_key
31
+ result = connection.exec_query(
32
+ "INSERT INTO #{table_name} (lock_key, job_class, job_id, state, owner_pid, owner_hostname, expires_at) " \
33
+ "VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (lock_key) DO NOTHING RETURNING id",
34
+ "JobLock Acquire", [lock_key, job_class, job_id, state, owner_pid, owner_hostname, expires_at]
30
35
  )
31
36
  result.rows.any?
32
- rescue ActiveRecord::RecordNotUnique
33
- false
34
37
  end
35
38
 
36
39
  # Transition a queued lock to executing state and claim ownership.
37
40
  # Called when a worker starts executing a job that was locked at enqueue time.
38
41
  def self.claim_for_execution!(lock_key, owner_pid:, owner_hostname:, ttl:)
39
- where(lock_key: lock_key).update_all(
40
- state: "executing",
41
- owner_pid: owner_pid,
42
- owner_hostname: owner_hostname,
43
- expires_at: Time.current + ttl
42
+ connection.exec_update(
43
+ "UPDATE #{table_name} SET state = $1, owner_pid = $2, owner_hostname = $3, expires_at = $4 " \
44
+ "WHERE lock_key = $5",
45
+ "JobLock Claim", ["executing", owner_pid, owner_hostname, Time.current + ttl, lock_key]
44
46
  )
45
47
  end
46
48
 
47
49
  # Release a lock by key.
48
50
  def self.release!(lock_key)
49
- where(lock_key: lock_key).delete_all
51
+ connection.exec_delete(
52
+ "DELETE FROM #{table_name} WHERE lock_key = $1",
53
+ "JobLock Release", [lock_key]
54
+ )
50
55
  end
51
56
 
52
57
  # Check if a lock is currently held (regardless of expiry — reaper handles orphans).
53
58
  def self.locked?(lock_key)
54
- where(lock_key: lock_key).exists?
59
+ result = connection.select_value(
60
+ "SELECT 1 FROM #{table_name} WHERE lock_key = $1 LIMIT 1", "JobLock Check", [lock_key]
61
+ )
62
+ !result.nil?
55
63
  end
56
64
 
57
65
  # Reap orphaned locks: locks in 'executing' state whose owner_pid
@@ -10,15 +10,19 @@ module Pgbus
10
10
  scope :dead_lettered, -> { where(status: "dead_lettered") }
11
11
 
12
12
  # Record a job execution stat. Called by the executor after each job.
13
- def self.record!(job_class:, queue_name:, status:, duration_ms:)
13
+ def self.record!(job_class:, queue_name:, status:, duration_ms:, enqueue_latency_ms: nil, retry_count: 0)
14
14
  return unless table_exists?
15
15
 
16
- create!(
16
+ attrs = {
17
17
  job_class: job_class,
18
18
  queue_name: queue_name,
19
19
  status: status,
20
20
  duration_ms: duration_ms
21
- )
21
+ }
22
+ attrs[:enqueue_latency_ms] = enqueue_latency_ms if latency_columns?
23
+ attrs[:retry_count] = retry_count if latency_columns?
24
+
25
+ create!(attrs)
22
26
  rescue StandardError => e
23
27
  Pgbus.logger.debug { "[Pgbus] Failed to record job stat: #{e.message}" }
24
28
  end
@@ -34,6 +38,15 @@ module Pgbus
34
38
  @table_exists = false
35
39
  end
36
40
 
41
+ # Memoized — checks if the latency migration has been applied.
42
+ def self.latency_columns?
43
+ return @latency_columns if defined?(@latency_columns)
44
+
45
+ @latency_columns = table_exists? && column_names.include?("enqueue_latency_ms")
46
+ rescue StandardError
47
+ @latency_columns = false
48
+ end
49
+
37
50
  # Throughput: jobs per minute bucketed by minute for the last N minutes
38
51
  def self.throughput(minutes: 60)
39
52
  since(minutes.minutes.ago)
@@ -67,16 +80,30 @@ module Pgbus
67
80
 
68
81
  # Single-query aggregate summary using conditional counts.
69
82
  def self.summary(minutes: 60)
70
- row = since(minutes.minutes.ago).pick(
71
- Arel.sql("COUNT(*)"),
72
- Arel.sql("COUNT(*) FILTER (WHERE status = 'success')"),
73
- Arel.sql("COUNT(*) FILTER (WHERE status = 'failed')"),
74
- Arel.sql("COUNT(*) FILTER (WHERE status = 'dead_lettered')"),
75
- Arel.sql("ROUND(AVG(duration_ms)::numeric, 1)"),
76
- Arel.sql("MAX(duration_ms)")
77
- )
78
-
79
- {
83
+ cols = [
84
+ "COUNT(*)",
85
+ "COUNT(*) FILTER (WHERE status = 'success')",
86
+ "COUNT(*) FILTER (WHERE status = 'failed')",
87
+ "COUNT(*) FILTER (WHERE status = 'dead_lettered')",
88
+ "ROUND(AVG(duration_ms)::numeric, 1)",
89
+ "MAX(duration_ms)"
90
+ ]
91
+ if latency_columns?
92
+ cols.push(
93
+ "ROUND(AVG(enqueue_latency_ms) FILTER (WHERE enqueue_latency_ms IS NOT NULL)::numeric, 1)",
94
+ "PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY enqueue_latency_ms) " \
95
+ "FILTER (WHERE enqueue_latency_ms IS NOT NULL)",
96
+ "PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY enqueue_latency_ms) " \
97
+ "FILTER (WHERE enqueue_latency_ms IS NOT NULL)",
98
+ "PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY enqueue_latency_ms) " \
99
+ "FILTER (WHERE enqueue_latency_ms IS NOT NULL)",
100
+ "ROUND(AVG(retry_count) FILTER (WHERE retry_count IS NOT NULL)::numeric, 2)"
101
+ )
102
+ end
103
+
104
+ row = since(minutes.minutes.ago).pick(*cols.map { |c| Arel.sql(c) })
105
+
106
+ result = {
80
107
  total: row[0].to_i,
81
108
  success: row[1].to_i,
82
109
  failed: row[2].to_i,
@@ -84,6 +111,51 @@ module Pgbus
84
111
  avg_duration_ms: row[4]&.to_f || 0,
85
112
  max_duration_ms: row[5].to_i
86
113
  }
114
+
115
+ if latency_columns?
116
+ result.merge!(
117
+ avg_latency_ms: row[6]&.to_f || 0,
118
+ p50_latency_ms: row[7]&.to_f || 0,
119
+ p95_latency_ms: row[8]&.to_f || 0,
120
+ p99_latency_ms: row[9]&.to_f || 0,
121
+ avg_retries: row[10]&.to_f || 0
122
+ )
123
+ end
124
+
125
+ result
126
+ end
127
+
128
+ # Latency trend: average enqueue latency per minute bucketed
129
+ def self.latency_trend(minutes: 60)
130
+ return [] unless latency_columns?
131
+
132
+ since(minutes.minutes.ago)
133
+ .where.not(enqueue_latency_ms: nil)
134
+ .group("date_trunc('minute', created_at)")
135
+ .order(Arel.sql("date_trunc('minute', created_at)"))
136
+ .pluck(
137
+ Arel.sql("date_trunc('minute', created_at)"),
138
+ Arel.sql("ROUND(AVG(enqueue_latency_ms))"),
139
+ Arel.sql("ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY enqueue_latency_ms))")
140
+ )
141
+ .map { |time, avg, p95| { time: time, avg_ms: avg.to_i, p95_ms: p95.to_i } }
142
+ end
143
+
144
+ # Average latency by queue
145
+ def self.avg_latency_by_queue(minutes: 60)
146
+ return [] unless latency_columns?
147
+
148
+ since(minutes.minutes.ago)
149
+ .where.not(enqueue_latency_ms: nil)
150
+ .group(:queue_name)
151
+ .order(Arel.sql("AVG(enqueue_latency_ms) DESC"))
152
+ .pluck(
153
+ :queue_name,
154
+ Arel.sql("ROUND(AVG(enqueue_latency_ms))"),
155
+ Arel.sql("ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY enqueue_latency_ms))"),
156
+ Arel.sql("COUNT(*)")
157
+ )
158
+ .map { |q, avg, p95, count| { queue_name: q, avg_ms: avg.to_i, p95_ms: p95.to_i, count: count.to_i } }
87
159
  end
88
160
 
89
161
  # Cleanup old stats
@@ -51,6 +51,32 @@
51
51
  </div>
52
52
  </div>
53
53
 
54
+ <% if @latency_available %>
55
+ <!-- Latency summary cards -->
56
+ <div class="grid grid-cols-2 gap-4 sm:grid-cols-3 lg:grid-cols-5 mb-8">
57
+ <div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
58
+ <dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.avg_latency") %></dt>
59
+ <dd class="mt-1 text-2xl font-semibold text-gray-900 dark:text-white"><%= pgbus_ms_duration(@summary[:avg_latency_ms]) %></dd>
60
+ </div>
61
+ <div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
62
+ <dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.p50_latency") %></dt>
63
+ <dd class="mt-1 text-2xl font-semibold text-gray-900 dark:text-white"><%= pgbus_ms_duration(@summary[:p50_latency_ms]) %></dd>
64
+ </div>
65
+ <div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
66
+ <dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.p95_latency") %></dt>
67
+ <dd class="mt-1 text-2xl font-semibold text-yellow-600 dark:text-yellow-400"><%= pgbus_ms_duration(@summary[:p95_latency_ms]) %></dd>
68
+ </div>
69
+ <div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
70
+ <dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.p99_latency") %></dt>
71
+ <dd class="mt-1 text-2xl font-semibold text-orange-600 dark:text-orange-400"><%= pgbus_ms_duration(@summary[:p99_latency_ms]) %></dd>
72
+ </div>
73
+ <div class="rounded-lg bg-white dark:bg-gray-800 p-4 shadow ring-1 ring-gray-200 dark:ring-gray-700">
74
+ <dt class="text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.summary.avg_retries") %></dt>
75
+ <dd class="mt-1 text-2xl font-semibold text-gray-900 dark:text-white"><%= @summary[:avg_retries]&.round(2) || 0 %></dd>
76
+ </div>
77
+ </div>
78
+ <% end %>
79
+
54
80
  <!-- Charts -->
55
81
  <div class="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-8">
56
82
  <div class="rounded-lg bg-white dark:bg-gray-800 p-5 shadow ring-1 ring-gray-200 dark:ring-gray-700">
@@ -63,6 +89,46 @@
63
89
  </div>
64
90
  </div>
65
91
 
92
+ <% if @latency_available %>
93
+ <!-- Latency chart -->
94
+ <div class="grid grid-cols-1 gap-6 mb-8">
95
+ <div class="rounded-lg bg-white dark:bg-gray-800 p-5 shadow ring-1 ring-gray-200 dark:ring-gray-700">
96
+ <h3 class="text-sm font-medium text-gray-700 dark:text-gray-300 mb-4"><%= t("pgbus.insights.show.charts.latency") %></h3>
97
+ <div id="latency-chart" style="height: 280px;"></div>
98
+ </div>
99
+ </div>
100
+
101
+ <!-- Latency by queue -->
102
+ <div class="rounded-lg bg-white dark:bg-gray-800 shadow ring-1 ring-gray-200 dark:ring-gray-700 mb-8">
103
+ <div class="px-5 py-4 border-b border-gray-200 dark:border-gray-700">
104
+ <h3 class="text-sm font-medium text-gray-700 dark:text-gray-300"><%= t("pgbus.insights.show.latency_by_queue.title") %></h3>
105
+ </div>
106
+ <table class="pgbus-table min-w-full divide-y divide-gray-200 dark:divide-gray-700">
107
+ <thead class="bg-gray-50 dark:bg-gray-900">
108
+ <tr>
109
+ <th class="px-4 py-3 text-left text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.queue") %></th>
110
+ <th class="px-4 py-3 text-right text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.count") %></th>
111
+ <th class="px-4 py-3 text-right text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.avg") %></th>
112
+ <th class="px-4 py-3 text-right text-xs font-medium uppercase text-gray-500 dark:text-gray-400"><%= t("pgbus.insights.show.latency_by_queue.headers.p95") %></th>
113
+ </tr>
114
+ </thead>
115
+ <tbody class="divide-y divide-gray-100 dark:divide-gray-700">
116
+ <% @latency_by_queue.each do |row| %>
117
+ <tr class="hover:bg-gray-50 dark:hover:bg-gray-700/50">
118
+ <td data-label="Queue" class="px-4 py-3 text-sm font-medium text-gray-700 dark:text-gray-300"><%= row[:queue_name] %></td>
119
+ <td data-label="Count" class="px-4 py-3 text-sm text-right font-mono text-gray-700 dark:text-gray-300"><%= pgbus_number(row[:count]) %></td>
120
+ <td data-label="Avg" class="px-4 py-3 text-sm text-right font-mono text-gray-700 dark:text-gray-300"><%= pgbus_ms_duration(row[:avg_ms]) %></td>
121
+ <td data-label="P95" class="px-4 py-3 text-sm text-right font-mono text-gray-700 dark:text-gray-300"><%= pgbus_ms_duration(row[:p95_ms]) %></td>
122
+ </tr>
123
+ <% end %>
124
+ <% if @latency_by_queue.empty? %>
125
+ <tr><td colspan="4" class="px-4 py-8 text-center text-sm text-gray-400 dark:text-gray-500"><%= t("pgbus.insights.show.latency_by_queue.empty") %></td></tr>
126
+ <% end %>
127
+ </tbody>
128
+ </table>
129
+ </div>
130
+ <% end %>
131
+
66
132
  <!-- Slowest job classes -->
67
133
  <div class="rounded-lg bg-white dark:bg-gray-800 shadow ring-1 ring-gray-200 dark:ring-gray-700">
68
134
  <div class="px-5 py-4 border-b border-gray-200 dark:border-gray-700">
@@ -100,6 +166,8 @@
100
166
  seriesName: "<%= j(t("pgbus.insights.show.charts.series_name")) %>",
101
167
  noData: "<%= j(t("pgbus.insights.show.charts.no_data")) %>",
102
168
  failedToLoad: "<%= j(t("pgbus.insights.show.charts.failed_to_load")) %>",
169
+ latencyAvg: "<%= j(t("pgbus.insights.show.charts.latency_avg")) %>",
170
+ latencyP95: "<%= j(t("pgbus.insights.show.charts.latency_p95")) %>",
103
171
  };
104
172
 
105
173
  let chartData = null;
@@ -114,9 +182,8 @@
114
182
  .then(data => { chartData = data; renderCharts(data, i18n); })
115
183
  .catch(err => {
116
184
  const msg = '<p class="text-center text-sm text-gray-400 dark:text-gray-500 pt-24">' + i18n.failedToLoad + "</p>";
117
- const el1 = document.querySelector("#throughput-chart");
118
- const el2 = document.querySelector("#status-chart");
119
- if (el1) el1.innerHTML = msg;
120
- if (el2) el2.innerHTML = msg;
185
+ document.querySelectorAll("#throughput-chart, #status-chart, #latency-chart").forEach(el => {
186
+ el.innerHTML = msg;
187
+ });
121
188
  });
122
189
  </script>
@@ -1,6 +1,13 @@
1
1
  <turbo-frame id="jobs-enqueued" data-auto-refresh data-src="<%= pgbus.jobs_path(request.query_parameters.merge(frame: 'enqueued')) %>">
2
2
  <div>
3
- <h2 class="text-lg font-semibold text-gray-900 dark:text-white mb-3"><%= t("pgbus.jobs.enqueued_table.title") %></h2>
3
+ <div class="flex items-center justify-between mb-3">
4
+ <h2 class="text-lg font-semibold text-gray-900 dark:text-white"><%= t("pgbus.jobs.enqueued_table.title") %></h2>
5
+ <% if @jobs.any? %>
6
+ <%= button_to t("pgbus.jobs.enqueued_table.discard_all"), pgbus.discard_all_enqueued_jobs_path, method: :post,
7
+ class: "rounded-md bg-red-600 px-3 py-2 text-sm font-medium text-white hover:bg-red-500",
8
+ data: { turbo_confirm: t("pgbus.jobs.enqueued_table.discard_all_confirm"), turbo_frame: "_top" } %>
9
+ <% end %>
10
+ </div>
4
11
  <div class="overflow-hidden rounded-lg bg-white dark:bg-gray-800 shadow ring-1 ring-gray-200 dark:ring-gray-700">
5
12
  <table class="pgbus-table min-w-full divide-y divide-gray-200 dark:divide-gray-700">
6
13
  <thead class="bg-gray-50 dark:bg-gray-900">
@@ -179,6 +179,8 @@ da:
179
179
  title: Job i kø
180
180
  discard: Kassér
181
181
  discard_confirm: Kassér denne besked?
182
+ discard_all: Kassér alle
183
+ discard_all_confirm: Kassér alle ventende jobs og frigiv deres låse? Dette kan ikke fortrydes.
182
184
  retry: Prøv igen
183
185
  retry_confirm: Nulstil synlighedstimeout og prøv igen?
184
186
  failed_table:
@@ -197,6 +199,7 @@ da:
197
199
  index:
198
200
  discard_all: Kassér alle
199
201
  discard_all_confirm: Kassér alle mislykkede job?
202
+ discard_all_enqueued_notice: Kasserede %{count} ventende jobs og frigav deres låse.
200
203
  retry_all: Forsøg alle igen
201
204
  retry_all_confirm: Forsøg alle mislykkede job igen?
202
205
  title: Job
@@ -179,6 +179,8 @@ de:
179
179
  title: Eingereihte Jobs
180
180
  discard: Verwerfen
181
181
  discard_confirm: Diese Nachricht verwerfen?
182
+ discard_all: Alle verwerfen
183
+ discard_all_confirm: Alle eingereihten Jobs verwerfen und ihre Sperren freigeben? Dies kann nicht rückgängig gemacht werden.
182
184
  retry: Wiederholen
183
185
  retry_confirm: Sichtbarkeits-Timeout zurücksetzen und erneut versuchen?
184
186
  failed_table:
@@ -197,6 +199,7 @@ de:
197
199
  index:
198
200
  discard_all: Alle verwerfen
199
201
  discard_all_confirm: Alle fehlgeschlagenen Jobs verwerfen?
202
+ discard_all_enqueued_notice: "%{count} eingereihte Jobs verworfen und Sperren freigegeben."
200
203
  retry_all: Alle wiederholen
201
204
  retry_all_confirm: Alle fehlgeschlagenen Jobs wiederholen?
202
205
  title: Jobs
@@ -128,11 +128,22 @@ en:
128
128
  show:
129
129
  charts:
130
130
  failed_to_load: Failed to load chart data
131
+ latency: Queue Latency (ms)
132
+ latency_avg: Avg
133
+ latency_p95: P95
131
134
  no_data: No data yet
132
135
  series_name: Jobs/min
133
136
  status_distribution: Status Distribution
134
137
  throughput: Throughput (jobs/min)
135
138
  description_html: Job performance metrics for the last %{range}
139
+ latency_by_queue:
140
+ empty: No latency data yet
141
+ headers:
142
+ avg: Avg (ms)
143
+ count: Count
144
+ p95: P95 (ms)
145
+ queue: Queue
146
+ title: Latency by Queue
136
147
  slowest:
137
148
  empty: No job stats yet
138
149
  headers:
@@ -143,9 +154,14 @@ en:
143
154
  title: Slowest Job Classes (avg duration)
144
155
  summary:
145
156
  avg_duration: Avg Duration
157
+ avg_latency: Avg Latency
158
+ avg_retries: Avg Retries
146
159
  dead_lettered: Dead Lettered
147
160
  failed: Failed
148
161
  max_duration: Max Duration
162
+ p50_latency: P50 Latency
163
+ p95_latency: P95 Latency
164
+ p99_latency: P99 Latency
149
165
  succeeded: Succeeded
150
166
  total_jobs: Total Jobs
151
167
  time_ranges:
@@ -177,6 +193,8 @@ en:
177
193
  timezone: 'Timezone:'
178
194
  visible_at: 'Visible at:'
179
195
  discard: Discard
196
+ discard_all: Discard All
197
+ discard_all_confirm: Discard all enqueued jobs and release their locks? This cannot be undone.
180
198
  discard_confirm: Discard this message?
181
199
  retry: Retry
182
200
  retry_confirm: Reset visibility timeout and retry?
@@ -197,6 +215,7 @@ en:
197
215
  index:
198
216
  discard_all: Discard All
199
217
  discard_all_confirm: Discard all failed jobs?
218
+ discard_all_enqueued_notice: Discarded %{count} enqueued jobs and released their locks.
200
219
  retry_all: Retry All
201
220
  retry_all_confirm: Retry all failed jobs?
202
221
  title: Jobs
@@ -179,6 +179,8 @@ es:
179
179
  title: Trabajos en Cola
180
180
  discard: Descartar
181
181
  discard_confirm: "¿Descartar este mensaje?"
182
+ discard_all: Descartar todos
183
+ discard_all_confirm: "¿Descartar todos los trabajos en cola y liberar sus bloqueos? Esta acción no se puede deshacer."
182
184
  retry: Reintentar
183
185
  retry_confirm: "¿Restablecer tiempo de visibilidad y reintentar?"
184
186
  failed_table:
@@ -197,6 +199,7 @@ es:
197
199
  index:
198
200
  discard_all: Descartar Todo
199
201
  discard_all_confirm: "¿Descartar todos los trabajos fallidos?"
202
+ discard_all_enqueued_notice: Se descartaron %{count} trabajos en cola y se liberaron sus bloqueos.
200
203
  retry_all: Reintentar Todo
201
204
  retry_all_confirm: "¿Reintentar todos los trabajos fallidos?"
202
205
  title: Trabajos
@@ -179,6 +179,8 @@ fi:
179
179
  title: Jonotetut työt
180
180
  discard: Hylkää
181
181
  discard_confirm: Hylätä tämä viesti?
182
+ discard_all: Hylkää kaikki
183
+ discard_all_confirm: Hylkää kaikki jonossa olevat tehtävät ja vapauta lukot? Tätä ei voi perua.
182
184
  retry: Yritä uudelleen
183
185
  retry_confirm: Nollaa näkyvyysaika ja yritä uudelleen?
184
186
  failed_table:
@@ -197,6 +199,7 @@ fi:
197
199
  index:
198
200
  discard_all: Hylkää kaikki
199
201
  discard_all_confirm: Hylätäänkö kaikki epäonnistuneet työt?
202
+ discard_all_enqueued_notice: Hylättiin %{count} jonossa olevaa tehtävää ja vapautettiin lukot.
200
203
  retry_all: Yritä uudelleen kaikki
201
204
  retry_all_confirm: Yritetäänkö uudelleen kaikki epäonnistuneet työt?
202
205
  title: Työt
@@ -179,6 +179,8 @@ fr:
179
179
  title: Travaux en file d'attente
180
180
  discard: Rejeter
181
181
  discard_confirm: Rejeter ce message ?
182
+ discard_all: Tout supprimer
183
+ discard_all_confirm: Supprimer tous les travaux en file d'attente et libérer leurs verrous ? Cette action est irréversible.
182
184
  retry: Réessayer
183
185
  retry_confirm: Réinitialiser le délai de visibilité et réessayer ?
184
186
  failed_table:
@@ -197,6 +199,7 @@ fr:
197
199
  index:
198
200
  discard_all: Tout ignorer
199
201
  discard_all_confirm: Ignorer tous les travaux échoués ?
202
+ discard_all_enqueued_notice: "%{count} travaux en file d'attente supprimés et verrous libérés."
200
203
  retry_all: Tout réessayer
201
204
  retry_all_confirm: Réessayer tous les travaux échoués ?
202
205
  title: Travaux
@@ -179,6 +179,8 @@ it:
179
179
  title: Lavori in coda
180
180
  discard: Scarta
181
181
  discard_confirm: Scartare questo messaggio?
182
+ discard_all: Scarta tutti
183
+ discard_all_confirm: Scartare tutti i lavori in coda e rilasciare i relativi blocchi? Questa azione non può essere annullata.
182
184
  retry: Riprova
183
185
  retry_confirm: Reimpostare il timeout di visibilità e riprovare?
184
186
  failed_table:
@@ -197,6 +199,7 @@ it:
197
199
  index:
198
200
  discard_all: Scarta Tutto
199
201
  discard_all_confirm: Scartare tutti i lavori falliti?
202
+ discard_all_enqueued_notice: Scartati %{count} lavori in coda e rilasciati i relativi blocchi.
200
203
  retry_all: Riprova Tutto
201
204
  retry_all_confirm: Riprova tutti i lavori falliti?
202
205
  title: Lavori
@@ -179,6 +179,8 @@ ja:
179
179
  title: キューに入れられたジョブ
180
180
  discard: 破棄
181
181
  discard_confirm: このメッセージを破棄しますか?
182
+ discard_all: すべて破棄
183
+ discard_all_confirm: キュー内のすべてのジョブを破棄し、ロックを解放しますか?この操作は元に戻せません。
182
184
  retry: リトライ
183
185
  retry_confirm: 可視性タイムアウトをリセットしてリトライしますか?
184
186
  failed_table:
@@ -197,6 +199,7 @@ ja:
197
199
  index:
198
200
  discard_all: すべて破棄
199
201
  discard_all_confirm: すべての失敗したジョブを破棄しますか?
202
+ discard_all_enqueued_notice: "%{count}件のキュー内ジョブを破棄し、ロックを解放しました。"
200
203
  retry_all: すべてリトライ
201
204
  retry_all_confirm: すべての失敗したジョブをリトライしますか?
202
205
  title: ジョブ
@@ -179,6 +179,8 @@ nb:
179
179
  title: Kølagte jobber
180
180
  discard: Forkast
181
181
  discard_confirm: Forkaste denne meldingen?
182
+ discard_all: Forkast alle
183
+ discard_all_confirm: Forkast alle køede jobber og frigi låsene? Dette kan ikke angres.
182
184
  retry: Prøv igjen
183
185
  retry_confirm: Tilbakestill synlighetstidsavbrudd og prøv igjen?
184
186
  failed_table:
@@ -197,6 +199,7 @@ nb:
197
199
  index:
198
200
  discard_all: Forkast alle
199
201
  discard_all_confirm: Forkast alle mislykkede jobber?
202
+ discard_all_enqueued_notice: Forkastet %{count} køede jobber og frigitt låsene.
200
203
  retry_all: Prøv alle på nytt
201
204
  retry_all_confirm: Prøv alle mislykkede jobber på nytt?
202
205
  title: Jobber
@@ -179,6 +179,8 @@ nl:
179
179
  title: Taken in de wachtrij
180
180
  discard: Verwerpen
181
181
  discard_confirm: Dit bericht verwerpen?
182
+ discard_all: Alles verwijderen
183
+ discard_all_confirm: Alle taken in de wachtrij verwijderen en hun vergrendelingen vrijgeven? Dit kan niet ongedaan worden gemaakt.
182
184
  retry: Opnieuw proberen
183
185
  retry_confirm: Zichtbaarheidstimeout resetten en opnieuw proberen?
184
186
  failed_table:
@@ -197,6 +199,7 @@ nl:
197
199
  index:
198
200
  discard_all: Alles Verwijderen
199
201
  discard_all_confirm: Alle mislukte taken verwijderen?
202
+ discard_all_enqueued_notice: "%{count} taken in de wachtrij verwijderd en vergrendelingen vrijgegeven."
200
203
  retry_all: Alles Opnieuw Proberen
201
204
  retry_all_confirm: Alle mislukte taken opnieuw proberen?
202
205
  title: Taken
@@ -179,6 +179,8 @@ pt:
179
179
  title: Trabalhos Enfileirados
180
180
  discard: Descartar
181
181
  discard_confirm: Descartar esta mensagem?
182
+ discard_all: Descartar todos
183
+ discard_all_confirm: Descartar todos os trabalhos na fila e liberar seus bloqueios? Esta ação não pode ser desfeita.
182
184
  retry: Tentar novamente
183
185
  retry_confirm: Redefinir tempo de visibilidade e tentar novamente?
184
186
  failed_table:
@@ -197,6 +199,7 @@ pt:
197
199
  index:
198
200
  discard_all: Descartar Todos
199
201
  discard_all_confirm: Descartar todos os trabalhos falhados?
202
+ discard_all_enqueued_notice: Descartados %{count} trabalhos na fila e bloqueios liberados.
200
203
  retry_all: Tentar Novamente Todos
201
204
  retry_all_confirm: Tentar novamente todos os trabalhos falhados?
202
205
  title: Trabalhos
@@ -179,6 +179,8 @@ sv:
179
179
  title: Köade jobb
180
180
  discard: Kassera
181
181
  discard_confirm: Kassera detta meddelande?
182
+ discard_all: Kassera alla
183
+ discard_all_confirm: Kassera alla köade jobb och frigör deras lås? Detta kan inte ångras.
182
184
  retry: Försök igen
183
185
  retry_confirm: Återställ synlighetstimeout och försök igen?
184
186
  failed_table:
@@ -197,6 +199,7 @@ sv:
197
199
  index:
198
200
  discard_all: Kassera alla
199
201
  discard_all_confirm: Kassera alla misslyckade jobb?
202
+ discard_all_enqueued_notice: Kasserade %{count} köade jobb och frigjorde deras lås.
200
203
  retry_all: Försök igen alla
201
204
  retry_all_confirm: Försök igen alla misslyckade jobb?
202
205
  title: Jobb
data/config/routes.rb CHANGED
@@ -21,6 +21,7 @@ Pgbus::Engine.routes.draw do
21
21
  collection do
22
22
  post :retry_all
23
23
  post :discard_all
24
+ post :discard_all_enqueued
24
25
  end
25
26
  end
26
27
 
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/active_record"
5
+
6
+ module Pgbus
7
+ module Generators
8
+ class AddJobStatsLatencyGenerator < Rails::Generators::Base
9
+ include ActiveRecord::Generators::Migration
10
+
11
+ source_root File.expand_path("templates", __dir__)
12
+
13
+ desc "Add enqueue latency and retry count columns to pgbus_job_stats"
14
+
15
+ class_option :database,
16
+ type: :string,
17
+ default: nil,
18
+ desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
19
+
20
+ def create_migration_file
21
+ if separate_database?
22
+ migration_template "add_job_stats_latency.rb.erb",
23
+ "db/pgbus_migrate/add_pgbus_job_stats_latency.rb"
24
+ else
25
+ migration_template "add_job_stats_latency.rb.erb",
26
+ "db/migrate/add_pgbus_job_stats_latency.rb"
27
+ end
28
+ end
29
+
30
+ def display_post_install
31
+ say ""
32
+ say "Pgbus job stats latency columns installed!", :green
33
+ say ""
34
+ say "Next steps:"
35
+ say " 1. Run: rails db:migrate#{":#{options[:database]}" if separate_database?}"
36
+ say " 2. Queue latency and retry metrics are now tracked automatically"
37
+ say " 3. View latency insights at /pgbus/insights"
38
+ say ""
39
+ end
40
+
41
+ private
42
+
43
+ def migration_version
44
+ "[#{ActiveRecord::Migration.current_version}]"
45
+ end
46
+
47
+ def separate_database?
48
+ options[:database].present?
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,9 @@
1
+ class AddPgbusJobStatsLatency < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ add_column :pgbus_job_stats, :enqueue_latency_ms, :bigint
4
+ add_column :pgbus_job_stats, :retry_count, :integer, default: 0
5
+
6
+ add_index :pgbus_job_stats, [:queue_name, :created_at],
7
+ name: "idx_pgbus_job_stats_queue_time"
8
+ end
9
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "time"
4
+
3
5
  module Pgbus
4
6
  module ActiveJob
5
7
  class Executor
@@ -20,7 +22,7 @@ module Pgbus
20
22
  signal_concurrency(payload)
21
23
  signal_batch_discarded(payload)
22
24
  Uniqueness.release_lock(Uniqueness.extract_key(payload))
23
- record_stat(payload, queue_name, "dead_lettered", execution_start)
25
+ record_stat(payload, queue_name, "dead_lettered", execution_start, message: message)
24
26
  return :dead_lettered
25
27
  end
26
28
 
@@ -56,12 +58,12 @@ module Pgbus
56
58
  end
57
59
 
58
60
  instrument("pgbus.job_completed", queue: queue_name, job_class: job_class)
59
- record_stat(payload, queue_name, "success", execution_start)
61
+ record_stat(payload, queue_name, "success", execution_start, message: message)
60
62
  :success
61
63
  rescue StandardError => e
62
64
  handle_failure(message, queue_name, e)
63
65
  instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
64
- record_stat(payload, queue_name, "failed", execution_start)
66
+ record_stat(payload, queue_name, "failed", execution_start, message: message)
65
67
  # Don't signal concurrency on transient failure — the job will be retried.
66
68
  # Semaphore is released only on success or dead-lettering.
67
69
  :failed
@@ -91,20 +93,37 @@ module Pgbus
91
93
  ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
92
94
  end
93
95
 
94
- def record_stat(payload, queue_name, status, start_time)
96
+ def record_stat(payload, queue_name, status, start_time, message: nil)
95
97
  return unless config.stats_enabled
96
98
 
97
99
  duration_ms = ((monotonic_now - start_time) * 1000).round
100
+ enqueue_latency_ms = compute_enqueue_latency(message)
101
+ retry_count = message ? [message.read_ct.to_i - 1, 0].max : 0
102
+
98
103
  JobStat.record!(
99
104
  job_class: payload&.dig("job_class") || "unknown",
100
105
  queue_name: queue_name,
101
106
  status: status,
102
- duration_ms: duration_ms
107
+ duration_ms: duration_ms,
108
+ enqueue_latency_ms: enqueue_latency_ms,
109
+ retry_count: retry_count
103
110
  )
104
111
  rescue StandardError => e
105
112
  Pgbus.logger.debug { "[Pgbus] Stat recording failed: #{e.message}" }
106
113
  end
107
114
 
115
+ def compute_enqueue_latency(message)
116
+ return unless message
117
+
118
+ enqueued_at_str = message.enqueued_at
119
+ return unless enqueued_at_str
120
+
121
+ enqueued_at = Time.parse(enqueued_at_str.to_s)
122
+ [((Time.now.utc - enqueued_at) * 1000).round, 0].max
123
+ rescue ArgumentError, TypeError
124
+ nil
125
+ end
126
+
108
127
  def handle_failure(_message, _queue_name, error)
109
128
  Pgbus.logger.error { "[Pgbus] Job failed: #{error.class}: #{error.message}" }
110
129
  Pgbus.logger.debug { error.backtrace&.join("\n") }
@@ -17,10 +17,24 @@ module Pgbus
17
17
  def enqueue_task(task, run_at:)
18
18
  queue = resolve_queue(task)
19
19
 
20
+ # Check uniqueness lock before enqueuing. If the job class declares
21
+ # ensures_uniqueness, we acquire the lock here so duplicate recurring
22
+ # enqueues are rejected while a previous instance is still queued or running.
23
+ if uniqueness_locked?(task)
24
+ Pgbus.logger.debug do
25
+ "[Pgbus] Recurring task #{task.key} skipped: uniqueness lock held"
26
+ end
27
+ return
28
+ end
29
+
20
30
  RecurringExecution.record(task.key, run_at) do
21
31
  payload = build_payload(task)
22
32
  headers = build_headers(task, run_at)
23
33
 
34
+ # Inject uniqueness metadata into the payload so the worker knows
35
+ # to release the lock after execution.
36
+ payload = inject_uniqueness_metadata(task, payload)
37
+
24
38
  Pgbus.client.ensure_queue(queue)
25
39
  Pgbus.client.send_message(queue, payload, headers: headers)
26
40
 
@@ -97,6 +111,78 @@ module Pgbus
97
111
  "pgbus.recurring_schedule" => task.schedule
98
112
  }
99
113
  end
114
+
115
+ # Check if the job class has ensures_uniqueness and if its lock is currently held.
116
+ # Returns true if the lock is held (skip enqueue), false otherwise.
117
+ def uniqueness_locked?(task)
118
+ return false unless task.class_name
119
+
120
+ job_class = task.class_name.safe_constantize
121
+ return false unless job_class
122
+ return false unless job_class.respond_to?(:pgbus_uniqueness)
123
+
124
+ config = job_class.pgbus_uniqueness
125
+ return false unless config
126
+ return false unless config[:strategy] == :until_executed
127
+
128
+ key = resolve_uniqueness_key(config, task)
129
+ return false unless key
130
+
131
+ # Try to acquire the lock. If it fails, the lock is already held.
132
+ acquired = JobLock.acquire!(
133
+ key,
134
+ job_class: task.class_name,
135
+ job_id: "recurring-#{task.key}",
136
+ state: "queued",
137
+ ttl: config[:lock_ttl]
138
+ )
139
+ # If we acquired it, great — the message will be enqueued with the lock held.
140
+ # If not, a previous instance is still queued/running.
141
+ !acquired
142
+ rescue StandardError => e
143
+ Pgbus.logger.warn { "[Pgbus] Uniqueness check failed for #{task.key}: #{e.message}" }
144
+ false # Fail open — allow enqueue if uniqueness check errors
145
+ end
146
+
147
+ # Resolve the uniqueness key for a recurring task.
148
+ # For no-argument recurring jobs, the key defaults to the class name.
149
+ def resolve_uniqueness_key(config, task)
150
+ key_proc = config[:key]
151
+ args = task.arguments || []
152
+
153
+ if args.empty?
154
+ key_proc.call
155
+ else
156
+ key_proc.call(*args)
157
+ end
158
+ rescue StandardError => e
159
+ Pgbus.logger.warn { "[Pgbus] Could not resolve uniqueness key for #{task.key}: #{e.message}" }
160
+ nil
161
+ end
162
+
163
+ # Inject uniqueness metadata into the payload so the executor releases
164
+ # the lock after the job completes.
165
+ # Only inject for :until_executed strategy — :while_executing locks are
166
+ # acquired at execution time by the executor, not by the scheduler.
167
+ def inject_uniqueness_metadata(task, payload)
168
+ return payload unless task.class_name
169
+
170
+ job_class = task.class_name.safe_constantize
171
+ return payload unless job_class.respond_to?(:pgbus_uniqueness)
172
+
173
+ config = job_class.pgbus_uniqueness
174
+ return payload unless config
175
+ return payload unless config[:strategy] == :until_executed
176
+
177
+ key = resolve_uniqueness_key(config, task)
178
+ return payload unless key
179
+
180
+ payload.merge(
181
+ Pgbus::Uniqueness::METADATA_KEY => key,
182
+ Pgbus::Uniqueness::STRATEGY_KEY => config[:strategy].to_s,
183
+ Pgbus::Uniqueness::TTL_KEY => config[:lock_ttl]
184
+ )
185
+ end
100
186
  end
101
187
  end
102
188
  end
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.2"
5
5
  end
@@ -113,9 +113,31 @@ module Pgbus
113
113
  end
114
114
 
115
115
  def discard_job(queue_name, msg_id)
116
+ release_lock_for_message(queue_name, msg_id)
116
117
  @client.archive_message(queue_name, msg_id.to_i, prefixed: false)
117
118
  end
118
119
 
120
+ def discard_all_enqueued
121
+ dlq_suffix = Pgbus.configuration.dead_letter_queue_suffix
122
+ queues = queues_with_metrics.reject { |q| q[:name].end_with?(dlq_suffix) }
123
+ total = 0
124
+
125
+ queues.each do |q|
126
+ messages = query_queue_messages_raw(q[:name], 10_000, 0)
127
+ next if messages.empty?
128
+
129
+ release_locks_for_messages(messages)
130
+
131
+ ids = messages.map { |m| m[:msg_id].to_i }
132
+ @client.archive_batch(q[:name], ids, prefixed: false)
133
+ total += ids.size
134
+ rescue StandardError => e
135
+ Pgbus.logger.debug { "[Pgbus::Web] Error discarding enqueued messages from #{q[:name]}: #{e.message}" }
136
+ end
137
+
138
+ total
139
+ end
140
+
119
141
  # Failed events
120
142
  def failed_events(page: 1, per_page: 25)
121
143
  offset = (page - 1) * per_page
@@ -170,6 +192,9 @@ module Pgbus
170
192
  end
171
193
 
172
194
  def discard_failed_event(id)
195
+ event = failed_event(id)
196
+ release_lock_for_payload(event["payload"]) if event
197
+
173
198
  connection.exec_delete(
174
199
  "DELETE FROM pgbus_failed_events WHERE id = $1", "Pgbus Delete Failed Event", [id.to_i]
175
200
  )
@@ -207,6 +232,8 @@ module Pgbus
207
232
  end
208
233
 
209
234
  def discard_all_failed
235
+ release_locks_for_failed_events
236
+
210
237
  result = connection.execute("DELETE FROM pgbus_failed_events")
211
238
  result.cmd_tuples
212
239
  rescue StandardError => e
@@ -273,6 +300,7 @@ module Pgbus
273
300
 
274
301
  def discard_dlq_message(queue_name, msg_id)
275
302
  # queue_name here is the full DLQ name (already prefixed)
303
+ release_lock_for_message(queue_name, msg_id)
276
304
  @client.delete_message(queue_name, msg_id.to_i, prefixed: false)
277
305
  true
278
306
  rescue StandardError => e
@@ -296,6 +324,8 @@ module Pgbus
296
324
  messages = dlq_messages(page: 1, per_page: 1000)
297
325
  return 0 if messages.empty?
298
326
 
327
+ release_locks_for_messages(messages)
328
+
299
329
  # Group by queue for batch delete — one call per DLQ instead of N calls
300
330
  messages.group_by { |m| m[:queue_name] }.sum do |queue_name, msgs|
301
331
  ids = msgs.map { |m| m[:msg_id].to_i }
@@ -552,6 +582,20 @@ module Pgbus
552
582
  []
553
583
  end
554
584
 
585
+ def latency_trend(minutes: 60)
586
+ JobStat.latency_trend(minutes: minutes)
587
+ rescue StandardError => e
588
+ Pgbus.logger.debug { "[Pgbus::Web] Error fetching latency trend: #{e.message}" }
589
+ []
590
+ end
591
+
592
+ def latency_by_queue(minutes: 60)
593
+ JobStat.avg_latency_by_queue(minutes: minutes)
594
+ rescue StandardError => e
595
+ Pgbus.logger.debug { "[Pgbus::Web] Error fetching latency by queue: #{e.message}" }
596
+ []
597
+ end
598
+
555
599
  # Subscriber registry
556
600
  def registered_subscribers
557
601
  EventBus::Registry.instance.subscribers.map do |s|
@@ -719,6 +763,69 @@ module Pgbus
719
763
  Pgbus.logger.debug { "[Pgbus::Web] Invalid recurring task arguments JSON: #{e.message}" }
720
764
  []
721
765
  end
766
+
767
+ # --- Lock cleanup helpers ---
768
+
769
+ # Extract uniqueness key from a queue message and release its lock.
770
+ def release_lock_for_message(queue_name, msg_id)
771
+ row = connection.select_one(
772
+ "SELECT * FROM pgmq.q_#{sanitize_name(queue_name)} WHERE msg_id = $1",
773
+ "Pgbus Job Detail",
774
+ [msg_id.to_i]
775
+ )
776
+ return unless row
777
+
778
+ release_lock_for_payload(row["message"])
779
+ rescue StandardError => e
780
+ Pgbus.logger.debug { "[Pgbus::Web] Error releasing lock for message #{msg_id}: #{e.message}" }
781
+ end
782
+
783
+ # Extract uniqueness key from a JSON payload string and release its lock.
784
+ def release_lock_for_payload(payload_str)
785
+ return unless payload_str
786
+
787
+ payload = payload_str.is_a?(String) ? JSON.parse(payload_str) : payload_str
788
+ key = payload[Uniqueness::METADATA_KEY]
789
+ JobLock.release!(key) if key
790
+ rescue JSON::ParserError => e
791
+ Pgbus.logger.debug { "[Pgbus::Web] Error parsing payload for lock release: #{e.message}" }
792
+ end
793
+
794
+ # Extract uniqueness keys from a collection of formatted messages and
795
+ # release all associated locks in a single query.
796
+ def release_locks_for_messages(messages)
797
+ keys = messages.filter_map do |m|
798
+ payload = m[:message]
799
+ next unless payload
800
+
801
+ parsed = payload.is_a?(String) ? JSON.parse(payload) : payload
802
+ parsed[Uniqueness::METADATA_KEY]
803
+ rescue JSON::ParserError
804
+ nil
805
+ end
806
+
807
+ JobLock.where(lock_key: keys).delete_all if keys.any?
808
+ rescue StandardError => e
809
+ Pgbus.logger.debug { "[Pgbus::Web] Error releasing locks for messages: #{e.message}" }
810
+ end
811
+
812
+ # Collect uniqueness keys from all failed events and release their locks.
813
+ def release_locks_for_failed_events
814
+ rows = connection.select_all(
815
+ "SELECT payload FROM pgbus_failed_events", "Pgbus Collect Failed Keys"
816
+ )
817
+
818
+ keys = rows.to_a.filter_map do |row|
819
+ payload = JSON.parse(row["payload"])
820
+ payload[Uniqueness::METADATA_KEY]
821
+ rescue JSON::ParserError
822
+ nil
823
+ end
824
+
825
+ JobLock.where(lock_key: keys).delete_all if keys.any?
826
+ rescue StandardError => e
827
+ Pgbus.logger.debug { "[Pgbus::Web] Error releasing locks for failed events: #{e.message}" }
828
+ end
722
829
  end
723
830
  end
724
831
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikael Henriksson
@@ -199,12 +199,14 @@ files:
199
199
  - lib/active_job/queue_adapters/pgbus_adapter.rb
200
200
  - lib/generators/pgbus/add_job_locks_generator.rb
201
201
  - lib/generators/pgbus/add_job_stats_generator.rb
202
+ - lib/generators/pgbus/add_job_stats_latency_generator.rb
202
203
  - lib/generators/pgbus/add_outbox_generator.rb
203
204
  - lib/generators/pgbus/add_queue_states_generator.rb
204
205
  - lib/generators/pgbus/add_recurring_generator.rb
205
206
  - lib/generators/pgbus/install_generator.rb
206
207
  - lib/generators/pgbus/templates/add_job_locks.rb.erb
207
208
  - lib/generators/pgbus/templates/add_job_stats.rb.erb
209
+ - lib/generators/pgbus/templates/add_job_stats_latency.rb.erb
208
210
  - lib/generators/pgbus/templates/add_outbox.rb.erb
209
211
  - lib/generators/pgbus/templates/add_queue_states.rb.erb
210
212
  - lib/generators/pgbus/templates/add_recurring_tables.rb.erb