pgbus 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/generators/pgbus/add_failed_events_index_generator.rb +51 -0
- data/lib/generators/pgbus/templates/add_failed_events_unique_index.rb.erb +7 -0
- data/lib/generators/pgbus/templates/migration.rb.erb +2 -0
- data/lib/pgbus/active_job/executor.rb +13 -2
- data/lib/pgbus/failed_event_recorder.rb +59 -0
- data/lib/pgbus/recurring/schedule.rb +31 -14
- data/lib/pgbus/recurring/scheduler.rb +1 -4
- data/lib/pgbus/version.rb +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 031cb29b5105d5731e862ab92fa1c8872279b3ff0e5dbbbdf595d4733874780a
|
|
4
|
+
data.tar.gz: ab7912d72556e382056a3f8f9cc0edd9f0498706b1958fce1f6546e4f8619eb2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f2892d6f44649afe256393e7abfc67369fa8d87ecb159b6e9d5a9e5816ab14a2a328bfd48c6c400866f1a81cb48b91446162d500d71faf06a587fc05e8d3bab6
|
|
7
|
+
data.tar.gz: 3577ea2487cd452e455a43be6dc310511681f003a942960116e41cd79722291a8cdb2a00d103e13950c8a6f996e3054029093e90d9587767e93ef11943969209
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module Pgbus
|
|
7
|
+
module Generators
|
|
8
|
+
class AddFailedEventsIndexGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Add unique index on pgbus_failed_events (queue_name, msg_id) for failure tracking upserts"
|
|
14
|
+
|
|
15
|
+
class_option :database,
|
|
16
|
+
type: :string,
|
|
17
|
+
default: nil,
|
|
18
|
+
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
|
+
|
|
20
|
+
def create_migration_file
|
|
21
|
+
if separate_database?
|
|
22
|
+
migration_template "add_failed_events_unique_index.rb.erb",
|
|
23
|
+
"db/pgbus_migrate/add_pgbus_failed_events_unique_index.rb"
|
|
24
|
+
else
|
|
25
|
+
migration_template "add_failed_events_unique_index.rb.erb",
|
|
26
|
+
"db/migrate/add_pgbus_failed_events_unique_index.rb"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def display_post_install
|
|
31
|
+
say ""
|
|
32
|
+
say "Pgbus failed events unique index added!", :green
|
|
33
|
+
say ""
|
|
34
|
+
say "Next steps:"
|
|
35
|
+
say " 1. Run: rails db:migrate#{":#{options[:database]}" if separate_database?}"
|
|
36
|
+
say " 2. Failed jobs will now be tracked in the dashboard"
|
|
37
|
+
say ""
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def migration_version
|
|
43
|
+
"[#{ActiveRecord::Migration.current_version}]"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def separate_database?
|
|
47
|
+
options[:database].present?
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -65,6 +65,8 @@ class CreatePgbusTables < ActiveRecord::Migration<%= migration_version %>
|
|
|
65
65
|
|
|
66
66
|
add_index :pgbus_failed_events, :queue_name, name: "idx_pgbus_failed_events_queue"
|
|
67
67
|
add_index :pgbus_failed_events, :failed_at, name: "idx_pgbus_failed_events_time"
|
|
68
|
+
add_index :pgbus_failed_events, [:queue_name, :msg_id],
|
|
69
|
+
unique: true, name: "idx_pgbus_failed_events_queue_msg"
|
|
68
70
|
|
|
69
71
|
# Concurrency semaphores (counting locks for job concurrency limits)
|
|
70
72
|
create_table :pgbus_semaphores do |t|
|
|
@@ -20,6 +20,7 @@ module Pgbus
|
|
|
20
20
|
|
|
21
21
|
if read_count > config.max_retries
|
|
22
22
|
handle_dead_letter(message, queue_name, payload, source_queue: source_queue)
|
|
23
|
+
FailedEventRecorder.clear!(queue_name: queue_name, msg_id: message.msg_id.to_i)
|
|
23
24
|
signal_concurrency(payload)
|
|
24
25
|
signal_batch_discarded(payload)
|
|
25
26
|
Uniqueness.release_lock(Uniqueness.extract_key(payload))
|
|
@@ -54,6 +55,7 @@ module Pgbus
|
|
|
54
55
|
job = ::ActiveJob::Base.deserialize(payload)
|
|
55
56
|
execute_job(job)
|
|
56
57
|
archive_from(queue_name, message.msg_id.to_i, source_queue: source_queue)
|
|
58
|
+
FailedEventRecorder.clear!(queue_name: queue_name, msg_id: message.msg_id.to_i)
|
|
57
59
|
job_succeeded = true
|
|
58
60
|
end
|
|
59
61
|
|
|
@@ -61,7 +63,7 @@ module Pgbus
|
|
|
61
63
|
record_stat(payload, queue_name, "success", execution_start, message: message)
|
|
62
64
|
:success
|
|
63
65
|
rescue StandardError => e
|
|
64
|
-
handle_failure(message, queue_name, e)
|
|
66
|
+
handle_failure(message, queue_name, e, payload: payload)
|
|
65
67
|
instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
|
|
66
68
|
record_stat(payload, queue_name, "failed", execution_start, message: message)
|
|
67
69
|
# Don't signal concurrency on transient failure — the job will be retried.
|
|
@@ -143,13 +145,22 @@ module Pgbus
|
|
|
143
145
|
[((Time.now.utc - enqueued_at) * 1000).round, 0].max
|
|
144
146
|
end
|
|
145
147
|
|
|
146
|
-
def handle_failure(
|
|
148
|
+
def handle_failure(message, queue_name, error, payload: nil)
|
|
147
149
|
Pgbus.logger.error { "[Pgbus] Job failed: #{error.class}: #{error.message}" }
|
|
148
150
|
Pgbus.logger.debug { error.backtrace&.join("\n") }
|
|
149
151
|
|
|
152
|
+
# Record failure for dashboard visibility.
|
|
150
153
|
# Message visibility timeout will expire and it becomes available again.
|
|
151
154
|
# read_ct tracks delivery attempts — when it exceeds max_retries,
|
|
152
155
|
# the next read will route to DLQ.
|
|
156
|
+
FailedEventRecorder.record!(
|
|
157
|
+
queue_name: queue_name,
|
|
158
|
+
msg_id: message.msg_id.to_i,
|
|
159
|
+
payload: payload || message.message,
|
|
160
|
+
headers: message.respond_to?(:headers) ? message.headers : nil,
|
|
161
|
+
error: error,
|
|
162
|
+
retry_count: [message.read_ct.to_i - 1, 0].max
|
|
163
|
+
)
|
|
153
164
|
end
|
|
154
165
|
|
|
155
166
|
def instrument(event_name, payload = {})
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
# Records job failures to pgbus_failed_events for dashboard visibility.
|
|
5
|
+
# Uses upsert (INSERT ON CONFLICT UPDATE) keyed on (queue_name, msg_id)
|
|
6
|
+
# so each message has at most one failed_event row tracking its latest error.
|
|
7
|
+
class FailedEventRecorder
|
|
8
|
+
class << self
|
|
9
|
+
def record!(queue_name:, msg_id:, payload:, headers:, error:, retry_count:)
|
|
10
|
+
connection.exec_query(
|
|
11
|
+
<<~SQL.squish,
|
|
12
|
+
INSERT INTO pgbus_failed_events
|
|
13
|
+
(queue_name, msg_id, payload, headers, error_class, error_message, backtrace, retry_count, failed_at)
|
|
14
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, CURRENT_TIMESTAMP)
|
|
15
|
+
ON CONFLICT (queue_name, msg_id) DO UPDATE SET
|
|
16
|
+
error_class = EXCLUDED.error_class,
|
|
17
|
+
error_message = EXCLUDED.error_message,
|
|
18
|
+
backtrace = EXCLUDED.backtrace,
|
|
19
|
+
retry_count = EXCLUDED.retry_count,
|
|
20
|
+
failed_at = EXCLUDED.failed_at
|
|
21
|
+
SQL
|
|
22
|
+
"FailedEvent Record",
|
|
23
|
+
[
|
|
24
|
+
queue_name,
|
|
25
|
+
msg_id.to_i,
|
|
26
|
+
payload.is_a?(String) ? payload : JSON.generate(payload),
|
|
27
|
+
headers.is_a?(String) ? headers : headers&.then { |h| JSON.generate(h) },
|
|
28
|
+
error.class.name,
|
|
29
|
+
error.message.to_s.truncate(10_000),
|
|
30
|
+
error.backtrace&.first(30)&.join("\n"),
|
|
31
|
+
retry_count
|
|
32
|
+
]
|
|
33
|
+
)
|
|
34
|
+
rescue StandardError => e
|
|
35
|
+
Pgbus.logger.debug { "[Pgbus] Failed to record failed event: #{e.message}" }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def clear!(queue_name:, msg_id:)
|
|
39
|
+
connection.exec_delete(
|
|
40
|
+
"DELETE FROM pgbus_failed_events WHERE queue_name = $1 AND msg_id = $2",
|
|
41
|
+
"FailedEvent Clear",
|
|
42
|
+
[queue_name, msg_id.to_i]
|
|
43
|
+
)
|
|
44
|
+
rescue StandardError => e
|
|
45
|
+
Pgbus.logger.debug { "[Pgbus] Failed to clear failed event: #{e.message}" }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def connection
|
|
51
|
+
if defined?(BusRecord) && BusRecord.connected?
|
|
52
|
+
BusRecord.connection
|
|
53
|
+
else
|
|
54
|
+
ActiveRecord::Base.connection
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -11,7 +11,10 @@ module Pgbus
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def due_tasks(time = Time.current)
|
|
14
|
-
tasks.
|
|
14
|
+
tasks.filter_map do |task|
|
|
15
|
+
run_at = canonical_run_at(task, time)
|
|
16
|
+
[task, run_at] if run_at
|
|
17
|
+
end
|
|
15
18
|
end
|
|
16
19
|
|
|
17
20
|
def enqueue_task(task, run_at:)
|
|
@@ -34,6 +37,12 @@ module Pgbus
|
|
|
34
37
|
end
|
|
35
38
|
end
|
|
36
39
|
rescue AlreadyRecorded
|
|
40
|
+
# AlreadyRecorded means this (task_key, run_at) was already enqueued.
|
|
41
|
+
# If we acquired a NEW lock (prior lock was already released because the
|
|
42
|
+
# job completed), release it — no message will use it. If we didn't
|
|
43
|
+
# acquire a lock (nil or :already_locked), there's nothing to release.
|
|
44
|
+
# In either case, we are NOT opening a race window because the job for
|
|
45
|
+
# this run_at already ran or is running.
|
|
37
46
|
release_uniqueness_lock(acquired_key)
|
|
38
47
|
Pgbus.logger.debug { "[Pgbus] Recurring task #{task.key} already enqueued for #{run_at.iso8601}" }
|
|
39
48
|
rescue StandardError
|
|
@@ -75,23 +84,31 @@ module Pgbus
|
|
|
75
84
|
end
|
|
76
85
|
end
|
|
77
86
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
87
|
+
# Returns the canonical run_at time if the task is due, or nil if not.
|
|
88
|
+
# This ensures a consistent run_at regardless of which tick detects
|
|
89
|
+
# the cron occurrence — fixing a bug where match?(t) at the exact
|
|
90
|
+
# boundary returns previous_time=T-1, while a tick 1s later gets
|
|
91
|
+
# previous_time=T, producing different run_at values for the same
|
|
92
|
+
# cron occurrence and bypassing RecurringExecution deduplication.
|
|
93
|
+
def canonical_run_at(task, time)
|
|
82
94
|
cron = task.parsed_schedule
|
|
83
|
-
return
|
|
84
|
-
|
|
85
|
-
#
|
|
86
|
-
|
|
95
|
+
return nil unless cron
|
|
96
|
+
|
|
97
|
+
# Exact boundary hit: cron.match?(time) is true.
|
|
98
|
+
# previous_time returns the PRIOR occurrence here, but the cron
|
|
99
|
+
# time that fired is `time` itself (truncated to the minute).
|
|
100
|
+
if cron.match?(time)
|
|
101
|
+
# Fugit next_time from 1 second before gives us the current cron time
|
|
102
|
+
return cron.next_time(time - 1).to_t
|
|
103
|
+
end
|
|
87
104
|
|
|
88
|
-
#
|
|
89
|
-
# still fire it (handles the case where we tick slightly after the
|
|
90
|
-
# cron time). The window is the scheduler interval.
|
|
105
|
+
# Within the scheduler interval window after the cron time.
|
|
91
106
|
prev = task.previous_time(time)
|
|
92
|
-
return
|
|
107
|
+
return nil unless prev
|
|
93
108
|
|
|
94
|
-
(time - prev) <= @config.recurring_schedule_interval
|
|
109
|
+
return prev if (time - prev) <= @config.recurring_schedule_interval
|
|
110
|
+
|
|
111
|
+
nil
|
|
95
112
|
end
|
|
96
113
|
|
|
97
114
|
def resolve_queue(task)
|
|
@@ -40,10 +40,7 @@ module Pgbus
|
|
|
40
40
|
end
|
|
41
41
|
|
|
42
42
|
def tick(now)
|
|
43
|
-
schedule.due_tasks(now).each do |task|
|
|
44
|
-
run_at = task.previous_time(now)
|
|
45
|
-
next unless run_at
|
|
46
|
-
|
|
43
|
+
schedule.due_tasks(now).each do |task, run_at|
|
|
47
44
|
schedule.enqueue_task(task, run_at: run_at)
|
|
48
45
|
@last_runs[task.key] = now
|
|
49
46
|
rescue StandardError => e
|
data/lib/pgbus/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pgbus
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mikael Henriksson
|
|
@@ -198,6 +198,7 @@ files:
|
|
|
198
198
|
- config/routes.rb
|
|
199
199
|
- exe/pgbus
|
|
200
200
|
- lib/active_job/queue_adapters/pgbus_adapter.rb
|
|
201
|
+
- lib/generators/pgbus/add_failed_events_index_generator.rb
|
|
201
202
|
- lib/generators/pgbus/add_job_locks_generator.rb
|
|
202
203
|
- lib/generators/pgbus/add_job_stats_generator.rb
|
|
203
204
|
- lib/generators/pgbus/add_job_stats_latency_generator.rb
|
|
@@ -206,6 +207,7 @@ files:
|
|
|
206
207
|
- lib/generators/pgbus/add_recurring_generator.rb
|
|
207
208
|
- lib/generators/pgbus/install_generator.rb
|
|
208
209
|
- lib/generators/pgbus/migrate_job_locks_generator.rb
|
|
210
|
+
- lib/generators/pgbus/templates/add_failed_events_unique_index.rb.erb
|
|
209
211
|
- lib/generators/pgbus/templates/add_job_locks.rb.erb
|
|
210
212
|
- lib/generators/pgbus/templates/add_job_stats.rb.erb
|
|
211
213
|
- lib/generators/pgbus/templates/add_job_stats_latency.rb.erb
|
|
@@ -240,6 +242,7 @@ files:
|
|
|
240
242
|
- lib/pgbus/event_bus/publisher.rb
|
|
241
243
|
- lib/pgbus/event_bus/registry.rb
|
|
242
244
|
- lib/pgbus/event_bus/subscriber.rb
|
|
245
|
+
- lib/pgbus/failed_event_recorder.rb
|
|
243
246
|
- lib/pgbus/instrumentation.rb
|
|
244
247
|
- lib/pgbus/outbox.rb
|
|
245
248
|
- lib/pgbus/outbox/poller.rb
|