pgbus 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pgbus/active_job/executor.rb +25 -3
- data/lib/pgbus/client/ensure_stream_queue.rb +3 -1
- data/lib/pgbus/client.rb +37 -1
- data/lib/pgbus/execution_pools/async_pool.rb +12 -1
- data/lib/pgbus/process/supervisor.rb +8 -0
- data/lib/pgbus/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c45dd364f341b6819b7901f583e058dbe761b375210e4162f19faf75917e3043
|
|
4
|
+
data.tar.gz: ed5ee189a3ff3d7fe0610deada3b2daba3726a2647d7762c58735e0771c9e0eb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e28c032dc7b4f2cba37bd709c4a45030bcedd90274b86a1499d55dd4f4e255769e580983023602a42267b8728068a7eb11f7fdcbc8d12bd3efd83f35a50f241b
|
|
7
|
+
data.tar.gz: dc6d8d5d2e4feebbf7d940c173f53700549b5364c0e15df3f6afac1d72ecc6b1222127d85649016e9c6590e3c184eb9365da95c424a21a98be683862f4a24e67
|
|
@@ -13,6 +13,10 @@ module Pgbus
|
|
|
13
13
|
@stat_buffer = stat_buffer
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
+
# Exceptions we never want to swallow — let the process die/signal propagate.
|
|
17
|
+
FATAL_EXCEPTIONS = [SystemExit, Interrupt, SignalException, NoMemoryError, SystemStackError].freeze
|
|
18
|
+
private_constant :FATAL_EXCEPTIONS
|
|
19
|
+
|
|
16
20
|
def execute(message, queue_name, source_queue: nil)
|
|
17
21
|
execution_start = monotonic_now
|
|
18
22
|
payload = JSON.parse(message.message)
|
|
@@ -51,18 +55,36 @@ module Pgbus
|
|
|
51
55
|
|
|
52
56
|
job_succeeded = false
|
|
53
57
|
|
|
58
|
+
# Debug-level phase markers. Silent at INFO+, but invaluable when a
|
|
59
|
+
# fiber interrupt or connection issue loses control flow between phases
|
|
60
|
+
# (issue #126). Each line identifies msg_id + phase so the gap is
|
|
61
|
+
# visible in logs: "deserialized" without "archived" means the job
|
|
62
|
+
# ran but its message was never archived.
|
|
63
|
+
msg_id = message.msg_id.to_i
|
|
54
64
|
Instrumentation.instrument("pgbus.executor.execute", queue: queue_name, job_class: job_class) do
|
|
65
|
+
Pgbus.logger.debug { "[Pgbus] Executor phase=deserialize msg_id=#{msg_id} job=#{job_class}" }
|
|
55
66
|
job = ::ActiveJob::Base.deserialize(payload)
|
|
67
|
+
Pgbus.logger.debug { "[Pgbus] Executor phase=perform msg_id=#{msg_id} job=#{job_class}" }
|
|
56
68
|
execute_job(job)
|
|
57
|
-
|
|
58
|
-
|
|
69
|
+
Pgbus.logger.debug { "[Pgbus] Executor phase=archive msg_id=#{msg_id} job=#{job_class}" }
|
|
70
|
+
archive_from(queue_name, msg_id, source_queue: source_queue)
|
|
71
|
+
FailedEventRecorder.clear!(queue_name: queue_name, msg_id: msg_id)
|
|
59
72
|
job_succeeded = true
|
|
73
|
+
Pgbus.logger.debug { "[Pgbus] Executor phase=succeeded msg_id=#{msg_id} job=#{job_class}" }
|
|
60
74
|
end
|
|
61
75
|
|
|
62
76
|
instrument("pgbus.job_completed", queue: queue_name, job_class: job_class)
|
|
63
77
|
record_stat(payload, queue_name, "success", execution_start, message: message)
|
|
64
78
|
:success
|
|
65
|
-
rescue
|
|
79
|
+
rescue *FATAL_EXCEPTIONS
|
|
80
|
+
# Process-fatal: propagate so the supervisor/OS can react.
|
|
81
|
+
raise
|
|
82
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
83
|
+
# Widened from StandardError to catch Async::Stop / Async::Cancel
|
|
84
|
+
# (both inherit from Exception, not StandardError) under execution_mode: :async.
|
|
85
|
+
# Before this, a fiber interruption between perform_now and archive_from
|
|
86
|
+
# silently lost control flow — no failed event row, no job_failed
|
|
87
|
+
# notification, uniqueness lock held until VT expired. See issue #126.
|
|
66
88
|
handle_failure(message, queue_name, e, payload: payload)
|
|
67
89
|
instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
|
|
68
90
|
record_stat(payload, queue_name, "failed", execution_start, message: message)
|
|
@@ -27,7 +27,9 @@ module Pgbus
|
|
|
27
27
|
# sensitive and need every broadcast to fire a NOTIFY, even
|
|
28
28
|
# when several are batched within a single millisecond.
|
|
29
29
|
# Override the throttle to 0 specifically for stream queues.
|
|
30
|
-
|
|
30
|
+
# Use the idempotent path to avoid deadlocks when multiple
|
|
31
|
+
# processes race to set up the same stream queue.
|
|
32
|
+
synchronized { enable_notify_if_needed(full_name, 0) }
|
|
31
33
|
|
|
32
34
|
# CREATE INDEX IF NOT EXISTS is idempotent in Postgres but still
|
|
33
35
|
# requires a roundtrip and a brief ACCESS SHARE lock on the archive
|
data/lib/pgbus/client.rb
CHANGED
|
@@ -457,12 +457,48 @@ module Pgbus
|
|
|
457
457
|
synchronized do
|
|
458
458
|
@pgmq.create(full_name)
|
|
459
459
|
tune_autovacuum(full_name)
|
|
460
|
-
|
|
460
|
+
enable_notify_if_needed(full_name, NOTIFY_THROTTLE_MS)
|
|
461
461
|
end
|
|
462
462
|
true
|
|
463
463
|
end
|
|
464
464
|
end
|
|
465
465
|
|
|
466
|
+
def enable_notify_if_needed(full_name, throttle_ms)
|
|
467
|
+
return unless config.listen_notify
|
|
468
|
+
return if notify_trigger_current?(full_name, throttle_ms)
|
|
469
|
+
|
|
470
|
+
@pgmq.enable_notify_insert(full_name, throttle_interval_ms: throttle_ms)
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
# Check whether the NOTIFY trigger already exists on this queue with the
|
|
474
|
+
# expected throttle interval. When it does, we can skip the destructive
|
|
475
|
+
# DROP TRIGGER + CREATE TRIGGER cycle that causes deadlocks when multiple
|
|
476
|
+
# forked processes race during bootstrap.
|
|
477
|
+
def notify_trigger_current?(full_name, throttle_ms)
|
|
478
|
+
with_raw_connection do |conn|
|
|
479
|
+
result = conn.exec_params(<<~SQL, [full_name, throttle_ms])
|
|
480
|
+
SELECT 1
|
|
481
|
+
FROM pg_trigger t
|
|
482
|
+
JOIN pg_class c ON t.tgrelid = c.oid
|
|
483
|
+
JOIN pg_namespace n ON c.relnamespace = n.oid
|
|
484
|
+
WHERE n.nspname = 'pgmq'
|
|
485
|
+
AND c.relname = pgmq.format_table_name($1, 'q')
|
|
486
|
+
AND t.tgname = 'trigger_notify_queue_insert_listeners'
|
|
487
|
+
AND EXISTS (
|
|
488
|
+
SELECT 1 FROM pgmq.notify_insert_throttle
|
|
489
|
+
WHERE queue_name = $1
|
|
490
|
+
AND throttle_interval_ms = $2
|
|
491
|
+
)
|
|
492
|
+
LIMIT 1
|
|
493
|
+
SQL
|
|
494
|
+
result.ntuples.positive?
|
|
495
|
+
end
|
|
496
|
+
rescue StandardError
|
|
497
|
+
# If we can't check (e.g. pgmq schema not fully ready), fall back to
|
|
498
|
+
# the unconditional path — same behavior as before this fix.
|
|
499
|
+
false
|
|
500
|
+
end
|
|
501
|
+
|
|
466
502
|
def tune_autovacuum(queue_name)
|
|
467
503
|
with_raw_connection do |conn|
|
|
468
504
|
conn.exec(AutovacuumTuning.sql_for_queue(queue_name))
|
|
@@ -128,9 +128,20 @@ module Pgbus
|
|
|
128
128
|
nil
|
|
129
129
|
end
|
|
130
130
|
|
|
131
|
+
# Supervisor-level rescue: catch any Exception raised from the user
|
|
132
|
+
# block so capacity is always restored and the failure is logged.
|
|
133
|
+
# The `async` gem uses Async::Stop / Async::Cancel (Exception subclasses,
|
|
134
|
+
# NOT StandardError) to cancel tasks, and prior to issue #126 those
|
|
135
|
+
# would leak past `rescue StandardError` and silently vanish.
|
|
136
|
+
# Process-fatal signals still propagate so the supervisor can react.
|
|
137
|
+
FATAL_EXCEPTIONS = [SystemExit, Interrupt, SignalException, NoMemoryError, SystemStackError].freeze
|
|
138
|
+
private_constant :FATAL_EXCEPTIONS
|
|
139
|
+
|
|
131
140
|
def perform(block)
|
|
132
141
|
block.call
|
|
133
|
-
rescue
|
|
142
|
+
rescue *FATAL_EXCEPTIONS
|
|
143
|
+
raise
|
|
144
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
134
145
|
Pgbus.logger.error { "[Pgbus] Async pool fiber error: #{e.class}: #{e.message}" }
|
|
135
146
|
ensure
|
|
136
147
|
restore_capacity
|
|
@@ -21,6 +21,14 @@ module Pgbus
|
|
|
21
21
|
|
|
22
22
|
Pgbus.logger.info { "[Pgbus] Supervisor starting pid=#{::Process.pid}" }
|
|
23
23
|
|
|
24
|
+
# Bootstrap queues once in the parent process before forking children.
|
|
25
|
+
# This avoids the deadlock that occurs when multiple forked children
|
|
26
|
+
# race to call enable_notify_insert (DROP TRIGGER + CREATE TRIGGER)
|
|
27
|
+
# concurrently on the same queue tables. Children still call
|
|
28
|
+
# bootstrap_queues post-fork but the idempotent check in
|
|
29
|
+
# notify_trigger_current? makes those calls cheap no-ops.
|
|
30
|
+
bootstrap_queues
|
|
31
|
+
|
|
24
32
|
boot_processes
|
|
25
33
|
monitor_loop
|
|
26
34
|
ensure
|
data/lib/pgbus/version.rb
CHANGED