pgbus 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pgbus/active_job/executor.rb +11 -11
- data/lib/pgbus/configuration.rb +6 -0
- data/lib/pgbus/failed_event_recorder.rb +12 -0
- data/lib/pgbus/process/worker.rb +16 -0
- data/lib/pgbus/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b1a647a2e485cace147ca201861ff4613e9c14f72f8fd8be9060f64d6eccfff5
|
|
4
|
+
data.tar.gz: bf3ca37850d549c6e9ed86bedbad1d88f9a50462b86a2d8a09113de7c1227d0f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b32dac214071005fa5974478f2cb8bd4d8a270efc3e6bc94141b74c28cebfea0dfa84e41ad9bf1ec2a10c8ab0d09ae08d0df989f0894b8d5cbc6a461698ecf5b
|
|
7
|
+
data.tar.gz: ea55e1a29e53e491063c34aaea3a643498d1567968e64833d9a38deef2bcd7a48dfb35e377601d347fa8e05e55a5647a77dbe958d9584fcdc8669a362ff253a8
|
|
@@ -19,7 +19,11 @@ module Pgbus
|
|
|
19
19
|
|
|
20
20
|
def execute(message, queue_name, source_queue: nil)
|
|
21
21
|
execution_start = monotonic_now
|
|
22
|
+
tag = "msg_id=#{message.msg_id} queue=#{queue_name} read_ct=#{message.read_ct}"
|
|
23
|
+
Pgbus.logger.debug { "[Pgbus::Executor] start #{tag}" }
|
|
24
|
+
|
|
22
25
|
payload = JSON.parse(message.message)
|
|
26
|
+
job_class = payload["job_class"]
|
|
23
27
|
read_count = message.read_ct.to_i
|
|
24
28
|
|
|
25
29
|
if read_count > config.max_retries
|
|
@@ -29,10 +33,9 @@ module Pgbus
|
|
|
29
33
|
signal_batch_discarded(payload)
|
|
30
34
|
Uniqueness.release_lock(Uniqueness.extract_key(payload))
|
|
31
35
|
record_stat(payload, queue_name, "dead_lettered", execution_start, message: message)
|
|
36
|
+
Pgbus.logger.debug { "[Pgbus::Executor] dead_lettered #{tag} job_class=#{job_class}" }
|
|
32
37
|
return :dead_lettered
|
|
33
38
|
end
|
|
34
|
-
|
|
35
|
-
job_class = payload["job_class"]
|
|
36
39
|
uniqueness_key = Uniqueness.extract_key(payload)
|
|
37
40
|
uniqueness_strategy = Uniqueness.extract_strategy(payload)
|
|
38
41
|
|
|
@@ -53,28 +56,24 @@ module Pgbus
|
|
|
53
56
|
end
|
|
54
57
|
end
|
|
55
58
|
|
|
59
|
+
Pgbus.logger.debug { "[Pgbus::Executor] deserialized #{tag} job_class=#{job_class}" }
|
|
56
60
|
job_succeeded = false
|
|
57
61
|
|
|
58
|
-
# Debug-level phase markers. Silent at INFO+, but invaluable when a
|
|
59
|
-
# fiber interrupt or connection issue loses control flow between phases
|
|
60
|
-
# (issue #126). Each line identifies msg_id + phase so the gap is
|
|
61
|
-
# visible in logs: "deserialized" without "archived" means the job
|
|
62
|
-
# ran but its message was never archived.
|
|
63
62
|
msg_id = message.msg_id.to_i
|
|
64
63
|
Instrumentation.instrument("pgbus.executor.execute", queue: queue_name, job_class: job_class) do
|
|
65
|
-
Pgbus.logger.debug { "[Pgbus] Executor phase=deserialize msg_id=#{msg_id} job=#{job_class}" }
|
|
66
64
|
job = ::ActiveJob::Base.deserialize(payload)
|
|
67
|
-
Pgbus.logger.debug { "[Pgbus]
|
|
65
|
+
Pgbus.logger.debug { "[Pgbus::Executor] running #{tag} job_class=#{job_class}" }
|
|
68
66
|
execute_job(job)
|
|
69
|
-
Pgbus.logger.debug { "[Pgbus]
|
|
67
|
+
Pgbus.logger.debug { "[Pgbus::Executor] perform_returned #{tag} job_class=#{job_class}" }
|
|
70
68
|
archive_from(queue_name, msg_id, source_queue: source_queue)
|
|
69
|
+
Pgbus.logger.debug { "[Pgbus::Executor] archived #{tag} job_class=#{job_class}" }
|
|
71
70
|
FailedEventRecorder.clear!(queue_name: queue_name, msg_id: msg_id)
|
|
72
71
|
job_succeeded = true
|
|
73
|
-
Pgbus.logger.debug { "[Pgbus] Executor phase=succeeded msg_id=#{msg_id} job=#{job_class}" }
|
|
74
72
|
end
|
|
75
73
|
|
|
76
74
|
instrument("pgbus.job_completed", queue: queue_name, job_class: job_class)
|
|
77
75
|
record_stat(payload, queue_name, "success", execution_start, message: message)
|
|
76
|
+
Pgbus.logger.debug { "[Pgbus::Executor] done #{tag} job_class=#{job_class}" }
|
|
78
77
|
:success
|
|
79
78
|
rescue *FATAL_EXCEPTIONS
|
|
80
79
|
# Process-fatal: propagate so the supervisor/OS can react.
|
|
@@ -88,6 +87,7 @@ module Pgbus
|
|
|
88
87
|
handle_failure(message, queue_name, e, payload: payload)
|
|
89
88
|
instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
|
|
90
89
|
record_stat(payload, queue_name, "failed", execution_start, message: message)
|
|
90
|
+
Pgbus.logger.debug { "[Pgbus::Executor] failed #{tag} job_class=#{payload&.dig("job_class")} error=#{e.class}" }
|
|
91
91
|
# Don't signal concurrency on transient failure — the job will be retried.
|
|
92
92
|
# Semaphore is released only on success or dead-lettering.
|
|
93
93
|
:failed
|
data/lib/pgbus/configuration.rb
CHANGED
|
@@ -85,6 +85,10 @@ module Pgbus
|
|
|
85
85
|
# Requires a matching entry in config/database.yml under the "pgbus" key.
|
|
86
86
|
attr_accessor :connects_to
|
|
87
87
|
|
|
88
|
+
# Zombie message detection — logs a warning when a message is redelivered
|
|
89
|
+
# (read_ct > 1) without any prior failure recorded in pgbus_failed_events.
|
|
90
|
+
attr_accessor :zombie_detection
|
|
91
|
+
|
|
88
92
|
# Job stats
|
|
89
93
|
attr_accessor :stats_enabled
|
|
90
94
|
attr_reader :stats_retention # rubocop:disable Style/AccessorGrouping
|
|
@@ -160,6 +164,8 @@ module Pgbus
|
|
|
160
164
|
@skip_recurring = false
|
|
161
165
|
@recurring_execution_retention = 7 * 24 * 3600 # 7 days
|
|
162
166
|
|
|
167
|
+
@zombie_detection = true
|
|
168
|
+
|
|
163
169
|
@stats_enabled = true
|
|
164
170
|
@stats_retention = 30 * 24 * 3600 # 30 days
|
|
165
171
|
|
|
@@ -35,6 +35,18 @@ module Pgbus
|
|
|
35
35
|
ErrorReporter.report(e, { action: "record_failed_event", queue: queue_name, msg_id: msg_id })
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
+
def exists?(queue_name:, msg_id:)
|
|
39
|
+
result = connection.select_value(
|
|
40
|
+
"SELECT 1 FROM pgbus_failed_events WHERE queue_name = $1 AND msg_id = $2 LIMIT 1",
|
|
41
|
+
"FailedEvent Exists",
|
|
42
|
+
[queue_name, msg_id.to_i]
|
|
43
|
+
)
|
|
44
|
+
!result.nil?
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
Pgbus.logger.debug { "[Pgbus] FailedEvent exists? check failed: #{e.class}: #{e.message}" }
|
|
47
|
+
false
|
|
48
|
+
end
|
|
49
|
+
|
|
38
50
|
def clear!(queue_name:, msg_id:)
|
|
39
51
|
connection.exec_delete(
|
|
40
52
|
"DELETE FROM pgbus_failed_events WHERE queue_name = $1 AND msg_id = $2",
|
data/lib/pgbus/process/worker.rb
CHANGED
|
@@ -126,6 +126,7 @@ module Pgbus
|
|
|
126
126
|
|
|
127
127
|
@rate_counter.increment(:dequeued, tagged_messages.size)
|
|
128
128
|
tagged_messages.each do |queue_name, message, source_queue|
|
|
129
|
+
detect_zombie(queue_name, message)
|
|
129
130
|
@in_flight.increment
|
|
130
131
|
@pool.post { process_message(message, queue_name, source_queue: source_queue) }
|
|
131
132
|
end
|
|
@@ -285,6 +286,21 @@ module Pgbus
|
|
|
285
286
|
Pgbus.logger.error { "[Pgbus] Queue table missing: #{error.message}" }
|
|
286
287
|
end
|
|
287
288
|
|
|
289
|
+
def detect_zombie(queue_name, message)
|
|
290
|
+
return unless config.zombie_detection
|
|
291
|
+
return unless message.read_ct.to_i > 1
|
|
292
|
+
|
|
293
|
+
return if FailedEventRecorder.exists?(queue_name: queue_name, msg_id: message.msg_id.to_i)
|
|
294
|
+
|
|
295
|
+
Pgbus.logger.warn do
|
|
296
|
+
"[Pgbus] Zombie message redelivered: queue=#{queue_name} msg_id=#{message.msg_id} " \
|
|
297
|
+
"read_ct=#{message.read_ct} — previous read did not record a failure. " \
|
|
298
|
+
"The worker may have crashed mid-execute or the executor silently dropped the job."
|
|
299
|
+
end
|
|
300
|
+
rescue StandardError => e
|
|
301
|
+
Pgbus.logger.debug { "[Pgbus] Zombie detection failed: #{e.class}: #{e.message}" }
|
|
302
|
+
end
|
|
303
|
+
|
|
288
304
|
def check_recycle
|
|
289
305
|
return unless @lifecycle.running? && recycle_needed?
|
|
290
306
|
|
data/lib/pgbus/version.rb
CHANGED