pgbus 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c45dd364f341b6819b7901f583e058dbe761b375210e4162f19faf75917e3043
4
- data.tar.gz: ed5ee189a3ff3d7fe0610deada3b2daba3726a2647d7762c58735e0771c9e0eb
3
+ metadata.gz: b1a647a2e485cace147ca201861ff4613e9c14f72f8fd8be9060f64d6eccfff5
4
+ data.tar.gz: bf3ca37850d549c6e9ed86bedbad1d88f9a50462b86a2d8a09113de7c1227d0f
5
5
  SHA512:
6
- metadata.gz: e28c032dc7b4f2cba37bd709c4a45030bcedd90274b86a1499d55dd4f4e255769e580983023602a42267b8728068a7eb11f7fdcbc8d12bd3efd83f35a50f241b
7
- data.tar.gz: dc6d8d5d2e4feebbf7d940c173f53700549b5364c0e15df3f6afac1d72ecc6b1222127d85649016e9c6590e3c184eb9365da95c424a21a98be683862f4a24e67
6
+ metadata.gz: b32dac214071005fa5974478f2cb8bd4d8a270efc3e6bc94141b74c28cebfea0dfa84e41ad9bf1ec2a10c8ab0d09ae08d0df989f0894b8d5cbc6a461698ecf5b
7
+ data.tar.gz: ea55e1a29e53e491063c34aaea3a643498d1567968e64833d9a38deef2bcd7a48dfb35e377601d347fa8e05e55a5647a77dbe958d9584fcdc8669a362ff253a8
@@ -19,7 +19,11 @@ module Pgbus
19
19
 
20
20
  def execute(message, queue_name, source_queue: nil)
21
21
  execution_start = monotonic_now
22
+ tag = "msg_id=#{message.msg_id} queue=#{queue_name} read_ct=#{message.read_ct}"
23
+ Pgbus.logger.debug { "[Pgbus::Executor] start #{tag}" }
24
+
22
25
  payload = JSON.parse(message.message)
26
+ job_class = payload["job_class"]
23
27
  read_count = message.read_ct.to_i
24
28
 
25
29
  if read_count > config.max_retries
@@ -29,10 +33,9 @@ module Pgbus
29
33
  signal_batch_discarded(payload)
30
34
  Uniqueness.release_lock(Uniqueness.extract_key(payload))
31
35
  record_stat(payload, queue_name, "dead_lettered", execution_start, message: message)
36
+ Pgbus.logger.debug { "[Pgbus::Executor] dead_lettered #{tag} job_class=#{job_class}" }
32
37
  return :dead_lettered
33
38
  end
34
-
35
- job_class = payload["job_class"]
36
39
  uniqueness_key = Uniqueness.extract_key(payload)
37
40
  uniqueness_strategy = Uniqueness.extract_strategy(payload)
38
41
 
@@ -53,28 +56,24 @@ module Pgbus
53
56
  end
54
57
  end
55
58
 
59
+ Pgbus.logger.debug { "[Pgbus::Executor] deserialized #{tag} job_class=#{job_class}" }
56
60
  job_succeeded = false
57
61
 
58
- # Debug-level phase markers. Silent at INFO+, but invaluable when a
59
- # fiber interrupt or connection issue loses control flow between phases
60
- # (issue #126). Each line identifies msg_id + phase so the gap is
61
- # visible in logs: "deserialized" without "archived" means the job
62
- # ran but its message was never archived.
63
62
  msg_id = message.msg_id.to_i
64
63
  Instrumentation.instrument("pgbus.executor.execute", queue: queue_name, job_class: job_class) do
65
- Pgbus.logger.debug { "[Pgbus] Executor phase=deserialize msg_id=#{msg_id} job=#{job_class}" }
66
64
  job = ::ActiveJob::Base.deserialize(payload)
67
- Pgbus.logger.debug { "[Pgbus] Executor phase=perform msg_id=#{msg_id} job=#{job_class}" }
65
+ Pgbus.logger.debug { "[Pgbus::Executor] running #{tag} job_class=#{job_class}" }
68
66
  execute_job(job)
69
- Pgbus.logger.debug { "[Pgbus] Executor phase=archive msg_id=#{msg_id} job=#{job_class}" }
67
+ Pgbus.logger.debug { "[Pgbus::Executor] perform_returned #{tag} job_class=#{job_class}" }
70
68
  archive_from(queue_name, msg_id, source_queue: source_queue)
69
+ Pgbus.logger.debug { "[Pgbus::Executor] archived #{tag} job_class=#{job_class}" }
71
70
  FailedEventRecorder.clear!(queue_name: queue_name, msg_id: msg_id)
72
71
  job_succeeded = true
73
- Pgbus.logger.debug { "[Pgbus] Executor phase=succeeded msg_id=#{msg_id} job=#{job_class}" }
74
72
  end
75
73
 
76
74
  instrument("pgbus.job_completed", queue: queue_name, job_class: job_class)
77
75
  record_stat(payload, queue_name, "success", execution_start, message: message)
76
+ Pgbus.logger.debug { "[Pgbus::Executor] done #{tag} job_class=#{job_class}" }
78
77
  :success
79
78
  rescue *FATAL_EXCEPTIONS
80
79
  # Process-fatal: propagate so the supervisor/OS can react.
@@ -88,6 +87,7 @@ module Pgbus
88
87
  handle_failure(message, queue_name, e, payload: payload)
89
88
  instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
90
89
  record_stat(payload, queue_name, "failed", execution_start, message: message)
90
+ Pgbus.logger.debug { "[Pgbus::Executor] failed #{tag} job_class=#{payload&.dig("job_class")} error=#{e.class}" }
91
91
  # Don't signal concurrency on transient failure — the job will be retried.
92
92
  # Semaphore is released only on success or dead-lettering.
93
93
  :failed
@@ -85,6 +85,10 @@ module Pgbus
85
85
  # Requires a matching entry in config/database.yml under the "pgbus" key.
86
86
  attr_accessor :connects_to
87
87
 
88
+ # Zombie message detection — logs a warning when a message is redelivered
89
+ # (read_ct > 1) without any prior failure recorded in pgbus_failed_events.
90
+ attr_accessor :zombie_detection
91
+
88
92
  # Job stats
89
93
  attr_accessor :stats_enabled
90
94
  attr_reader :stats_retention # rubocop:disable Style/AccessorGrouping
@@ -160,6 +164,8 @@ module Pgbus
160
164
  @skip_recurring = false
161
165
  @recurring_execution_retention = 7 * 24 * 3600 # 7 days
162
166
 
167
+ @zombie_detection = true
168
+
163
169
  @stats_enabled = true
164
170
  @stats_retention = 30 * 24 * 3600 # 30 days
165
171
 
@@ -35,6 +35,18 @@ module Pgbus
35
35
  ErrorReporter.report(e, { action: "record_failed_event", queue: queue_name, msg_id: msg_id })
36
36
  end
37
37
 
38
+ def exists?(queue_name:, msg_id:)
39
+ result = connection.select_value(
40
+ "SELECT 1 FROM pgbus_failed_events WHERE queue_name = $1 AND msg_id = $2 LIMIT 1",
41
+ "FailedEvent Exists",
42
+ [queue_name, msg_id.to_i]
43
+ )
44
+ !result.nil?
45
+ rescue StandardError => e
46
+ Pgbus.logger.debug { "[Pgbus] FailedEvent exists? check failed: #{e.class}: #{e.message}" }
47
+ false
48
+ end
49
+
38
50
  def clear!(queue_name:, msg_id:)
39
51
  connection.exec_delete(
40
52
  "DELETE FROM pgbus_failed_events WHERE queue_name = $1 AND msg_id = $2",
@@ -126,6 +126,7 @@ module Pgbus
126
126
 
127
127
  @rate_counter.increment(:dequeued, tagged_messages.size)
128
128
  tagged_messages.each do |queue_name, message, source_queue|
129
+ detect_zombie(queue_name, message)
129
130
  @in_flight.increment
130
131
  @pool.post { process_message(message, queue_name, source_queue: source_queue) }
131
132
  end
@@ -285,6 +286,21 @@ module Pgbus
285
286
  Pgbus.logger.error { "[Pgbus] Queue table missing: #{error.message}" }
286
287
  end
287
288
 
289
+ def detect_zombie(queue_name, message)
290
+ return unless config.zombie_detection
291
+ return unless message.read_ct.to_i > 1
292
+
293
+ return if FailedEventRecorder.exists?(queue_name: queue_name, msg_id: message.msg_id.to_i)
294
+
295
+ Pgbus.logger.warn do
296
+ "[Pgbus] Zombie message redelivered: queue=#{queue_name} msg_id=#{message.msg_id} " \
297
+ "read_ct=#{message.read_ct} — previous read did not record a failure. " \
298
+ "The worker may have crashed mid-execute or the executor silently dropped the job."
299
+ end
300
+ rescue StandardError => e
301
+ Pgbus.logger.debug { "[Pgbus] Zombie detection failed: #{e.class}: #{e.message}" }
302
+ end
303
+
288
304
  def check_recycle
289
305
  return unless @lifecycle.running? && recycle_needed?
290
306
 
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.7.1"
4
+ VERSION = "0.7.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikael Henriksson