pgbus 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a25921e6a7a0ac72023501978b6dec0b38645a18b45cece73bf15ddbc655dae8
4
- data.tar.gz: 810980a58f382aad948b660a9102cdb6ea3a3af57c3768f1205c3582b0705003
3
+ metadata.gz: 031cb29b5105d5731e862ab92fa1c8872279b3ff0e5dbbbdf595d4733874780a
4
+ data.tar.gz: ab7912d72556e382056a3f8f9cc0edd9f0498706b1958fce1f6546e4f8619eb2
5
5
  SHA512:
6
- metadata.gz: 597fbc986e88b2c37339156de5f21a576d50f45bab5f72343dc3eaed02b730d35dce777285f65b63f3264f4a91d76fa0b7a001635a6c103b08ef95454c6dc71f
7
- data.tar.gz: 2a305826eb3b9b0618e64c99e0724c2daf020047933e83d231a35866d8e8b1e334a45c45c03a7fbef577b8dd2243686dc4e351595a671a8d8cd1a63f940602ed
6
+ metadata.gz: f2892d6f44649afe256393e7abfc67369fa8d87ecb159b6e9d5a9e5816ab14a2a328bfd48c6c400866f1a81cb48b91446162d500d71faf06a587fc05e8d3bab6
7
+ data.tar.gz: 3577ea2487cd452e455a43be6dc310511681f003a942960116e41cd79722291a8cdb2a00d103e13950c8a6f996e3054029093e90d9587767e93ef11943969209
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/active_record"
5
+
6
+ module Pgbus
7
+ module Generators
8
+ class AddFailedEventsIndexGenerator < Rails::Generators::Base
9
+ include ActiveRecord::Generators::Migration
10
+
11
+ source_root File.expand_path("templates", __dir__)
12
+
13
+ desc "Add unique index on pgbus_failed_events (queue_name, msg_id) for failure tracking upserts"
14
+
15
+ class_option :database,
16
+ type: :string,
17
+ default: nil,
18
+ desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
19
+
20
+ def create_migration_file
21
+ if separate_database?
22
+ migration_template "add_failed_events_unique_index.rb.erb",
23
+ "db/pgbus_migrate/add_pgbus_failed_events_unique_index.rb"
24
+ else
25
+ migration_template "add_failed_events_unique_index.rb.erb",
26
+ "db/migrate/add_pgbus_failed_events_unique_index.rb"
27
+ end
28
+ end
29
+
30
+ def display_post_install
31
+ say ""
32
+ say "Pgbus failed events unique index added!", :green
33
+ say ""
34
+ say "Next steps:"
35
+ say " 1. Run: rails db:migrate#{":#{options[:database]}" if separate_database?}"
36
+ say " 2. Failed jobs will now be tracked in the dashboard"
37
+ say ""
38
+ end
39
+
40
+ private
41
+
42
+ def migration_version
43
+ "[#{ActiveRecord::Migration.current_version}]"
44
+ end
45
+
46
+ def separate_database?
47
+ options[:database].present?
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,7 @@
1
+ class AddPgbusFailedEventsUniqueIndex < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ add_index :pgbus_failed_events, [:queue_name, :msg_id],
4
+ unique: true, name: "idx_pgbus_failed_events_queue_msg",
5
+ if_not_exists: true
6
+ end
7
+ end
@@ -65,6 +65,8 @@ class CreatePgbusTables < ActiveRecord::Migration<%= migration_version %>
65
65
 
66
66
  add_index :pgbus_failed_events, :queue_name, name: "idx_pgbus_failed_events_queue"
67
67
  add_index :pgbus_failed_events, :failed_at, name: "idx_pgbus_failed_events_time"
68
+ add_index :pgbus_failed_events, [:queue_name, :msg_id],
69
+ unique: true, name: "idx_pgbus_failed_events_queue_msg"
68
70
 
69
71
  # Concurrency semaphores (counting locks for job concurrency limits)
70
72
  create_table :pgbus_semaphores do |t|
@@ -20,6 +20,7 @@ module Pgbus
20
20
 
21
21
  if read_count > config.max_retries
22
22
  handle_dead_letter(message, queue_name, payload, source_queue: source_queue)
23
+ FailedEventRecorder.clear!(queue_name: queue_name, msg_id: message.msg_id.to_i)
23
24
  signal_concurrency(payload)
24
25
  signal_batch_discarded(payload)
25
26
  Uniqueness.release_lock(Uniqueness.extract_key(payload))
@@ -54,6 +55,7 @@ module Pgbus
54
55
  job = ::ActiveJob::Base.deserialize(payload)
55
56
  execute_job(job)
56
57
  archive_from(queue_name, message.msg_id.to_i, source_queue: source_queue)
58
+ FailedEventRecorder.clear!(queue_name: queue_name, msg_id: message.msg_id.to_i)
57
59
  job_succeeded = true
58
60
  end
59
61
 
@@ -61,7 +63,7 @@ module Pgbus
61
63
  record_stat(payload, queue_name, "success", execution_start, message: message)
62
64
  :success
63
65
  rescue StandardError => e
64
- handle_failure(message, queue_name, e)
66
+ handle_failure(message, queue_name, e, payload: payload)
65
67
  instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
66
68
  record_stat(payload, queue_name, "failed", execution_start, message: message)
67
69
  # Don't signal concurrency on transient failure — the job will be retried.
@@ -143,13 +145,22 @@ module Pgbus
143
145
  [((Time.now.utc - enqueued_at) * 1000).round, 0].max
144
146
  end
145
147
 
146
- def handle_failure(_message, _queue_name, error)
148
+ def handle_failure(message, queue_name, error, payload: nil)
147
149
  Pgbus.logger.error { "[Pgbus] Job failed: #{error.class}: #{error.message}" }
148
150
  Pgbus.logger.debug { error.backtrace&.join("\n") }
149
151
 
152
+ # Record failure for dashboard visibility.
150
153
  # Message visibility timeout will expire and it becomes available again.
151
154
  # read_ct tracks delivery attempts — when it exceeds max_retries,
152
155
  # the next read will route to DLQ.
156
+ FailedEventRecorder.record!(
157
+ queue_name: queue_name,
158
+ msg_id: message.msg_id.to_i,
159
+ payload: payload || message.message,
160
+ headers: message.respond_to?(:headers) ? message.headers : nil,
161
+ error: error,
162
+ retry_count: [message.read_ct.to_i - 1, 0].max
163
+ )
153
164
  end
154
165
 
155
166
  def instrument(event_name, payload = {})
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgbus
4
+ # Records job failures to pgbus_failed_events for dashboard visibility.
5
+ # Uses upsert (INSERT ON CONFLICT UPDATE) keyed on (queue_name, msg_id)
6
+ # so each message has at most one failed_event row tracking its latest error.
7
+ class FailedEventRecorder
8
+ class << self
9
+ def record!(queue_name:, msg_id:, payload:, headers:, error:, retry_count:)
10
+ connection.exec_query(
11
+ <<~SQL.squish,
12
+ INSERT INTO pgbus_failed_events
13
+ (queue_name, msg_id, payload, headers, error_class, error_message, backtrace, retry_count, failed_at)
14
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, CURRENT_TIMESTAMP)
15
+ ON CONFLICT (queue_name, msg_id) DO UPDATE SET
16
+ error_class = EXCLUDED.error_class,
17
+ error_message = EXCLUDED.error_message,
18
+ backtrace = EXCLUDED.backtrace,
19
+ retry_count = EXCLUDED.retry_count,
20
+ failed_at = EXCLUDED.failed_at
21
+ SQL
22
+ "FailedEvent Record",
23
+ [
24
+ queue_name,
25
+ msg_id.to_i,
26
+ payload.is_a?(String) ? payload : JSON.generate(payload),
27
+ headers.is_a?(String) ? headers : headers&.then { |h| JSON.generate(h) },
28
+ error.class.name,
29
+ error.message.to_s.truncate(10_000),
30
+ error.backtrace&.first(30)&.join("\n"),
31
+ retry_count
32
+ ]
33
+ )
34
+ rescue StandardError => e
35
+ Pgbus.logger.debug { "[Pgbus] Failed to record failed event: #{e.message}" }
36
+ end
37
+
38
+ def clear!(queue_name:, msg_id:)
39
+ connection.exec_delete(
40
+ "DELETE FROM pgbus_failed_events WHERE queue_name = $1 AND msg_id = $2",
41
+ "FailedEvent Clear",
42
+ [queue_name, msg_id.to_i]
43
+ )
44
+ rescue StandardError => e
45
+ Pgbus.logger.debug { "[Pgbus] Failed to clear failed event: #{e.message}" }
46
+ end
47
+
48
+ private
49
+
50
+ def connection
51
+ if defined?(BusRecord) && BusRecord.connected?
52
+ BusRecord.connection
53
+ else
54
+ ActiveRecord::Base.connection
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -11,7 +11,10 @@ module Pgbus
11
11
  end
12
12
 
13
13
  def due_tasks(time = Time.current)
14
- tasks.select { |task| task_due?(task, time) }
14
+ tasks.filter_map do |task|
15
+ run_at = canonical_run_at(task, time)
16
+ [task, run_at] if run_at
17
+ end
15
18
  end
16
19
 
17
20
  def enqueue_task(task, run_at:)
@@ -34,6 +37,12 @@ module Pgbus
34
37
  end
35
38
  end
36
39
  rescue AlreadyRecorded
40
+ # AlreadyRecorded means this (task_key, run_at) was already enqueued.
41
+ # If we acquired a NEW lock (prior lock was already released because the
42
+ # job completed), release it — no message will use it. If we didn't
43
+ # acquire a lock (nil or :already_locked), there's nothing to release.
44
+ # In either case, we are NOT opening a race window because the job for
45
+ # this run_at already ran or is running.
37
46
  release_uniqueness_lock(acquired_key)
38
47
  Pgbus.logger.debug { "[Pgbus] Recurring task #{task.key} already enqueued for #{run_at.iso8601}" }
39
48
  rescue StandardError
@@ -75,23 +84,31 @@ module Pgbus
75
84
  end
76
85
  end
77
86
 
78
- def task_due?(task, time)
79
- # A task is due when its most recent cron occurrence (previous_time)
80
- # falls within the current tick window. We also check match? to
81
- # handle the exact-boundary case where time == cron time.
87
+ # Returns the canonical run_at time if the task is due, or nil if not.
88
+ # This ensures a consistent run_at regardless of which tick detects
89
+ # the cron occurrence fixing a bug where match?(t) at the exact
90
+ # boundary returns previous_time=T-1, while a tick 1s later gets
91
+ # previous_time=T, producing different run_at values for the same
92
+ # cron occurrence and bypassing RecurringExecution deduplication.
93
+ def canonical_run_at(task, time)
82
94
  cron = task.parsed_schedule
83
- return false unless cron
84
-
85
- # Check if `time` itself matches the cron (exact boundary hit)
86
- return true if cron.match?(time)
95
+ return nil unless cron
96
+
97
+ # Exact boundary hit: cron.match?(time) is true.
98
+ # previous_time returns the PRIOR occurrence here, but the cron
99
+ # time that fired is `time` itself (truncated to the minute).
100
+ if cron.match?(time)
101
+ # Fugit next_time from 1 second before gives us the current cron time
102
+ return cron.next_time(time - 1).to_t
103
+ end
87
104
 
88
- # Check if the previous occurrence was recent enough that we should
89
- # still fire it (handles the case where we tick slightly after the
90
- # cron time). The window is the scheduler interval.
105
+ # Within the scheduler interval window after the cron time.
91
106
  prev = task.previous_time(time)
92
- return false unless prev
107
+ return nil unless prev
93
108
 
94
- (time - prev) <= @config.recurring_schedule_interval
109
+ return prev if (time - prev) <= @config.recurring_schedule_interval
110
+
111
+ nil
95
112
  end
96
113
 
97
114
  def resolve_queue(task)
@@ -40,10 +40,7 @@ module Pgbus
40
40
  end
41
41
 
42
42
  def tick(now)
43
- schedule.due_tasks(now).each do |task|
44
- run_at = task.previous_time(now)
45
- next unless run_at
46
-
43
+ schedule.due_tasks(now).each do |task, run_at|
47
44
  schedule.enqueue_task(task, run_at: run_at)
48
45
  @last_runs[task.key] = now
49
46
  rescue StandardError => e
data/lib/pgbus/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pgbus
4
- VERSION = "0.3.5"
4
+ VERSION = "0.3.7"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikael Henriksson
@@ -198,6 +198,7 @@ files:
198
198
  - config/routes.rb
199
199
  - exe/pgbus
200
200
  - lib/active_job/queue_adapters/pgbus_adapter.rb
201
+ - lib/generators/pgbus/add_failed_events_index_generator.rb
201
202
  - lib/generators/pgbus/add_job_locks_generator.rb
202
203
  - lib/generators/pgbus/add_job_stats_generator.rb
203
204
  - lib/generators/pgbus/add_job_stats_latency_generator.rb
@@ -206,6 +207,7 @@ files:
206
207
  - lib/generators/pgbus/add_recurring_generator.rb
207
208
  - lib/generators/pgbus/install_generator.rb
208
209
  - lib/generators/pgbus/migrate_job_locks_generator.rb
210
+ - lib/generators/pgbus/templates/add_failed_events_unique_index.rb.erb
209
211
  - lib/generators/pgbus/templates/add_job_locks.rb.erb
210
212
  - lib/generators/pgbus/templates/add_job_stats.rb.erb
211
213
  - lib/generators/pgbus/templates/add_job_stats_latency.rb.erb
@@ -240,6 +242,7 @@ files:
240
242
  - lib/pgbus/event_bus/publisher.rb
241
243
  - lib/pgbus/event_bus/registry.rb
242
244
  - lib/pgbus/event_bus/subscriber.rb
245
+ - lib/pgbus/failed_event_recorder.rb
243
246
  - lib/pgbus/instrumentation.rb
244
247
  - lib/pgbus/outbox.rb
245
248
  - lib/pgbus/outbox/poller.rb