pgbus 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +326 -11
- data/app/controllers/pgbus/api/insights_controller.rb +16 -0
- data/app/controllers/pgbus/insights_controller.rb +10 -0
- data/app/controllers/pgbus/locks_controller.rb +9 -0
- data/app/controllers/pgbus/outbox_controller.rb +10 -0
- data/app/controllers/pgbus/queues_controller.rb +10 -0
- data/app/helpers/pgbus/application_helper.rb +34 -0
- data/app/models/pgbus/job_lock.rb +82 -0
- data/app/models/pgbus/job_stat.rb +94 -0
- data/app/models/pgbus/outbox_entry.rb +10 -0
- data/app/models/pgbus/queue_state.rb +33 -0
- data/app/views/layouts/pgbus/application.html.erb +33 -8
- data/app/views/pgbus/dashboard/_stats_cards.html.erb +24 -18
- data/app/views/pgbus/insights/show.html.erb +161 -0
- data/app/views/pgbus/locks/index.html.erb +53 -0
- data/app/views/pgbus/outbox/index.html.erb +55 -0
- data/app/views/pgbus/queues/_queues_list.html.erb +15 -1
- data/config/routes.rb +7 -0
- data/lib/generators/pgbus/add_job_locks_generator.rb +52 -0
- data/lib/generators/pgbus/add_job_stats_generator.rb +52 -0
- data/lib/generators/pgbus/add_outbox_generator.rb +52 -0
- data/lib/generators/pgbus/add_queue_states_generator.rb +51 -0
- data/lib/generators/pgbus/add_recurring_generator.rb +1 -1
- data/lib/generators/pgbus/install_generator.rb +1 -1
- data/lib/generators/pgbus/templates/add_job_locks.rb.erb +21 -0
- data/lib/generators/pgbus/templates/add_job_stats.rb.erb +18 -0
- data/lib/generators/pgbus/templates/add_outbox.rb.erb +25 -0
- data/lib/generators/pgbus/templates/add_queue_states.rb.erb +16 -0
- data/lib/generators/pgbus/upgrade_pgmq_generator.rb +1 -1
- data/lib/pgbus/active_job/adapter.rb +64 -9
- data/lib/pgbus/active_job/executor.rb +67 -5
- data/lib/pgbus/circuit_breaker.rb +112 -0
- data/lib/pgbus/client.rb +127 -50
- data/lib/pgbus/configuration.rb +55 -1
- data/lib/pgbus/dedup_cache.rb +76 -0
- data/lib/pgbus/engine.rb +1 -0
- data/lib/pgbus/event_bus/handler.rb +13 -2
- data/lib/pgbus/outbox/poller.rb +117 -0
- data/lib/pgbus/outbox.rb +30 -0
- data/lib/pgbus/process/consumer_priority.rb +64 -0
- data/lib/pgbus/process/dispatcher.rb +75 -0
- data/lib/pgbus/process/heartbeat.rb +3 -1
- data/lib/pgbus/process/lifecycle.rb +111 -0
- data/lib/pgbus/process/queue_lock.rb +87 -0
- data/lib/pgbus/process/supervisor.rb +46 -6
- data/lib/pgbus/process/wake_signal.rb +53 -0
- data/lib/pgbus/process/worker.rb +117 -21
- data/lib/pgbus/queue_factory.rb +62 -0
- data/lib/pgbus/rate_counter.rb +81 -0
- data/lib/pgbus/recurring/schedule.rb +1 -1
- data/lib/pgbus/uniqueness.rb +169 -0
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +136 -2
- data/lib/pgbus.rb +9 -0
- metadata +31 -1
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Outbox
|
|
5
|
+
class Poller
|
|
6
|
+
include Process::SignalHandler
|
|
7
|
+
|
|
8
|
+
attr_reader :config
|
|
9
|
+
|
|
10
|
+
def initialize(config: Pgbus.configuration)
|
|
11
|
+
@config = config
|
|
12
|
+
@shutting_down = false
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def run
|
|
16
|
+
setup_signals
|
|
17
|
+
start_heartbeat
|
|
18
|
+
Pgbus.logger.info { "[Pgbus] Outbox poller started: interval=#{config.outbox_poll_interval}s" }
|
|
19
|
+
|
|
20
|
+
loop do
|
|
21
|
+
break if @shutting_down
|
|
22
|
+
|
|
23
|
+
process_signals
|
|
24
|
+
break if @shutting_down
|
|
25
|
+
|
|
26
|
+
poll_and_publish
|
|
27
|
+
break if @shutting_down
|
|
28
|
+
|
|
29
|
+
interruptible_sleep(config.outbox_poll_interval)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
shutdown
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def graceful_shutdown
|
|
36
|
+
@shutting_down = true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def immediate_shutdown
|
|
40
|
+
@shutting_down = true
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def poll_and_publish
|
|
44
|
+
published = 0
|
|
45
|
+
|
|
46
|
+
loop do
|
|
47
|
+
succeeded = 0
|
|
48
|
+
|
|
49
|
+
OutboxEntry.transaction do
|
|
50
|
+
entries = OutboxEntry.unpublished
|
|
51
|
+
.order(:id)
|
|
52
|
+
.limit(config.outbox_batch_size)
|
|
53
|
+
.lock("FOR UPDATE SKIP LOCKED")
|
|
54
|
+
.to_a
|
|
55
|
+
break if entries.empty?
|
|
56
|
+
|
|
57
|
+
entries.each do |entry|
|
|
58
|
+
succeeded += 1 if publish_entry(entry)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
published += succeeded
|
|
62
|
+
break if succeeded.zero? || entries.size < config.outbox_batch_size
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
break if succeeded.zero?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
Pgbus.logger.debug { "[Pgbus] Outbox published #{published} entries" } if published.positive?
|
|
69
|
+
published
|
|
70
|
+
rescue StandardError => e
|
|
71
|
+
Pgbus.logger.error { "[Pgbus] Outbox poll error: #{e.message}" }
|
|
72
|
+
0
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def publish_entry(entry)
|
|
78
|
+
if entry.routing_key.present?
|
|
79
|
+
Pgbus.client.publish_to_topic(
|
|
80
|
+
entry.routing_key,
|
|
81
|
+
entry.payload,
|
|
82
|
+
headers: entry.headers,
|
|
83
|
+
delay: entry.delay || 0
|
|
84
|
+
)
|
|
85
|
+
else
|
|
86
|
+
Pgbus.client.send_message(
|
|
87
|
+
entry.queue_name,
|
|
88
|
+
entry.payload,
|
|
89
|
+
headers: entry.headers,
|
|
90
|
+
delay: entry.delay || 0,
|
|
91
|
+
priority: entry.priority
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
entry.update!(published_at: Time.current)
|
|
96
|
+
true
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
Pgbus.logger.error { "[Pgbus] Failed to publish outbox entry #{entry.id}: #{e.message}" }
|
|
99
|
+
false
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def start_heartbeat
|
|
103
|
+
@heartbeat = Process::Heartbeat.new(
|
|
104
|
+
kind: "outbox_poller",
|
|
105
|
+
metadata: { pid: ::Process.pid }
|
|
106
|
+
)
|
|
107
|
+
@heartbeat.start
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def shutdown
|
|
111
|
+
@heartbeat&.stop
|
|
112
|
+
restore_signals
|
|
113
|
+
Pgbus.logger.info { "[Pgbus] Outbox poller stopped" }
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
data/lib/pgbus/outbox.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Outbox
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
def publish(queue_name, payload, headers: nil, priority: nil, delay: 0)
|
|
8
|
+
OutboxEntry.create!(
|
|
9
|
+
queue_name: queue_name,
|
|
10
|
+
payload: payload,
|
|
11
|
+
headers: headers,
|
|
12
|
+
priority: priority || Pgbus.configuration.default_priority,
|
|
13
|
+
delay: delay
|
|
14
|
+
)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def publish_event(routing_key, payload, headers: nil)
|
|
18
|
+
event_data = EventBus::Publisher.build_event_data(payload)
|
|
19
|
+
OutboxEntry.create!(
|
|
20
|
+
routing_key: routing_key,
|
|
21
|
+
payload: event_data,
|
|
22
|
+
headers: headers
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def flush!
|
|
27
|
+
Poller.new.poll_and_publish
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Process
|
|
5
|
+
# Implements consumer priority by checking whether higher-priority
|
|
6
|
+
# workers are active for the same queues. When a higher-priority
|
|
7
|
+
# worker is healthy and not at its prefetch limit, lower-priority
|
|
8
|
+
# workers yield by using a longer polling interval.
|
|
9
|
+
#
|
|
10
|
+
# Inspired by LavinMQ's consumer priority where higher-priority
|
|
11
|
+
# consumers are served first and lower-priority consumers wait
|
|
12
|
+
# until all higher-priority consumers are at their prefetch limit.
|
|
13
|
+
module ConsumerPriority
|
|
14
|
+
# Check if this worker should yield to a higher-priority worker.
|
|
15
|
+
# Returns true if a higher-priority healthy worker exists for
|
|
16
|
+
# any of the given queues.
|
|
17
|
+
def self.should_yield?(queues:, my_priority:, my_pid:)
|
|
18
|
+
return false if my_priority >= max_active_priority(queues, my_pid)
|
|
19
|
+
|
|
20
|
+
true
|
|
21
|
+
rescue StandardError => e
|
|
22
|
+
Pgbus.logger.debug { "[Pgbus] Consumer priority check failed: #{e.message}" }
|
|
23
|
+
false
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Returns the highest consumer_priority among healthy workers
|
|
27
|
+
# that share at least one queue with the given queue list,
|
|
28
|
+
# excluding the current worker (by PID).
|
|
29
|
+
def self.max_active_priority(queues, my_pid)
|
|
30
|
+
conn = Pgbus.configuration.connects_to ? Pgbus::ApplicationRecord.connection : ActiveRecord::Base.connection
|
|
31
|
+
rows = conn.select_all(
|
|
32
|
+
"SELECT metadata FROM pgbus_processes WHERE kind = 'worker' AND pid != $1 AND last_heartbeat_at > $2",
|
|
33
|
+
"Pgbus ConsumerPriority",
|
|
34
|
+
[my_pid, Time.now.utc - Heartbeat::ALIVE_THRESHOLD]
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
max_priority = 0
|
|
38
|
+
rows.each do |row|
|
|
39
|
+
metadata = row["metadata"]
|
|
40
|
+
metadata = JSON.parse(metadata) if metadata.is_a?(String)
|
|
41
|
+
next unless metadata
|
|
42
|
+
|
|
43
|
+
other_queues = metadata["queues"] || []
|
|
44
|
+
next unless queues.intersect?(other_queues)
|
|
45
|
+
|
|
46
|
+
other_priority = metadata["consumer_priority"] || 0
|
|
47
|
+
max_priority = other_priority if other_priority > max_priority
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
max_priority
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Calculate the effective polling interval for this worker.
|
|
54
|
+
# Higher-priority workers use the base interval.
|
|
55
|
+
# Lower-priority workers multiply by a backoff factor.
|
|
56
|
+
def self.effective_polling_interval(base_interval:, my_priority:, max_priority:)
|
|
57
|
+
return base_interval if my_priority >= max_priority
|
|
58
|
+
|
|
59
|
+
# Lower-priority workers back off: 3x the base interval
|
|
60
|
+
base_interval * 3
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -11,6 +11,10 @@ module Pgbus
|
|
|
11
11
|
CONCURRENCY_INTERVAL = 300 # Run concurrency cleanup every 5 minutes
|
|
12
12
|
BATCH_CLEANUP_INTERVAL = 3600 # Run batch cleanup every hour
|
|
13
13
|
RECURRING_CLEANUP_INTERVAL = 3600 # Run recurring execution cleanup every hour
|
|
14
|
+
ARCHIVE_COMPACTION_INTERVAL = 3600 # Run archive compaction every hour
|
|
15
|
+
OUTBOX_CLEANUP_INTERVAL = 3600 # Run outbox cleanup every hour
|
|
16
|
+
JOB_LOCK_CLEANUP_INTERVAL = 300 # Run job lock cleanup every 5 minutes
|
|
17
|
+
STATS_CLEANUP_INTERVAL = 3600 # Run stats cleanup every hour
|
|
14
18
|
|
|
15
19
|
attr_reader :config
|
|
16
20
|
|
|
@@ -22,6 +26,10 @@ module Pgbus
|
|
|
22
26
|
@last_concurrency_at = Time.now
|
|
23
27
|
@last_batch_cleanup_at = Time.now
|
|
24
28
|
@last_recurring_cleanup_at = Time.now
|
|
29
|
+
@last_archive_compaction_at = Time.now
|
|
30
|
+
@last_outbox_cleanup_at = Time.now
|
|
31
|
+
@last_job_lock_cleanup_at = Time.now
|
|
32
|
+
@last_stats_cleanup_at = Time.now
|
|
25
33
|
end
|
|
26
34
|
|
|
27
35
|
def run
|
|
@@ -64,6 +72,10 @@ module Pgbus
|
|
|
64
72
|
run_if_due(now, :@last_concurrency_at, CONCURRENCY_INTERVAL) { cleanup_concurrency }
|
|
65
73
|
run_if_due(now, :@last_batch_cleanup_at, BATCH_CLEANUP_INTERVAL) { cleanup_batches }
|
|
66
74
|
run_if_due(now, :@last_recurring_cleanup_at, RECURRING_CLEANUP_INTERVAL) { cleanup_recurring_executions }
|
|
75
|
+
run_if_due(now, :@last_archive_compaction_at, archive_compaction_interval) { compact_archives }
|
|
76
|
+
run_if_due(now, :@last_outbox_cleanup_at, OUTBOX_CLEANUP_INTERVAL) { cleanup_outbox }
|
|
77
|
+
run_if_due(now, :@last_job_lock_cleanup_at, JOB_LOCK_CLEANUP_INTERVAL) { cleanup_job_locks }
|
|
78
|
+
run_if_due(now, :@last_stats_cleanup_at, STATS_CLEANUP_INTERVAL) { cleanup_stats }
|
|
67
79
|
end
|
|
68
80
|
|
|
69
81
|
# Only update the timestamp when the block succeeds.
|
|
@@ -121,6 +133,69 @@ module Pgbus
|
|
|
121
133
|
Pgbus.logger.warn { "[Pgbus] Batch cleanup failed: #{e.message}" }
|
|
122
134
|
end
|
|
123
135
|
|
|
136
|
+
def cleanup_stats
|
|
137
|
+
return unless config.stats_enabled
|
|
138
|
+
|
|
139
|
+
retention = config.stats_retention
|
|
140
|
+
return unless retention&.positive?
|
|
141
|
+
|
|
142
|
+
deleted = JobStat.cleanup!(older_than: Time.now.utc - retention)
|
|
143
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} old job stats" } if deleted.positive?
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def cleanup_job_locks
|
|
147
|
+
# Primary: reap orphaned locks whose owner worker is no longer alive.
|
|
148
|
+
# Cross-references (owner_pid, owner_hostname) against pgbus_processes heartbeats.
|
|
149
|
+
reaped = JobLock.reap_orphaned!
|
|
150
|
+
Pgbus.logger.info { "[Pgbus] Reaped #{reaped} orphaned job locks" } if reaped.positive?
|
|
151
|
+
|
|
152
|
+
# Last resort: clean up locks with expired TTL (handles case where
|
|
153
|
+
# even the reaper/supervisor is dead and locks are truly abandoned).
|
|
154
|
+
expired = JobLock.cleanup_expired!
|
|
155
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{expired} expired job locks" } if expired.positive?
|
|
156
|
+
# No rescue here — let run_if_due handle the error and retry next tick
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def cleanup_outbox
|
|
160
|
+
return unless config.outbox_enabled
|
|
161
|
+
|
|
162
|
+
retention = config.outbox_retention
|
|
163
|
+
return unless retention&.positive?
|
|
164
|
+
|
|
165
|
+
deleted = OutboxEntry.published_before(Time.now.utc - retention).delete_all
|
|
166
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} published outbox entries" } if deleted.positive?
|
|
167
|
+
rescue StandardError => e
|
|
168
|
+
Pgbus.logger.warn { "[Pgbus] Outbox cleanup failed: #{e.message}" }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def archive_compaction_interval
|
|
172
|
+
config.archive_compaction_interval || ARCHIVE_COMPACTION_INTERVAL
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def compact_archives
|
|
176
|
+
retention = config.archive_retention
|
|
177
|
+
return unless retention&.positive?
|
|
178
|
+
|
|
179
|
+
cutoff = Time.now.utc - retention
|
|
180
|
+
batch_size = config.archive_compaction_batch_size || 1000
|
|
181
|
+
prefix = config.queue_prefix
|
|
182
|
+
|
|
183
|
+
conn = config.connects_to ? Pgbus::ApplicationRecord.connection : ActiveRecord::Base.connection
|
|
184
|
+
queue_names = conn.select_values("SELECT queue_name FROM pgmq.meta ORDER BY queue_name")
|
|
185
|
+
|
|
186
|
+
queue_names.each do |full_name|
|
|
187
|
+
next unless full_name.start_with?("#{prefix}_")
|
|
188
|
+
|
|
189
|
+
stripped = full_name.delete_prefix("#{prefix}_")
|
|
190
|
+
deleted = Pgbus.client.purge_archive(stripped, older_than: cutoff, batch_size: batch_size)
|
|
191
|
+
Pgbus.logger.debug { "[Pgbus] Compacted #{deleted} archive entries from #{full_name}" } if deleted.positive?
|
|
192
|
+
rescue StandardError => e
|
|
193
|
+
Pgbus.logger.warn { "[Pgbus] Archive compaction failed for #{full_name}: #{e.message}" }
|
|
194
|
+
end
|
|
195
|
+
rescue StandardError => e
|
|
196
|
+
Pgbus.logger.warn { "[Pgbus] Archive compaction failed: #{e.message}" }
|
|
197
|
+
end
|
|
198
|
+
|
|
124
199
|
def cleanup_recurring_executions
|
|
125
200
|
retention = config.recurring_execution_retention
|
|
126
201
|
return unless retention&.positive?
|
|
@@ -11,9 +11,10 @@ module Pgbus
|
|
|
11
11
|
|
|
12
12
|
attr_reader :process_entry
|
|
13
13
|
|
|
14
|
-
def initialize(kind:, metadata: {})
|
|
14
|
+
def initialize(kind:, metadata: {}, on_beat: nil)
|
|
15
15
|
@kind = kind
|
|
16
16
|
@metadata = metadata
|
|
17
|
+
@on_beat = on_beat
|
|
17
18
|
@timer = nil
|
|
18
19
|
end
|
|
19
20
|
|
|
@@ -31,6 +32,7 @@ module Pgbus
|
|
|
31
32
|
def beat
|
|
32
33
|
return unless @process_id
|
|
33
34
|
|
|
35
|
+
@on_beat&.call
|
|
34
36
|
ProcessEntry.where(id: @process_id).update_all(last_heartbeat_at: Time.current)
|
|
35
37
|
rescue StandardError => e
|
|
36
38
|
Pgbus.logger.warn { "[Pgbus] Heartbeat failed: #{e.message}" }
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Process
|
|
7
|
+
# Thread-safe worker lifecycle state machine inspired by LavinMQ's QueueState.
|
|
8
|
+
#
|
|
9
|
+
# States:
|
|
10
|
+
# :starting → initial state, setting up
|
|
11
|
+
# :running → actively processing messages
|
|
12
|
+
# :paused → temporarily stopped (manual or circuit breaker)
|
|
13
|
+
# :draining → finishing in-flight work before stopping
|
|
14
|
+
# :stopped → terminal state
|
|
15
|
+
#
|
|
16
|
+
# Transitions:
|
|
17
|
+
# starting → running
|
|
18
|
+
# running → paused | draining | stopped
|
|
19
|
+
# paused → running | draining | stopped
|
|
20
|
+
# draining → stopped
|
|
21
|
+
class Lifecycle
|
|
22
|
+
STATES = %i[starting running paused draining stopped].freeze
|
|
23
|
+
|
|
24
|
+
TRANSITIONS = {
|
|
25
|
+
starting: %i[running stopped],
|
|
26
|
+
running: %i[paused draining stopped],
|
|
27
|
+
paused: %i[running draining stopped],
|
|
28
|
+
draining: %i[stopped],
|
|
29
|
+
stopped: []
|
|
30
|
+
}.freeze
|
|
31
|
+
|
|
32
|
+
attr_reader :state
|
|
33
|
+
|
|
34
|
+
def initialize
|
|
35
|
+
@state = :starting
|
|
36
|
+
@mutex = Mutex.new
|
|
37
|
+
@callbacks = Hash.new { |h, k| h[k] = [] }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def transition_to!(new_state)
|
|
41
|
+
@mutex.synchronize do
|
|
42
|
+
validate_transition!(new_state)
|
|
43
|
+
old_state = @state
|
|
44
|
+
@state = new_state
|
|
45
|
+
fire_callbacks(old_state, new_state)
|
|
46
|
+
new_state
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def transition_to(new_state)
|
|
51
|
+
transition_to!(new_state)
|
|
52
|
+
rescue InvalidTransition
|
|
53
|
+
false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def on(event, &block)
|
|
57
|
+
@callbacks[event] << block
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def starting?
|
|
61
|
+
@state == :starting
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def running?
|
|
65
|
+
@state == :running
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def paused?
|
|
69
|
+
@state == :paused
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def draining?
|
|
73
|
+
@state == :draining
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def stopped?
|
|
77
|
+
@state == :stopped
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def active?
|
|
81
|
+
running? || paused?
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def can_process?
|
|
85
|
+
running?
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def terminal?
|
|
89
|
+
stopped?
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def validate_transition!(new_state)
|
|
95
|
+
raise ArgumentError, "Unknown state: #{new_state}. Valid states: #{STATES.join(", ")}" unless STATES.include?(new_state)
|
|
96
|
+
|
|
97
|
+
return if TRANSITIONS[@state].include?(new_state)
|
|
98
|
+
|
|
99
|
+
raise InvalidTransition, "Cannot transition from #{@state} to #{new_state}. " \
|
|
100
|
+
"Valid transitions: #{TRANSITIONS[@state].join(", ")}"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def fire_callbacks(old_state, new_state)
|
|
104
|
+
@callbacks[:"#{old_state}_to_#{new_state}"].each(&:call)
|
|
105
|
+
@callbacks[:any].each { |cb| cb.call(old_state, new_state) }
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class InvalidTransition < Pgbus::Error; end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zlib"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Process
|
|
7
|
+
# Manages PostgreSQL advisory locks for single-active-consumer mode.
|
|
8
|
+
# Only one worker process can hold the lock for a given queue at a time.
|
|
9
|
+
# Other workers skip the queue and process other queues instead.
|
|
10
|
+
#
|
|
11
|
+
# Uses pg_try_advisory_lock (non-blocking) so workers never wait —
|
|
12
|
+
# they simply skip queues they can't lock and try again next cycle.
|
|
13
|
+
#
|
|
14
|
+
# Locks are session-level and automatically released when the connection
|
|
15
|
+
# closes (including on crash), so no manual cleanup is needed.
|
|
16
|
+
class QueueLock
|
|
17
|
+
# Use a fixed namespace to avoid collision with application advisory locks.
|
|
18
|
+
# CRC32 of "pgbus_queue_lock" = 0x5067_6275
|
|
19
|
+
LOCK_NAMESPACE = 0x5067_6275
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@held_locks = Concurrent::Map.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Try to acquire an advisory lock for the given queue name.
|
|
26
|
+
# Returns true if acquired (or already held), false if another process holds it.
|
|
27
|
+
def try_lock(queue_name)
|
|
28
|
+
return true if @held_locks[queue_name]
|
|
29
|
+
|
|
30
|
+
lock_id = lock_id_for(queue_name)
|
|
31
|
+
acquired = connection.select_value(
|
|
32
|
+
"SELECT pg_try_advisory_lock(#{LOCK_NAMESPACE}, #{lock_id})"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if acquired
|
|
36
|
+
@held_locks[queue_name] = lock_id
|
|
37
|
+
true
|
|
38
|
+
else
|
|
39
|
+
false
|
|
40
|
+
end
|
|
41
|
+
rescue StandardError => e
|
|
42
|
+
Pgbus.logger.warn { "[Pgbus] Advisory lock failed for #{queue_name}: #{e.message}" }
|
|
43
|
+
false
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Release the advisory lock for a queue. Called during shutdown.
|
|
47
|
+
def unlock(queue_name)
|
|
48
|
+
lock_id = @held_locks.delete(queue_name)
|
|
49
|
+
return unless lock_id
|
|
50
|
+
|
|
51
|
+
connection.select_value(
|
|
52
|
+
"SELECT pg_advisory_unlock(#{LOCK_NAMESPACE}, #{lock_id})"
|
|
53
|
+
)
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
Pgbus.logger.warn { "[Pgbus] Advisory unlock failed for #{queue_name}: #{e.message}" }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Release all held locks.
|
|
59
|
+
def unlock_all
|
|
60
|
+
@held_locks.each_key { |q| unlock(q) }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def locked?(queue_name)
|
|
64
|
+
@held_locks.key?(queue_name)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def held_queues
|
|
68
|
+
@held_locks.keys
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def lock_id_for(queue_name)
|
|
74
|
+
# Use a stable hash to convert queue name to a 32-bit integer
|
|
75
|
+
Zlib.crc32(queue_name.to_s) & 0x7FFFFFFF
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def connection
|
|
79
|
+
if Pgbus.configuration.connects_to
|
|
80
|
+
Pgbus::ApplicationRecord.connection
|
|
81
|
+
else
|
|
82
|
+
ActiveRecord::Base.connection
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -55,17 +55,25 @@ module Pgbus
|
|
|
55
55
|
|
|
56
56
|
# Boot event consumers if configured
|
|
57
57
|
boot_consumers
|
|
58
|
+
|
|
59
|
+
# Boot outbox poller if configured
|
|
60
|
+
boot_outbox_poller
|
|
58
61
|
end
|
|
59
62
|
|
|
60
63
|
def fork_worker(worker_config)
|
|
61
64
|
queues = worker_config[:queues] || worker_config["queues"] || [config.default_queue]
|
|
62
65
|
threads = worker_config[:threads] || worker_config["threads"] || 5
|
|
66
|
+
single_active = worker_config[:single_active_consumer] || worker_config["single_active_consumer"] || false
|
|
67
|
+
priority = worker_config[:consumer_priority] || worker_config["consumer_priority"] || 0
|
|
63
68
|
|
|
64
69
|
pid = fork do
|
|
65
70
|
restore_signals
|
|
66
|
-
|
|
71
|
+
setup_child_process
|
|
67
72
|
load_rails_app
|
|
68
|
-
worker = Worker.new(
|
|
73
|
+
worker = Worker.new(
|
|
74
|
+
queues: queues, threads: threads, config: config,
|
|
75
|
+
single_active_consumer: single_active, consumer_priority: priority
|
|
76
|
+
)
|
|
69
77
|
worker.run
|
|
70
78
|
end
|
|
71
79
|
|
|
@@ -83,7 +91,7 @@ module Pgbus
|
|
|
83
91
|
def fork_dispatcher
|
|
84
92
|
pid = fork do
|
|
85
93
|
restore_signals
|
|
86
|
-
|
|
94
|
+
setup_child_process
|
|
87
95
|
load_rails_app
|
|
88
96
|
dispatcher = Dispatcher.new(config: config)
|
|
89
97
|
dispatcher.run
|
|
@@ -110,7 +118,7 @@ module Pgbus
|
|
|
110
118
|
def fork_scheduler
|
|
111
119
|
pid = fork do
|
|
112
120
|
restore_signals
|
|
113
|
-
|
|
121
|
+
setup_child_process
|
|
114
122
|
load_rails_app
|
|
115
123
|
load_recurring_config
|
|
116
124
|
scheduler = Recurring::Scheduler.new(config: config)
|
|
@@ -165,7 +173,7 @@ module Pgbus
|
|
|
165
173
|
|
|
166
174
|
pid = fork do
|
|
167
175
|
restore_signals
|
|
168
|
-
|
|
176
|
+
setup_child_process
|
|
169
177
|
load_rails_app
|
|
170
178
|
consumer = Consumer.new(topics: topics, threads: threads, config: config)
|
|
171
179
|
consumer.run
|
|
@@ -182,6 +190,32 @@ module Pgbus
|
|
|
182
190
|
Pgbus.logger.error { "[Pgbus] Fork failed for consumer: #{e.message}" }
|
|
183
191
|
end
|
|
184
192
|
|
|
193
|
+
def boot_outbox_poller
|
|
194
|
+
return unless config.outbox_enabled
|
|
195
|
+
|
|
196
|
+
fork_outbox_poller
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def fork_outbox_poller
|
|
200
|
+
pid = fork do
|
|
201
|
+
restore_signals
|
|
202
|
+
setup_child_process
|
|
203
|
+
load_rails_app
|
|
204
|
+
poller = Outbox::Poller.new(config: config)
|
|
205
|
+
poller.run
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
unless pid
|
|
209
|
+
Pgbus.logger.error { "[Pgbus] Failed to fork outbox poller" }
|
|
210
|
+
return
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
@forks[pid] = { type: :outbox_poller }
|
|
214
|
+
Pgbus.logger.info { "[Pgbus] Forked outbox poller pid=#{pid}" }
|
|
215
|
+
rescue Errno::EAGAIN, Errno::ENOMEM => e
|
|
216
|
+
Pgbus.logger.error { "[Pgbus] Fork failed for outbox poller: #{e.message}" }
|
|
217
|
+
end
|
|
218
|
+
|
|
185
219
|
def monitor_loop
|
|
186
220
|
loop do
|
|
187
221
|
break if @shutting_down && @forks.empty?
|
|
@@ -223,6 +257,8 @@ module Pgbus
|
|
|
223
257
|
fork_scheduler
|
|
224
258
|
when :consumer
|
|
225
259
|
fork_consumer(info[:config])
|
|
260
|
+
when :outbox_poller
|
|
261
|
+
fork_outbox_poller
|
|
226
262
|
end
|
|
227
263
|
end
|
|
228
264
|
|
|
@@ -234,7 +270,11 @@ module Pgbus
|
|
|
234
270
|
end
|
|
235
271
|
end
|
|
236
272
|
|
|
237
|
-
def
|
|
273
|
+
def setup_child_process
|
|
274
|
+
# Reset the PGMQ client so this forked process gets a fresh
|
|
275
|
+
# PG::Connection instead of inheriting the parent's (which is
|
|
276
|
+
# in undefined state post-fork and not thread-safe to share).
|
|
277
|
+
Pgbus.reset_client!
|
|
238
278
|
%w[INT TERM QUIT].each do |sig|
|
|
239
279
|
trap(sig) { @shutting_down = true }
|
|
240
280
|
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Process
|
|
7
|
+
# Wake signal inspired by LavinMQ's BoolChannel pattern.
|
|
8
|
+
# Replaces polling-based coordination with instant state-change signaling.
|
|
9
|
+
#
|
|
10
|
+
# IMPORTANT: Single-waiter only. The +wait+ method calls @event.reset
|
|
11
|
+
# immediately after waking, which means concurrent waiters may miss
|
|
12
|
+
# notifications. Callers must ensure only one thread calls +wait+ at
|
|
13
|
+
# a time. In pgbus this is guaranteed because each Worker has exactly
|
|
14
|
+
# one main loop thread that calls +wait+, while +notify!+ can be
|
|
15
|
+
# called from any thread (signal handlers, lifecycle transitions).
|
|
16
|
+
#
|
|
17
|
+
# Usage:
|
|
18
|
+
# signal = WakeSignal.new
|
|
19
|
+
# # In worker thread (single waiter):
|
|
20
|
+
# signal.wait(timeout: 5) # blocks until signaled or timeout
|
|
21
|
+
# # In another thread (e.g. signal handler, lifecycle transition):
|
|
22
|
+
# signal.notify! # wakes the waiting thread
|
|
23
|
+
class WakeSignal
|
|
24
|
+
def initialize
|
|
25
|
+
@event = Concurrent::Event.new
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Block until +notify!+ is called or timeout expires.
|
|
29
|
+
# Returns true if signaled, false if timed out.
|
|
30
|
+
# Resets the event after waking — only safe with a single waiter.
|
|
31
|
+
def wait(timeout: nil)
|
|
32
|
+
result = @event.wait(timeout)
|
|
33
|
+
@event.reset
|
|
34
|
+
result
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Wake all waiting threads immediately.
|
|
38
|
+
def notify!
|
|
39
|
+
@event.set
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Check if a notification is pending without blocking.
|
|
43
|
+
def pending?
|
|
44
|
+
@event.set?
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Clear the pending notification.
|
|
48
|
+
def reset!
|
|
49
|
+
@event.reset
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|