pgbus 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.bun-version +1 -0
- data/.claude/commands/architect.md +100 -0
- data/.claude/commands/github-review-comments.md +237 -0
- data/.claude/commands/lfg.md +271 -0
- data/.claude/commands/review-pr.md +69 -0
- data/.claude/commands/security.md +122 -0
- data/.claude/commands/tdd.md +148 -0
- data/.claude/rules/agents.md +49 -0
- data/.claude/rules/coding-style.md +91 -0
- data/.claude/rules/git-workflow.md +56 -0
- data/.claude/rules/performance.md +73 -0
- data/.claude/rules/testing.md +67 -0
- data/CHANGELOG.md +5 -0
- data/CLAUDE.md +80 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +417 -0
- data/Rakefile +14 -0
- data/app/controllers/pgbus/api/stats_controller.rb +11 -0
- data/app/controllers/pgbus/application_controller.rb +35 -0
- data/app/controllers/pgbus/dashboard_controller.rb +27 -0
- data/app/controllers/pgbus/dead_letter_controller.rb +50 -0
- data/app/controllers/pgbus/events_controller.rb +23 -0
- data/app/controllers/pgbus/jobs_controller.rb +48 -0
- data/app/controllers/pgbus/processes_controller.rb +10 -0
- data/app/controllers/pgbus/queues_controller.rb +21 -0
- data/app/helpers/pgbus/application_helper.rb +69 -0
- data/app/views/layouts/pgbus/application.html.erb +76 -0
- data/app/views/pgbus/dashboard/_processes_table.html.erb +30 -0
- data/app/views/pgbus/dashboard/_queues_table.html.erb +39 -0
- data/app/views/pgbus/dashboard/_recent_failures.html.erb +33 -0
- data/app/views/pgbus/dashboard/_stats_cards.html.erb +28 -0
- data/app/views/pgbus/dashboard/show.html.erb +10 -0
- data/app/views/pgbus/dead_letter/_messages_table.html.erb +40 -0
- data/app/views/pgbus/dead_letter/index.html.erb +15 -0
- data/app/views/pgbus/dead_letter/show.html.erb +52 -0
- data/app/views/pgbus/events/index.html.erb +57 -0
- data/app/views/pgbus/events/show.html.erb +28 -0
- data/app/views/pgbus/jobs/_enqueued_table.html.erb +34 -0
- data/app/views/pgbus/jobs/_failed_table.html.erb +45 -0
- data/app/views/pgbus/jobs/index.html.erb +16 -0
- data/app/views/pgbus/jobs/show.html.erb +57 -0
- data/app/views/pgbus/processes/_processes_table.html.erb +37 -0
- data/app/views/pgbus/processes/index.html.erb +3 -0
- data/app/views/pgbus/queues/_queues_list.html.erb +41 -0
- data/app/views/pgbus/queues/index.html.erb +3 -0
- data/app/views/pgbus/queues/show.html.erb +49 -0
- data/bun.lock +18 -0
- data/config/routes.rb +45 -0
- data/docs/README.md +28 -0
- data/docs/switch_from_good_job.md +279 -0
- data/docs/switch_from_sidekiq.md +226 -0
- data/docs/switch_from_solid_queue.md +247 -0
- data/exe/pgbus +7 -0
- data/lib/generators/pgbus/install_generator.rb +56 -0
- data/lib/generators/pgbus/templates/migration.rb.erb +114 -0
- data/lib/generators/pgbus/templates/pgbus.yml.erb +74 -0
- data/lib/generators/pgbus/templates/pgbus_binstub.erb +7 -0
- data/lib/pgbus/active_job/adapter.rb +109 -0
- data/lib/pgbus/active_job/executor.rb +107 -0
- data/lib/pgbus/batch.rb +153 -0
- data/lib/pgbus/cli.rb +84 -0
- data/lib/pgbus/client.rb +162 -0
- data/lib/pgbus/concurrency/blocked_execution.rb +74 -0
- data/lib/pgbus/concurrency/semaphore.rb +66 -0
- data/lib/pgbus/concurrency.rb +65 -0
- data/lib/pgbus/config_loader.rb +27 -0
- data/lib/pgbus/configuration.rb +99 -0
- data/lib/pgbus/engine.rb +31 -0
- data/lib/pgbus/event.rb +31 -0
- data/lib/pgbus/event_bus/handler.rb +76 -0
- data/lib/pgbus/event_bus/publisher.rb +42 -0
- data/lib/pgbus/event_bus/registry.rb +54 -0
- data/lib/pgbus/event_bus/subscriber.rb +30 -0
- data/lib/pgbus/process/consumer.rb +113 -0
- data/lib/pgbus/process/dispatcher.rb +154 -0
- data/lib/pgbus/process/heartbeat.rb +71 -0
- data/lib/pgbus/process/signal_handler.rb +49 -0
- data/lib/pgbus/process/supervisor.rb +198 -0
- data/lib/pgbus/process/worker.rb +153 -0
- data/lib/pgbus/serializer.rb +43 -0
- data/lib/pgbus/version.rb +5 -0
- data/lib/pgbus/web/authentication.rb +24 -0
- data/lib/pgbus/web/data_source.rb +406 -0
- data/lib/pgbus.rb +49 -0
- data/package.json +9 -0
- data/sig/pgbus.rbs +4 -0
- metadata +198 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Process
|
|
7
|
+
class Consumer
|
|
8
|
+
include SignalHandler
|
|
9
|
+
|
|
10
|
+
attr_reader :topics, :threads, :config
|
|
11
|
+
|
|
12
|
+
def initialize(topics:, threads: 3, config: Pgbus.configuration)
|
|
13
|
+
@topics = Array(topics)
|
|
14
|
+
@threads = threads
|
|
15
|
+
@config = config
|
|
16
|
+
@shutting_down = false
|
|
17
|
+
@pool = Concurrent::FixedThreadPool.new(threads)
|
|
18
|
+
@registry = EventBus::Registry.instance
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def run
|
|
22
|
+
setup_signals
|
|
23
|
+
start_heartbeat
|
|
24
|
+
setup_subscriptions
|
|
25
|
+
Pgbus.logger.info { "[Pgbus] Consumer started: topics=#{topics.join(",")} threads=#{threads}" }
|
|
26
|
+
|
|
27
|
+
loop do
|
|
28
|
+
break if @shutting_down
|
|
29
|
+
|
|
30
|
+
process_signals
|
|
31
|
+
consume
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
shutdown
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def graceful_shutdown
|
|
38
|
+
@shutting_down = true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def immediate_shutdown
|
|
42
|
+
@shutting_down = true
|
|
43
|
+
@pool.kill
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def setup_subscriptions
|
|
49
|
+
matching = @registry.subscribers.select do |s|
|
|
50
|
+
topics.any? { |t| pattern_overlaps?(t, s.pattern) }
|
|
51
|
+
end
|
|
52
|
+
@queue_names = matching.map(&:queue_name).uniq
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def consume
|
|
56
|
+
idle = @pool.max_length - @pool.queue_length
|
|
57
|
+
return sleep(config.polling_interval) if idle <= 0
|
|
58
|
+
|
|
59
|
+
messages = @queue_names.flat_map do |queue_name|
|
|
60
|
+
Pgbus.client.read_batch(queue_name, qty: idle) || []
|
|
61
|
+
end.first(idle)
|
|
62
|
+
|
|
63
|
+
if messages.empty?
|
|
64
|
+
sleep(config.polling_interval)
|
|
65
|
+
return
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
messages.each do |message|
|
|
69
|
+
@pool.post { handle_message(message) }
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def handle_message(message)
|
|
74
|
+
raw = JSON.parse(message.message)
|
|
75
|
+
routing_key = raw.dig("headers", "routing_key") || raw["routing_key"]
|
|
76
|
+
|
|
77
|
+
handlers = @registry.handlers_for(routing_key || "")
|
|
78
|
+
handlers.each do |subscriber|
|
|
79
|
+
handler = subscriber.handler_class.new
|
|
80
|
+
handler.process(message)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
queue_name = message.respond_to?(:queue_name) ? message.queue_name : @queue_names.first
|
|
84
|
+
Pgbus.client.archive_message(queue_name, message.msg_id.to_i)
|
|
85
|
+
rescue StandardError => e
|
|
86
|
+
Pgbus.logger.error { "[Pgbus] Consumer error: #{e.class}: #{e.message}" }
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def pattern_overlaps?(topic_filter, subscription_pattern)
|
|
90
|
+
# Simple check: if either is a subset of the other
|
|
91
|
+
topic_filter == subscription_pattern ||
|
|
92
|
+
topic_filter.end_with?("#") ||
|
|
93
|
+
subscription_pattern.start_with?(topic_filter.delete_suffix(".#"))
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def start_heartbeat
|
|
97
|
+
@heartbeat = Heartbeat.new(
|
|
98
|
+
kind: "consumer",
|
|
99
|
+
metadata: { topics: topics, threads: threads, pid: ::Process.pid }
|
|
100
|
+
)
|
|
101
|
+
@heartbeat.start
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def shutdown
|
|
105
|
+
@pool.shutdown
|
|
106
|
+
@pool.wait_for_termination(30)
|
|
107
|
+
@heartbeat&.stop
|
|
108
|
+
restore_signals
|
|
109
|
+
Pgbus.logger.info { "[Pgbus] Consumer stopped" }
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Process
|
|
5
|
+
class Dispatcher
|
|
6
|
+
include SignalHandler
|
|
7
|
+
|
|
8
|
+
# Maintenance runs on coarser intervals than the main loop
|
|
9
|
+
CLEANUP_INTERVAL = 3600 # Run idempotency cleanup every hour
|
|
10
|
+
REAP_INTERVAL = 300 # Run stale process reaping every 5 minutes
|
|
11
|
+
CONCURRENCY_INTERVAL = 300 # Run concurrency cleanup every 5 minutes
|
|
12
|
+
BATCH_CLEANUP_INTERVAL = 3600 # Run batch cleanup every hour
|
|
13
|
+
|
|
14
|
+
attr_reader :config
|
|
15
|
+
|
|
16
|
+
def initialize(config: Pgbus.configuration)
|
|
17
|
+
@config = config
|
|
18
|
+
@shutting_down = false
|
|
19
|
+
@last_cleanup_at = Time.now
|
|
20
|
+
@last_reap_at = Time.now
|
|
21
|
+
@last_concurrency_at = Time.now
|
|
22
|
+
@last_batch_cleanup_at = Time.now
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def run
|
|
26
|
+
setup_signals
|
|
27
|
+
start_heartbeat
|
|
28
|
+
Pgbus.logger.info do
|
|
29
|
+
"[Pgbus] Dispatcher started: interval=#{config.dispatch_interval}s"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
loop do
|
|
33
|
+
break if @shutting_down
|
|
34
|
+
|
|
35
|
+
process_signals
|
|
36
|
+
break if @shutting_down
|
|
37
|
+
|
|
38
|
+
run_maintenance
|
|
39
|
+
break if @shutting_down
|
|
40
|
+
|
|
41
|
+
sleep(config.dispatch_interval)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
shutdown
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def graceful_shutdown
|
|
48
|
+
@shutting_down = true
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def immediate_shutdown
|
|
52
|
+
@shutting_down = true
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def run_maintenance
|
|
58
|
+
now = Time.now
|
|
59
|
+
|
|
60
|
+
if now - @last_cleanup_at >= CLEANUP_INTERVAL
|
|
61
|
+
cleanup_processed_events
|
|
62
|
+
@last_cleanup_at = now
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
if now - @last_reap_at >= REAP_INTERVAL
|
|
66
|
+
reap_stale_processes
|
|
67
|
+
@last_reap_at = now
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
if now - @last_concurrency_at >= CONCURRENCY_INTERVAL
|
|
71
|
+
cleanup_concurrency
|
|
72
|
+
@last_concurrency_at = now
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
if now - @last_batch_cleanup_at >= BATCH_CLEANUP_INTERVAL
|
|
76
|
+
cleanup_batches
|
|
77
|
+
@last_batch_cleanup_at = now
|
|
78
|
+
end
|
|
79
|
+
rescue StandardError => e
|
|
80
|
+
Pgbus.logger.error { "[Pgbus] Dispatcher maintenance error: #{e.message}" }
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def cleanup_processed_events
|
|
84
|
+
return unless defined?(ActiveRecord::Base)
|
|
85
|
+
|
|
86
|
+
ttl = config.idempotency_ttl
|
|
87
|
+
return unless ttl&.positive?
|
|
88
|
+
|
|
89
|
+
deleted = ActiveRecord::Base.connection.delete(
|
|
90
|
+
"DELETE FROM pgbus_processed_events WHERE processed_at < $1",
|
|
91
|
+
"Pgbus Idempotency Cleanup",
|
|
92
|
+
[Time.now.utc - ttl]
|
|
93
|
+
)
|
|
94
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} expired processed events" } if deleted.positive?
|
|
95
|
+
rescue StandardError => e
|
|
96
|
+
Pgbus.logger.warn { "[Pgbus] Idempotency cleanup failed: #{e.message}" }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def reap_stale_processes
|
|
100
|
+
return unless defined?(ActiveRecord::Base)
|
|
101
|
+
|
|
102
|
+
threshold = Heartbeat::ALIVE_THRESHOLD
|
|
103
|
+
deleted = ActiveRecord::Base.connection.delete(
|
|
104
|
+
"DELETE FROM pgbus_processes WHERE last_heartbeat_at < $1",
|
|
105
|
+
"Pgbus Stale Process Reap",
|
|
106
|
+
[Time.now.utc - threshold]
|
|
107
|
+
)
|
|
108
|
+
Pgbus.logger.info { "[Pgbus] Reaped #{deleted} stale processes" } if deleted.positive?
|
|
109
|
+
rescue StandardError => e
|
|
110
|
+
Pgbus.logger.warn { "[Pgbus] Stale process reaping failed: #{e.message}" }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def cleanup_concurrency
|
|
114
|
+
expired_keys = Concurrency::Semaphore.expire_stale
|
|
115
|
+
expired_keys.each do |row|
|
|
116
|
+
release_blocked_for_key(row["key"])
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
orphaned = Concurrency::BlockedExecution.expire_stale
|
|
120
|
+
Pgbus.logger.debug { "[Pgbus] Expired #{orphaned} orphaned blocked executions" } if orphaned.positive?
|
|
121
|
+
rescue StandardError => e
|
|
122
|
+
Pgbus.logger.warn { "[Pgbus] Concurrency cleanup failed: #{e.message}" }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def release_blocked_for_key(key)
|
|
126
|
+
released = Concurrency::BlockedExecution.release_next(key)
|
|
127
|
+
return unless released
|
|
128
|
+
|
|
129
|
+
Pgbus.client.send_message(released[:queue_name], released[:payload])
|
|
130
|
+
Pgbus.logger.debug { "[Pgbus] Released blocked execution for key: #{key}" }
|
|
131
|
+
rescue StandardError => e
|
|
132
|
+
Pgbus.logger.warn { "[Pgbus] Failed to release blocked execution for #{key}: #{e.message}" }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def cleanup_batches
|
|
136
|
+
deleted = Batch.cleanup(older_than: Time.now.utc - (7 * 24 * 3600)) # 7 days
|
|
137
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} finished batches" } if deleted.positive?
|
|
138
|
+
rescue StandardError => e
|
|
139
|
+
Pgbus.logger.warn { "[Pgbus] Batch cleanup failed: #{e.message}" }
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def start_heartbeat
|
|
143
|
+
@heartbeat = Heartbeat.new(kind: "dispatcher", metadata: { pid: ::Process.pid })
|
|
144
|
+
@heartbeat.start
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def shutdown
|
|
148
|
+
@heartbeat&.stop
|
|
149
|
+
restore_signals
|
|
150
|
+
Pgbus.logger.info { "[Pgbus] Dispatcher stopped" }
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Process
|
|
7
|
+
class Heartbeat
|
|
8
|
+
INTERVAL = 60 # seconds
|
|
9
|
+
ALIVE_THRESHOLD = 300 # 5 minutes
|
|
10
|
+
|
|
11
|
+
attr_reader :process_record
|
|
12
|
+
|
|
13
|
+
def initialize(kind:, metadata: {})
|
|
14
|
+
@kind = kind
|
|
15
|
+
@metadata = metadata
|
|
16
|
+
@timer = nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def start
|
|
20
|
+
register_process
|
|
21
|
+
@timer = Concurrent::TimerTask.new(execution_interval: INTERVAL) { beat }
|
|
22
|
+
@timer.execute
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def stop
|
|
26
|
+
@timer&.shutdown
|
|
27
|
+
deregister_process
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def beat
|
|
31
|
+
return unless @process_id && defined?(ActiveRecord::Base)
|
|
32
|
+
|
|
33
|
+
ActiveRecord::Base.connection.execute(
|
|
34
|
+
"UPDATE pgbus_processes SET last_heartbeat_at = NOW() WHERE id = $1",
|
|
35
|
+
"Pgbus Heartbeat",
|
|
36
|
+
[@process_id]
|
|
37
|
+
)
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
Pgbus.logger.warn { "[Pgbus] Heartbeat failed: #{e.message}" }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def register_process
|
|
45
|
+
return unless defined?(ActiveRecord::Base)
|
|
46
|
+
|
|
47
|
+
result = ActiveRecord::Base.connection.exec_insert(
|
|
48
|
+
"INSERT INTO pgbus_processes (kind, hostname, pid, metadata, last_heartbeat_at, created_at, updated_at) " \
|
|
49
|
+
"VALUES ($1, $2, $3, $4, NOW(), NOW(), NOW()) RETURNING id",
|
|
50
|
+
"Pgbus Register Process",
|
|
51
|
+
[@kind, Socket.gethostname, ::Process.pid, JSON.generate(@metadata)]
|
|
52
|
+
)
|
|
53
|
+
@process_id = result.first["id"]
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
Pgbus.logger.warn { "[Pgbus] Process registration failed: #{e.message}" }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def deregister_process
|
|
59
|
+
return unless @process_id && defined?(ActiveRecord::Base)
|
|
60
|
+
|
|
61
|
+
ActiveRecord::Base.connection.execute(
|
|
62
|
+
"DELETE FROM pgbus_processes WHERE id = $1",
|
|
63
|
+
"Pgbus Deregister Process",
|
|
64
|
+
[@process_id]
|
|
65
|
+
)
|
|
66
|
+
rescue StandardError
|
|
67
|
+
# Best effort — process is exiting anyway
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Process
|
|
5
|
+
module SignalHandler
|
|
6
|
+
def self.included(base)
|
|
7
|
+
base.attr_reader :signal_queue
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def setup_signals
|
|
11
|
+
@signal_queue = Queue.new
|
|
12
|
+
@previous_handlers = {}
|
|
13
|
+
|
|
14
|
+
%w[INT TERM QUIT].each do |sig|
|
|
15
|
+
@previous_handlers[sig] = trap(sig) { @signal_queue << sig }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def restore_signals
|
|
20
|
+
@previous_handlers&.each do |sig, handler|
|
|
21
|
+
trap(sig, handler || "DEFAULT")
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def process_signals
|
|
26
|
+
while (sig = begin
|
|
27
|
+
@signal_queue.pop(true)
|
|
28
|
+
rescue StandardError
|
|
29
|
+
nil
|
|
30
|
+
end)
|
|
31
|
+
case sig
|
|
32
|
+
when "INT", "TERM"
|
|
33
|
+
graceful_shutdown
|
|
34
|
+
when "QUIT"
|
|
35
|
+
immediate_shutdown
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def graceful_shutdown
|
|
41
|
+
raise NotImplementedError
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def immediate_shutdown
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Process
|
|
5
|
+
class Supervisor
|
|
6
|
+
include SignalHandler
|
|
7
|
+
|
|
8
|
+
FORK_WAIT = 1 # seconds between fork checks
|
|
9
|
+
|
|
10
|
+
attr_reader :config
|
|
11
|
+
|
|
12
|
+
def initialize(config: Pgbus.configuration)
|
|
13
|
+
@config = config
|
|
14
|
+
@forks = {}
|
|
15
|
+
@shutting_down = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def run
|
|
19
|
+
setup_signals
|
|
20
|
+
start_heartbeat
|
|
21
|
+
|
|
22
|
+
Pgbus.logger.info { "[Pgbus] Supervisor starting pid=#{::Process.pid}" }
|
|
23
|
+
|
|
24
|
+
boot_processes
|
|
25
|
+
monitor_loop
|
|
26
|
+
ensure
|
|
27
|
+
shutdown
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def graceful_shutdown
|
|
31
|
+
Pgbus.logger.info { "[Pgbus] Supervisor: graceful shutdown requested" }
|
|
32
|
+
@shutting_down = true
|
|
33
|
+
signal_children("TERM")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def immediate_shutdown
|
|
37
|
+
Pgbus.logger.warn { "[Pgbus] Supervisor: immediate shutdown requested" }
|
|
38
|
+
@shutting_down = true
|
|
39
|
+
signal_children("QUIT")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def boot_processes
|
|
45
|
+
# Boot workers
|
|
46
|
+
config.workers.each do |worker_config|
|
|
47
|
+
fork_worker(worker_config)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Boot dispatcher
|
|
51
|
+
fork_dispatcher
|
|
52
|
+
|
|
53
|
+
# Boot event consumers if configured
|
|
54
|
+
boot_consumers
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def fork_worker(worker_config)
|
|
58
|
+
queues = worker_config[:queues] || worker_config["queues"] || [config.default_queue]
|
|
59
|
+
threads = worker_config[:threads] || worker_config["threads"] || 5
|
|
60
|
+
|
|
61
|
+
pid = fork do
|
|
62
|
+
restore_signals
|
|
63
|
+
setup_child_signals
|
|
64
|
+
load_rails_app
|
|
65
|
+
worker = Worker.new(queues: queues, threads: threads, config: config)
|
|
66
|
+
worker.run
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
@forks[pid] = { type: :worker, config: worker_config }
|
|
70
|
+
Pgbus.logger.info { "[Pgbus] Forked worker pid=#{pid} queues=#{queues.join(",")}" }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def fork_dispatcher
|
|
74
|
+
pid = fork do
|
|
75
|
+
restore_signals
|
|
76
|
+
setup_child_signals
|
|
77
|
+
load_rails_app
|
|
78
|
+
dispatcher = Dispatcher.new(config: config)
|
|
79
|
+
dispatcher.run
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
@forks[pid] = { type: :dispatcher }
|
|
83
|
+
Pgbus.logger.info { "[Pgbus] Forked dispatcher pid=#{pid}" }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def boot_consumers
|
|
87
|
+
return unless config.event_consumers
|
|
88
|
+
|
|
89
|
+
config.event_consumers.each do |consumer_config|
|
|
90
|
+
fork_consumer(consumer_config)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def fork_consumer(consumer_config)
|
|
95
|
+
topics = consumer_config[:topics] || consumer_config["topics"]
|
|
96
|
+
threads = consumer_config[:threads] || consumer_config["threads"] || 3
|
|
97
|
+
|
|
98
|
+
pid = fork do
|
|
99
|
+
restore_signals
|
|
100
|
+
setup_child_signals
|
|
101
|
+
load_rails_app
|
|
102
|
+
consumer = Consumer.new(topics: topics, threads: threads, config: config)
|
|
103
|
+
consumer.run
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
@forks[pid] = { type: :consumer, config: consumer_config }
|
|
107
|
+
Pgbus.logger.info { "[Pgbus] Forked consumer pid=#{pid} topics=#{topics.join(",")}" }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def monitor_loop
|
|
111
|
+
loop do
|
|
112
|
+
break if @shutting_down && @forks.empty?
|
|
113
|
+
|
|
114
|
+
process_signals
|
|
115
|
+
reap_children
|
|
116
|
+
sleep(FORK_WAIT)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def reap_children
|
|
121
|
+
loop do
|
|
122
|
+
pid, status = ::Process.waitpid2(-1, ::Process::WNOHANG)
|
|
123
|
+
break unless pid
|
|
124
|
+
|
|
125
|
+
info = @forks.delete(pid)
|
|
126
|
+
next unless info
|
|
127
|
+
|
|
128
|
+
if @shutting_down
|
|
129
|
+
Pgbus.logger.info { "[Pgbus] Child #{info[:type]} pid=#{pid} exited (status=#{status.exitstatus})" }
|
|
130
|
+
else
|
|
131
|
+
Pgbus.logger.warn do
|
|
132
|
+
"[Pgbus] Child #{info[:type]} pid=#{pid} exited unexpectedly (status=#{status&.exitstatus}), restarting..."
|
|
133
|
+
end
|
|
134
|
+
restart_child(info)
|
|
135
|
+
end
|
|
136
|
+
rescue Errno::ECHILD
|
|
137
|
+
break
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def restart_child(info)
|
|
142
|
+
case info[:type]
|
|
143
|
+
when :worker
|
|
144
|
+
fork_worker(info[:config])
|
|
145
|
+
when :dispatcher
|
|
146
|
+
fork_dispatcher
|
|
147
|
+
when :consumer
|
|
148
|
+
fork_consumer(info[:config])
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def signal_children(sig)
|
|
153
|
+
@forks.each_key do |pid|
|
|
154
|
+
::Process.kill(sig, pid)
|
|
155
|
+
rescue Errno::ESRCH
|
|
156
|
+
# Process already gone
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def setup_child_signals
|
|
161
|
+
%w[INT TERM QUIT].each do |sig|
|
|
162
|
+
trap(sig) { @shutting_down = true }
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def load_rails_app
|
|
167
|
+
return unless defined?(Rails)
|
|
168
|
+
|
|
169
|
+
Rails.application.eager_load! if Rails.application.respond_to?(:eager_load!)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def start_heartbeat
|
|
173
|
+
@heartbeat = Heartbeat.new(
|
|
174
|
+
kind: "supervisor",
|
|
175
|
+
metadata: { pid: ::Process.pid, hostname: Socket.gethostname }
|
|
176
|
+
)
|
|
177
|
+
@heartbeat.start
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def shutdown
|
|
181
|
+
# Wait for all children with timeout
|
|
182
|
+
deadline = Time.now + 30
|
|
183
|
+
|
|
184
|
+
until @forks.empty? || Time.now > deadline
|
|
185
|
+
reap_children
|
|
186
|
+
sleep(0.5)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Force kill any remaining
|
|
190
|
+
signal_children("KILL") unless @forks.empty?
|
|
191
|
+
|
|
192
|
+
@heartbeat&.stop
|
|
193
|
+
restore_signals
|
|
194
|
+
Pgbus.logger.info { "[Pgbus] Supervisor stopped" }
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|