pgbus 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +326 -11
- data/app/controllers/pgbus/api/insights_controller.rb +16 -0
- data/app/controllers/pgbus/insights_controller.rb +10 -0
- data/app/controllers/pgbus/locks_controller.rb +9 -0
- data/app/helpers/pgbus/application_helper.rb +28 -0
- data/app/models/pgbus/job_lock.rb +82 -0
- data/app/models/pgbus/job_stat.rb +94 -0
- data/app/views/layouts/pgbus/application.html.erb +31 -8
- data/app/views/pgbus/dashboard/_processes_table.html.erb +6 -6
- data/app/views/pgbus/dashboard/_queues_table.html.erb +6 -6
- data/app/views/pgbus/dashboard/_recent_failures.html.erb +5 -5
- data/app/views/pgbus/dashboard/_stats_cards.html.erb +20 -20
- data/app/views/pgbus/dashboard/show.html.erb +1 -1
- data/app/views/pgbus/dead_letter/_messages_table.html.erb +12 -12
- data/app/views/pgbus/dead_letter/index.html.erb +1 -1
- data/app/views/pgbus/dead_letter/show.html.erb +10 -10
- data/app/views/pgbus/events/index.html.erb +15 -15
- data/app/views/pgbus/events/show.html.erb +5 -5
- data/app/views/pgbus/insights/show.html.erb +161 -0
- data/app/views/pgbus/jobs/_enqueued_table.html.erb +13 -13
- data/app/views/pgbus/jobs/_failed_table.html.erb +7 -7
- data/app/views/pgbus/jobs/index.html.erb +1 -1
- data/app/views/pgbus/jobs/show.html.erb +10 -10
- data/app/views/pgbus/locks/index.html.erb +53 -0
- data/app/views/pgbus/outbox/index.html.erb +12 -12
- data/app/views/pgbus/processes/_processes_table.html.erb +6 -6
- data/app/views/pgbus/processes/index.html.erb +1 -1
- data/app/views/pgbus/queues/_queues_list.html.erb +5 -5
- data/app/views/pgbus/queues/index.html.erb +1 -1
- data/app/views/pgbus/queues/show.html.erb +7 -7
- data/app/views/pgbus/recurring_tasks/_tasks_table.html.erb +6 -6
- data/app/views/pgbus/recurring_tasks/index.html.erb +1 -1
- data/app/views/pgbus/recurring_tasks/show.html.erb +22 -22
- data/config/routes.rb +3 -0
- data/lib/generators/pgbus/add_job_locks_generator.rb +52 -0
- data/lib/generators/pgbus/add_job_stats_generator.rb +52 -0
- data/lib/generators/pgbus/add_outbox_generator.rb +1 -1
- data/lib/generators/pgbus/add_queue_states_generator.rb +1 -1
- data/lib/generators/pgbus/add_recurring_generator.rb +1 -1
- data/lib/generators/pgbus/install_generator.rb +1 -1
- data/lib/generators/pgbus/templates/add_job_locks.rb.erb +21 -0
- data/lib/generators/pgbus/templates/add_job_stats.rb.erb +18 -0
- data/lib/generators/pgbus/upgrade_pgmq_generator.rb +1 -1
- data/lib/pgbus/active_job/adapter.rb +58 -4
- data/lib/pgbus/active_job/executor.rb +45 -0
- data/lib/pgbus/client.rb +8 -22
- data/lib/pgbus/configuration.rb +6 -0
- data/lib/pgbus/engine.rb +1 -0
- data/lib/pgbus/process/consumer_priority.rb +64 -0
- data/lib/pgbus/process/dispatcher.rb +29 -0
- data/lib/pgbus/process/queue_lock.rb +87 -0
- data/lib/pgbus/process/supervisor.rb +6 -1
- data/lib/pgbus/process/wake_signal.rb +53 -0
- data/lib/pgbus/process/worker.rb +36 -6
- data/lib/pgbus/queue_factory.rb +62 -0
- data/lib/pgbus/uniqueness.rb +169 -0
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +49 -0
- data/lib/pgbus.rb +1 -0
- metadata +17 -1
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module Pgbus
|
|
7
|
+
module Generators
|
|
8
|
+
class AddJobStatsGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Add job stats table for dashboard insights and performance tracking"
|
|
14
|
+
|
|
15
|
+
class_option :database,
|
|
16
|
+
type: :string,
|
|
17
|
+
default: nil,
|
|
18
|
+
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
|
+
|
|
20
|
+
def create_migration_file
|
|
21
|
+
if separate_database?
|
|
22
|
+
migration_template "add_job_stats.rb.erb",
|
|
23
|
+
"db/pgbus_migrate/add_pgbus_job_stats.rb"
|
|
24
|
+
else
|
|
25
|
+
migration_template "add_job_stats.rb.erb",
|
|
26
|
+
"db/migrate/add_pgbus_job_stats.rb"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def display_post_install
|
|
31
|
+
say ""
|
|
32
|
+
say "Pgbus job stats table installed!", :green
|
|
33
|
+
say ""
|
|
34
|
+
say "Next steps:"
|
|
35
|
+
say " 1. Run: rails db:migrate#{":#{options[:database]}" if separate_database?}"
|
|
36
|
+
say " 2. Stats collection is enabled by default"
|
|
37
|
+
say " 3. View insights at /pgbus/insights"
|
|
38
|
+
say ""
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def migration_version
|
|
44
|
+
"[#{ActiveRecord::Migration.current_version}]"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def separate_database?
|
|
48
|
+
options[:database].present?
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -17,7 +17,7 @@ module Pgbus
|
|
|
17
17
|
default: nil,
|
|
18
18
|
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
19
|
|
|
20
|
-
def
|
|
20
|
+
def create_migration_file
|
|
21
21
|
if separate_database?
|
|
22
22
|
migration_template "add_outbox.rb.erb",
|
|
23
23
|
"db/pgbus_migrate/add_pgbus_outbox.rb"
|
|
@@ -17,7 +17,7 @@ module Pgbus
|
|
|
17
17
|
default: nil,
|
|
18
18
|
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
19
|
|
|
20
|
-
def
|
|
20
|
+
def create_migration_file
|
|
21
21
|
if separate_database?
|
|
22
22
|
migration_template "add_queue_states.rb.erb",
|
|
23
23
|
"db/pgbus_migrate/add_pgbus_queue_states.rb"
|
|
@@ -17,7 +17,7 @@ module Pgbus
|
|
|
17
17
|
default: nil,
|
|
18
18
|
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
19
|
|
|
20
|
-
def
|
|
20
|
+
def create_migration_file
|
|
21
21
|
if separate_database?
|
|
22
22
|
migration_template "add_recurring_tables.rb.erb",
|
|
23
23
|
"db/pgbus_migrate/add_pgbus_recurring_tables.rb"
|
|
@@ -24,7 +24,7 @@ module Pgbus
|
|
|
24
24
|
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus). " \
|
|
25
25
|
"Migrations go to db/pgbus_migrate/ and schema to db/pgbus_schema.rb"
|
|
26
26
|
|
|
27
|
-
def
|
|
27
|
+
def create_migration_file
|
|
28
28
|
if separate_database?
|
|
29
29
|
migration_template "migration.rb.erb",
|
|
30
30
|
"db/pgbus_migrate/create_pgbus_tables.rb"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class AddPgbusJobLocks < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def change
|
|
3
|
+
create_table :pgbus_job_locks do |t|
|
|
4
|
+
t.string :lock_key, null: false
|
|
5
|
+
t.string :job_class, null: false
|
|
6
|
+
t.string :job_id
|
|
7
|
+
t.string :state, null: false, default: "queued"
|
|
8
|
+
t.integer :owner_pid
|
|
9
|
+
t.string :owner_hostname
|
|
10
|
+
t.datetime :locked_at, null: false, default: -> { "CURRENT_TIMESTAMP" }
|
|
11
|
+
t.datetime :expires_at, null: false
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
add_index :pgbus_job_locks, :lock_key,
|
|
15
|
+
unique: true, name: "idx_pgbus_job_locks_key"
|
|
16
|
+
add_index :pgbus_job_locks, :expires_at,
|
|
17
|
+
name: "idx_pgbus_job_locks_expires"
|
|
18
|
+
add_index :pgbus_job_locks, [:state, :owner_pid],
|
|
19
|
+
name: "idx_pgbus_job_locks_reaper"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class AddPgbusJobStats < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def change
|
|
3
|
+
create_table :pgbus_job_stats do |t|
|
|
4
|
+
t.string :job_class, null: false
|
|
5
|
+
t.string :queue_name, null: false
|
|
6
|
+
t.string :status, null: false
|
|
7
|
+
t.integer :duration_ms, null: false, default: 0
|
|
8
|
+
t.datetime :created_at, null: false, default: -> { "CURRENT_TIMESTAMP" }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_index :pgbus_job_stats, :created_at,
|
|
12
|
+
name: "idx_pgbus_job_stats_time"
|
|
13
|
+
add_index :pgbus_job_stats, [:job_class, :created_at],
|
|
14
|
+
name: "idx_pgbus_job_stats_class_time"
|
|
15
|
+
add_index :pgbus_job_stats, [:status, :created_at],
|
|
16
|
+
name: "idx_pgbus_job_stats_status_time"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -17,7 +17,7 @@ module Pgbus
|
|
|
17
17
|
default: nil,
|
|
18
18
|
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
19
|
|
|
20
|
-
def
|
|
20
|
+
def create_migration_file
|
|
21
21
|
if separate_database?
|
|
22
22
|
migration_template "upgrade_pgmq.rb.erb",
|
|
23
23
|
"db/pgbus_migrate/upgrade_pgmq_to_v#{target_version_slug}.rb"
|
|
@@ -9,8 +9,11 @@ module Pgbus
|
|
|
9
9
|
queue = active_job.queue_name || Pgbus.configuration.default_queue
|
|
10
10
|
payload_hash = Serializer.serialize_job_hash(active_job)
|
|
11
11
|
payload_hash = Concurrency.inject_metadata(active_job, payload_hash)
|
|
12
|
+
payload_hash = Uniqueness.inject_metadata(active_job, payload_hash)
|
|
12
13
|
payload_hash = inject_batch_metadata(payload_hash)
|
|
13
14
|
|
|
15
|
+
return active_job if uniqueness_rejected?(active_job, payload_hash)
|
|
16
|
+
|
|
14
17
|
enqueue_with_concurrency(active_job, queue, payload_hash)
|
|
15
18
|
end
|
|
16
19
|
|
|
@@ -18,14 +21,27 @@ module Pgbus
|
|
|
18
21
|
queue = active_job.queue_name || Pgbus.configuration.default_queue
|
|
19
22
|
payload_hash = Serializer.serialize_job_hash(active_job)
|
|
20
23
|
payload_hash = Concurrency.inject_metadata(active_job, payload_hash)
|
|
24
|
+
payload_hash = Uniqueness.inject_metadata(active_job, payload_hash)
|
|
21
25
|
payload_hash = inject_batch_metadata(payload_hash)
|
|
22
26
|
delay = [(timestamp - Time.now.to_f).ceil, 0].max
|
|
23
27
|
|
|
28
|
+
return active_job if uniqueness_rejected?(active_job, payload_hash)
|
|
29
|
+
|
|
24
30
|
enqueue_with_concurrency(active_job, queue, payload_hash, delay: delay)
|
|
25
31
|
end
|
|
26
32
|
|
|
27
33
|
def enqueue_all(active_jobs)
|
|
28
|
-
|
|
34
|
+
# Jobs with uniqueness must go through individual enqueue to acquire locks
|
|
35
|
+
unique, bulk = active_jobs.partition { |j| Uniqueness.uniqueness_config(j) }
|
|
36
|
+
unique.each do |j|
|
|
37
|
+
if j.scheduled_at && j.scheduled_at > Time.now
|
|
38
|
+
enqueue_at(j, j.scheduled_at.to_f)
|
|
39
|
+
else
|
|
40
|
+
enqueue(j)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
bulk.group_by { |j| j.queue_name || Pgbus.configuration.default_queue }.each do |queue, jobs|
|
|
29
45
|
enqueue_immediate(queue, jobs.reject { |j| j.scheduled_at && j.scheduled_at > Time.now })
|
|
30
46
|
jobs.select { |j| j.scheduled_at && j.scheduled_at > Time.now }.each { |j| enqueue_at(j, j.scheduled_at.to_f) }
|
|
31
47
|
end
|
|
@@ -54,10 +70,20 @@ module Pgbus
|
|
|
54
70
|
active_job.provider_job_id = msg_id
|
|
55
71
|
end
|
|
56
72
|
|
|
73
|
+
Thread.current[:pgbus_acquired_uniqueness_key] = nil
|
|
57
74
|
active_job
|
|
58
|
-
rescue
|
|
59
|
-
|
|
60
|
-
|
|
75
|
+
rescue StandardError => e
|
|
76
|
+
# Roll back the uniqueness lock if enqueue failed
|
|
77
|
+
rollback_key = Thread.current[:pgbus_acquired_uniqueness_key]
|
|
78
|
+
if rollback_key
|
|
79
|
+
begin
|
|
80
|
+
Uniqueness.release_lock(rollback_key)
|
|
81
|
+
rescue StandardError => rollback_error
|
|
82
|
+
Pgbus.logger.warn { "[Pgbus] Lock rollback failed: #{rollback_error.message}" }
|
|
83
|
+
end
|
|
84
|
+
Thread.current[:pgbus_acquired_uniqueness_key] = nil
|
|
85
|
+
end
|
|
86
|
+
raise e
|
|
61
87
|
end
|
|
62
88
|
|
|
63
89
|
def concurrency_config(active_job)
|
|
@@ -81,6 +107,34 @@ module Pgbus
|
|
|
81
107
|
end
|
|
82
108
|
end
|
|
83
109
|
|
|
110
|
+
def uniqueness_rejected?(active_job, payload_hash)
|
|
111
|
+
uniqueness_key = Uniqueness.extract_key(payload_hash)
|
|
112
|
+
return false unless uniqueness_key
|
|
113
|
+
|
|
114
|
+
result = Uniqueness.acquire_enqueue_lock(uniqueness_key, active_job)
|
|
115
|
+
|
|
116
|
+
# :no_lock means no enqueue-time lock needed (e.g. :while_executing strategy)
|
|
117
|
+
return false if result == :no_lock
|
|
118
|
+
|
|
119
|
+
# Store the acquired key so we can release it if enqueue fails
|
|
120
|
+
Thread.current[:pgbus_acquired_uniqueness_key] = uniqueness_key if result == :acquired
|
|
121
|
+
return false if result == :acquired
|
|
122
|
+
|
|
123
|
+
config = Uniqueness.uniqueness_config(active_job)
|
|
124
|
+
case config[:on_conflict]
|
|
125
|
+
when :reject
|
|
126
|
+
raise JobNotUnique, "Job #{active_job.class.name} is already locked"
|
|
127
|
+
when :discard
|
|
128
|
+
Pgbus.logger.info { "[Pgbus] Discarding duplicate job #{active_job.class.name}" }
|
|
129
|
+
true
|
|
130
|
+
when :log
|
|
131
|
+
Pgbus.logger.warn { "[Pgbus] Duplicate job #{active_job.class.name} detected" }
|
|
132
|
+
true
|
|
133
|
+
else
|
|
134
|
+
true
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
84
138
|
def inject_batch_metadata(payload_hash)
|
|
85
139
|
batch_id = Thread.current[:pgbus_batch_id]
|
|
86
140
|
return payload_hash unless batch_id
|
|
@@ -11,6 +11,7 @@ module Pgbus
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def execute(message, queue_name, source_queue: nil)
|
|
14
|
+
execution_start = monotonic_now
|
|
14
15
|
payload = JSON.parse(message.message)
|
|
15
16
|
read_count = message.read_ct.to_i
|
|
16
17
|
|
|
@@ -18,10 +19,32 @@ module Pgbus
|
|
|
18
19
|
handle_dead_letter(message, queue_name, payload, source_queue: source_queue)
|
|
19
20
|
signal_concurrency(payload)
|
|
20
21
|
signal_batch_discarded(payload)
|
|
22
|
+
Uniqueness.release_lock(Uniqueness.extract_key(payload))
|
|
23
|
+
record_stat(payload, queue_name, "dead_lettered", execution_start)
|
|
21
24
|
return :dead_lettered
|
|
22
25
|
end
|
|
23
26
|
|
|
24
27
|
job_class = payload["job_class"]
|
|
28
|
+
uniqueness_key = Uniqueness.extract_key(payload)
|
|
29
|
+
uniqueness_strategy = Uniqueness.extract_strategy(payload)
|
|
30
|
+
uniqueness_ttl = payload[Uniqueness::TTL_KEY] || Uniqueness::DEFAULT_LOCK_TTL
|
|
31
|
+
|
|
32
|
+
if uniqueness_key
|
|
33
|
+
case uniqueness_strategy
|
|
34
|
+
when :until_executed
|
|
35
|
+
# Transition the queued lock to executing state with our PID.
|
|
36
|
+
# The lock was acquired at enqueue time — now we claim ownership
|
|
37
|
+
# so the reaper can correlate it with our heartbeat.
|
|
38
|
+
Uniqueness.claim_for_execution!(uniqueness_key, ttl: uniqueness_ttl)
|
|
39
|
+
when :while_executing
|
|
40
|
+
# Acquire the lock now. If another worker is already executing
|
|
41
|
+
# this job, skip it — VT will expire and it'll be retried.
|
|
42
|
+
unless Uniqueness.acquire_execution_lock(uniqueness_key, payload)
|
|
43
|
+
Pgbus.logger.info { "[Pgbus] Skipping duplicate execution for #{job_class}" }
|
|
44
|
+
return :skipped
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
25
48
|
|
|
26
49
|
job_succeeded = false
|
|
27
50
|
|
|
@@ -33,10 +56,12 @@ module Pgbus
|
|
|
33
56
|
end
|
|
34
57
|
|
|
35
58
|
instrument("pgbus.job_completed", queue: queue_name, job_class: job_class)
|
|
59
|
+
record_stat(payload, queue_name, "success", execution_start)
|
|
36
60
|
:success
|
|
37
61
|
rescue StandardError => e
|
|
38
62
|
handle_failure(message, queue_name, e)
|
|
39
63
|
instrument("pgbus.job_failed", queue: queue_name, job_class: payload&.dig("job_class"), error: e.class.name)
|
|
64
|
+
record_stat(payload, queue_name, "failed", execution_start)
|
|
40
65
|
# Don't signal concurrency on transient failure — the job will be retried.
|
|
41
66
|
# Semaphore is released only on success or dead-lettering.
|
|
42
67
|
:failed
|
|
@@ -47,6 +72,8 @@ module Pgbus
|
|
|
47
72
|
if job_succeeded
|
|
48
73
|
signal_concurrency(payload)
|
|
49
74
|
signal_batch_completed(payload)
|
|
75
|
+
# Release uniqueness lock on successful completion (both strategies)
|
|
76
|
+
Uniqueness.release_lock(uniqueness_key) if uniqueness_key
|
|
50
77
|
end
|
|
51
78
|
end
|
|
52
79
|
|
|
@@ -60,6 +87,24 @@ module Pgbus
|
|
|
60
87
|
end
|
|
61
88
|
end
|
|
62
89
|
|
|
90
|
+
def monotonic_now
|
|
91
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def record_stat(payload, queue_name, status, start_time)
|
|
95
|
+
return unless config.stats_enabled
|
|
96
|
+
|
|
97
|
+
duration_ms = ((monotonic_now - start_time) * 1000).round
|
|
98
|
+
JobStat.record!(
|
|
99
|
+
job_class: payload&.dig("job_class") || "unknown",
|
|
100
|
+
queue_name: queue_name,
|
|
101
|
+
status: status,
|
|
102
|
+
duration_ms: duration_ms
|
|
103
|
+
)
|
|
104
|
+
rescue StandardError => e
|
|
105
|
+
Pgbus.logger.debug { "[Pgbus] Stat recording failed: #{e.message}" }
|
|
106
|
+
end
|
|
107
|
+
|
|
63
108
|
def handle_failure(_message, _queue_name, error)
|
|
64
109
|
Pgbus.logger.error { "[Pgbus] Job failed: #{error.class}: #{error.message}" }
|
|
65
110
|
Pgbus.logger.debug { error.backtrace&.join("\n") }
|
data/lib/pgbus/client.rb
CHANGED
|
@@ -32,14 +32,11 @@ module Pgbus
|
|
|
32
32
|
)
|
|
33
33
|
@pgmq_mutex = Mutex.new
|
|
34
34
|
@queues_created = Concurrent::Map.new
|
|
35
|
+
@queue_strategy = QueueFactory.for(config)
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
def ensure_queue(name)
|
|
38
|
-
|
|
39
|
-
config.priority_queue_names(name).each { |pq| ensure_single_queue(pq) }
|
|
40
|
-
else
|
|
41
|
-
ensure_single_queue(config.queue_name(name))
|
|
42
|
-
end
|
|
39
|
+
@queue_strategy.physical_queue_names(name).each { |pq| ensure_single_queue(pq) }
|
|
43
40
|
rescue PGMQ::Errors::ConnectionError => e
|
|
44
41
|
raise Pgbus::SchemaNotReady,
|
|
45
42
|
"PGMQ schema is not available (#{e.message}). Run `rails db:migrate` for the pgbus database."
|
|
@@ -56,7 +53,7 @@ module Pgbus
|
|
|
56
53
|
end
|
|
57
54
|
|
|
58
55
|
def send_message(queue_name, payload, headers: nil, delay: 0, priority: nil)
|
|
59
|
-
target =
|
|
56
|
+
target = @queue_strategy.target_queue(queue_name, priority)
|
|
60
57
|
ensure_queue(queue_name)
|
|
61
58
|
Instrumentation.instrument("pgbus.client.send_message", queue: target) do
|
|
62
59
|
synchronized { @pgmq.produce(target, serialize(payload), headers: headers && serialize(headers), delay: delay) }
|
|
@@ -90,7 +87,11 @@ module Pgbus
|
|
|
90
87
|
# Read from priority sub-queues, highest priority (p0) first.
|
|
91
88
|
# Returns [priority_queue_name, messages] pairs.
|
|
92
89
|
def read_batch_prioritized(queue_name, qty:, vt: nil)
|
|
93
|
-
|
|
90
|
+
unless @queue_strategy.priority?
|
|
91
|
+
return (read_batch(queue_name, qty: qty, vt: vt) || []).map do |m|
|
|
92
|
+
[config.queue_name(queue_name), m]
|
|
93
|
+
end
|
|
94
|
+
end
|
|
94
95
|
|
|
95
96
|
remaining = qty
|
|
96
97
|
results = []
|
|
@@ -239,21 +240,6 @@ module Pgbus
|
|
|
239
240
|
end
|
|
240
241
|
end
|
|
241
242
|
|
|
242
|
-
def priority_enabled?
|
|
243
|
-
config.priority_levels && config.priority_levels > 1
|
|
244
|
-
end
|
|
245
|
-
|
|
246
|
-
def resolve_target_queue(queue_name, priority)
|
|
247
|
-
if priority_enabled? && priority
|
|
248
|
-
clamped = priority.clamp(0, config.priority_levels - 1)
|
|
249
|
-
config.priority_queue_name(queue_name, clamped)
|
|
250
|
-
elsif priority_enabled?
|
|
251
|
-
config.priority_queue_name(queue_name, config.default_priority.clamp(0, config.priority_levels - 1))
|
|
252
|
-
else
|
|
253
|
-
config.queue_name(queue_name)
|
|
254
|
-
end
|
|
255
|
-
end
|
|
256
|
-
|
|
257
243
|
# Serialize all PGMQ operations through a single mutex.
|
|
258
244
|
# PG::Connection is not thread-safe — concurrent access from worker
|
|
259
245
|
# threads causes segfaults and result corruption.
|
data/lib/pgbus/configuration.rb
CHANGED
|
@@ -59,6 +59,9 @@ module Pgbus
|
|
|
59
59
|
# Requires a matching entry in config/database.yml under the "pgbus" key.
|
|
60
60
|
attr_accessor :connects_to
|
|
61
61
|
|
|
62
|
+
# Job stats
|
|
63
|
+
attr_accessor :stats_retention, :stats_enabled
|
|
64
|
+
|
|
62
65
|
# Web dashboard
|
|
63
66
|
attr_accessor :web_auth, :web_refresh_interval, :web_per_page, :web_live_updates, :web_data_source
|
|
64
67
|
|
|
@@ -120,6 +123,9 @@ module Pgbus
|
|
|
120
123
|
@skip_recurring = false
|
|
121
124
|
@recurring_execution_retention = 7 * 24 * 3600 # 7 days
|
|
122
125
|
|
|
126
|
+
@stats_enabled = true
|
|
127
|
+
@stats_retention = 7 * 24 * 3600 # 7 days
|
|
128
|
+
|
|
123
129
|
@connects_to = nil
|
|
124
130
|
|
|
125
131
|
@web_auth = nil
|
data/lib/pgbus/engine.rb
CHANGED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pgbus
|
|
4
|
+
module Process
|
|
5
|
+
# Implements consumer priority by checking whether higher-priority
|
|
6
|
+
# workers are active for the same queues. When a higher-priority
|
|
7
|
+
# worker is healthy and not at its prefetch limit, lower-priority
|
|
8
|
+
# workers yield by using a longer polling interval.
|
|
9
|
+
#
|
|
10
|
+
# Inspired by LavinMQ's consumer priority where higher-priority
|
|
11
|
+
# consumers are served first and lower-priority consumers wait
|
|
12
|
+
# until all higher-priority consumers are at their prefetch limit.
|
|
13
|
+
module ConsumerPriority
|
|
14
|
+
# Check if this worker should yield to a higher-priority worker.
|
|
15
|
+
# Returns true if a higher-priority healthy worker exists for
|
|
16
|
+
# any of the given queues.
|
|
17
|
+
def self.should_yield?(queues:, my_priority:, my_pid:)
|
|
18
|
+
return false if my_priority >= max_active_priority(queues, my_pid)
|
|
19
|
+
|
|
20
|
+
true
|
|
21
|
+
rescue StandardError => e
|
|
22
|
+
Pgbus.logger.debug { "[Pgbus] Consumer priority check failed: #{e.message}" }
|
|
23
|
+
false
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Returns the highest consumer_priority among healthy workers
|
|
27
|
+
# that share at least one queue with the given queue list,
|
|
28
|
+
# excluding the current worker (by PID).
|
|
29
|
+
def self.max_active_priority(queues, my_pid)
|
|
30
|
+
conn = Pgbus.configuration.connects_to ? Pgbus::ApplicationRecord.connection : ActiveRecord::Base.connection
|
|
31
|
+
rows = conn.select_all(
|
|
32
|
+
"SELECT metadata FROM pgbus_processes WHERE kind = 'worker' AND pid != $1 AND last_heartbeat_at > $2",
|
|
33
|
+
"Pgbus ConsumerPriority",
|
|
34
|
+
[my_pid, Time.now.utc - Heartbeat::ALIVE_THRESHOLD]
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
max_priority = 0
|
|
38
|
+
rows.each do |row|
|
|
39
|
+
metadata = row["metadata"]
|
|
40
|
+
metadata = JSON.parse(metadata) if metadata.is_a?(String)
|
|
41
|
+
next unless metadata
|
|
42
|
+
|
|
43
|
+
other_queues = metadata["queues"] || []
|
|
44
|
+
next unless queues.intersect?(other_queues)
|
|
45
|
+
|
|
46
|
+
other_priority = metadata["consumer_priority"] || 0
|
|
47
|
+
max_priority = other_priority if other_priority > max_priority
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
max_priority
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Calculate the effective polling interval for this worker.
|
|
54
|
+
# Higher-priority workers use the base interval.
|
|
55
|
+
# Lower-priority workers multiply by a backoff factor.
|
|
56
|
+
def self.effective_polling_interval(base_interval:, my_priority:, max_priority:)
|
|
57
|
+
return base_interval if my_priority >= max_priority
|
|
58
|
+
|
|
59
|
+
# Lower-priority workers back off: 3x the base interval
|
|
60
|
+
base_interval * 3
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -13,6 +13,8 @@ module Pgbus
|
|
|
13
13
|
RECURRING_CLEANUP_INTERVAL = 3600 # Run recurring execution cleanup every hour
|
|
14
14
|
ARCHIVE_COMPACTION_INTERVAL = 3600 # Run archive compaction every hour
|
|
15
15
|
OUTBOX_CLEANUP_INTERVAL = 3600 # Run outbox cleanup every hour
|
|
16
|
+
JOB_LOCK_CLEANUP_INTERVAL = 300 # Run job lock cleanup every 5 minutes
|
|
17
|
+
STATS_CLEANUP_INTERVAL = 3600 # Run stats cleanup every hour
|
|
16
18
|
|
|
17
19
|
attr_reader :config
|
|
18
20
|
|
|
@@ -26,6 +28,8 @@ module Pgbus
|
|
|
26
28
|
@last_recurring_cleanup_at = Time.now
|
|
27
29
|
@last_archive_compaction_at = Time.now
|
|
28
30
|
@last_outbox_cleanup_at = Time.now
|
|
31
|
+
@last_job_lock_cleanup_at = Time.now
|
|
32
|
+
@last_stats_cleanup_at = Time.now
|
|
29
33
|
end
|
|
30
34
|
|
|
31
35
|
def run
|
|
@@ -70,6 +74,8 @@ module Pgbus
|
|
|
70
74
|
run_if_due(now, :@last_recurring_cleanup_at, RECURRING_CLEANUP_INTERVAL) { cleanup_recurring_executions }
|
|
71
75
|
run_if_due(now, :@last_archive_compaction_at, archive_compaction_interval) { compact_archives }
|
|
72
76
|
run_if_due(now, :@last_outbox_cleanup_at, OUTBOX_CLEANUP_INTERVAL) { cleanup_outbox }
|
|
77
|
+
run_if_due(now, :@last_job_lock_cleanup_at, JOB_LOCK_CLEANUP_INTERVAL) { cleanup_job_locks }
|
|
78
|
+
run_if_due(now, :@last_stats_cleanup_at, STATS_CLEANUP_INTERVAL) { cleanup_stats }
|
|
73
79
|
end
|
|
74
80
|
|
|
75
81
|
# Only update the timestamp when the block succeeds.
|
|
@@ -127,6 +133,29 @@ module Pgbus
|
|
|
127
133
|
Pgbus.logger.warn { "[Pgbus] Batch cleanup failed: #{e.message}" }
|
|
128
134
|
end
|
|
129
135
|
|
|
136
|
+
def cleanup_stats
|
|
137
|
+
return unless config.stats_enabled
|
|
138
|
+
|
|
139
|
+
retention = config.stats_retention
|
|
140
|
+
return unless retention&.positive?
|
|
141
|
+
|
|
142
|
+
deleted = JobStat.cleanup!(older_than: Time.now.utc - retention)
|
|
143
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} old job stats" } if deleted.positive?
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def cleanup_job_locks
|
|
147
|
+
# Primary: reap orphaned locks whose owner worker is no longer alive.
|
|
148
|
+
# Cross-references (owner_pid, owner_hostname) against pgbus_processes heartbeats.
|
|
149
|
+
reaped = JobLock.reap_orphaned!
|
|
150
|
+
Pgbus.logger.info { "[Pgbus] Reaped #{reaped} orphaned job locks" } if reaped.positive?
|
|
151
|
+
|
|
152
|
+
# Last resort: clean up locks with expired TTL (handles case where
|
|
153
|
+
# even the reaper/supervisor is dead and locks are truly abandoned).
|
|
154
|
+
expired = JobLock.cleanup_expired!
|
|
155
|
+
Pgbus.logger.debug { "[Pgbus] Cleaned up #{expired} expired job locks" } if expired.positive?
|
|
156
|
+
# No rescue here — let run_if_due handle the error and retry next tick
|
|
157
|
+
end
|
|
158
|
+
|
|
130
159
|
def cleanup_outbox
|
|
131
160
|
return unless config.outbox_enabled
|
|
132
161
|
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zlib"
|
|
4
|
+
|
|
5
|
+
module Pgbus
|
|
6
|
+
module Process
|
|
7
|
+
# Manages PostgreSQL advisory locks for single-active-consumer mode.
|
|
8
|
+
# Only one worker process can hold the lock for a given queue at a time.
|
|
9
|
+
# Other workers skip the queue and process other queues instead.
|
|
10
|
+
#
|
|
11
|
+
# Uses pg_try_advisory_lock (non-blocking) so workers never wait —
|
|
12
|
+
# they simply skip queues they can't lock and try again next cycle.
|
|
13
|
+
#
|
|
14
|
+
# Locks are session-level and automatically released when the connection
|
|
15
|
+
# closes (including on crash), so no manual cleanup is needed.
|
|
16
|
+
class QueueLock
|
|
17
|
+
# Use a fixed namespace to avoid collision with application advisory locks.
|
|
18
|
+
# CRC32 of "pgbus_queue_lock" = 0x5067_6275
|
|
19
|
+
LOCK_NAMESPACE = 0x5067_6275
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@held_locks = Concurrent::Map.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Try to acquire an advisory lock for the given queue name.
|
|
26
|
+
# Returns true if acquired (or already held), false if another process holds it.
|
|
27
|
+
def try_lock(queue_name)
|
|
28
|
+
return true if @held_locks[queue_name]
|
|
29
|
+
|
|
30
|
+
lock_id = lock_id_for(queue_name)
|
|
31
|
+
acquired = connection.select_value(
|
|
32
|
+
"SELECT pg_try_advisory_lock(#{LOCK_NAMESPACE}, #{lock_id})"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if acquired
|
|
36
|
+
@held_locks[queue_name] = lock_id
|
|
37
|
+
true
|
|
38
|
+
else
|
|
39
|
+
false
|
|
40
|
+
end
|
|
41
|
+
rescue StandardError => e
|
|
42
|
+
Pgbus.logger.warn { "[Pgbus] Advisory lock failed for #{queue_name}: #{e.message}" }
|
|
43
|
+
false
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Release the advisory lock for a queue. Called during shutdown.
|
|
47
|
+
def unlock(queue_name)
|
|
48
|
+
lock_id = @held_locks.delete(queue_name)
|
|
49
|
+
return unless lock_id
|
|
50
|
+
|
|
51
|
+
connection.select_value(
|
|
52
|
+
"SELECT pg_advisory_unlock(#{LOCK_NAMESPACE}, #{lock_id})"
|
|
53
|
+
)
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
Pgbus.logger.warn { "[Pgbus] Advisory unlock failed for #{queue_name}: #{e.message}" }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Release all held locks.
|
|
59
|
+
def unlock_all
|
|
60
|
+
@held_locks.each_key { |q| unlock(q) }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def locked?(queue_name)
|
|
64
|
+
@held_locks.key?(queue_name)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def held_queues
|
|
68
|
+
@held_locks.keys
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def lock_id_for(queue_name)
|
|
74
|
+
# Use a stable hash to convert queue name to a 32-bit integer
|
|
75
|
+
Zlib.crc32(queue_name.to_s) & 0x7FFFFFFF
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def connection
|
|
79
|
+
if Pgbus.configuration.connects_to
|
|
80
|
+
Pgbus::ApplicationRecord.connection
|
|
81
|
+
else
|
|
82
|
+
ActiveRecord::Base.connection
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -63,12 +63,17 @@ module Pgbus
|
|
|
63
63
|
def fork_worker(worker_config)
|
|
64
64
|
queues = worker_config[:queues] || worker_config["queues"] || [config.default_queue]
|
|
65
65
|
threads = worker_config[:threads] || worker_config["threads"] || 5
|
|
66
|
+
single_active = worker_config[:single_active_consumer] || worker_config["single_active_consumer"] || false
|
|
67
|
+
priority = worker_config[:consumer_priority] || worker_config["consumer_priority"] || 0
|
|
66
68
|
|
|
67
69
|
pid = fork do
|
|
68
70
|
restore_signals
|
|
69
71
|
setup_child_process
|
|
70
72
|
load_rails_app
|
|
71
|
-
worker = Worker.new(
|
|
73
|
+
worker = Worker.new(
|
|
74
|
+
queues: queues, threads: threads, config: config,
|
|
75
|
+
single_active_consumer: single_active, consumer_priority: priority
|
|
76
|
+
)
|
|
72
77
|
worker.run
|
|
73
78
|
end
|
|
74
79
|
|