pgbus 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/pgbus/dead_letter_controller.rb +17 -0
- data/app/controllers/pgbus/jobs_controller.rb +36 -0
- data/app/controllers/pgbus/locks_controller.rb +25 -0
- data/app/frontend/pgbus/application.js +45 -0
- data/app/models/pgbus/blocked_execution.rb +1 -1
- data/app/models/pgbus/job_lock.rb +16 -8
- data/app/models/pgbus/uniqueness_key.rb +36 -0
- data/app/views/pgbus/dead_letter/_messages_table.html.erb +22 -2
- data/app/views/pgbus/dead_letter/index.html.erb +9 -1
- data/app/views/pgbus/jobs/_enqueued_table.html.erb +36 -6
- data/app/views/pgbus/jobs/_failed_table.html.erb +35 -4
- data/app/views/pgbus/locks/index.html.erb +53 -28
- data/config/locales/da.yml +19 -23
- data/config/locales/de.yml +19 -23
- data/config/locales/en.yml +48 -22
- data/config/locales/es.yml +19 -23
- data/config/locales/fi.yml +19 -23
- data/config/locales/fr.yml +19 -23
- data/config/locales/it.yml +19 -23
- data/config/locales/ja.yml +19 -23
- data/config/locales/nb.yml +19 -23
- data/config/locales/nl.yml +19 -23
- data/config/locales/pt.yml +19 -23
- data/config/locales/sv.yml +19 -23
- data/config/routes.rb +12 -1
- data/lib/generators/pgbus/migrate_job_locks_generator.rb +56 -0
- data/lib/generators/pgbus/templates/add_uniqueness_keys.rb.erb +13 -0
- data/lib/generators/pgbus/templates/migrate_job_locks_to_uniqueness_keys.rb.erb +33 -0
- data/lib/pgbus/active_job/adapter.rb +9 -4
- data/lib/pgbus/active_job/executor.rb +38 -19
- data/lib/pgbus/circuit_breaker.rb +2 -2
- data/lib/pgbus/client.rb +18 -2
- data/lib/pgbus/concurrency/blocked_execution.rb +3 -3
- data/lib/pgbus/concurrency/semaphore.rb +2 -2
- data/lib/pgbus/process/dispatcher.rb +53 -26
- data/lib/pgbus/process/worker.rb +7 -3
- data/lib/pgbus/recurring/schedule.rb +39 -36
- data/lib/pgbus/recurring/scheduler.rb +1 -1
- data/lib/pgbus/stat_buffer.rb +92 -0
- data/lib/pgbus/uniqueness.rb +24 -39
- data/lib/pgbus/version.rb +1 -1
- data/lib/pgbus/web/data_source.rb +46 -15
- metadata +6 -1
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/active_record"
|
|
5
|
+
|
|
6
|
+
module Pgbus
|
|
7
|
+
module Generators
|
|
8
|
+
class MigrateJobLocksGenerator < Rails::Generators::Base
|
|
9
|
+
include ActiveRecord::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Migrate pgbus_job_locks to lightweight pgbus_uniqueness_keys table"
|
|
14
|
+
|
|
15
|
+
class_option :database,
|
|
16
|
+
type: :string,
|
|
17
|
+
default: nil,
|
|
18
|
+
desc: "Use a separate database for pgbus tables (e.g. --database=pgbus)"
|
|
19
|
+
|
|
20
|
+
def create_migration_file
|
|
21
|
+
if separate_database?
|
|
22
|
+
migration_template "migrate_job_locks_to_uniqueness_keys.rb.erb",
|
|
23
|
+
"db/pgbus_migrate/migrate_pgbus_job_locks_to_uniqueness_keys.rb"
|
|
24
|
+
else
|
|
25
|
+
migration_template "migrate_job_locks_to_uniqueness_keys.rb.erb",
|
|
26
|
+
"db/migrate/migrate_pgbus_job_locks_to_uniqueness_keys.rb"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def display_post_install
|
|
31
|
+
say ""
|
|
32
|
+
say "Pgbus uniqueness keys migration created!", :green
|
|
33
|
+
say ""
|
|
34
|
+
say "This migration will:"
|
|
35
|
+
say " 1. Create the new pgbus_uniqueness_keys table (3 columns, 1 index)"
|
|
36
|
+
say " 2. Migrate existing locks from pgbus_job_locks"
|
|
37
|
+
say " 3. Drop the old pgbus_job_locks table (8 columns, 3 indexes)"
|
|
38
|
+
say ""
|
|
39
|
+
say "Next steps:"
|
|
40
|
+
say " 1. Run: rails db:migrate#{":#{options[:database]}" if separate_database?}"
|
|
41
|
+
say " 2. Restart pgbus: bin/pgbus start"
|
|
42
|
+
say ""
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def migration_version
|
|
48
|
+
"[#{ActiveRecord::Migration.current_version}]"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def separate_database?
|
|
52
|
+
options[:database].present?
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
class AddPgbusUniquenessKeys < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def change
|
|
3
|
+
create_table :pgbus_uniqueness_keys, id: false do |t|
|
|
4
|
+
t.string :lock_key, null: false
|
|
5
|
+
t.string :queue_name, null: false
|
|
6
|
+
t.bigint :msg_id, null: false
|
|
7
|
+
t.datetime :created_at, null: false, default: -> { "CURRENT_TIMESTAMP" }
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
add_index :pgbus_uniqueness_keys, :lock_key,
|
|
11
|
+
unique: true, name: "idx_pgbus_uniqueness_keys_key"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
class MigratePgbusJobLocksToUniquenessKeys < ActiveRecord::Migration<%= migration_version %>
|
|
2
|
+
def up
|
|
3
|
+
# Create the new lightweight uniqueness keys table
|
|
4
|
+
unless table_exists?(:pgbus_uniqueness_keys)
|
|
5
|
+
create_table :pgbus_uniqueness_keys, id: false do |t|
|
|
6
|
+
t.string :lock_key, null: false
|
|
7
|
+
t.string :queue_name, null: false
|
|
8
|
+
t.bigint :msg_id, null: false
|
|
9
|
+
t.datetime :created_at, null: false, default: -> { "CURRENT_TIMESTAMP" }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
add_index :pgbus_uniqueness_keys, :lock_key,
|
|
13
|
+
unique: true, name: "idx_pgbus_uniqueness_keys_key"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Drop the old table. Require it to be empty — active locks should be
|
|
17
|
+
# drained before migrating (stop workers, let VT expire, restart).
|
|
18
|
+
if table_exists?(:pgbus_job_locks)
|
|
19
|
+
count = execute("SELECT COUNT(*) FROM pgbus_job_locks").first["count"].to_i
|
|
20
|
+
if count > 0
|
|
21
|
+
raise "pgbus_job_locks has #{count} active lock(s). " \
|
|
22
|
+
"Drain workers and wait for locks to clear before migrating."
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
drop_table :pgbus_job_locks
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def down
|
|
30
|
+
raise ActiveRecord::IrreversibleMigration,
|
|
31
|
+
"Cannot safely reconstruct pgbus_job_locks from pgbus_uniqueness_keys"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -23,7 +23,7 @@ module Pgbus
|
|
|
23
23
|
payload_hash = Concurrency.inject_metadata(active_job, payload_hash)
|
|
24
24
|
payload_hash = Uniqueness.inject_metadata(active_job, payload_hash)
|
|
25
25
|
payload_hash = inject_batch_metadata(payload_hash)
|
|
26
|
-
delay = [(timestamp - Time.
|
|
26
|
+
delay = [(timestamp - Time.current.to_f).ceil, 0].max
|
|
27
27
|
|
|
28
28
|
return active_job if uniqueness_rejected?(active_job, payload_hash)
|
|
29
29
|
|
|
@@ -34,7 +34,7 @@ module Pgbus
|
|
|
34
34
|
# Jobs with uniqueness must go through individual enqueue to acquire locks
|
|
35
35
|
unique, bulk = active_jobs.partition { |j| Uniqueness.uniqueness_config(j) }
|
|
36
36
|
unique.each do |j|
|
|
37
|
-
if j
|
|
37
|
+
if scheduled_in_future?(j)
|
|
38
38
|
enqueue_at(j, j.scheduled_at.to_f)
|
|
39
39
|
else
|
|
40
40
|
enqueue(j)
|
|
@@ -42,8 +42,9 @@ module Pgbus
|
|
|
42
42
|
end
|
|
43
43
|
|
|
44
44
|
bulk.group_by { |j| j.queue_name || Pgbus.configuration.default_queue }.each do |queue, jobs|
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
immediate, scheduled = jobs.partition { |j| !scheduled_in_future?(j) }
|
|
46
|
+
enqueue_immediate(queue, immediate)
|
|
47
|
+
scheduled.each { |j| enqueue_at(j, j.scheduled_at.to_f) }
|
|
47
48
|
end
|
|
48
49
|
|
|
49
50
|
active_jobs.count
|
|
@@ -159,6 +160,10 @@ module Pgbus
|
|
|
159
160
|
raise
|
|
160
161
|
end
|
|
161
162
|
|
|
163
|
+
def scheduled_in_future?(job)
|
|
164
|
+
job.scheduled_at && job.scheduled_at > Time.current
|
|
165
|
+
end
|
|
166
|
+
|
|
162
167
|
def enqueue_after_transaction_commit?
|
|
163
168
|
true
|
|
164
169
|
end
|
|
@@ -7,9 +7,10 @@ module Pgbus
|
|
|
7
7
|
class Executor
|
|
8
8
|
attr_reader :client, :config
|
|
9
9
|
|
|
10
|
-
def initialize(client: Pgbus.client, config: Pgbus.configuration)
|
|
10
|
+
def initialize(client: Pgbus.client, config: Pgbus.configuration, stat_buffer: nil)
|
|
11
11
|
@client = client
|
|
12
12
|
@config = config
|
|
13
|
+
@stat_buffer = stat_buffer
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def execute(message, queue_name, source_queue: nil)
|
|
@@ -29,15 +30,14 @@ module Pgbus
|
|
|
29
30
|
job_class = payload["job_class"]
|
|
30
31
|
uniqueness_key = Uniqueness.extract_key(payload)
|
|
31
32
|
uniqueness_strategy = Uniqueness.extract_strategy(payload)
|
|
32
|
-
uniqueness_ttl = payload[Uniqueness::TTL_KEY] || Uniqueness::DEFAULT_LOCK_TTL
|
|
33
33
|
|
|
34
34
|
if uniqueness_key
|
|
35
35
|
case uniqueness_strategy
|
|
36
36
|
when :until_executed
|
|
37
|
-
#
|
|
38
|
-
# The
|
|
39
|
-
#
|
|
40
|
-
|
|
37
|
+
# No claim step needed — PGMQ's visibility timeout is the execution lock.
|
|
38
|
+
# The uniqueness key row was inserted at enqueue time and will be
|
|
39
|
+
# released on completion or DLQ.
|
|
40
|
+
nil
|
|
41
41
|
when :while_executing
|
|
42
42
|
# Acquire the lock now. If another worker is already executing
|
|
43
43
|
# this job, skip it — VT will expire and it'll be retried.
|
|
@@ -96,18 +96,20 @@ module Pgbus
|
|
|
96
96
|
def record_stat(payload, queue_name, status, start_time, message: nil)
|
|
97
97
|
return unless config.stats_enabled
|
|
98
98
|
|
|
99
|
-
|
|
100
|
-
enqueue_latency_ms = compute_enqueue_latency(message)
|
|
101
|
-
retry_count = message ? [message.read_ct.to_i - 1, 0].max : 0
|
|
102
|
-
|
|
103
|
-
JobStat.record!(
|
|
99
|
+
attrs = {
|
|
104
100
|
job_class: payload&.dig("job_class") || "unknown",
|
|
105
101
|
queue_name: queue_name,
|
|
106
102
|
status: status,
|
|
107
|
-
duration_ms:
|
|
108
|
-
enqueue_latency_ms:
|
|
109
|
-
retry_count:
|
|
110
|
-
|
|
103
|
+
duration_ms: ((monotonic_now - start_time) * 1000).round,
|
|
104
|
+
enqueue_latency_ms: compute_enqueue_latency(message),
|
|
105
|
+
retry_count: message ? [message.read_ct.to_i - 1, 0].max : 0
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if @stat_buffer
|
|
109
|
+
@stat_buffer.push(attrs)
|
|
110
|
+
else
|
|
111
|
+
JobStat.record!(**attrs)
|
|
112
|
+
end
|
|
111
113
|
rescue StandardError => e
|
|
112
114
|
Pgbus.logger.debug { "[Pgbus] Stat recording failed: #{e.message}" }
|
|
113
115
|
end
|
|
@@ -115,15 +117,32 @@ module Pgbus
|
|
|
115
117
|
def compute_enqueue_latency(message)
|
|
116
118
|
return unless message
|
|
117
119
|
|
|
118
|
-
|
|
119
|
-
return unless
|
|
120
|
+
enqueued_at = message.enqueued_at
|
|
121
|
+
return unless enqueued_at
|
|
120
122
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
+
# Fast path: numeric epoch (float seconds) avoids Time.parse entirely.
|
|
124
|
+
# PGMQ returns enqueued_at as a Time or string depending on the driver.
|
|
125
|
+
case enqueued_at
|
|
126
|
+
when Numeric
|
|
127
|
+
[((Time.now.to_f - enqueued_at) * 1000).round, 0].max
|
|
128
|
+
when Time
|
|
129
|
+
[((Time.now.utc - enqueued_at.utc) * 1000).round, 0].max
|
|
130
|
+
else
|
|
131
|
+
parse_enqueue_latency_from_string(enqueued_at.to_s)
|
|
132
|
+
end
|
|
123
133
|
rescue ArgumentError, TypeError
|
|
124
134
|
nil
|
|
125
135
|
end
|
|
126
136
|
|
|
137
|
+
def parse_enqueue_latency_from_string(str)
|
|
138
|
+
# PGMQ enqueued_at is TIMESTAMPTZ (always UTC internally).
|
|
139
|
+
# If the string lacks an explicit offset, assume UTC to avoid
|
|
140
|
+
# misinterpretation when the system timezone is non-UTC.
|
|
141
|
+
str = "#{str} UTC" unless str.match?(/[+-]\d{2}:?\d{2}\s*$|Z\s*$/i)
|
|
142
|
+
enqueued_at = Time.parse(str)
|
|
143
|
+
[((Time.now.utc - enqueued_at) * 1000).round, 0].max
|
|
144
|
+
end
|
|
145
|
+
|
|
127
146
|
def handle_failure(_message, _queue_name, error)
|
|
128
147
|
Pgbus.logger.error { "[Pgbus] Job failed: #{error.class}: #{error.message}" }
|
|
129
148
|
Pgbus.logger.debug { error.backtrace&.join("\n") }
|
|
@@ -29,10 +29,10 @@ module Pgbus
|
|
|
29
29
|
|
|
30
30
|
def paused?(queue_name)
|
|
31
31
|
cached = @pause_cache[queue_name]
|
|
32
|
-
return cached[:paused] if cached && (Time.
|
|
32
|
+
return cached[:paused] if cached && (Time.current - cached[:checked_at]) < @pause_cache_ttl
|
|
33
33
|
|
|
34
34
|
paused = check_paused(queue_name)
|
|
35
|
-
@pause_cache[queue_name] = { paused: paused, checked_at: Time.
|
|
35
|
+
@pause_cache[queue_name] = { paused: paused, checked_at: Time.current }
|
|
36
36
|
paused
|
|
37
37
|
end
|
|
38
38
|
|
data/lib/pgbus/client.rb
CHANGED
|
@@ -76,8 +76,7 @@ module Pgbus
|
|
|
76
76
|
def send_batch(queue_name, payloads, headers: nil, delay: 0)
|
|
77
77
|
full_name = config.queue_name(queue_name)
|
|
78
78
|
ensure_queue(queue_name)
|
|
79
|
-
serialized = payloads
|
|
80
|
-
serialized_headers = headers&.map { |h| h.nil? ? nil : serialize(h) }
|
|
79
|
+
serialized, serialized_headers = serialize_batch(payloads, headers)
|
|
81
80
|
Instrumentation.instrument("pgbus.client.send_batch", queue: full_name, size: payloads.size) do
|
|
82
81
|
synchronized { @pgmq.produce_batch(full_name, serialized, headers: serialized_headers, delay: delay) }
|
|
83
82
|
end
|
|
@@ -378,5 +377,22 @@ module Pgbus
|
|
|
378
377
|
JSON.generate(data)
|
|
379
378
|
end
|
|
380
379
|
end
|
|
380
|
+
|
|
381
|
+
# Single-pass serialization of payloads and optional headers.
|
|
382
|
+
# Avoids two separate .map iterations over the same index range.
|
|
383
|
+
def serialize_batch(payloads, headers)
|
|
384
|
+
serialized = Array.new(payloads.size)
|
|
385
|
+
serialized_headers = headers ? Array.new(headers.size) : nil
|
|
386
|
+
|
|
387
|
+
payloads.each_with_index do |p, i|
|
|
388
|
+
serialized[i] = serialize(p)
|
|
389
|
+
if serialized_headers && i < headers.size
|
|
390
|
+
h = headers[i]
|
|
391
|
+
serialized_headers[i] = h.nil? ? nil : serialize(h)
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
[serialized, serialized_headers]
|
|
396
|
+
end
|
|
381
397
|
end
|
|
382
398
|
end
|
|
@@ -13,7 +13,7 @@ module Pgbus
|
|
|
13
13
|
queue_name: queue_name,
|
|
14
14
|
payload: JSON.generate(payload),
|
|
15
15
|
priority: priority,
|
|
16
|
-
expires_at: Time.
|
|
16
|
+
expires_at: Time.current + duration
|
|
17
17
|
)
|
|
18
18
|
end
|
|
19
19
|
|
|
@@ -45,7 +45,7 @@ module Pgbus
|
|
|
45
45
|
# Delete blocked executions that have expired.
|
|
46
46
|
# Returns the count of deleted rows.
|
|
47
47
|
def expire_stale
|
|
48
|
-
Pgbus::BlockedExecution.expired(Time.
|
|
48
|
+
Pgbus::BlockedExecution.expired(Time.current).delete_all
|
|
49
49
|
end
|
|
50
50
|
|
|
51
51
|
# Count blocked executions for a given key. Useful for testing/monitoring.
|
|
@@ -59,7 +59,7 @@ module Pgbus
|
|
|
59
59
|
scheduled_at = payload["scheduled_at"]
|
|
60
60
|
return default_delay unless scheduled_at
|
|
61
61
|
|
|
62
|
-
[Time.parse(scheduled_at).to_f - Time.
|
|
62
|
+
[Time.parse(scheduled_at).to_f - Time.current.to_f, 0].max.ceil
|
|
63
63
|
rescue StandardError
|
|
64
64
|
default_delay
|
|
65
65
|
end
|
|
@@ -7,7 +7,7 @@ module Pgbus
|
|
|
7
7
|
# Attempt to acquire a slot in the semaphore for the given key.
|
|
8
8
|
# Returns :acquired if a slot was available, :blocked if the limit is reached.
|
|
9
9
|
def acquire(key, max_value, duration)
|
|
10
|
-
expires_at = Time.
|
|
10
|
+
expires_at = Time.current + duration
|
|
11
11
|
Pgbus::Semaphore.acquire!(key, max_value, expires_at)
|
|
12
12
|
end
|
|
13
13
|
|
|
@@ -23,7 +23,7 @@ module Pgbus
|
|
|
23
23
|
result = Pgbus::Semaphore.connection.exec_query(
|
|
24
24
|
"DELETE FROM pgbus_semaphores WHERE expires_at < $1 RETURNING key",
|
|
25
25
|
"Pgbus Semaphore Expire",
|
|
26
|
-
[Time.
|
|
26
|
+
[Time.current]
|
|
27
27
|
)
|
|
28
28
|
result.rows.map { |row| { "key" => row[0] } }
|
|
29
29
|
end
|
|
@@ -21,15 +21,15 @@ module Pgbus
|
|
|
21
21
|
def initialize(config: Pgbus.configuration)
|
|
22
22
|
@config = config
|
|
23
23
|
@shutting_down = false
|
|
24
|
-
@last_cleanup_at =
|
|
25
|
-
@last_reap_at =
|
|
26
|
-
@last_concurrency_at =
|
|
27
|
-
@last_batch_cleanup_at =
|
|
28
|
-
@last_recurring_cleanup_at =
|
|
29
|
-
@last_archive_compaction_at =
|
|
30
|
-
@last_outbox_cleanup_at =
|
|
31
|
-
@last_job_lock_cleanup_at =
|
|
32
|
-
@last_stats_cleanup_at =
|
|
24
|
+
@last_cleanup_at = monotonic_now
|
|
25
|
+
@last_reap_at = monotonic_now
|
|
26
|
+
@last_concurrency_at = monotonic_now
|
|
27
|
+
@last_batch_cleanup_at = monotonic_now
|
|
28
|
+
@last_recurring_cleanup_at = monotonic_now
|
|
29
|
+
@last_archive_compaction_at = monotonic_now
|
|
30
|
+
@last_outbox_cleanup_at = monotonic_now
|
|
31
|
+
@last_job_lock_cleanup_at = monotonic_now
|
|
32
|
+
@last_stats_cleanup_at = monotonic_now
|
|
33
33
|
end
|
|
34
34
|
|
|
35
35
|
def run
|
|
@@ -65,7 +65,7 @@ module Pgbus
|
|
|
65
65
|
private
|
|
66
66
|
|
|
67
67
|
def run_maintenance
|
|
68
|
-
now =
|
|
68
|
+
now = monotonic_now
|
|
69
69
|
|
|
70
70
|
run_if_due(now, :@last_cleanup_at, CLEANUP_INTERVAL) { cleanup_processed_events }
|
|
71
71
|
run_if_due(now, :@last_reap_at, REAP_INTERVAL) { reap_stale_processes }
|
|
@@ -93,7 +93,7 @@ module Pgbus
|
|
|
93
93
|
ttl = config.idempotency_ttl
|
|
94
94
|
return unless ttl&.positive?
|
|
95
95
|
|
|
96
|
-
deleted = ProcessedEvent.expired(Time.
|
|
96
|
+
deleted = ProcessedEvent.expired(Time.current - ttl).delete_all
|
|
97
97
|
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} expired processed events" } if deleted.positive?
|
|
98
98
|
rescue StandardError => e
|
|
99
99
|
Pgbus.logger.warn { "[Pgbus] Idempotency cleanup failed: #{e.message}" }
|
|
@@ -101,7 +101,7 @@ module Pgbus
|
|
|
101
101
|
|
|
102
102
|
def reap_stale_processes
|
|
103
103
|
threshold = Heartbeat::ALIVE_THRESHOLD
|
|
104
|
-
deleted = ProcessEntry.stale(Time.
|
|
104
|
+
deleted = ProcessEntry.stale(Time.current - threshold).delete_all
|
|
105
105
|
Pgbus.logger.info { "[Pgbus] Reaped #{deleted} stale processes" } if deleted.positive?
|
|
106
106
|
rescue StandardError => e
|
|
107
107
|
Pgbus.logger.warn { "[Pgbus] Stale process reaping failed: #{e.message}" }
|
|
@@ -127,7 +127,7 @@ module Pgbus
|
|
|
127
127
|
end
|
|
128
128
|
|
|
129
129
|
def cleanup_batches
|
|
130
|
-
deleted = Batch.cleanup(older_than: Time.
|
|
130
|
+
deleted = Batch.cleanup(older_than: Time.current - (7 * 24 * 3600)) # 7 days
|
|
131
131
|
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} finished batches" } if deleted.positive?
|
|
132
132
|
rescue StandardError => e
|
|
133
133
|
Pgbus.logger.warn { "[Pgbus] Batch cleanup failed: #{e.message}" }
|
|
@@ -139,21 +139,44 @@ module Pgbus
|
|
|
139
139
|
retention = config.stats_retention
|
|
140
140
|
return unless retention&.positive?
|
|
141
141
|
|
|
142
|
-
deleted = JobStat.cleanup!(older_than: Time.
|
|
142
|
+
deleted = JobStat.cleanup!(older_than: Time.current - retention)
|
|
143
143
|
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} old job stats" } if deleted.positive?
|
|
144
144
|
end
|
|
145
145
|
|
|
146
146
|
def cleanup_job_locks
|
|
147
|
-
#
|
|
148
|
-
#
|
|
149
|
-
|
|
150
|
-
|
|
147
|
+
# Clean up orphaned uniqueness keys whose msg_id no longer exists
|
|
148
|
+
# in any PGMQ queue. This handles the rare case where a message is
|
|
149
|
+
# lost (e.g., queue table truncated) but the uniqueness key remains.
|
|
150
|
+
reaped = reap_orphaned_uniqueness_keys
|
|
151
|
+
Pgbus.logger.info { "[Pgbus] Reaped #{reaped} orphaned uniqueness keys" } if reaped.positive?
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def reap_orphaned_uniqueness_keys
|
|
155
|
+
keys = UniquenessKey.all.to_a
|
|
156
|
+
return 0 if keys.empty?
|
|
157
|
+
|
|
158
|
+
threshold = Time.current - (config.visibility_timeout * 2)
|
|
159
|
+
|
|
160
|
+
orphaned = keys.select do |key|
|
|
161
|
+
# msg_id == 0 means pre-produce placeholder or :while_executing lock.
|
|
162
|
+
# These are live locks — never reap them based on msg_id alone.
|
|
163
|
+
# Only reap if old enough that the job is certainly gone.
|
|
164
|
+
next false if key.msg_id.zero? && (!key.created_at || key.created_at >= threshold)
|
|
165
|
+
next true if key.msg_id.zero? && key.created_at && key.created_at < threshold
|
|
166
|
+
|
|
167
|
+
# For real msg_ids, only reap if stale (old enough that VT has
|
|
168
|
+
# long expired). The message itself may still be in the queue
|
|
169
|
+
# awaiting retry — age is the only safe signal without scanning
|
|
170
|
+
# every queue table.
|
|
171
|
+
key.created_at && key.created_at < threshold
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
return 0 if orphaned.empty?
|
|
151
175
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
# No rescue here — let run_if_due handle the error and retry next tick
|
|
176
|
+
UniquenessKey.where(lock_key: orphaned.map(&:lock_key)).delete_all
|
|
177
|
+
rescue StandardError => e
|
|
178
|
+
Pgbus.logger.warn { "[Pgbus] Uniqueness key cleanup failed: #{e.message}" }
|
|
179
|
+
0
|
|
157
180
|
end
|
|
158
181
|
|
|
159
182
|
def cleanup_outbox
|
|
@@ -162,7 +185,7 @@ module Pgbus
|
|
|
162
185
|
retention = config.outbox_retention
|
|
163
186
|
return unless retention&.positive?
|
|
164
187
|
|
|
165
|
-
deleted = OutboxEntry.published_before(Time.
|
|
188
|
+
deleted = OutboxEntry.published_before(Time.current - retention).delete_all
|
|
166
189
|
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} published outbox entries" } if deleted.positive?
|
|
167
190
|
rescue StandardError => e
|
|
168
191
|
Pgbus.logger.warn { "[Pgbus] Outbox cleanup failed: #{e.message}" }
|
|
@@ -176,7 +199,7 @@ module Pgbus
|
|
|
176
199
|
retention = config.archive_retention
|
|
177
200
|
return unless retention&.positive?
|
|
178
201
|
|
|
179
|
-
cutoff = Time.
|
|
202
|
+
cutoff = Time.current - retention
|
|
180
203
|
batch_size = config.archive_compaction_batch_size || 1000
|
|
181
204
|
prefix = config.queue_prefix
|
|
182
205
|
|
|
@@ -200,12 +223,16 @@ module Pgbus
|
|
|
200
223
|
retention = config.recurring_execution_retention
|
|
201
224
|
return unless retention&.positive?
|
|
202
225
|
|
|
203
|
-
deleted = RecurringExecution.older_than(Time.
|
|
226
|
+
deleted = RecurringExecution.older_than(Time.current - retention).delete_all
|
|
204
227
|
Pgbus.logger.debug { "[Pgbus] Cleaned up #{deleted} old recurring executions" } if deleted.positive?
|
|
205
228
|
rescue StandardError => e
|
|
206
229
|
Pgbus.logger.warn { "[Pgbus] Recurring execution cleanup failed: #{e.message}" }
|
|
207
230
|
end
|
|
208
231
|
|
|
232
|
+
def monotonic_now
|
|
233
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
234
|
+
end
|
|
235
|
+
|
|
209
236
|
def start_heartbeat
|
|
210
237
|
@heartbeat = Heartbeat.new(kind: "dispatcher", metadata: { pid: ::Process.pid })
|
|
211
238
|
@heartbeat.start
|
data/lib/pgbus/process/worker.rb
CHANGED
|
@@ -23,8 +23,10 @@ module Pgbus
|
|
|
23
23
|
@jobs_failed = Concurrent::AtomicFixnum.new(0)
|
|
24
24
|
@in_flight = Concurrent::AtomicFixnum.new(0)
|
|
25
25
|
@rate_counter = RateCounter.new(:processed, :failed, :dequeued)
|
|
26
|
-
@started_at = Time.
|
|
27
|
-
@
|
|
26
|
+
@started_at = Time.current
|
|
27
|
+
@started_at_monotonic = monotonic_now
|
|
28
|
+
@stat_buffer = config.stats_enabled ? Pgbus::StatBuffer.new : nil
|
|
29
|
+
@executor = Pgbus::ActiveJob::Executor.new(stat_buffer: @stat_buffer)
|
|
28
30
|
@pool = Concurrent::FixedThreadPool.new(threads)
|
|
29
31
|
@circuit_breaker = Pgbus::CircuitBreaker.new(config: config)
|
|
30
32
|
@queue_lock = QueueLock.new if @single_active_consumer
|
|
@@ -61,6 +63,7 @@ module Pgbus
|
|
|
61
63
|
break if @lifecycle.draining? && @pool.queue_length.zero?
|
|
62
64
|
|
|
63
65
|
claim_and_execute if @lifecycle.can_process?
|
|
66
|
+
@stat_buffer&.flush_if_due
|
|
64
67
|
@wake_signal.wait(timeout: config.polling_interval) if @lifecycle.draining? || @lifecycle.paused?
|
|
65
68
|
end
|
|
66
69
|
|
|
@@ -271,7 +274,7 @@ module Pgbus
|
|
|
271
274
|
end
|
|
272
275
|
|
|
273
276
|
def exceeded_max_lifetime?
|
|
274
|
-
return false unless config.max_worker_lifetime && (
|
|
277
|
+
return false unless config.max_worker_lifetime && (monotonic_now - @started_at_monotonic) > config.max_worker_lifetime
|
|
275
278
|
|
|
276
279
|
Pgbus.logger.info { "[Pgbus] Worker recycling: lifetime exceeded" }
|
|
277
280
|
true
|
|
@@ -317,6 +320,7 @@ module Pgbus
|
|
|
317
320
|
Pgbus.logger.info { "[Pgbus] Worker draining thread pool..." }
|
|
318
321
|
@pool.shutdown
|
|
319
322
|
@pool.wait_for_termination(30)
|
|
323
|
+
@stat_buffer&.stop
|
|
320
324
|
@queue_lock&.unlock_all
|
|
321
325
|
@heartbeat&.stop
|
|
322
326
|
restore_signals
|
|
@@ -10,29 +10,19 @@ module Pgbus
|
|
|
10
10
|
@tasks = load_tasks
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
def due_tasks(time = Time.
|
|
13
|
+
def due_tasks(time = Time.current)
|
|
14
14
|
tasks.select { |task| task_due?(task, time) }
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def enqueue_task(task, run_at:)
|
|
18
18
|
queue = resolve_queue(task)
|
|
19
|
+
acquired_key = acquire_uniqueness_lock(task)
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
# ensures_uniqueness, we acquire the lock here so duplicate recurring
|
|
22
|
-
# enqueues are rejected while a previous instance is still queued or running.
|
|
23
|
-
if uniqueness_locked?(task)
|
|
24
|
-
Pgbus.logger.debug do
|
|
25
|
-
"[Pgbus] Recurring task #{task.key} skipped: uniqueness lock held"
|
|
26
|
-
end
|
|
27
|
-
return
|
|
28
|
-
end
|
|
21
|
+
return if acquired_key == :already_locked
|
|
29
22
|
|
|
30
23
|
RecurringExecution.record(task.key, run_at) do
|
|
31
24
|
payload = build_payload(task)
|
|
32
25
|
headers = build_headers(task, run_at)
|
|
33
|
-
|
|
34
|
-
# Inject uniqueness metadata into the payload so the worker knows
|
|
35
|
-
# to release the lock after execution.
|
|
36
26
|
payload = inject_uniqueness_metadata(task, payload)
|
|
37
27
|
|
|
38
28
|
Pgbus.client.ensure_queue(queue)
|
|
@@ -44,7 +34,11 @@ module Pgbus
|
|
|
44
34
|
end
|
|
45
35
|
end
|
|
46
36
|
rescue AlreadyRecorded
|
|
37
|
+
release_uniqueness_lock(acquired_key)
|
|
47
38
|
Pgbus.logger.debug { "[Pgbus] Recurring task #{task.key} already enqueued for #{run_at.iso8601}" }
|
|
39
|
+
rescue StandardError
|
|
40
|
+
release_uniqueness_lock(acquired_key)
|
|
41
|
+
raise
|
|
48
42
|
end
|
|
49
43
|
|
|
50
44
|
def build_payload(task)
|
|
@@ -112,36 +106,45 @@ module Pgbus
|
|
|
112
106
|
}
|
|
113
107
|
end
|
|
114
108
|
|
|
115
|
-
#
|
|
116
|
-
# Returns
|
|
117
|
-
|
|
118
|
-
|
|
109
|
+
# Acquire the uniqueness lock for a recurring task.
|
|
110
|
+
# Returns:
|
|
111
|
+
# nil — no uniqueness configured, proceed without lock
|
|
112
|
+
# :already_locked — lock held by a previous instance, caller should skip enqueue
|
|
113
|
+
# String — the lock key (lock was acquired, caller must release on failure)
|
|
114
|
+
def acquire_uniqueness_lock(task)
|
|
115
|
+
return nil unless task.class_name
|
|
119
116
|
|
|
120
117
|
job_class = task.class_name.safe_constantize
|
|
121
|
-
return
|
|
122
|
-
return
|
|
118
|
+
return nil unless job_class
|
|
119
|
+
return nil unless job_class.respond_to?(:pgbus_uniqueness)
|
|
123
120
|
|
|
124
121
|
config = job_class.pgbus_uniqueness
|
|
125
|
-
return
|
|
126
|
-
return
|
|
122
|
+
return nil unless config
|
|
123
|
+
return nil unless config[:strategy] == :until_executed
|
|
127
124
|
|
|
128
125
|
key = resolve_uniqueness_key(config, task)
|
|
129
|
-
return
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
126
|
+
return nil unless key
|
|
127
|
+
|
|
128
|
+
acquired = UniquenessKey.acquire!(key, queue_name: resolve_queue(task), msg_id: 0)
|
|
129
|
+
|
|
130
|
+
if acquired
|
|
131
|
+
key
|
|
132
|
+
else
|
|
133
|
+
Pgbus.logger.debug { "[Pgbus] Recurring task #{task.key} skipped: uniqueness lock held" }
|
|
134
|
+
:already_locked
|
|
135
|
+
end
|
|
136
|
+
rescue StandardError => e
|
|
137
|
+
Pgbus.logger.warn { "[Pgbus] Uniqueness lock failed for #{task.key}: #{e.message}" }
|
|
138
|
+
nil # Fail open — allow enqueue if lock check errors
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Release a uniqueness lock. Safe to call with nil or :already_locked.
|
|
142
|
+
def release_uniqueness_lock(key)
|
|
143
|
+
return if key.nil? || key == :already_locked
|
|
144
|
+
|
|
145
|
+
UniquenessKey.release!(key)
|
|
142
146
|
rescue StandardError => e
|
|
143
|
-
Pgbus.logger.warn { "[Pgbus]
|
|
144
|
-
false # Fail open — allow enqueue if uniqueness check errors
|
|
147
|
+
Pgbus.logger.warn { "[Pgbus] Lock rollback failed: #{e.message}" }
|
|
145
148
|
end
|
|
146
149
|
|
|
147
150
|
# Resolve the uniqueness key for a recurring task.
|