postburner 1.0.0.pre.15 → 1.0.0.pre.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +132 -16
- data/app/concerns/postburner/execution.rb +39 -2
- data/app/concerns/postburner/insertion.rb +22 -9
- data/app/concerns/postburner/properties.rb +102 -5
- data/app/models/postburner/job.rb +12 -3
- data/lib/postburner/configuration.rb +4 -0
- data/lib/postburner/scheduler.rb +60 -11
- data/lib/postburner/tube.rb +3 -5
- data/lib/postburner/version.rb +1 -1
- data/lib/postburner/worker.rb +122 -66
- data/lib/postburner.rb +9 -10
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 05d87a7d1949eb64ef1918b4bb0f0cdeb24796097c370f90576b3a158d0be5a7
|
|
4
|
+
data.tar.gz: 857304a0b925574ab66d6ad215e6cbe3bdba8c109ecb9a34e96e8ab0ca40aac3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: af92d06c7d796c0db2bb766986bc9b27ad46e9958103e11871538e3224b43a054d80fb50b7bb0b5f8d93de1a11331e10b10d26058c219047e15530f6a36a1b9e
|
|
7
|
+
data.tar.gz: 351f86f49998e5350b7b705a4f77f68a91c29340995dc5531b964bbdb40b6bb55f3c4202459cdc7453429c7e3b7ceaa4ca934628fc8503305f9630eb9c287142
|
data/README.md
CHANGED
|
@@ -1,12 +1,8 @@
|
|
|
1
1
|
# Postburner
|
|
2
2
|
|
|
3
|
-
Fast Beanstalkd-backed job queue with **optional PostgreSQL records**.
|
|
3
|
+
Fast Beanstalkd-backed job queue with **optional PostgreSQL records via ActiveRecord**.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
- **Fast jobs**: Fast execution via Beanstalkd
|
|
7
|
-
- **Tracked jobs**: Audited jobs logs, timing, errors, and statistics
|
|
8
|
-
|
|
9
|
-
Built for production environments where you want fast background processing for most jobs, but comprehensive auditing for critical operations.
|
|
5
|
+
Built for the real world where you may want fast background processing for most jobs, but comprehensive auditing for critical operations.
|
|
10
6
|
|
|
11
7
|
- **ActiveJob Adapter** - To use with Rails, ActionMailer, ActiveStorage
|
|
12
8
|
- **Dual-mode execution** - Beanstalkd only or tracked (database backed)
|
|
@@ -97,12 +93,14 @@ bundle exec rake postburner:work WORKER=default
|
|
|
97
93
|
- [Postburner::Job](#postburnerjob)
|
|
98
94
|
- [Scheduler](#scheduler)
|
|
99
95
|
- [Job Management](#job-management)
|
|
96
|
+
- [Writing Jobs](#writing-jobs)
|
|
100
97
|
- [Queue Strategies](#queue-strategies)
|
|
101
98
|
- [Testing](#testing)
|
|
102
99
|
- [Workers](#workers)
|
|
103
100
|
- [Configuration](#configuration)
|
|
104
101
|
- [Callbacks](#callbacks)
|
|
105
102
|
- [Instrumentation](#instrumentation)
|
|
103
|
+
- [Logging](#logging)
|
|
106
104
|
- [Why Beanstalkd?](#why-beanstalkd)
|
|
107
105
|
- [Beanstalkd Integration](#beanstalkd-integration)
|
|
108
106
|
- [Installation](#installation)
|
|
@@ -177,16 +175,77 @@ config.active_job.queue_adapter = :postburner
|
|
|
177
175
|
#config.active_job.queue_name_prefix = Postburner.tube_prefix(Rails.env) # i.e. "postburner.#{Rails.env}"
|
|
178
176
|
config.action_mailer.deliver_later_queue_name = 'mailers' # gets prefixed by config.active_job.queue_name_prefix
|
|
179
177
|
|
|
180
|
-
|
|
181
178
|
bundle exec postburner # start with bin/postburner
|
|
182
179
|
bundle exec rake postburner:work # or with rake task
|
|
183
180
|
```
|
|
184
181
|
|
|
185
|
-
|
|
182
|
+
### Enqueueing Jobs
|
|
186
183
|
|
|
187
|
-
|
|
184
|
+
```ruby
|
|
185
|
+
# ActiveJob (standard Rails API)
|
|
186
|
+
SendEmailJob.perform_later(user_id) # Enqueue immediately
|
|
187
|
+
SendEmailJob.set(wait: 1.hour).perform_later(user_id) # Delay by duration
|
|
188
|
+
SendEmailJob.set(wait_until: Date.tomorrow.noon).perform_later(user_id) # Run at specific time
|
|
189
|
+
SendEmailJob.set(queue: 'critical').perform_later(user_id) # Override queue
|
|
190
|
+
SendEmailJob.set(priority: 0).perform_later(user_id) # Override priority
|
|
191
|
+
|
|
192
|
+
# Postburner::Job (always tracked, full API)
|
|
193
|
+
job = ProcessPayment.create!(args: { 'payment_id' => 123 })
|
|
194
|
+
job.queue! # Enqueue immediately
|
|
195
|
+
job.queue!(delay: 1.hour) # Delay by duration
|
|
196
|
+
job.queue!(at: Date.tomorrow.noon) # Run at specific time
|
|
197
|
+
job.queue!(queue: 'critical') # Override queue
|
|
198
|
+
job.queue!(priority: 0, ttr: 600) # Set priority and TTR
|
|
199
|
+
```
|
|
188
200
|
|
|
189
|
-
|
|
201
|
+
### ActiveJob vs Postburner::Job TL;DR
|
|
202
|
+
|
|
203
|
+
`Postburner::Job` as simple subclass of `ActiveRecord`, so the normal
|
|
204
|
+
`ActiveRecord` API applies! Thus the workflow is to create an instance,
|
|
205
|
+
then queue it!
|
|
206
|
+
|
|
207
|
+
| Operation | ActiveJob | Postburner::Job |
|
|
208
|
+
|-----------|-----------|-----------------|
|
|
209
|
+
| **Enqueue immediately** | `MyJob.perform_later(args)` | `MyJob.create!(args: {}).queue!` |
|
|
210
|
+
| **Delay** | `.set(wait: 1.hour)` | `job.queue!(delay: 1.hour)` |
|
|
211
|
+
| **Run at** | `.set(wait_until: time)` | `job.queue!(at: time)` |
|
|
212
|
+
| **Set queue** | `.set(queue: 'critical')` | `job.queue!(queue: 'critical')` |
|
|
213
|
+
| **Set priority** | `.set(priority: 0)` | `job.queue!(priority: 0)` |
|
|
214
|
+
| **Set TTR** | `.set(ttr: 300)` | `job.queue!(ttr: 300)` |
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
| Retries | ActiveJob | Postburner::Job |
|
|
218
|
+
|---------|-----------|-----------------|
|
|
219
|
+
| **Default** | No retries (discarded) | No retries (buried) |
|
|
220
|
+
| **Disable retries** | `discard_on StandardError` | *(default behavior)* |
|
|
221
|
+
| **Enable retries** | `retry_on StandardError` | `max_retries 5` |
|
|
222
|
+
|
|
223
|
+
- **ActiveJob**: No automatic worker-level retries. Use ActiveJob's `retry_on`/`discard_on` for retry behavior. Failed jobs without retry configuration are discarded.
|
|
224
|
+
- **Postburner::Job**: No automatic retries by default. On failure, the job is buried in Beanstalkd for inspection. Use `max_retries` (1-32) to enable retries with exponential backoff (2^n seconds).
|
|
225
|
+
|
|
226
|
+
```ruby
|
|
227
|
+
# ActiveJob: Use retry_on for retries
|
|
228
|
+
class MyActiveJob < ApplicationJob
|
|
229
|
+
retry_on StandardError, wait: :polynomially_longer, attempts: 5
|
|
230
|
+
|
|
231
|
+
def perform(args)
|
|
232
|
+
# ...
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Postburner::Job: Use max_retries for retries
|
|
237
|
+
class MyJob < Postburner::Job
|
|
238
|
+
max_retries 5 # Exponential backoff: 1s, 2s, 4s, 8s, 16s
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Or use a fixed delay
|
|
242
|
+
class MyJob < Postburner::Job
|
|
243
|
+
max_retries 5
|
|
244
|
+
retry_delay 10 # 10 seconds between retries
|
|
245
|
+
end
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Usage
|
|
190
249
|
|
|
191
250
|
### Default Jobs
|
|
192
251
|
|
|
@@ -335,7 +394,7 @@ job.duration # Execution time in milliseconds
|
|
|
335
394
|
job.lag # Queue lag in milliseconds
|
|
336
395
|
```
|
|
337
396
|
|
|
338
|
-
###
|
|
397
|
+
### Postburner::Job Usage
|
|
339
398
|
|
|
340
399
|
Direct `Postburner::Job` subclasses are **always tracked**:
|
|
341
400
|
|
|
@@ -363,6 +422,15 @@ job.queue!(delay: 1.hour)
|
|
|
363
422
|
job.queue!(at: 2.days.from_now)
|
|
364
423
|
```
|
|
365
424
|
|
|
425
|
+
> **Note:** The `args` parameter in `perform(args)` is optional. It's a convenience accessor to `self.args`, which is stored in a JSONB column on the job record. You can omit the parameter and access args directly:
|
|
426
|
+
>
|
|
427
|
+
> ```ruby
|
|
428
|
+
> def perform
|
|
429
|
+
> payment = Payment.find(self.args['payment_id'])
|
|
430
|
+
> # ...
|
|
431
|
+
> end
|
|
432
|
+
> ```
|
|
433
|
+
|
|
366
434
|
#### Instance-Level Queue Configuration
|
|
367
435
|
|
|
368
436
|
Override queue priority and TTR per job instance for dynamic behavior:
|
|
@@ -775,6 +843,14 @@ job.errata # Array of exceptions with backtraces
|
|
|
775
843
|
job.attempts # Array of attempt timestamps
|
|
776
844
|
```
|
|
777
845
|
|
|
846
|
+
## Writing Jobs
|
|
847
|
+
|
|
848
|
+
Pay attention to the following when writing jobs:
|
|
849
|
+
|
|
850
|
+
**Job Idempotency:** Jobs should be designed to be idempotent and safely re-runnable. Like all job queues, Postburner provides at-least-once delivery—in rare errant cases outside of Postburner's control, a job may be executed more than once, i.e. network issues, etc.
|
|
851
|
+
|
|
852
|
+
**TTR (Time-to-Run):** If a job exceeds its TTR without completion, Beanstalkd releases it back to the queue while still running—causing duplicate execution. For long-running jobs, call `extend!` periodically to reset the TTR, or set a sufficiently high TTR value. You must include the `Postburner::Beanstalkd` or `Postburner::Tracked` module with `ActiveJob` to use `extend!`.
|
|
853
|
+
|
|
778
854
|
## Queue Strategies
|
|
779
855
|
|
|
780
856
|
Postburner uses different strategies to control job execution. These affect `Postburner::Job` subclasses (not ActiveJob classes).
|
|
@@ -1412,9 +1488,9 @@ Postburner emits ActiveSupport::Notifications events following Rails conventions
|
|
|
1412
1488
|
|-------|------|--------------|
|
|
1413
1489
|
| `perform_start.job.postburner` | Before job execution begins | `:job`, `:beanstalk_job_id` |
|
|
1414
1490
|
| `perform.job.postburner` | Around job execution (includes duration) | `:job`, `:beanstalk_job_id` |
|
|
1415
|
-
| `retry.job.postburner` | When
|
|
1416
|
-
| `retry_stopped.job.postburner` | When
|
|
1417
|
-
| `discard.job.postburner` | When default
|
|
1491
|
+
| `retry.job.postburner` | When Postburner::Job is retried | `:job`, `:beanstalk_job_id`, `:error`, `:wait`, `:attempt` |
|
|
1492
|
+
| `retry_stopped.job.postburner` | When Postburner::Job exhausts retries (buried) | `:job`, `:beanstalk_job_id`, `:error` |
|
|
1493
|
+
| `discard.job.postburner` | When default ActiveJob fails (discarded) | `:job`, `:beanstalk_job_id`, `:error` |
|
|
1418
1494
|
| `enqueue.job.postburner` | When job is enqueued for immediate execution | `:job` |
|
|
1419
1495
|
| `enqueue_at.job.postburner` | When job is enqueued with delay | `:job`, `:scheduled_at` |
|
|
1420
1496
|
|
|
@@ -1525,6 +1601,46 @@ end
|
|
|
1525
1601
|
|
|
1526
1602
|
**Note:** These events complement (don't replace) ActiveJob's built-in instrumentation events like `enqueue.active_job` and `perform.active_job`.
|
|
1527
1603
|
|
|
1604
|
+
## Logging
|
|
1605
|
+
|
|
1606
|
+
### Configuration
|
|
1607
|
+
|
|
1608
|
+
Set the log level in your Rails environment configuration:
|
|
1609
|
+
|
|
1610
|
+
```ruby
|
|
1611
|
+
# config/environments/production.rb
|
|
1612
|
+
config.log_level = :info # Default, recommended for production
|
|
1613
|
+
|
|
1614
|
+
# config/environments/development.rb
|
|
1615
|
+
config.log_level = :debug # Verbose logging for development
|
|
1616
|
+
```
|
|
1617
|
+
|
|
1618
|
+
Postburner uses `Rails.logger`, so standard Rails log level configuration applies.
|
|
1619
|
+
|
|
1620
|
+
### Debugging
|
|
1621
|
+
|
|
1622
|
+
Set the log level to `debug` for verbose logging:
|
|
1623
|
+
|
|
1624
|
+
```ruby
|
|
1625
|
+
config.log_level = :debug
|
|
1626
|
+
```
|
|
1627
|
+
|
|
1628
|
+
### Custom Job Logging
|
|
1629
|
+
|
|
1630
|
+
```ruby
|
|
1631
|
+
class ProcessPayment < ApplicationJob
|
|
1632
|
+
include Postburner::Tracked
|
|
1633
|
+
|
|
1634
|
+
def perform(payment_id)
|
|
1635
|
+
log "Starting payment processing for $#{payment.amount}" # Stored in database
|
|
1636
|
+
payment.charge!
|
|
1637
|
+
log! "Payment charged successfully" # Saved immediately
|
|
1638
|
+
end
|
|
1639
|
+
end
|
|
1640
|
+
```
|
|
1641
|
+
|
|
1642
|
+
These job-specific logs are stored in the database (for tracked jobs only) and are separate from Rails application logs, providing a complete audit trail for critical operations.
|
|
1643
|
+
|
|
1528
1644
|
## Why Beanstalkd?
|
|
1529
1645
|
|
|
1530
1646
|
Beanstalkd is a simple, fast, and reliable queue system. It is a good choice for production environments where you want fast background processing for most jobs, but comprehensive auditing for critical operations.
|
|
@@ -1649,7 +1765,7 @@ class BackgroundTask < Postburner::Job
|
|
|
1649
1765
|
end
|
|
1650
1766
|
```
|
|
1651
1767
|
|
|
1652
|
-
**
|
|
1768
|
+
**Example Priority Ranges:**
|
|
1653
1769
|
|
|
1654
1770
|
| Priority Range | Use Case | Examples |
|
|
1655
1771
|
|---------------|----------|----------|
|
|
@@ -1665,7 +1781,7 @@ Set default priority in `config/postburner.yml`:
|
|
|
1665
1781
|
|
|
1666
1782
|
```yaml
|
|
1667
1783
|
production:
|
|
1668
|
-
default_priority: 65536 # Default
|
|
1784
|
+
default_priority: 65536 # Default without explicit priority set
|
|
1669
1785
|
default_ttr: 300
|
|
1670
1786
|
```
|
|
1671
1787
|
|
|
@@ -134,7 +134,7 @@ module Postburner
|
|
|
134
134
|
|
|
135
135
|
run_callbacks :processing do
|
|
136
136
|
begin
|
|
137
|
-
self.perform(args)
|
|
137
|
+
method(:perform).arity == 0 ? self.perform : self.perform(args)
|
|
138
138
|
rescue Exception => exception
|
|
139
139
|
self.persist_metadata!
|
|
140
140
|
self.log! '[Postburner] Exception raised during perform prevented completion.'
|
|
@@ -162,7 +162,7 @@ module Postburner
|
|
|
162
162
|
|
|
163
163
|
rescue Exception => exception
|
|
164
164
|
self.log_exception!(exception)
|
|
165
|
-
|
|
165
|
+
handle_retry_or_raise(exception)
|
|
166
166
|
end
|
|
167
167
|
end # run_callbacks :attempt
|
|
168
168
|
|
|
@@ -170,6 +170,43 @@ module Postburner
|
|
|
170
170
|
|
|
171
171
|
private
|
|
172
172
|
|
|
173
|
+
# Handles retry logic or re-raises exception.
|
|
174
|
+
#
|
|
175
|
+
# If job has retries configured and hasn't exceeded max_retries, requeues
|
|
176
|
+
# the job with the configured delay. Otherwise, re-raises the exception
|
|
177
|
+
# for the worker to handle (typically by burying).
|
|
178
|
+
#
|
|
179
|
+
# Instruments with ActiveSupport::Notifications:
|
|
180
|
+
# - retry.job.postburner: When job is requeued for retry
|
|
181
|
+
#
|
|
182
|
+
# @param exception [Exception] The exception that caused the failure
|
|
183
|
+
#
|
|
184
|
+
# @raise [Exception] Re-raises if no retries configured or max exceeded
|
|
185
|
+
#
|
|
186
|
+
# @api private
|
|
187
|
+
#
|
|
188
|
+
def handle_retry_or_raise(exception)
|
|
189
|
+
if should_retry?
|
|
190
|
+
delay = retry_delay_for_attempt(attempt_count - 1)
|
|
191
|
+
|
|
192
|
+
self.log!("RETRY: attempt #{attempt_count}/#{self.class.max_retries}, delay #{delay}s")
|
|
193
|
+
|
|
194
|
+
# Instrument retry event
|
|
195
|
+
job_payload = Postburner::Instrumentation.job_payload_from_model(self, beanstalk_job_id: self.bkid)
|
|
196
|
+
ActiveSupport::Notifications.instrument('retry.job.postburner', {
|
|
197
|
+
job: job_payload,
|
|
198
|
+
beanstalk_job_id: self.bkid,
|
|
199
|
+
error: exception,
|
|
200
|
+
wait: delay,
|
|
201
|
+
attempt: attempt_count
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
requeue!(delay: delay)
|
|
205
|
+
else
|
|
206
|
+
raise exception
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
173
210
|
# Records an attempt and calculates execution lag.
|
|
174
211
|
#
|
|
175
212
|
# Appends current time to attempts array, sets attempting_at on first attempt,
|
|
@@ -37,10 +37,12 @@ module Postburner
|
|
|
37
37
|
# @param options [Hash] Queue options
|
|
38
38
|
# @option options [Time, ActiveSupport::Duration] :at Absolute time to run the job
|
|
39
39
|
# @option options [Integer, ActiveSupport::Duration] :delay Seconds to delay execution
|
|
40
|
-
# @option options [Integer] :
|
|
41
|
-
# @option options [Integer] :
|
|
40
|
+
# @option options [Integer] :priority Priority (0-4294967295, 0 = HIGHEST), sets instance attribute
|
|
41
|
+
# @option options [Integer] :pri Beanstalkd priority (pass-through, for backwards compatibility)
|
|
42
|
+
# @option options [Integer] :ttr Time-to-run in seconds (1-4294967295, 0 is silently changed to 1)
|
|
43
|
+
# @option options [String] :queue Queue name override
|
|
42
44
|
#
|
|
43
|
-
# @return [
|
|
45
|
+
# @return [true] on success (including if already queued)
|
|
44
46
|
#
|
|
45
47
|
# @raise [ActiveRecord::RecordInvalid] if job is not valid
|
|
46
48
|
# @raise [AlreadyProcessed] if job was already processed
|
|
@@ -57,17 +59,25 @@ module Postburner
|
|
|
57
59
|
# job.queue!(at: '2025-01-15 09:00:00'.in_time_zone)
|
|
58
60
|
# job.queue!(at: Time.parse('2025-01-15 09:00:00 EST'))
|
|
59
61
|
#
|
|
60
|
-
# @example Queue with priority
|
|
61
|
-
# job.queue!(
|
|
62
|
+
# @example Queue with priority and TTR
|
|
63
|
+
# job.queue!(priority: 0, ttr: 600)
|
|
64
|
+
#
|
|
65
|
+
# @example Queue to specific queue
|
|
66
|
+
# job.queue!(queue: 'critical', delay: 30.minutes)
|
|
62
67
|
#
|
|
63
68
|
# @see #requeue!
|
|
64
69
|
# @see Postburner.queue_strategy
|
|
65
70
|
#
|
|
66
71
|
def queue!(options={})
|
|
67
|
-
return if self.queued_at.present? && self.bkid.present?
|
|
72
|
+
return true if self.queued_at.present? && self.bkid.present?
|
|
68
73
|
raise ActiveRecord::RecordInvalid, "Can't queue unless valid." unless self.valid?
|
|
69
74
|
raise AlreadyProcessed, "Processed at #{self.processed_at}" if self.processed_at
|
|
70
75
|
|
|
76
|
+
# Extract and set instance-level overrides
|
|
77
|
+
self.priority = options.delete(:priority) if options.key?(:priority)
|
|
78
|
+
self.ttr = options.delete(:ttr) if options.key?(:ttr)
|
|
79
|
+
self.queue_name = options.delete(:queue) if options.key?(:queue)
|
|
80
|
+
|
|
71
81
|
at = options.delete(:at)
|
|
72
82
|
now = Time.current
|
|
73
83
|
|
|
@@ -86,6 +96,8 @@ module Postburner
|
|
|
86
96
|
run_callbacks :enqueue do
|
|
87
97
|
self.save!
|
|
88
98
|
end
|
|
99
|
+
|
|
100
|
+
true
|
|
89
101
|
end
|
|
90
102
|
|
|
91
103
|
# Re-queues an existing job by removing it from Beanstalkd and queueing again.
|
|
@@ -96,10 +108,11 @@ module Postburner
|
|
|
96
108
|
# @param options [Hash] Queue options (same as {#queue!})
|
|
97
109
|
# @option options [Time, ActiveSupport::Duration] :at Absolute time to run the job
|
|
98
110
|
# @option options [Integer, ActiveSupport::Duration] :delay Seconds to delay execution
|
|
99
|
-
# @option options [Integer] :
|
|
100
|
-
# @option options [Integer] :ttr Time-to-run in seconds
|
|
111
|
+
# @option options [Integer] :priority Priority (0-4294967295, lower = higher priority)
|
|
112
|
+
# @option options [Integer] :ttr Time-to-run in seconds (1-4294967295)
|
|
113
|
+
# @option options [String] :queue Queue name override
|
|
101
114
|
#
|
|
102
|
-
# @return [
|
|
115
|
+
# @return [true] on success
|
|
103
116
|
#
|
|
104
117
|
# @raise [ActiveRecord::RecordInvalid] if job is not valid
|
|
105
118
|
# @raise [Beaneater::NotConnected] if Beanstalkd connection fails
|
|
@@ -6,6 +6,26 @@ module Postburner
|
|
|
6
6
|
# Provides DSL methods for configuring queue behavior (name, priority, TTR, retries).
|
|
7
7
|
# Defines configurable properties for job queue management.
|
|
8
8
|
#
|
|
9
|
+
# == Retry Behavior
|
|
10
|
+
#
|
|
11
|
+
# By default, Postburner::Job does NOT retry failed jobs. When a job raises an
|
|
12
|
+
# exception, it is buried in Beanstalkd for inspection. This differs from default
|
|
13
|
+
# ActiveJob behavior (5 retries with 2^n second backoff, ~31s total).
|
|
14
|
+
#
|
|
15
|
+
# Use +max_retries+ to enable automatic retries:
|
|
16
|
+
#
|
|
17
|
+
# class MyJob < Postburner::Job
|
|
18
|
+
# max_retries 5 # Retry up to 5 times with exponential backoff (1s, 2s, 4s, 8s, 16s)
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# The default +retry_delay+ is exponential backoff (2^n seconds), matching
|
|
22
|
+
# Postburner's default ActiveJob behavior. Override with a fixed delay or custom proc:
|
|
23
|
+
#
|
|
24
|
+
# retry_delay 10 # Fixed 10 second delay between retries
|
|
25
|
+
#
|
|
26
|
+
# # Match ActiveJob's :exponentially_longer (polynomial: (n+1)^4 + 2)
|
|
27
|
+
# retry_delay ->(n) { ((n + 1) ** 4) + 2 } # 3s, 18s, 83s, 258s, 627s
|
|
28
|
+
#
|
|
9
29
|
# @example Basic usage
|
|
10
30
|
# class ProcessPayment < Postburner::Job
|
|
11
31
|
# queue 'critical'
|
|
@@ -23,9 +43,11 @@ module Postburner
|
|
|
23
43
|
|
|
24
44
|
included do
|
|
25
45
|
# Instance-level queue configuration (overrides class-level defaults)
|
|
26
|
-
attr_writer :priority, :ttr
|
|
46
|
+
attr_writer :priority, :ttr, :queue_name
|
|
27
47
|
|
|
28
|
-
|
|
48
|
+
# Class-level overrides (all default to nil).
|
|
49
|
+
# Global defaults are defined in Postburner::Configuration (lib/postburner/configuration.rb).
|
|
50
|
+
class_attribute :postburner_queue_name, default: nil
|
|
29
51
|
class_attribute :postburner_priority, default: nil
|
|
30
52
|
class_attribute :postburner_ttr, default: nil
|
|
31
53
|
class_attribute :postburner_max_retries, default: nil
|
|
@@ -106,7 +128,13 @@ module Postburner
|
|
|
106
128
|
|
|
107
129
|
# Sets or returns maximum number of job retries.
|
|
108
130
|
#
|
|
109
|
-
#
|
|
131
|
+
# By default, Postburner::Job does NOT retry (max_retries is nil).
|
|
132
|
+
# Failed jobs are buried in Beanstalkd for inspection.
|
|
133
|
+
# Set max_retries to enable automatic retries on failure.
|
|
134
|
+
#
|
|
135
|
+
# Values of nil or 0 disable retries. Maximum allowed value is 32.
|
|
136
|
+
#
|
|
137
|
+
# @param retries [Integer, nil] Max retries (0-32), or nil to get current value
|
|
110
138
|
#
|
|
111
139
|
# @return [Integer, nil] Current max retries when getting, nil when setting
|
|
112
140
|
#
|
|
@@ -116,9 +144,11 @@ module Postburner
|
|
|
116
144
|
# @example Get max retries
|
|
117
145
|
# ProcessPayment.max_retries # => 3
|
|
118
146
|
#
|
|
147
|
+
# @see retry_delay
|
|
148
|
+
#
|
|
119
149
|
def max_retries(retries = nil)
|
|
120
150
|
if retries
|
|
121
|
-
self.postburner_max_retries = retries
|
|
151
|
+
self.postburner_max_retries = [[retries.to_i, 0].max, 32].min
|
|
122
152
|
nil
|
|
123
153
|
else
|
|
124
154
|
postburner_max_retries
|
|
@@ -166,8 +196,13 @@ module Postburner
|
|
|
166
196
|
# job = MyJob.create!(args: {})
|
|
167
197
|
# job.queue_name # => 'critical'
|
|
168
198
|
#
|
|
199
|
+
# @example Instance-level override
|
|
200
|
+
# job = MyJob.create!(args: {})
|
|
201
|
+
# job.queue_name = 'urgent'
|
|
202
|
+
# job.queue_name # => 'urgent'
|
|
203
|
+
#
|
|
169
204
|
def queue_name
|
|
170
|
-
self.class.queue
|
|
205
|
+
@queue_name || self.class.queue || Postburner.configuration.default_queue_name
|
|
171
206
|
end
|
|
172
207
|
|
|
173
208
|
# Returns the full tube name with environment prefix.
|
|
@@ -217,5 +252,67 @@ module Postburner
|
|
|
217
252
|
@ttr || self.class.ttr
|
|
218
253
|
end
|
|
219
254
|
|
|
255
|
+
# Checks if this job should retry after a failure.
|
|
256
|
+
#
|
|
257
|
+
# Returns true if max_retries is configured and the current attempt count
|
|
258
|
+
# is less than max_retries.
|
|
259
|
+
#
|
|
260
|
+
# @return [Boolean] true if job should retry, false otherwise
|
|
261
|
+
#
|
|
262
|
+
# @example
|
|
263
|
+
# class MyJob < Postburner::Job
|
|
264
|
+
# max_retries 3
|
|
265
|
+
# end
|
|
266
|
+
#
|
|
267
|
+
# job = MyJob.create!(args: {})
|
|
268
|
+
# job.should_retry? # => true (attempt_count is 0)
|
|
269
|
+
# # After 3 failed attempts...
|
|
270
|
+
# job.should_retry? # => false
|
|
271
|
+
#
|
|
272
|
+
def should_retry?
|
|
273
|
+
max = self.class.max_retries || Postburner.configuration.default_max_retries
|
|
274
|
+
attempt_count.to_i < max.to_i
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Calculates the retry delay for the given attempt.
|
|
278
|
+
#
|
|
279
|
+
# Uses the class-level retry_delay configuration, which can be:
|
|
280
|
+
# - Integer: Fixed delay in seconds
|
|
281
|
+
# - Proc: Called with attempt number (0-based), returns delay in seconds
|
|
282
|
+
# - nil: Defaults to 5 seconds
|
|
283
|
+
#
|
|
284
|
+
# @param attempt [Integer] The attempt number (0-based: 0 for first retry)
|
|
285
|
+
#
|
|
286
|
+
# @return [Integer] Delay in seconds before next retry
|
|
287
|
+
#
|
|
288
|
+
# @example Fixed delay
|
|
289
|
+
# class MyJob < Postburner::Job
|
|
290
|
+
# max_retries 3
|
|
291
|
+
# retry_delay 10
|
|
292
|
+
# end
|
|
293
|
+
# job.retry_delay_for_attempt(0) # => 10
|
|
294
|
+
#
|
|
295
|
+
# @example Exponential backoff (2s, 4s, 8s...)
|
|
296
|
+
# class MyJob < Postburner::Job
|
|
297
|
+
# max_retries 5
|
|
298
|
+
# retry_delay ->(n) { 2 ** (n + 1) }
|
|
299
|
+
# end
|
|
300
|
+
# job.retry_delay_for_attempt(0) # => 2
|
|
301
|
+
# job.retry_delay_for_attempt(1) # => 4
|
|
302
|
+
# job.retry_delay_for_attempt(2) # => 8
|
|
303
|
+
#
|
|
304
|
+
def retry_delay_for_attempt(attempt)
|
|
305
|
+
delay_config = self.class.retry_delay || Postburner.configuration.default_retry_delay
|
|
306
|
+
|
|
307
|
+
case delay_config
|
|
308
|
+
when Proc
|
|
309
|
+
delay_config.call(attempt).to_i
|
|
310
|
+
when Integer
|
|
311
|
+
delay_config
|
|
312
|
+
else
|
|
313
|
+
2 ** attempt # Fallback to default exponential backoff
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
220
317
|
end
|
|
221
318
|
end
|
|
@@ -96,13 +96,13 @@ module Postburner
|
|
|
96
96
|
#
|
|
97
97
|
# @abstract Subclasses must implement this method
|
|
98
98
|
#
|
|
99
|
-
# @param args [Hash] Job arguments from the args JSONB column
|
|
99
|
+
# @param args [Hash] Job arguments from the args JSONB column (optional)
|
|
100
100
|
#
|
|
101
101
|
# @return [void]
|
|
102
102
|
#
|
|
103
103
|
# @raise [NotImplementedError] if subclass does not implement this method
|
|
104
104
|
#
|
|
105
|
-
# @example
|
|
105
|
+
# @example With args parameter
|
|
106
106
|
# class ProcessPayment < Postburner::Job
|
|
107
107
|
# def perform(args)
|
|
108
108
|
# payment = Payment.find(args['payment_id'])
|
|
@@ -111,14 +111,23 @@ module Postburner
|
|
|
111
111
|
# end
|
|
112
112
|
# end
|
|
113
113
|
#
|
|
114
|
+
# @example Without args parameter (access via self.args)
|
|
115
|
+
# class CleanupJob < Postburner::Job
|
|
116
|
+
# def perform
|
|
117
|
+
# log "Cleaning up #{self.args['table']}"
|
|
118
|
+
# # self.args is always available
|
|
119
|
+
# end
|
|
120
|
+
# end
|
|
121
|
+
#
|
|
114
122
|
# @note Use {#log} or {#log!} within perform to add entries to the job's audit trail
|
|
115
123
|
# @note Exceptions will be caught, logged to errata, and re-raised
|
|
124
|
+
# @note Args are always accessible via self.args regardless of method signature
|
|
116
125
|
#
|
|
117
126
|
# @see #perform!
|
|
118
127
|
# @see #log
|
|
119
128
|
# @see #log_exception
|
|
120
129
|
#
|
|
121
|
-
def perform(args)
|
|
130
|
+
def perform(args=nil)
|
|
122
131
|
raise NotImplementedError, "Subclasses must implement the perform method"
|
|
123
132
|
end
|
|
124
133
|
|
|
@@ -22,6 +22,7 @@ module Postburner
|
|
|
22
22
|
class Configuration
|
|
23
23
|
# Global settings
|
|
24
24
|
attr_accessor :beanstalk_url, :logger, :default_priority, :default_ttr
|
|
25
|
+
attr_accessor :default_queue_name, :default_max_retries, :default_retry_delay
|
|
25
26
|
attr_accessor :default_scheduler_interval, :default_scheduler_priority
|
|
26
27
|
attr_accessor :enqueue_options
|
|
27
28
|
|
|
@@ -52,6 +53,9 @@ module Postburner
|
|
|
52
53
|
@logger = options[:logger] || (defined?(Rails) ? Rails.logger : Logger.new(STDOUT))
|
|
53
54
|
@default_priority = options[:default_priority] || 65536
|
|
54
55
|
@default_ttr = options[:default_ttr] || 300
|
|
56
|
+
@default_queue_name = options[:default_queue_name] || 'default'
|
|
57
|
+
@default_max_retries = options[:default_max_retries] || 0
|
|
58
|
+
@default_retry_delay = options[:default_retry_delay] || ->(n) { 2 ** n }
|
|
55
59
|
@default_scheduler_interval = options[:default_scheduler_interval] || 300
|
|
56
60
|
@default_scheduler_priority = options[:default_scheduler_priority] || 100
|
|
57
61
|
@enqueue_options = options[:enqueue_options]
|
data/lib/postburner/scheduler.rb
CHANGED
|
@@ -136,6 +136,16 @@ module Postburner
|
|
|
136
136
|
# scheduler.perform
|
|
137
137
|
#
|
|
138
138
|
def perform
|
|
139
|
+
# Self-deduplicate: if another watchdog exists in the queue, exit early
|
|
140
|
+
# and let that one handle scheduling. This naturally resolves duplicate
|
|
141
|
+
# watchdogs that can occur from race conditions.
|
|
142
|
+
@skip_requeue = false
|
|
143
|
+
if another_watchdog_queued?
|
|
144
|
+
logger.info "[Postburner::Scheduler] Another watchdog already queued, exiting to deduplicate"
|
|
145
|
+
@skip_requeue = true
|
|
146
|
+
return
|
|
147
|
+
end
|
|
148
|
+
|
|
139
149
|
logger.info "[Postburner::Scheduler] Starting scheduler run"
|
|
140
150
|
|
|
141
151
|
ActiveSupport::Notifications.instrument('perform_start.scheduler.postburner', {
|
|
@@ -161,15 +171,24 @@ module Postburner
|
|
|
161
171
|
|
|
162
172
|
# Use advisory lock to coordinate multiple workers
|
|
163
173
|
ActiveSupport::Notifications.instrument('perform.scheduler.postburner', payload) do
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
174
|
+
begin
|
|
175
|
+
lock_acquired = Postburner::AdvisoryLock.with_lock(AdvisoryLock::SCHEDULER_LOCK_KEY, blocking: false) do
|
|
176
|
+
process_all_schedules
|
|
177
|
+
true
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
if lock_acquired
|
|
181
|
+
logger.info "[Postburner::Scheduler] Scheduler run complete"
|
|
182
|
+
else
|
|
183
|
+
logger.info "[Postburner::Scheduler] Could not acquire lock, skipping"
|
|
184
|
+
end
|
|
185
|
+
rescue ActiveRecord::ConnectionTimeoutError => e
|
|
186
|
+
# This can happen if the connection pool is exhausted
|
|
187
|
+
# Log cleanly and let the watchdog retry on next interval
|
|
188
|
+
logger.warn "[Postburner::Scheduler] Database connection pool exhausted. Skipping scheduler run (need advisory lock), will retry on next interval"
|
|
189
|
+
logger.debug "[Postburner::Scheduler] Check database.yml pool and max_connections i.e. pool >= needed connection count from web/job workers"
|
|
190
|
+
logger.debug "[Postburner::Scheduler] ActiveRecord Connection timeout: #{e.message}"
|
|
191
|
+
lock_acquired = false
|
|
173
192
|
end
|
|
174
193
|
|
|
175
194
|
# Update payload with final stats (mutates the hash subscribers receive)
|
|
@@ -180,8 +199,8 @@ module Postburner
|
|
|
180
199
|
payload[:orphans_enqueued] = @orphans_enqueued
|
|
181
200
|
end
|
|
182
201
|
ensure
|
|
183
|
-
#
|
|
184
|
-
requeue_watchdog
|
|
202
|
+
# Re-queue watchdog for next run (unless we're deduplicating)
|
|
203
|
+
requeue_watchdog unless @skip_requeue
|
|
185
204
|
end
|
|
186
205
|
|
|
187
206
|
# Class method to enqueue watchdog to Beanstalkd
|
|
@@ -321,6 +340,32 @@ module Postburner
|
|
|
321
340
|
|
|
322
341
|
private
|
|
323
342
|
|
|
343
|
+
# Check if another watchdog is already queued in Beanstalkd.
|
|
344
|
+
#
|
|
345
|
+
# Used for self-deduplication: if this watchdog sees another one queued,
|
|
346
|
+
# it exits early and lets that one handle scheduling. This resolves
|
|
347
|
+
# duplicate watchdogs that can occur from race conditions.
|
|
348
|
+
#
|
|
349
|
+
# @return [Boolean] true if another watchdog exists (ready or delayed)
|
|
350
|
+
#
|
|
351
|
+
# @api private
|
|
352
|
+
#
|
|
353
|
+
def another_watchdog_queued?
|
|
354
|
+
Postburner.connected do |conn|
|
|
355
|
+
tube_name = Postburner.scheduler_tube_name
|
|
356
|
+
tube = conn.beanstalk.tubes[tube_name]
|
|
357
|
+
stats = tube.stats
|
|
358
|
+
|
|
359
|
+
# Check for ready or delayed jobs (not counting reserved, which is us)
|
|
360
|
+
# Note: beaneater transforms hyphenated beanstalkd stats to underscores
|
|
361
|
+
queued_count = stats.current_jobs_ready.to_i + stats.current_jobs_delayed.to_i
|
|
362
|
+
queued_count > 0
|
|
363
|
+
end
|
|
364
|
+
rescue => e
|
|
365
|
+
logger.debug "[Postburner::Scheduler] Could not check for duplicate watchdogs: #{e.message}"
|
|
366
|
+
false # Assume no duplicate if we can't check
|
|
367
|
+
end
|
|
368
|
+
|
|
324
369
|
# Process all enabled schedules.
|
|
325
370
|
#
|
|
326
371
|
# Iterates through all enabled schedules and calls process_schedule for each.
|
|
@@ -472,6 +517,10 @@ module Postburner
|
|
|
472
517
|
|
|
473
518
|
self.class.enqueue_watchdog
|
|
474
519
|
end
|
|
520
|
+
rescue ActiveRecord::ConnectionTimeoutError => e
|
|
521
|
+
# Connection pool exhausted - workers will recreate watchdog on next timeout
|
|
522
|
+
logger.warn "[Postburner::Scheduler] Could not re-queue watchdog (connection pool exhausted), workers will recreate on timeout"
|
|
523
|
+
logger.debug "[Postburner::Scheduler] Connection timeout details: #{e.message}"
|
|
475
524
|
rescue => e
|
|
476
525
|
logger.error "[Postburner::Scheduler] Failed to re-queue watchdog: #{e.class} - #{e.message}"
|
|
477
526
|
# This is critical - if watchdog isn't re-queued, scheduling stops
|
data/lib/postburner/tube.rb
CHANGED
|
@@ -31,10 +31,9 @@ module Postburner
|
|
|
31
31
|
# Just pass the last known id to after for the next batch.
|
|
32
32
|
#
|
|
33
33
|
def jobs(count=20, limit: 1000, after: nil)
|
|
34
|
-
#
|
|
34
|
+
# Note: beaneater transforms hyphenated beanstalkd stats to underscores
|
|
35
35
|
stats = @tube.stats
|
|
36
|
-
|
|
37
|
-
tube_name = stats_hash['name']
|
|
36
|
+
tube_name = stats.name
|
|
38
37
|
|
|
39
38
|
jobs = Array.new
|
|
40
39
|
|
|
@@ -48,8 +47,7 @@ module Postburner
|
|
|
48
47
|
job = @tube.client.jobs.find(i)
|
|
49
48
|
if job
|
|
50
49
|
job_stats = job.stats
|
|
51
|
-
|
|
52
|
-
jobs << job if job_stats_hash['tube'] == tube_name
|
|
50
|
+
jobs << job if job_stats.tube == tube_name
|
|
53
51
|
end
|
|
54
52
|
break if jobs.length >= count
|
|
55
53
|
end
|
data/lib/postburner/version.rb
CHANGED
data/lib/postburner/worker.rb
CHANGED
|
@@ -136,6 +136,28 @@ module Postburner
|
|
|
136
136
|
@shutdown
|
|
137
137
|
end
|
|
138
138
|
|
|
139
|
+
# Checks if this process has been orphaned (parent died).
|
|
140
|
+
#
|
|
141
|
+
# When the parent process dies, the kernel re-parents children to init (PID 1).
|
|
142
|
+
# Detecting this allows forked children to exit gracefully instead of running
|
|
143
|
+
# indefinitely as orphans.
|
|
144
|
+
#
|
|
145
|
+
# @return [Boolean] true if parent PID is 1, false otherwise
|
|
146
|
+
def orphaned?
|
|
147
|
+
Process.ppid == 1
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Calculates exponential backoff sleep duration for reconnection attempts.
|
|
151
|
+
#
|
|
152
|
+
# Uses exponential backoff starting at 1 second and doubling each attempt,
|
|
153
|
+
# capped at 32 seconds to prevent excessively long waits.
|
|
154
|
+
#
|
|
155
|
+
# @param attempts [Integer] Number of consecutive failed attempts (0-based)
|
|
156
|
+
# @return [Integer] Sleep duration in seconds (1, 2, 4, 8, 16, or 32)
|
|
157
|
+
def reconnect_backoff(attempts)
|
|
158
|
+
[2**attempts, 32].min
|
|
159
|
+
end
|
|
160
|
+
|
|
139
161
|
private
|
|
140
162
|
|
|
141
163
|
# Returns the worker configuration hash.
|
|
@@ -148,13 +170,17 @@ module Postburner
|
|
|
148
170
|
|
|
149
171
|
# Sets up signal handlers for graceful shutdown.
|
|
150
172
|
#
|
|
151
|
-
#
|
|
173
|
+
# Trapped signals:
|
|
174
|
+
# - TERM: Graceful termination request (systemd, kill, process managers)
|
|
175
|
+
# - INT: Interrupt from keyboard (Ctrl+C)
|
|
176
|
+
# - HUP: Hangup signal when controlling terminal dies (prevents orphaned children)
|
|
152
177
|
#
|
|
153
178
|
# @return [void]
|
|
154
179
|
# @api private
|
|
155
180
|
def setup_signal_handlers
|
|
156
181
|
Signal.trap('TERM') { shutdown }
|
|
157
182
|
Signal.trap('INT') { shutdown }
|
|
183
|
+
Signal.trap('HUP') { shutdown }
|
|
158
184
|
end
|
|
159
185
|
|
|
160
186
|
# Expands queue name to full tube name with environment prefix.
|
|
@@ -245,17 +271,19 @@ module Postburner
|
|
|
245
271
|
def process_jobs
|
|
246
272
|
connection = Postburner::Connection.new
|
|
247
273
|
timeout = worker_config[:timeout]
|
|
274
|
+
reconnect_attempts = 0
|
|
248
275
|
|
|
249
276
|
watch_queues(connection, config.queue_names)
|
|
250
277
|
|
|
251
278
|
until shutdown? || (@gc_limit && @jobs_processed.value >= @gc_limit)
|
|
252
279
|
begin
|
|
253
|
-
job = connection.beanstalk.tubes.reserve(timeout
|
|
280
|
+
job = connection.beanstalk.tubes.reserve(timeout)
|
|
254
281
|
|
|
255
282
|
if job
|
|
256
283
|
logger.debug "[Postburner::Worker] Thread #{Thread.current.object_id} reserved job #{job.id}"
|
|
257
284
|
execute_job(job)
|
|
258
285
|
@jobs_processed.increment
|
|
286
|
+
reconnect_attempts = 0 # Reset backoff on successful job execution
|
|
259
287
|
else
|
|
260
288
|
ensure_scheduler_watchdog!(connection)
|
|
261
289
|
end
|
|
@@ -263,8 +291,10 @@ module Postburner
|
|
|
263
291
|
ensure_scheduler_watchdog!(connection)
|
|
264
292
|
next
|
|
265
293
|
rescue Beaneater::NotConnected => e
|
|
266
|
-
|
|
267
|
-
|
|
294
|
+
backoff = reconnect_backoff(reconnect_attempts)
|
|
295
|
+
logger.error "[Postburner::Worker] Thread disconnected: #{e.message}, reconnecting in #{backoff}s (attempt #{reconnect_attempts + 1})"
|
|
296
|
+
sleep backoff
|
|
297
|
+
reconnect_attempts += 1
|
|
268
298
|
connection.reconnect!
|
|
269
299
|
watch_queues(connection, config.queue_names)
|
|
270
300
|
rescue => e
|
|
@@ -359,6 +389,11 @@ module Postburner
|
|
|
359
389
|
end
|
|
360
390
|
|
|
361
391
|
until shutdown? || (gc_limit && jobs_processed.value >= gc_limit)
|
|
392
|
+
if orphaned?
|
|
393
|
+
logger.error "[Postburner::Worker] Fork #{fork_num} detected parent died (orphaned), initiating shutdown"
|
|
394
|
+
shutdown
|
|
395
|
+
break
|
|
396
|
+
end
|
|
362
397
|
sleep 0.5
|
|
363
398
|
end
|
|
364
399
|
|
|
@@ -391,17 +426,19 @@ module Postburner
|
|
|
391
426
|
def process_jobs_in_fork(fork_num, jobs_processed, gc_limit)
|
|
392
427
|
connection = Postburner::Connection.new
|
|
393
428
|
timeout = worker_config[:timeout]
|
|
429
|
+
reconnect_attempts = 0
|
|
394
430
|
|
|
395
431
|
watch_queues(connection, config.queue_names)
|
|
396
432
|
|
|
397
433
|
until shutdown? || (gc_limit && jobs_processed.value >= gc_limit)
|
|
398
434
|
begin
|
|
399
|
-
job = connection.beanstalk.tubes.reserve(timeout
|
|
435
|
+
job = connection.beanstalk.tubes.reserve(timeout)
|
|
400
436
|
|
|
401
437
|
if job
|
|
402
438
|
logger.debug "[Postburner::Worker] Fork #{fork_num} thread #{Thread.current.object_id} reserved job #{job.id}"
|
|
403
439
|
execute_job(job)
|
|
404
440
|
jobs_processed.increment
|
|
441
|
+
reconnect_attempts = 0 # Reset backoff on successful job execution
|
|
405
442
|
else
|
|
406
443
|
ensure_scheduler_watchdog!(connection)
|
|
407
444
|
end
|
|
@@ -409,8 +446,10 @@ module Postburner
|
|
|
409
446
|
ensure_scheduler_watchdog!(connection)
|
|
410
447
|
next
|
|
411
448
|
rescue Beaneater::NotConnected => e
|
|
412
|
-
|
|
413
|
-
|
|
449
|
+
backoff = reconnect_backoff(reconnect_attempts)
|
|
450
|
+
logger.error "[Postburner::Worker] Thread disconnected: #{e.message}, reconnecting in #{backoff}s (attempt #{reconnect_attempts + 1})"
|
|
451
|
+
sleep backoff
|
|
452
|
+
reconnect_attempts += 1
|
|
414
453
|
connection.reconnect!
|
|
415
454
|
watch_queues(connection, config.queue_names)
|
|
416
455
|
rescue => e
|
|
@@ -549,12 +588,16 @@ module Postburner
|
|
|
549
588
|
|
|
550
589
|
# Handles job execution errors with retry logic.
|
|
551
590
|
#
|
|
552
|
-
# For
|
|
553
|
-
#
|
|
554
|
-
#
|
|
591
|
+
# For Postburner::Job: The job handles its own retries in perform!. If an
|
|
592
|
+
# exception bubbles up here, it means no retries configured or max exceeded.
|
|
593
|
+
# Buries the job for inspection, reports to Rails.error, and emits
|
|
594
|
+
# retry_stopped.job.postburner event.
|
|
595
|
+
#
|
|
596
|
+
# For default ActiveJob: Applies exponential backoff retry with max 5 attempts,
|
|
597
|
+
# reporting to Rails.error only on final discard.
|
|
555
598
|
#
|
|
556
599
|
# Instruments with ActiveSupport::Notifications:
|
|
557
|
-
# - retry_stopped.job.postburner: When
|
|
600
|
+
# - retry_stopped.job.postburner: When Postburner::Job is buried after failure
|
|
558
601
|
#
|
|
559
602
|
# @param beanstalk_job [Beaneater::Job] Failed job
|
|
560
603
|
# @param error [Exception] The error that caused the failure
|
|
@@ -562,24 +605,14 @@ module Postburner
|
|
|
562
605
|
# @api private
|
|
563
606
|
def handle_error(beanstalk_job, error)
|
|
564
607
|
logger.error "[Postburner] Job failed: #{error.class} - #{error.message}"
|
|
565
|
-
logger.error error.backtrace.join("\n")
|
|
566
608
|
|
|
567
609
|
begin
|
|
568
610
|
payload = JSON.parse(beanstalk_job.body)
|
|
569
611
|
|
|
570
612
|
if payload['tracked'] || Postburner::ActiveJob::Payload.legacy_format?(payload)
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
job_payload = Postburner::Instrumentation.job_payload_from_hash(payload, beanstalk_job_id: beanstalk_job.id)
|
|
574
|
-
ActiveSupport::Notifications.instrument('retry_stopped.job.postburner', {
|
|
575
|
-
job: job_payload,
|
|
576
|
-
beanstalk_job_id: beanstalk_job.id,
|
|
577
|
-
error: error
|
|
578
|
-
})
|
|
579
|
-
|
|
580
|
-
beanstalk_job.bury
|
|
613
|
+
handle_postburner_job_error(beanstalk_job, payload, error)
|
|
581
614
|
else
|
|
582
|
-
|
|
615
|
+
handle_default_job_error(beanstalk_job, payload, error)
|
|
583
616
|
end
|
|
584
617
|
rescue => retry_error
|
|
585
618
|
logger.error "[Postburner] Error handling failure: #{retry_error.message}"
|
|
@@ -587,62 +620,85 @@ module Postburner
|
|
|
587
620
|
end
|
|
588
621
|
end
|
|
589
622
|
|
|
590
|
-
# Handles
|
|
623
|
+
# Handles errors for Postburner::Job (including tracked ActiveJob).
|
|
591
624
|
#
|
|
592
|
-
#
|
|
593
|
-
#
|
|
625
|
+
# The job handles its own retries in perform!. If the exception bubbles up
|
|
626
|
+
# here, it means either no retries are configured or max_retries was exceeded.
|
|
627
|
+
# Buries the job for inspection.
|
|
594
628
|
#
|
|
595
629
|
# Instruments with ActiveSupport::Notifications:
|
|
596
|
-
# -
|
|
597
|
-
# - discard.job.postburner: When job is discarded after max retries
|
|
630
|
+
# - retry_stopped.job.postburner: When job is buried
|
|
598
631
|
#
|
|
599
|
-
# @param beanstalk_job [Beaneater::Job] Failed job
|
|
600
|
-
# @param payload [Hash] Parsed job body
|
|
632
|
+
# @param beanstalk_job [Beaneater::Job] Failed job
|
|
633
|
+
# @param payload [Hash] Parsed job body
|
|
601
634
|
# @param error [Exception] The error that caused the failure
|
|
602
635
|
# @return [void]
|
|
603
636
|
# @api private
|
|
604
|
-
def
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
637
|
+
def handle_postburner_job_error(beanstalk_job, payload, error)
|
|
638
|
+
job_id = if Postburner::ActiveJob::Payload.legacy_format?(payload)
|
|
639
|
+
payload['args']&.first
|
|
640
|
+
else
|
|
641
|
+
payload['postburner_job_id']
|
|
642
|
+
end
|
|
608
643
|
|
|
609
|
-
|
|
610
|
-
payload['retry_count'] = retry_count + 1
|
|
611
|
-
payload['executions'] = (payload['executions'] || 0) + 1
|
|
644
|
+
job_class_name = payload['job_class'] || payload['class']
|
|
612
645
|
|
|
613
|
-
|
|
646
|
+
# Log the error with backtrace (Postburner::Job doesn't use ActiveJob's logging)
|
|
647
|
+
logger.error "[Postburner] #{job_class_name}##{job_id} failed: #{error.class} - #{error.message}"
|
|
648
|
+
logger.error error.backtrace.join("\n")
|
|
614
649
|
|
|
615
|
-
|
|
650
|
+
logger.info "[Postburner] Burying #{job_class_name}##{job_id} for inspection (bkid: #{beanstalk_job.id})"
|
|
616
651
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
)
|
|
625
|
-
end
|
|
652
|
+
# Report to Rails error reporter for integration with error tracking services
|
|
653
|
+
Rails.error.report(error, handled: false, context: {
|
|
654
|
+
job_class: job_class_name,
|
|
655
|
+
job_id: job_id,
|
|
656
|
+
beanstalk_job_id: beanstalk_job.id,
|
|
657
|
+
queue_name: payload['queue_name']
|
|
658
|
+
})
|
|
626
659
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
})
|
|
660
|
+
job_payload = Postburner::Instrumentation.job_payload_from_hash(payload, beanstalk_job_id: beanstalk_job.id)
|
|
661
|
+
ActiveSupport::Notifications.instrument('retry_stopped.job.postburner', {
|
|
662
|
+
job: job_payload,
|
|
663
|
+
beanstalk_job_id: beanstalk_job.id,
|
|
664
|
+
error: error
|
|
665
|
+
})
|
|
634
666
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
667
|
+
beanstalk_job.bury
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
# Handles errors for default ActiveJob jobs.
|
|
671
|
+
#
|
|
672
|
+
# Discards the job and reports to Rails.error. No automatic retries -
|
|
673
|
+
# use ActiveJob's retry_on/discard_on for retry behavior.
|
|
674
|
+
#
|
|
675
|
+
# Instruments with ActiveSupport::Notifications:
|
|
676
|
+
# - discard.job.postburner: When job is discarded
|
|
677
|
+
#
|
|
678
|
+
# @param beanstalk_job [Beaneater::Job] Failed job
|
|
679
|
+
# @param payload [Hash] Parsed job body
|
|
680
|
+
# @param error [Exception] The error that caused the failure
|
|
681
|
+
# @return [void]
|
|
682
|
+
# @api private
|
|
683
|
+
def handle_default_job_error(beanstalk_job, payload, error)
|
|
684
|
+
job_payload = Postburner::Instrumentation.job_payload_from_hash(payload, beanstalk_job_id: beanstalk_job.id)
|
|
685
|
+
|
|
686
|
+
# Report to Rails error reporter for integration with error tracking services
|
|
687
|
+
Rails.error.report(error, handled: false, context: {
|
|
688
|
+
job_class: payload['job_class'],
|
|
689
|
+
job_id: payload['job_id'],
|
|
690
|
+
beanstalk_job_id: beanstalk_job.id,
|
|
691
|
+
queue_name: payload['queue_name']
|
|
692
|
+
})
|
|
693
|
+
|
|
694
|
+
ActiveSupport::Notifications.instrument('discard.job.postburner', {
|
|
695
|
+
job: job_payload,
|
|
696
|
+
beanstalk_job_id: beanstalk_job.id,
|
|
697
|
+
error: error
|
|
698
|
+
})
|
|
699
|
+
|
|
700
|
+
logger.error "[Postburner] Discarding #{payload['job_class']} (#{payload['job_id']})"
|
|
701
|
+
beanstalk_job.delete
|
|
646
702
|
end
|
|
647
703
|
|
|
648
704
|
# Watches all configured queues in Beanstalkd.
|
data/lib/postburner.rb
CHANGED
|
@@ -559,19 +559,18 @@ module Postburner
|
|
|
559
559
|
tubes_to_inspect.each do |tube|
|
|
560
560
|
begin
|
|
561
561
|
stats = tube.stats
|
|
562
|
-
#
|
|
563
|
-
stats_hash = stats.instance_variable_get(:@hash) || {}
|
|
562
|
+
# Note: beaneater transforms hyphenated beanstalkd stats to underscores
|
|
564
563
|
|
|
565
564
|
tube_data = {
|
|
566
565
|
name: tube.name,
|
|
567
|
-
ready:
|
|
568
|
-
delayed:
|
|
569
|
-
buried:
|
|
570
|
-
reserved:
|
|
571
|
-
total: (
|
|
572
|
-
(
|
|
573
|
-
(
|
|
574
|
-
(
|
|
566
|
+
ready: stats.current_jobs_ready || 0,
|
|
567
|
+
delayed: stats.current_jobs_delayed || 0,
|
|
568
|
+
buried: stats.current_jobs_buried || 0,
|
|
569
|
+
reserved: stats.current_jobs_reserved || 0,
|
|
570
|
+
total: (stats.current_jobs_ready || 0) +
|
|
571
|
+
(stats.current_jobs_delayed || 0) +
|
|
572
|
+
(stats.current_jobs_buried || 0) +
|
|
573
|
+
(stats.current_jobs_reserved || 0)
|
|
575
574
|
}
|
|
576
575
|
rescue Beaneater::NotFoundError
|
|
577
576
|
# Tube doesn't exist yet, skip it
|