postburner 1.0.0.pre.11 → 1.0.0.pre.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +961 -555
  3. data/app/concerns/postburner/commands.rb +1 -1
  4. data/app/concerns/postburner/execution.rb +11 -11
  5. data/app/concerns/postburner/insertion.rb +1 -1
  6. data/app/concerns/postburner/logging.rb +2 -2
  7. data/app/concerns/postburner/statistics.rb +1 -1
  8. data/app/models/postburner/job.rb +27 -4
  9. data/app/models/postburner/mailer.rb +1 -1
  10. data/app/models/postburner/schedule.rb +703 -0
  11. data/app/models/postburner/schedule_execution.rb +353 -0
  12. data/app/views/postburner/jobs/show.html.haml +3 -3
  13. data/lib/generators/postburner/install/install_generator.rb +1 -0
  14. data/lib/generators/postburner/install/templates/config/postburner.yml +15 -6
  15. data/lib/generators/postburner/install/templates/migrations/create_postburner_schedules.rb.erb +71 -0
  16. data/lib/postburner/active_job/adapter.rb +3 -3
  17. data/lib/postburner/active_job/payload.rb +5 -0
  18. data/lib/postburner/advisory_lock.rb +123 -0
  19. data/lib/postburner/configuration.rb +43 -7
  20. data/lib/postburner/connection.rb +7 -6
  21. data/lib/postburner/runner.rb +26 -3
  22. data/lib/postburner/scheduler.rb +427 -0
  23. data/lib/postburner/strategies/immediate_test_queue.rb +24 -7
  24. data/lib/postburner/strategies/nice_queue.rb +1 -1
  25. data/lib/postburner/strategies/null_queue.rb +2 -2
  26. data/lib/postburner/strategies/test_queue.rb +2 -2
  27. data/lib/postburner/time_helpers.rb +4 -2
  28. data/lib/postburner/tube.rb +9 -1
  29. data/lib/postburner/version.rb +1 -1
  30. data/lib/postburner/worker.rb +684 -0
  31. data/lib/postburner.rb +32 -13
  32. metadata +7 -3
  33. data/lib/postburner/workers/base.rb +0 -205
  34. data/lib/postburner/workers/worker.rb +0 -396
@@ -0,0 +1,684 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+ require 'postburner/scheduler'
5
+
6
+ module Postburner
7
+ # Puma-style worker with configurable forks and threads.
8
+ #
9
+ # Processes jobs from Beanstalkd queues using a scalable architecture that
10
+ # adapts from development to production. Workers can run in single-process
11
+ # mode (forks: 0) or multi-process mode (forks: 1+), each with configurable
12
+ # thread pools.
13
+ #
14
+ # ## Architecture
15
+ #
16
+ # ### Single Process Mode (forks: 0)
17
+ #
18
+ # Main Process
19
+ # └─ Thread Pool (N threads watching all queues)
20
+ #
21
+ # ### Multi-Process Mode (forks: 1+)
22
+ #
23
+ # Parent Process
24
+ # ├─ Fork 0
25
+ # │ └─ Thread Pool (N threads watching all queues)
26
+ # ├─ Fork 1
27
+ # │ └─ Thread Pool (N threads watching all queues)
28
+ # └─ Fork 2
29
+ # └─ Thread Pool (N threads watching all queues)
30
+ #
31
+ # ## Scaling Strategy
32
+ #
33
+ # **Development** (forks: 0, threads: 1):
34
+ # Single-threaded, single-process for simplest debugging.
35
+ #
36
+ # **Staging** (forks: 0, threads: 10):
37
+ # Multi-threaded, single-process for moderate concurrency.
38
+ #
39
+ # **Production** (forks: 4, threads: 10):
40
+ # 4 processes × 10 threads = 40 concurrent jobs.
41
+ #
42
+ # ## Connection Model
43
+ #
44
+ # Each worker thread maintains its own Beanstalkd connection for thread safety.
45
+ #
46
+ # ## GC Limit
47
+ #
48
+ # Workers support a GC limit (gc_limit) that causes the process to exit
49
+ # after processing N jobs. In multi-process mode, the parent automatically
50
+ # respawns exited children. This prevents memory bloat in long-running workers.
51
+ #
52
+ # ## Scheduler Integration
53
+ #
54
+ # Workers automatically watch the scheduler tube and process watchdog jobs.
55
+ # On reserve timeout, workers check if a watchdog exists and create one if
56
+ # missing, ensuring the scheduler continues running even if jobs are sparse.
57
+ #
58
+ # @example Starting a worker programmatically
59
+ # config = Postburner::Configuration.load_yaml('config/postburner.yml', 'production', 'default')
60
+ # worker = Postburner::Worker.new(config)
61
+ # worker.start
62
+ #
63
+ # @example Configuration in postburner.yml
64
+ # production:
65
+ # workers:
66
+ # default:
67
+ # forks: 4
68
+ # threads: 10
69
+ # gc_limit: 5000
70
+ # queues:
71
+ # - default
72
+ # - critical
73
+ #
74
+ # @see Postburner::Runner
75
+ # @see Postburner::Configuration
76
+ # @see Postburner::Scheduler
77
+ #
78
+ class Worker
79
+ # @!attribute [r] config
80
+ # @return [Postburner::Configuration] The worker configuration
81
+ # @!attribute [r] logger
82
+ # @return [Logger] The logger instance
83
+ attr_reader :config, :logger
84
+
85
+ # Initialize a new worker.
86
+ #
87
+ # @param config [Postburner::Configuration] Worker configuration loaded from YAML
88
+ #
89
+ # @example
90
+ # config = Postburner::Configuration.load_yaml('config/postburner.yml', 'production', 'default')
91
+ # worker = Postburner::Worker.new(config)
92
+ #
93
+ def initialize(config)
94
+ @config = config
95
+ @logger = config.logger
96
+ @shutdown = false
97
+ setup_signal_handlers
98
+ end
99
+
100
+ # Starts the worker.
101
+ #
102
+ # Detects whether to run in single-process mode (forks: 0) or
103
+ # multi-process mode (forks: 1+) and starts accordingly.
104
+ #
105
+ # @return [void]
106
+ def start
107
+ logger.info "[Postburner::Worker] Starting worker '#{worker_config[:name]}'..."
108
+ logger.info "[Postburner::Worker] Queues: #{config.queue_names.join(', ')}"
109
+ logger.info "[Postburner::Worker] Config: #{worker_config[:forks]} forks, #{worker_config[:threads]} threads, gc_limit: #{worker_config[:gc_limit] || 'unlimited'}, timeout: #{worker_config[:timeout]}s"
110
+ all_tubes = config.expanded_tube_names + [config.scheduler_tube_name]
111
+ logger.info "[Postburner] #{config.beanstalk_url} known tubes: #{all_tubes.join(', ')}"
112
+ log_next_scheduler_watchdog
113
+
114
+ if worker_config[:forks] > 0
115
+ start_forked_mode
116
+ else
117
+ start_single_process_mode
118
+ end
119
+ end
120
+
121
+ # Initiates graceful shutdown.
122
+ #
123
+ # Sets shutdown flag to stop processing new jobs. Current jobs
124
+ # are allowed to finish.
125
+ #
126
+ # @return [void]
127
+ def shutdown
128
+ @shutdown = true
129
+ end
130
+
131
+ # Checks if shutdown has been requested.
132
+ #
133
+ # @return [Boolean] true if shutdown requested, false otherwise
134
+ def shutdown?
135
+ @shutdown
136
+ end
137
+
138
+ private
139
+
140
+ # Returns the worker configuration hash.
141
+ #
142
+ # @return [Hash] Worker config with :name, :queues, :forks, :threads, :gc_limit, :timeout
143
+ # @api private
144
+ def worker_config
145
+ config.worker_config
146
+ end
147
+
148
+ # Sets up signal handlers for graceful shutdown.
149
+ #
150
+ # Traps TERM and INT signals to initiate graceful shutdown.
151
+ #
152
+ # @return [void]
153
+ # @api private
154
+ def setup_signal_handlers
155
+ Signal.trap('TERM') { shutdown }
156
+ Signal.trap('INT') { shutdown }
157
+ end
158
+
159
+ # Expands queue name to full tube name with environment prefix.
160
+ #
161
+ # @param queue_name [String] Base queue name
162
+ # @return [String] Full tube name (e.g., 'postburner.production.critical')
163
+ # @api private
164
+ def expand_tube_name(queue_name)
165
+ config.expand_tube_name(queue_name)
166
+ end
167
+
168
+ # Logs info about the next scheduler watchdog job.
169
+ #
170
+ # Peeks at the scheduler tube to find the next watchdog job and logs
171
+ # when it will run. Useful for debugging scheduler timing.
172
+ #
173
+ # @return [void]
174
+ # @api private
175
+ def log_next_scheduler_watchdog
176
+ tube_name = config.scheduler_tube_name
177
+
178
+ Postburner.connected do |conn|
179
+ tube = conn.beanstalk.tubes[tube_name]
180
+
181
+ # Check delayed first, then ready
182
+ job = tube.peek(:delayed) rescue nil
183
+ job ||= tube.peek(:ready) rescue nil
184
+
185
+ if job
186
+ stats = job.stats
187
+ time_left = stats.time_left.to_i
188
+ at = Time.current + time_left
189
+
190
+ logger.info "[Postburner::Worker] Next scheduler watchdog at #{at.iso8601} (job #{job.id}, time-left: #{time_left}s); config: #{config.default_scheduler_interval}s"
191
+ else
192
+ logger.info "[Postburner::Worker] Schedule watchdog not found (tube: #{tube_name})"
193
+ end
194
+ end
195
+ rescue => e
196
+ logger.warn "[Postburner::Scheduler] Failed to lookup next scheduler watchdog: #{e.message}"
197
+ end
198
+
199
+ # Starts worker in single-process mode (forks: 0).
200
+ #
201
+ # Creates a thread pool and processes jobs until shutdown or GC limit
202
+ # is reached. Exits with code 99 when GC limit is reached to signal
203
+ # the need for a restart.
204
+ #
205
+ # @return [void]
206
+ # @api private
207
+ def start_single_process_mode
208
+ logger.info "[Postburner::Worker] Mode: Single process (forks: 0)"
209
+
210
+ @jobs_processed = Concurrent::AtomicFixnum.new(0)
211
+ @gc_limit = worker_config[:gc_limit]
212
+
213
+ thread_count = worker_config[:threads]
214
+ @pool = Concurrent::FixedThreadPool.new(thread_count)
215
+
216
+ thread_count.times do
217
+ @pool.post { process_jobs }
218
+ end
219
+
220
+ until shutdown? || (@gc_limit && @jobs_processed.value >= @gc_limit)
221
+ sleep 0.5
222
+ end
223
+
224
+ logger.info "[Postburner::Worker] Shutting down..."
225
+ @pool.shutdown
226
+ @pool.wait_for_termination(worker_config[:shutdown_timeout])
227
+
228
+ if @gc_limit && @jobs_processed.value >= @gc_limit
229
+ logger.debug "[Postburner::Worker] Reached GC limit (#{@jobs_processed.value} jobs), exiting for restart..."
230
+ exit 99
231
+ else
232
+ logger.info "[Postburner::Worker] Shutdown complete"
233
+ end
234
+ end
235
+
236
+ # Processes jobs in a single thread.
237
+ #
238
+ # Reserves jobs from watched tubes and executes them. Handles connection
239
+ # errors with automatic reconnection. Checks for scheduler watchdog on
240
+ # timeout.
241
+ #
242
+ # @return [void]
243
+ # @api private
244
+ def process_jobs
245
+ connection = Postburner::Connection.new
246
+ timeout = worker_config[:timeout]
247
+
248
+ watch_queues(connection, config.queue_names)
249
+
250
+ until shutdown? || (@gc_limit && @jobs_processed.value >= @gc_limit)
251
+ begin
252
+ job = connection.beanstalk.tubes.reserve(timeout: timeout)
253
+
254
+ if job
255
+ logger.debug "[Postburner::Worker] Thread #{Thread.current.object_id} reserved job #{job.id}"
256
+ execute_job(job)
257
+ @jobs_processed.increment
258
+ else
259
+ ensure_scheduler_watchdog!(connection)
260
+ end
261
+ rescue Beaneater::TimedOutError
262
+ ensure_scheduler_watchdog!(connection)
263
+ next
264
+ rescue Beaneater::NotConnected => e
265
+ logger.error "[Postburner::Worker] Thread disconnected: #{e.message}"
266
+ sleep 1
267
+ connection.reconnect!
268
+ watch_queues(connection, config.queue_names)
269
+ rescue => e
270
+ logger.error "[Postburner::Worker] Thread error: #{e.class} - #{e.message}"
271
+ logger.error e.backtrace.join("\n")
272
+ sleep 1
273
+ end
274
+ end
275
+ ensure
276
+ connection&.close rescue nil
277
+ end
278
+
279
+ # Starts worker in forked mode (forks: 1+).
280
+ #
281
+ # Spawns child processes and monitors them for exit. Automatically
282
+ # respawns children that exit with code 99 (GC limit) or unexpectedly.
283
+ # Initiates graceful shutdown when shutdown flag is set.
284
+ #
285
+ # @return [void]
286
+ # @api private
287
+ def start_forked_mode
288
+ logger.info "[Postburner::Worker] Mode: Multi-process (#{worker_config[:forks]} forks)"
289
+
290
+ @children = {}
291
+
292
+ worker_config[:forks].times do |fork_num|
293
+ spawn_fork(fork_num)
294
+ end
295
+
296
+ until shutdown?
297
+ begin
298
+ pid, status = Process.wait2(-1, Process::WNOHANG)
299
+
300
+ if pid
301
+ fork_num = @children.delete(pid)
302
+ exit_code = status.exitstatus
303
+
304
+ if exit_code == 99
305
+ logger.debug "[Postburner::Worker] Fork #{fork_num} reached GC limit, restarting..."
306
+ spawn_fork(fork_num) unless shutdown?
307
+ else
308
+ logger.error "[Postburner::Worker] Fork #{fork_num} exited unexpectedly (code: #{exit_code})"
309
+ spawn_fork(fork_num) unless shutdown?
310
+ end
311
+ end
312
+
313
+ sleep 0.5
314
+ rescue Errno::ECHILD
315
+ break
316
+ rescue => e
317
+ logger.error "[Postburner::Worker] Monitor error: #{e.message}"
318
+ sleep 1
319
+ end
320
+ end
321
+
322
+ logger.info "[Postburner::Worker] Shutting down, waiting for children..."
323
+ shutdown_children
324
+ logger.info "[Postburner::Worker] Shutdown complete"
325
+ end
326
+
327
+ # Spawns a single forked worker process.
328
+ #
329
+ # @param fork_num [Integer] Fork identifier (0-based index)
330
+ # @return [void]
331
+ # @api private
332
+ def spawn_fork(fork_num)
333
+ pid = fork { run_fork(fork_num) }
334
+ @children[pid] = fork_num
335
+ logger.info "[Postburner::Worker] Spawned fork #{fork_num} (pid: #{pid})"
336
+ end
337
+
338
+ # Runs the thread pool worker in a forked process.
339
+ #
340
+ # Creates a thread pool within the forked process and processes jobs
341
+ # until shutdown or GC limit is reached. Exits with code 99 on GC limit,
342
+ # 0 on graceful shutdown, or 1 on error.
343
+ #
344
+ # @param fork_num [Integer] Fork identifier for logging
345
+ # @return [void]
346
+ # @api private
347
+ def run_fork(fork_num)
348
+ thread_count = worker_config[:threads]
349
+ gc_limit = worker_config[:gc_limit]
350
+
351
+ logger.info "[Postburner::Worker] Fork #{fork_num}: #{thread_count} threads, GC limit #{gc_limit || 'unlimited'}"
352
+
353
+ jobs_processed = Concurrent::AtomicFixnum.new(0)
354
+ pool = Concurrent::FixedThreadPool.new(thread_count)
355
+
356
+ thread_count.times do
357
+ pool.post { process_jobs_in_fork(fork_num, jobs_processed, gc_limit) }
358
+ end
359
+
360
+ until shutdown? || (gc_limit && jobs_processed.value >= gc_limit)
361
+ sleep 0.5
362
+ end
363
+
364
+ pool.shutdown
365
+ pool.wait_for_termination(worker_config[:shutdown_timeout])
366
+
367
+ if gc_limit && jobs_processed.value >= gc_limit
368
+ logger.debug "[Postburner::Worker] Fork #{fork_num} reached GC limit (#{jobs_processed.value} jobs), exiting for restart..."
369
+ exit 99
370
+ else
371
+ logger.info "[Postburner::Worker] Fork #{fork_num} shutting down gracefully..."
372
+ exit 0
373
+ end
374
+ rescue => e
375
+ logger.error "[Postburner::Worker] Fork #{fork_num} error: #{e.message}"
376
+ logger.error e.backtrace.join("\n")
377
+ exit 1
378
+ end
379
+
380
+ # Processes jobs in a single thread within a fork.
381
+ #
382
+ # Similar to {#process_jobs} but uses shared atomic counter for
383
+ # tracking jobs processed across all threads in the fork.
384
+ #
385
+ # @param fork_num [Integer] Fork identifier for logging
386
+ # @param jobs_processed [Concurrent::AtomicFixnum] Shared job counter
387
+ # @param gc_limit [Integer, nil] Maximum jobs before exit (nil = unlimited)
388
+ # @return [void]
389
+ # @api private
390
+ def process_jobs_in_fork(fork_num, jobs_processed, gc_limit)
391
+ connection = Postburner::Connection.new
392
+ timeout = worker_config[:timeout]
393
+
394
+ watch_queues(connection, config.queue_names)
395
+
396
+ until shutdown? || (gc_limit && jobs_processed.value >= gc_limit)
397
+ begin
398
+ job = connection.beanstalk.tubes.reserve(timeout: timeout)
399
+
400
+ if job
401
+ logger.debug "[Postburner::Worker] Fork #{fork_num} thread #{Thread.current.object_id} reserved job #{job.id}"
402
+ execute_job(job)
403
+ jobs_processed.increment
404
+ else
405
+ ensure_scheduler_watchdog!(connection)
406
+ end
407
+ rescue Beaneater::TimedOutError
408
+ ensure_scheduler_watchdog!(connection)
409
+ next
410
+ rescue Beaneater::NotConnected => e
411
+ logger.error "[Postburner::Worker] Thread disconnected: #{e.message}"
412
+ sleep 1
413
+ connection.reconnect!
414
+ watch_queues(connection, config.queue_names)
415
+ rescue => e
416
+ logger.error "[Postburner::Worker] Thread error: #{e.class} - #{e.message}"
417
+ logger.error e.backtrace.join("\n")
418
+ sleep 1
419
+ end
420
+ end
421
+ ensure
422
+ connection&.close rescue nil
423
+ end
424
+
425
+ # Gracefully shuts down all child processes.
426
+ #
427
+ # Sends TERM signal to all children and waits up to shutdown_timeout
428
+ # (defaults to default_ttr) for graceful shutdown. Force kills any
429
+ # remaining children with KILL. Jobs killed mid-execution will be
430
+ # re-released by Beanstalkd after their TTR expires.
431
+ #
432
+ # @return [void]
433
+ # @api private
434
+ def shutdown_children
435
+ @children.keys.each do |pid|
436
+ Process.kill('TERM', pid) rescue nil
437
+ end
438
+
439
+ timeout = Time.current + worker_config[:shutdown_timeout]
440
+ until @children.empty? || Time.current > timeout
441
+ pid, _status = Process.wait2(-1, Process::WNOHANG)
442
+ @children.delete(pid) if pid
443
+ sleep 0.5
444
+ end
445
+
446
+ @children.keys.each do |pid|
447
+ begin
448
+ Process.kill('KILL', pid)
449
+ Process.wait(pid)
450
+ rescue Errno::ESRCH
451
+ # Already exited
452
+ end
453
+ end
454
+ end
455
+
456
+ # Executes a job from Beanstalkd.
457
+ #
458
+ # Parses the job payload and dispatches to either scheduler watchdog
459
+ # execution or regular job execution based on payload type.
460
+ #
461
+ # @param beanstalk_job [Beaneater::Job] Reserved job from Beanstalkd
462
+ # @return [void]
463
+ # @api private
464
+ def execute_job(beanstalk_job)
465
+ payload = JSON.parse(beanstalk_job.body)
466
+
467
+ if payload['scheduler'] == true
468
+ execute_scheduler_job(beanstalk_job, payload)
469
+ else
470
+ execute_regular_job(beanstalk_job, payload)
471
+ end
472
+ rescue => e
473
+ handle_error(beanstalk_job, e)
474
+ end
475
+
476
+ # Execute a scheduler watchdog job.
477
+ #
478
+ # Instantiates the Scheduler and runs it to process due schedules.
479
+ # Deletes the job from Beanstalkd after completion.
480
+ #
481
+ # @param beanstalk_job [Beaneater::Job] Reserved scheduler job
482
+ # @param payload [Hash] Parsed job body with 'scheduler' and 'interval' keys
483
+ # @return [void]
484
+ # @see Postburner::Scheduler#perform
485
+ # @api private
486
+ def execute_scheduler_job(beanstalk_job, payload)
487
+ logger.info "[Postburner] Executing scheduler watchdog #{beanstalk_job.id}"
488
+
489
+ interval = payload['interval'] || 300
490
+ scheduler = Postburner::Scheduler.new(interval: interval, logger: logger)
491
+ scheduler.perform
492
+
493
+ logger.info "[Postburner] Completed scheduler watchdog #{beanstalk_job.id}"
494
+ delete_job!(beanstalk_job)
495
+ end
496
+
497
+ # Execute a regular job (ActiveJob or Postburner::Job).
498
+ #
499
+ # Delegates execution to {Postburner::ActiveJob::Execution} which handles
500
+ # both ActiveJob and legacy Postburner::Job formats. Deletes the job
501
+ # from Beanstalkd after successful completion.
502
+ #
503
+ # Instruments with ActiveSupport::Notifications:
504
+ # - perform_start.postburner: Before job execution
505
+ # - perform.postburner: Around job execution (includes duration)
506
+ #
507
+ # @param beanstalk_job [Beaneater::Job] Reserved job from Beanstalkd
508
+ # @param payload [Hash] Parsed job body
509
+ # @return [void]
510
+ # @see Postburner::ActiveJob::Execution.execute
511
+ # @api private
512
+ def execute_regular_job(beanstalk_job, payload)
513
+ job_description = format_job_description(payload)
514
+ instrument_payload = { payload: payload, beanstalk_job_id: beanstalk_job.id }
515
+
516
+ logger.info "[Postburner] Executing #{job_description} (bkid: #{beanstalk_job.id})"
517
+
518
+ ActiveSupport::Notifications.instrument('perform_start.postburner', instrument_payload)
519
+
520
+ ActiveSupport::Notifications.instrument('perform.postburner', instrument_payload) do
521
+ Postburner::ActiveJob::Execution.execute(beanstalk_job.body)
522
+ end
523
+
524
+ logger.info "[Postburner] Completed #{job_description} (bkid: #{beanstalk_job.id})"
525
+ delete_job!(beanstalk_job)
526
+ end
527
+
528
+ # Format job description for logging.
529
+ #
530
+ # For Postburner::Job subclasses: Shows class name and database ID (e.g., HeartbeatJob#123)
531
+ # For ActiveJob classes: Shows class name only (e.g., SomeActiveJob)
532
+ #
533
+ # @param payload [Hash] Parsed job body
534
+ # @return [String] Human-readable job description
535
+ # @api private
536
+ def format_job_description(payload)
537
+ if Postburner::ActiveJob::Payload.legacy_format?(payload)
538
+ # Legacy Postburner::Job format: { "class" => "HeartbeatJob", "args" => [123] }
539
+ job_class = payload['class'] || 'UnknownJob'
540
+ job_id = payload['args']&.first
541
+ job_id ? "#{job_class}##{job_id}" : job_class
542
+ else
543
+ # ActiveJob format: { "job_class" => "SomeActiveJob", ... }
544
+ payload['job_class'] || 'UnknownJob'
545
+ end
546
+ end
547
+
548
+ # Handles job execution errors with retry logic.
549
+ #
550
+ # For tracked jobs and legacy Postburner::Job: Buries the job for inspection.
551
+ # For default ActiveJob: Applies exponential backoff retry with max 5 attempts.
552
+ #
553
+ # @param beanstalk_job [Beaneater::Job] Failed job
554
+ # @param error [Exception] The error that caused the failure
555
+ # @return [void]
556
+ # @api private
557
+ def handle_error(beanstalk_job, error)
558
+ logger.error "[Postburner] Job failed: #{error.class} - #{error.message}"
559
+ logger.error error.backtrace.join("\n")
560
+
561
+ begin
562
+ payload = JSON.parse(beanstalk_job.body)
563
+
564
+ if payload['tracked'] || Postburner::ActiveJob::Payload.legacy_format?(payload)
565
+ logger.info "[Postburner] Burying tracked/legacy job for inspection"
566
+ beanstalk_job.bury
567
+ else
568
+ handle_default_retry(beanstalk_job, payload, error)
569
+ end
570
+ rescue => retry_error
571
+ logger.error "[Postburner] Error handling failure: #{retry_error.message}"
572
+ beanstalk_job.bury rescue nil
573
+ end
574
+ end
575
+
576
+ # Handles retry logic for default jobs.
577
+ #
578
+ # Applies exponential backoff (2^retry_count seconds, max 1 hour).
579
+ # After 5 failed attempts, discards the job permanently.
580
+ #
581
+ # Instruments with ActiveSupport::Notifications:
582
+ # - retry.postburner: When job is retried
583
+ # - discard.postburner: When job is discarded after max retries
584
+ #
585
+ # @param beanstalk_job [Beaneater::Job] Failed job to retry
586
+ # @param payload [Hash] Parsed job body (modified with retry_count)
587
+ # @param error [Exception] The error that caused the failure
588
+ # @return [void]
589
+ # @api private
590
+ def handle_default_retry(beanstalk_job, payload, error)
591
+ retry_count = payload['retry_count'] || 0
592
+ max_retries = 5
593
+
594
+ if retry_count < max_retries
595
+ payload['retry_count'] = retry_count + 1
596
+ payload['executions'] = (payload['executions'] || 0) + 1
597
+
598
+ delay = [2 ** retry_count, 3600].min
599
+
600
+ beanstalk_job.delete
601
+
602
+ Postburner.connected do |conn|
603
+ tube_name = expand_tube_name(payload['queue_name'])
604
+ conn.tubes[tube_name].put(
605
+ JSON.generate(payload),
606
+ pri: payload['priority'] || config.default_priority,
607
+ delay: delay,
608
+ ttr: payload['ttr'] || config.default_ttr
609
+ )
610
+ end
611
+
612
+ ActiveSupport::Notifications.instrument('retry.postburner', {
613
+ payload: payload,
614
+ beanstalk_job_id: beanstalk_job.id,
615
+ error: error,
616
+ wait: delay,
617
+ attempt: retry_count + 1
618
+ })
619
+
620
+ logger.info "[Postburner] Retrying default job #{payload['job_id']}, attempt #{retry_count + 1} in #{delay}s"
621
+ else
622
+ ActiveSupport::Notifications.instrument('discard.postburner', {
623
+ payload: payload,
624
+ beanstalk_job_id: beanstalk_job.id,
625
+ error: error
626
+ })
627
+
628
+ logger.error "[Postburner] Discarding default job #{payload['job_id']} after #{retry_count} retries"
629
+ beanstalk_job.delete
630
+ end
631
+ end
632
+
633
+ # Watches all configured queues in Beanstalkd.
634
+ #
635
+ # Expands queue names to full tube names and sets up the connection
636
+ # to watch them. Always includes the scheduler tube.
637
+ #
638
+ # @param connection [Postburner::Connection] Beanstalkd connection
639
+ # @param queue_names [Array<String>, nil] Queue names to watch (uses config if nil)
640
+ # @return [void]
641
+ # @api private
642
+ def watch_queues(connection, queue_names = nil)
643
+ tube_names = queue_names.map { |q| config.expand_tube_name(q) }
644
+ tube_names = config.expanded_tube_names if tube_names.empty?
645
+
646
+ # Always watch scheduler tube
647
+ tube_names << config.scheduler_tube_name unless tube_names.include?(config.scheduler_tube_name)
648
+
649
+ connection.beanstalk.tubes.watch!(*tube_names)
650
+ logger.debug "[Postburner::Worker] Thread #{Thread.current.object_id} watching: #{tube_names.join(', ')}"
651
+ end
652
+
653
+ # Ensure scheduler watchdog exists in Beanstalkd.
654
+ #
655
+ # Called on reserve timeout to verify the watchdog job exists.
656
+ # Creates a new watchdog if none found, ensuring the scheduler
657
+ # continues running even when job volume is low.
658
+ #
659
+ # @param connection [Postburner::Connection] Beanstalkd connection
660
+ # @return [void]
661
+ # @see Postburner::Scheduler.ensure_watchdog!
662
+ # @api private
663
+ def ensure_scheduler_watchdog!(connection)
664
+ Postburner::Scheduler.ensure_watchdog!(connection: connection)
665
+ end
666
+
667
+ # Deletes a job from Beanstalkd with one retry on failure.
668
+ #
669
+ # If delete fails (e.g., network blip), waits 1 second and retries once.
670
+ # This reduces the window for duplicate job execution while keeping
671
+ # the implementation simple.
672
+ #
673
+ # @param beanstalk_job [Beaneater::Job] Job to delete
674
+ # @return [void]
675
+ # @api private
676
+ def delete_job!(beanstalk_job)
677
+ beanstalk_job.delete
678
+ rescue => e
679
+ logger.warn "[Postburner] Delete failed (#{e.message}), retrying..."
680
+ sleep 1
681
+ beanstalk_job.delete rescue nil
682
+ end
683
+ end
684
+ end