workhorse 1.3.1 → 1.4.0.rc0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -1
- data/CHANGELOG.md +10 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +28 -1
- data/VERSION +1 -1
- data/lib/workhorse/daemon/shell_handler.rb +14 -4
- data/lib/workhorse/daemon.rb +28 -2
- data/lib/workhorse/db_job.rb +1 -1
- data/lib/workhorse/poller.rb +8 -4
- data/lib/workhorse/scoped_env.rb +4 -5
- data/lib/workhorse/worker.rb +82 -3
- data/test/active_job/queue_adapters/workhorse_adapter_test.rb +6 -6
- data/test/lib/test_helper.rb +22 -0
- data/test/workhorse/daemon_test.rb +73 -0
- data/test/workhorse/poller_test.rb +19 -15
- data/test/workhorse/worker_test.rb +91 -9
- data/workhorse.gemspec +4 -4
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 66d11e1801fa64ed11dbb329dd843eef6436cdd2d999abf5786ffa013f768dbe
|
|
4
|
+
data.tar.gz: d03c50b4d34f32492386ec8c20ba065ad862e7bf9e1c90b03e0641f1f0b8b4db
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a3fa8b847ca0d0e68112a4f5c59c34fd24955010185bd5a4c712571e0e1dd8e3340156873bd7dc61f1e1186b04d4bda9cb9d3bfdad036209e760390a43e6a127
|
|
7
|
+
data.tar.gz: ad309e55cfb166fcd95b907f75cf2a07009b8d2c49335256505a6e0f54172b44375fd7182b972806601bbd32df43b9431977367fcafe24400ae5769658a30a06
|
data/.rubocop.yml
CHANGED
|
@@ -2,7 +2,7 @@ AllCops:
|
|
|
2
2
|
DisplayCopNames: true
|
|
3
3
|
NewCops: enable
|
|
4
4
|
SuggestExtensions: false
|
|
5
|
-
TargetRubyVersion:
|
|
5
|
+
TargetRubyVersion: 3.0
|
|
6
6
|
Exclude:
|
|
7
7
|
- 'local/**/*'
|
|
8
8
|
- 'vendor/**/*'
|
|
@@ -122,9 +122,15 @@ Layout/LineLength:
|
|
|
122
122
|
|
|
123
123
|
# Prefer variable_1 over variable1 for aesthetic reasons. Do not check symbols,
|
|
124
124
|
# as they often need to be another case for use in external palces (e.g. :md5).
|
|
125
|
+
# $0 is a Ruby special variable for the process name.
|
|
126
|
+
# Test method names often contain numbers (e.g., test_issue_22).
|
|
125
127
|
Naming/VariableNumber:
|
|
126
128
|
EnforcedStyle: snake_case
|
|
127
129
|
CheckSymbols: false
|
|
130
|
+
AllowedPatterns:
|
|
131
|
+
- '\$0'
|
|
132
|
+
Exclude:
|
|
133
|
+
- 'test/**/*'
|
|
128
134
|
|
|
129
135
|
# Depending on the surrounding code, even simple if/unless clauses may be more
|
|
130
136
|
# descriptive when on multiple lines.
|
|
@@ -187,3 +193,8 @@ Style/HashSyntax:
|
|
|
187
193
|
# Allow rescue 'Exception', necessary for Workhorse
|
|
188
194
|
Lint/RescueException:
|
|
189
195
|
Enabled: false
|
|
196
|
+
|
|
197
|
+
# check_memory returns boolean but is not a pure predicate (has side effects)
|
|
198
|
+
Naming/PredicateMethod:
|
|
199
|
+
AllowedMethods:
|
|
200
|
+
- check_memory
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Workhorse Changelog
|
|
2
2
|
|
|
3
|
+
## 1.4.0.rc0 - 2026-02-11
|
|
4
|
+
|
|
5
|
+
* Add `soft-restart` daemon command for graceful worker restarts. Sends a
|
|
6
|
+
`USR1` signal to all workers, causing them to stop accepting new jobs and shut
|
|
7
|
+
down after completing any currently running job. The caller is not blocked
|
|
8
|
+
(fire-and-forget). Use in combination with the `watch` command (typically via
|
|
9
|
+
cron) to automatically start fresh workers after shutdown.
|
|
10
|
+
|
|
11
|
+
Sitrox reference: #140281.
|
|
12
|
+
|
|
3
13
|
## 1.3.1 - 2025-11-05
|
|
4
14
|
|
|
5
15
|
* Adapt shell handler locking behavior depending on command:
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
|
@@ -241,7 +241,7 @@ For this case, the workhorse install routine automatically creates the file
|
|
|
241
241
|
The script can be called as follows:
|
|
242
242
|
|
|
243
243
|
```bash
|
|
244
|
-
RAILS_ENV=production bundle exec bin/workhorse.rb start|stop|kill|status|watch|restart|usage
|
|
244
|
+
RAILS_ENV=production bundle exec bin/workhorse.rb start|stop|kill|status|watch|restart|soft-restart|usage
|
|
245
245
|
```
|
|
246
246
|
|
|
247
247
|
#### Background and customization
|
|
@@ -488,6 +488,33 @@ Workhorse.setup do |config|
|
|
|
488
488
|
end
|
|
489
489
|
```
|
|
490
490
|
|
|
491
|
+
## Soft restart
|
|
492
|
+
|
|
493
|
+
The `soft-restart` command provides a way to gracefully restart all worker
|
|
494
|
+
processes without interrupting jobs that are currently running. It sends a
|
|
495
|
+
`USR1` signal to each worker, which causes the worker to:
|
|
496
|
+
|
|
497
|
+
1. Stop accepting new jobs immediately.
|
|
498
|
+
2. Wait for any currently running job to complete.
|
|
499
|
+
3. Shut down and create a shutdown file (`tmp/pids/workhorse.<pid>.shutdown`).
|
|
500
|
+
|
|
501
|
+
The command returns immediately (fire-and-forget) and does not block the caller.
|
|
502
|
+
|
|
503
|
+
**Important:** The `soft-restart` command only *stops* workers gracefully. To
|
|
504
|
+
start fresh workers after shutdown, you need the `watch` command running
|
|
505
|
+
(typically via cron). Without `watch`, `soft-restart` behaves like a graceful
|
|
506
|
+
stop with no automatic recovery.
|
|
507
|
+
|
|
508
|
+
Example usage:
|
|
509
|
+
|
|
510
|
+
```bash
|
|
511
|
+
# Trigger soft restart
|
|
512
|
+
RAILS_ENV=production bundle exec bin/workhorse.rb soft-restart
|
|
513
|
+
|
|
514
|
+
# The watch command (e.g. via cron) will automatically start fresh workers
|
|
515
|
+
*/1 * * * * cd /my/app && RAILS_ENV=production bundle exec bin/workhorse.rb watch
|
|
516
|
+
```
|
|
517
|
+
|
|
491
518
|
## Load hooks
|
|
492
519
|
|
|
493
520
|
Using the load hook `:workhorse_db_job`, you can inject custom code into the
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.4.0.rc0
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
module Workhorse
|
|
2
2
|
class Daemon::ShellHandler
|
|
3
3
|
def self.run(**options, &block)
|
|
4
|
-
unless ARGV.
|
|
4
|
+
unless ARGV.one?
|
|
5
5
|
usage
|
|
6
6
|
exit 99
|
|
7
7
|
end
|
|
@@ -34,6 +34,9 @@ module Workhorse
|
|
|
34
34
|
when 'restart-logging'
|
|
35
35
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
36
36
|
status = daemon.restart_logging
|
|
37
|
+
when 'soft-restart'
|
|
38
|
+
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
39
|
+
status = daemon.soft_restart
|
|
37
40
|
when 'usage'
|
|
38
41
|
usage
|
|
39
42
|
status = 0
|
|
@@ -52,7 +55,7 @@ module Workhorse
|
|
|
52
55
|
|
|
53
56
|
def self.usage
|
|
54
57
|
warn <<~USAGE
|
|
55
|
-
Usage: #{$PROGRAM_NAME} start|stop|status|watch|restart|usage
|
|
58
|
+
Usage: #{$PROGRAM_NAME} start|stop|status|watch|restart|soft-restart|usage
|
|
56
59
|
|
|
57
60
|
Options:
|
|
58
61
|
|
|
@@ -80,6 +83,14 @@ module Workhorse
|
|
|
80
83
|
Re-opens log files, useful e.g. after the log files have been moved or
|
|
81
84
|
removed by log rotation.
|
|
82
85
|
|
|
86
|
+
soft-restart
|
|
87
|
+
Signals workers to restart gracefully. Idle workers restart
|
|
88
|
+
immediately; busy workers finish their current job first. Returns
|
|
89
|
+
immediately (fire-and-forget).
|
|
90
|
+
NOTE: Requires 'watch' (typically via cron) to start fresh workers.
|
|
91
|
+
Without 'watch', this behaves like a graceful stop with no automatic
|
|
92
|
+
recovery.
|
|
93
|
+
|
|
83
94
|
usage
|
|
84
95
|
Show this message
|
|
85
96
|
|
|
@@ -91,8 +102,6 @@ module Workhorse
|
|
|
91
102
|
USAGE
|
|
92
103
|
end
|
|
93
104
|
|
|
94
|
-
private
|
|
95
|
-
|
|
96
105
|
def self.acquire_lock(lockfile_path, flags)
|
|
97
106
|
if Workhorse.lock_shell_commands
|
|
98
107
|
lockfile = File.open(lockfile_path, 'a')
|
|
@@ -103,5 +112,6 @@ module Workhorse
|
|
|
103
112
|
|
|
104
113
|
return nil
|
|
105
114
|
end
|
|
115
|
+
private_class_method :acquire_lock
|
|
106
116
|
end
|
|
107
117
|
end
|
data/lib/workhorse/daemon.rb
CHANGED
|
@@ -214,6 +214,32 @@ module Workhorse
|
|
|
214
214
|
return code
|
|
215
215
|
end
|
|
216
216
|
|
|
217
|
+
# Sends USR1 signal to all workers to initiate a soft restart.
|
|
218
|
+
# Workers will finish their current jobs before shutting down.
|
|
219
|
+
# The watch mechanism will then start fresh workers.
|
|
220
|
+
# This method returns immediately (fire-and-forget).
|
|
221
|
+
#
|
|
222
|
+
# @return [Integer] Exit code (0 = success, 2 = some signals failed)
|
|
223
|
+
def soft_restart
|
|
224
|
+
code = 0
|
|
225
|
+
|
|
226
|
+
for_each_worker do |worker|
|
|
227
|
+
_pid_file, pid, active = read_pid(worker)
|
|
228
|
+
|
|
229
|
+
next unless pid && active
|
|
230
|
+
|
|
231
|
+
begin
|
|
232
|
+
Process.kill 'USR1', pid
|
|
233
|
+
puts "Worker (#{worker.name}) ##{worker.id}: Sent soft-restart signal"
|
|
234
|
+
rescue Errno::ESRCH
|
|
235
|
+
warn "Worker (#{worker.name}) ##{worker.id}: Process not found"
|
|
236
|
+
code = 2
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
return code
|
|
241
|
+
end
|
|
242
|
+
|
|
217
243
|
private
|
|
218
244
|
|
|
219
245
|
# Executes the given block for each defined worker.
|
|
@@ -236,8 +262,8 @@ module Workhorse
|
|
|
236
262
|
pid = fork do
|
|
237
263
|
$0 = process_name(worker)
|
|
238
264
|
# Reopen pipes to prevent #107576
|
|
239
|
-
$stdin.reopen File.open(
|
|
240
|
-
null_out = File.open
|
|
265
|
+
$stdin.reopen File.open(File::NULL, 'r')
|
|
266
|
+
null_out = File.open File::NULL, 'w'
|
|
241
267
|
$stdout.reopen null_out
|
|
242
268
|
$stderr.reopen null_out
|
|
243
269
|
|
data/lib/workhorse/db_job.rb
CHANGED
|
@@ -19,7 +19,7 @@ module Workhorse
|
|
|
19
19
|
STATE_SUCCEEDED = :succeeded
|
|
20
20
|
STATE_FAILED = :failed
|
|
21
21
|
|
|
22
|
-
EXP_LOCKED_BY = /^(.*?)\.(\d+?)\.([^.]+)
|
|
22
|
+
EXP_LOCKED_BY = /^(.*?)\.(\d+?)\.([^.]+)$/
|
|
23
23
|
|
|
24
24
|
if respond_to?(:attr_accessible)
|
|
25
25
|
attr_accessible :queue, :priority, :perform_at, :handler, :description
|
data/lib/workhorse/poller.rb
CHANGED
|
@@ -253,9 +253,11 @@ module Workhorse
|
|
|
253
253
|
# @return [void]
|
|
254
254
|
# @private
|
|
255
255
|
def poll
|
|
256
|
+
return unless worker.accepting_jobs?
|
|
257
|
+
|
|
256
258
|
@instant_repoll.make_false
|
|
257
259
|
|
|
258
|
-
timeout =
|
|
260
|
+
timeout = worker.polling_interval.clamp(MIN_LOCK_TIMEOUT, MAX_LOCK_TIMEOUT)
|
|
259
261
|
with_global_lock timeout: timeout do
|
|
260
262
|
job_ids = []
|
|
261
263
|
|
|
@@ -277,8 +279,8 @@ module Workhorse
|
|
|
277
279
|
end
|
|
278
280
|
end
|
|
279
281
|
|
|
280
|
-
unless running?
|
|
281
|
-
worker.log 'Rolling back transaction to unlock jobs, as worker
|
|
282
|
+
unless running? && worker.accepting_jobs?
|
|
283
|
+
worker.log 'Rolling back transaction to unlock jobs, as worker is no longer accepting jobs'
|
|
282
284
|
fail ActiveRecord::Rollback
|
|
283
285
|
end
|
|
284
286
|
end
|
|
@@ -288,7 +290,9 @@ module Workhorse
|
|
|
288
290
|
# non-blocking and thus directly conclude the block and the transaction,
|
|
289
291
|
# there would still be a risk that the transaction is not committed yet
|
|
290
292
|
# when the job starts.
|
|
291
|
-
|
|
293
|
+
# Also check accepting_jobs? to prevent posting if soft restart was requested
|
|
294
|
+
# while we were acquiring the lock or querying jobs.
|
|
295
|
+
job_ids.each { |job_id| worker.perform(job_id) } if running? && worker.accepting_jobs?
|
|
292
296
|
end
|
|
293
297
|
end
|
|
294
298
|
|
data/lib/workhorse/scoped_env.rb
CHANGED
|
@@ -16,16 +16,15 @@ module Workhorse
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
# Handles method delegation to the configured objects.
|
|
19
|
+
# Uses argument forwarding to pass all arguments to the delegated method.
|
|
19
20
|
#
|
|
20
21
|
# @param symbol [Symbol] Method name
|
|
21
|
-
# @param args [Array] Method arguments
|
|
22
|
-
# @param block [Proc, nil] Block to pass to the method
|
|
23
22
|
# @return [Object] Result of the delegated method call
|
|
24
|
-
def method_missing(symbol,
|
|
23
|
+
def method_missing(symbol, ...)
|
|
25
24
|
if @methods.include?(symbol)
|
|
26
|
-
@delegation_object.send(symbol,
|
|
25
|
+
@delegation_object.send(symbol, ...)
|
|
27
26
|
elsif @backup_binding.try(:respond_to?, symbol)
|
|
28
|
-
@backup_binding.send(symbol,
|
|
27
|
+
@backup_binding.send(symbol, ...)
|
|
29
28
|
else
|
|
30
29
|
super
|
|
31
30
|
end
|
data/lib/workhorse/worker.rb
CHANGED
|
@@ -21,6 +21,7 @@ module Workhorse
|
|
|
21
21
|
LOG_LEVELS = %i[fatal error warn info debug].freeze
|
|
22
22
|
SHUTDOWN_SIGNALS = %w[TERM INT].freeze
|
|
23
23
|
LOG_REOPEN_SIGNAL = 'HUP'.freeze
|
|
24
|
+
SOFT_RESTART_SIGNAL = 'USR1'.freeze
|
|
24
25
|
|
|
25
26
|
# @return [Array<Symbol>] The queues this worker processes
|
|
26
27
|
attr_reader :queues
|
|
@@ -97,6 +98,7 @@ module Workhorse
|
|
|
97
98
|
@pool = Pool.new(@pool_size)
|
|
98
99
|
@poller = Workhorse::Poller.new(self, proc { check_memory })
|
|
99
100
|
@logger = logger
|
|
101
|
+
@soft_restart_requested = Concurrent::AtomicBoolean.new(false)
|
|
100
102
|
|
|
101
103
|
unless (@polling_interval / 0.1).round(2).modulo(1).zero?
|
|
102
104
|
fail 'Polling interval must be a multiple of 0.1.'
|
|
@@ -158,6 +160,7 @@ module Workhorse
|
|
|
158
160
|
|
|
159
161
|
trap_termination if @auto_terminate
|
|
160
162
|
trap_log_reopen
|
|
163
|
+
trap_soft_restart
|
|
161
164
|
end
|
|
162
165
|
end
|
|
163
166
|
|
|
@@ -213,6 +216,14 @@ module Workhorse
|
|
|
213
216
|
@pool.idle
|
|
214
217
|
end
|
|
215
218
|
|
|
219
|
+
# Returns whether this worker is accepting new jobs.
|
|
220
|
+
# Returns false when a soft restart has been requested.
|
|
221
|
+
#
|
|
222
|
+
# @return [Boolean] True if accepting jobs, false otherwise
|
|
223
|
+
def accepting_jobs?
|
|
224
|
+
@soft_restart_requested.false?
|
|
225
|
+
end
|
|
226
|
+
|
|
216
227
|
# Schedules a job for execution in the thread pool.
|
|
217
228
|
#
|
|
218
229
|
# @param db_job_id [Integer] The ID of the {Workhorse::DbJob} to perform
|
|
@@ -260,8 +271,8 @@ module Workhorse
|
|
|
260
271
|
FileUtils.touch self.class.shutdown_file_for(pid)
|
|
261
272
|
end
|
|
262
273
|
|
|
263
|
-
log "Worker process #{id.inspect} memory consumption (RSS) of #{mem}MB exceeds "\
|
|
264
|
-
"configured per-worker limit of #{max}MB and is now being shut down. Make sure "\
|
|
274
|
+
log "Worker process #{id.inspect} memory consumption (RSS) of #{mem}MB exceeds " \
|
|
275
|
+
"configured per-worker limit of #{max}MB and is now being shut down. Make sure " \
|
|
265
276
|
'that your worker processes are watched (e.g. using the "watch"-command) for ' \
|
|
266
277
|
'this worker to be restarted automatically.'
|
|
267
278
|
|
|
@@ -273,7 +284,7 @@ module Workhorse
|
|
|
273
284
|
# @return [Integer, nil] Memory usage in MB or nil if unable to determine
|
|
274
285
|
# @private
|
|
275
286
|
def current_memory_consumption
|
|
276
|
-
mem = `ps -p #{pid} -o rss
|
|
287
|
+
mem = `ps -p #{pid} -o rss=`.strip
|
|
277
288
|
return nil if mem.blank?
|
|
278
289
|
return mem.to_i / 1024
|
|
279
290
|
end
|
|
@@ -312,5 +323,73 @@ module Workhorse
|
|
|
312
323
|
end
|
|
313
324
|
end
|
|
314
325
|
end
|
|
326
|
+
|
|
327
|
+
# Initiates a soft restart of the worker.
|
|
328
|
+
# Creates a shutdown file for the watch mechanism, then waits for all
|
|
329
|
+
# currently running jobs to complete before shutting down.
|
|
330
|
+
# This method returns immediately; shutdown happens asynchronously.
|
|
331
|
+
#
|
|
332
|
+
# @return [void]
|
|
333
|
+
# @private
|
|
334
|
+
def soft_restart
|
|
335
|
+
return if @state == :shutdown
|
|
336
|
+
|
|
337
|
+
return unless @soft_restart_requested.make_true
|
|
338
|
+
|
|
339
|
+
# Create shutdown file for watch to detect
|
|
340
|
+
shutdown_file = self.class.shutdown_file_for(pid)
|
|
341
|
+
FileUtils.touch(shutdown_file) if shutdown_file
|
|
342
|
+
|
|
343
|
+
# Monitor in a separate thread to avoid blocking the signal handler
|
|
344
|
+
@soft_restart_thread = Thread.new do
|
|
345
|
+
begin
|
|
346
|
+
wait_for_idle_then_shutdown
|
|
347
|
+
rescue Exception => e
|
|
348
|
+
log %(Soft restart error: #{e.message}\n#{e.backtrace.join("\n")}), :error
|
|
349
|
+
Workhorse.on_exception.call(e)
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# Sets up signal handler for soft restart (USR1 signal).
|
|
355
|
+
#
|
|
356
|
+
# @return [void]
|
|
357
|
+
# @private
|
|
358
|
+
def trap_soft_restart
|
|
359
|
+
Signal.trap(SOFT_RESTART_SIGNAL) do
|
|
360
|
+
# Start a new thread as certain functionality (such as logging) is not
|
|
361
|
+
# available from within a trap context.
|
|
362
|
+
Thread.new do
|
|
363
|
+
begin
|
|
364
|
+
log "\nCaught #{SOFT_RESTART_SIGNAL}, initiating soft restart..."
|
|
365
|
+
soft_restart
|
|
366
|
+
rescue Exception => e
|
|
367
|
+
log %(Soft restart signal handler error: #{e.message}\n#{e.backtrace.join("\n")}), :error
|
|
368
|
+
Workhorse.on_exception.call(e)
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
# Note: Unlike trap_termination, we don't join here because soft_restart
|
|
372
|
+
# is designed to be fire-and-forget (it spawns its own monitoring thread).
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Waits for all jobs to complete, then shuts down the worker.
|
|
377
|
+
# Called asynchronously from soft_restart.
|
|
378
|
+
#
|
|
379
|
+
# @return [void]
|
|
380
|
+
# @private
|
|
381
|
+
def wait_for_idle_then_shutdown
|
|
382
|
+
loop do
|
|
383
|
+
break if @state == :shutdown
|
|
384
|
+
|
|
385
|
+
if idle == @pool_size
|
|
386
|
+
log 'All jobs completed, shutting down for soft restart'
|
|
387
|
+
shutdown
|
|
388
|
+
break
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
Kernel.sleep 0.2
|
|
392
|
+
end
|
|
393
|
+
end
|
|
315
394
|
end
|
|
316
395
|
end
|
|
@@ -58,22 +58,22 @@ class ActiveJob::QueueAdapters::WorkhorseAdapterTest < WorkhorseTest
|
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
def test_wait
|
|
61
|
-
Job2.set(wait:
|
|
61
|
+
Job2.set(wait: 0.5.seconds).perform_later 'foo'
|
|
62
62
|
|
|
63
|
-
work
|
|
63
|
+
work 0.3, polling_interval: 0.1
|
|
64
64
|
assert_equal 'waiting', Workhorse::DbJob.first.state
|
|
65
65
|
|
|
66
|
-
work
|
|
66
|
+
work 0.5, polling_interval: 0.1
|
|
67
67
|
assert_equal 'succeeded', Workhorse::DbJob.first.reload.state
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
def test_wait_until
|
|
71
|
-
Job2.set(wait_until: (Time.now +
|
|
71
|
+
Job2.set(wait_until: (Time.now + 0.5.seconds)).perform_later 'foo'
|
|
72
72
|
|
|
73
|
-
work 0.
|
|
73
|
+
work 0.3, polling_interval: 0.1
|
|
74
74
|
assert_equal 'waiting', Workhorse::DbJob.first.state
|
|
75
75
|
|
|
76
|
-
work
|
|
76
|
+
work 0.5, polling_interval: 0.1
|
|
77
77
|
assert_equal 'succeeded', Workhorse::DbJob.first.reload.state
|
|
78
78
|
end
|
|
79
79
|
|
data/test/lib/test_helper.rb
CHANGED
|
@@ -79,6 +79,21 @@ class WorkhorseTest < ActiveSupport::TestCase
|
|
|
79
79
|
end
|
|
80
80
|
end
|
|
81
81
|
|
|
82
|
+
def wait_for_process_exit(pid, timeout: 5)
|
|
83
|
+
deadline = Time.now + timeout
|
|
84
|
+
loop do
|
|
85
|
+
Process.getpgid(pid)
|
|
86
|
+
if Time.now > deadline
|
|
87
|
+
fail "Process #{pid} did not exit within #{timeout} seconds"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
sleep 0.01
|
|
91
|
+
Thread.pass # Give detach threads a chance to reap zombies
|
|
92
|
+
rescue Errno::ESRCH
|
|
93
|
+
return # Process is fully gone from process table
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
82
97
|
def capture_log(level: :debug)
|
|
83
98
|
io = StringIO.new
|
|
84
99
|
logger = Logger.new(io, level: level)
|
|
@@ -146,6 +161,13 @@ class WorkhorseTest < ActiveSupport::TestCase
|
|
|
146
161
|
end
|
|
147
162
|
end
|
|
148
163
|
|
|
164
|
+
def process?(pid)
|
|
165
|
+
Process.kill(0, pid)
|
|
166
|
+
true
|
|
167
|
+
rescue Errno::EPERM, Errno::ESRCH
|
|
168
|
+
false
|
|
169
|
+
end
|
|
170
|
+
|
|
149
171
|
def capture_stderr
|
|
150
172
|
old = $stderr
|
|
151
173
|
$stderr = StringIO.new
|
|
@@ -83,6 +83,79 @@ class Workhorse::DaemonTest < WorkhorseTest
|
|
|
83
83
|
assert_not File.exist?("tmp/pids/workhorse.#{daemon.workers.first.pid}.shutdown")
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
def test_soft_restart_returns_immediately
|
|
87
|
+
with_daemon 2 do
|
|
88
|
+
# Give workers time to fully start and register signal handlers
|
|
89
|
+
sleep 0.5
|
|
90
|
+
|
|
91
|
+
result = nil
|
|
92
|
+
elapsed = Benchmark.measure { result = daemon.soft_restart }.real
|
|
93
|
+
assert_equal 0, result
|
|
94
|
+
assert elapsed < 0.1, "soft_restart should return immediately, took #{elapsed}s"
|
|
95
|
+
|
|
96
|
+
# Wait for shutdown to complete before test cleanup
|
|
97
|
+
daemon.workers.each do |w|
|
|
98
|
+
with_retries(150) { assert_not process?(w.pid) }
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def test_soft_restart_creates_shutdown_files_and_watch_restarts
|
|
104
|
+
with_daemon 2 do
|
|
105
|
+
old_pids = daemon.workers.map(&:pid)
|
|
106
|
+
|
|
107
|
+
# Give workers time to fully start and register signal handlers
|
|
108
|
+
sleep 0.5
|
|
109
|
+
|
|
110
|
+
daemon.soft_restart
|
|
111
|
+
|
|
112
|
+
# Wait for each worker to create shutdown file and exit
|
|
113
|
+
old_pids.each do |pid|
|
|
114
|
+
with_retries(100) do
|
|
115
|
+
assert File.exist?("tmp/pids/workhorse.#{pid}.shutdown"),
|
|
116
|
+
"Shutdown file for PID #{pid} should exist. Files: #{Dir['tmp/pids/*'].join(', ')}"
|
|
117
|
+
end
|
|
118
|
+
with_retries(100) do
|
|
119
|
+
assert_not process?(pid), "Process #{pid} should have exited"
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Watch should restart them and clean up shutdown files
|
|
124
|
+
capture_stderr { daemon.watch }
|
|
125
|
+
|
|
126
|
+
with_retries do
|
|
127
|
+
old_pids.each do |pid|
|
|
128
|
+
assert_not File.exist?("tmp/pids/workhorse.#{pid}.shutdown"),
|
|
129
|
+
"Shutdown file for PID #{pid} should be cleaned up"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Workers should be running again with different PIDs
|
|
133
|
+
assert_equal 0, daemon.status(quiet: true)
|
|
134
|
+
new_pids = daemon.workers.map(&:pid)
|
|
135
|
+
assert_empty(old_pids & new_pids, 'New workers should have different PIDs than old workers')
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def test_soft_restart_with_dead_worker
|
|
141
|
+
with_daemon 2 do
|
|
142
|
+
# Give workers time to fully start and register signal handlers
|
|
143
|
+
sleep 0.5
|
|
144
|
+
|
|
145
|
+
# Kill first worker so it's dead when we try to soft_restart
|
|
146
|
+
Process.kill 'KILL', daemon.workers.first.pid
|
|
147
|
+
with_retries { assert_not process?(daemon.workers.first.pid) }
|
|
148
|
+
|
|
149
|
+
# soft_restart returns 0 because read_pid detects the dead worker as
|
|
150
|
+
# inactive and skips it (the ESRCH rescue is never reached)
|
|
151
|
+
result = daemon.soft_restart
|
|
152
|
+
assert_equal 0, result
|
|
153
|
+
|
|
154
|
+
# Second worker should still soft-restart successfully
|
|
155
|
+
with_retries(150) { assert_not process?(daemon.workers.second.pid) }
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
86
159
|
private
|
|
87
160
|
|
|
88
161
|
def assert_watch_output(*expected_lines)
|
|
@@ -115,37 +115,37 @@ class Workhorse::PollerTest < WorkhorseTest
|
|
|
115
115
|
end
|
|
116
116
|
|
|
117
117
|
def test_already_locked_issue
|
|
118
|
-
# Create
|
|
119
|
-
|
|
118
|
+
# Create 50 jobs
|
|
119
|
+
50.times do |i|
|
|
120
120
|
Workhorse.enqueue BasicJob.new(some_param: i, sleep_time: 0)
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
-
# Create
|
|
124
|
-
|
|
123
|
+
# Create 10 worker processes that work for 3s each
|
|
124
|
+
10.times do
|
|
125
125
|
Process.fork do
|
|
126
|
-
work
|
|
126
|
+
work 3, pool_size: 1, polling_interval: 0.1
|
|
127
127
|
end
|
|
128
128
|
end
|
|
129
129
|
|
|
130
|
-
# Create additional
|
|
130
|
+
# Create additional 50 jobs that are scheduled while the workers are
|
|
131
131
|
# already polling (to make sure those are picked up as well)
|
|
132
|
-
|
|
133
|
-
sleep 0.
|
|
132
|
+
50.times do
|
|
133
|
+
sleep 0.02
|
|
134
134
|
Workhorse.enqueue BasicJob.new(sleep_time: 0)
|
|
135
135
|
end
|
|
136
136
|
|
|
137
|
-
# Wait for all forked processes to finish (should take ~
|
|
137
|
+
# Wait for all forked processes to finish (should take ~3s)
|
|
138
138
|
Process.waitall
|
|
139
139
|
|
|
140
140
|
total = Workhorse::DbJob.count
|
|
141
141
|
succeeded = Workhorse::DbJob.succeeded.count
|
|
142
142
|
used_workers = Workhorse::DbJob.lock.pluck(:locked_by).uniq.size
|
|
143
143
|
|
|
144
|
-
# Make sure there are
|
|
144
|
+
# Make sure there are 100 jobs, all jobs have succeeded and that all of the
|
|
145
145
|
# workers have had their turn.
|
|
146
|
-
assert_equal
|
|
147
|
-
assert_equal
|
|
148
|
-
assert_equal
|
|
146
|
+
assert_equal 100, total
|
|
147
|
+
assert_equal 100, succeeded
|
|
148
|
+
assert_equal 10, used_workers
|
|
149
149
|
end
|
|
150
150
|
|
|
151
151
|
def test_connection_loss
|
|
@@ -251,8 +251,12 @@ class Workhorse::PollerTest < WorkhorseTest
|
|
|
251
251
|
private
|
|
252
252
|
|
|
253
253
|
def kill_deamon_workers
|
|
254
|
-
daemon.workers.
|
|
255
|
-
|
|
254
|
+
pids = daemon.workers.map(&:pid)
|
|
255
|
+
pids.each do |pid|
|
|
256
|
+
Process.kill 'KILL', pid
|
|
257
|
+
# Wait for zombies to be reaped by Process.detach threads
|
|
258
|
+
# This is necessary because Process.getpgid succeeds for zombie processes
|
|
259
|
+
wait_for_process_exit(pid)
|
|
256
260
|
end
|
|
257
261
|
end
|
|
258
262
|
|
|
@@ -69,6 +69,95 @@ class Workhorse::WorkerTest < WorkhorseTest
|
|
|
69
69
|
end
|
|
70
70
|
end
|
|
71
71
|
|
|
72
|
+
def test_soft_restart_when_idle
|
|
73
|
+
with_worker(pool_size: 2, polling_interval: 0.2) do |w|
|
|
74
|
+
assert w.accepting_jobs?
|
|
75
|
+
|
|
76
|
+
Process.kill 'USR1', Process.pid
|
|
77
|
+
|
|
78
|
+
with_retries { assert_equal :shutdown, w.state }
|
|
79
|
+
assert File.exist?(Workhorse::Worker.shutdown_file_for(Process.pid))
|
|
80
|
+
end
|
|
81
|
+
ensure
|
|
82
|
+
FileUtils.rm_f Workhorse::Worker.shutdown_file_for(Process.pid)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def test_soft_restart_when_busy_waits_for_job
|
|
86
|
+
with_worker(pool_size: 1, polling_interval: 0.2) do |w|
|
|
87
|
+
Workhorse.enqueue BasicJob.new(sleep_time: 0.5)
|
|
88
|
+
with_retries { assert_equal 'started', Workhorse::DbJob.first.state }
|
|
89
|
+
|
|
90
|
+
Process.kill 'USR1', Process.pid
|
|
91
|
+
sleep 0.1
|
|
92
|
+
|
|
93
|
+
# Still running but not accepting jobs
|
|
94
|
+
w.assert_state! :running
|
|
95
|
+
assert_not w.accepting_jobs?
|
|
96
|
+
|
|
97
|
+
# Wait for job to finish and worker to shut down
|
|
98
|
+
with_retries { assert_equal :shutdown, w.state }
|
|
99
|
+
end
|
|
100
|
+
ensure
|
|
101
|
+
FileUtils.rm_f Workhorse::Worker.shutdown_file_for(Process.pid)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def test_soft_restart_prevents_new_job_pickup
|
|
105
|
+
with_worker(pool_size: 1, polling_interval: 0.2) do |w|
|
|
106
|
+
Workhorse.enqueue BasicJob.new(sleep_time: 0.4)
|
|
107
|
+
with_retries { assert_equal 'started', Workhorse::DbJob.first.state }
|
|
108
|
+
|
|
109
|
+
Process.kill 'USR1', Process.pid
|
|
110
|
+
sleep 0.1
|
|
111
|
+
|
|
112
|
+
# Enqueue another job while soft restart is pending
|
|
113
|
+
Workhorse.enqueue BasicJob.new(sleep_time: 0.1)
|
|
114
|
+
|
|
115
|
+
# Wait for worker to shut down
|
|
116
|
+
with_retries { assert_equal :shutdown, w.state }
|
|
117
|
+
|
|
118
|
+
jobs = Workhorse::DbJob.order(:id).to_a
|
|
119
|
+
assert_equal 'succeeded', jobs[0].state
|
|
120
|
+
assert_equal 'waiting', jobs[1].state # Not picked up due to soft restart
|
|
121
|
+
end
|
|
122
|
+
ensure
|
|
123
|
+
FileUtils.rm_f Workhorse::Worker.shutdown_file_for(Process.pid)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def test_soft_restart_double_signal
|
|
127
|
+
with_worker(pool_size: 1, polling_interval: 0.2) do |w|
|
|
128
|
+
Workhorse.enqueue BasicJob.new(sleep_time: 0.5)
|
|
129
|
+
with_retries { assert_equal 'started', Workhorse::DbJob.first.state }
|
|
130
|
+
|
|
131
|
+
# Send USR1 twice in rapid succession
|
|
132
|
+
Process.kill 'USR1', Process.pid
|
|
133
|
+
Process.kill 'USR1', Process.pid
|
|
134
|
+
sleep 0.1
|
|
135
|
+
|
|
136
|
+
assert_not w.accepting_jobs?
|
|
137
|
+
|
|
138
|
+
# Worker should still shut down cleanly (no double-shutdown crash)
|
|
139
|
+
with_retries { assert_equal :shutdown, w.state }
|
|
140
|
+
assert File.exist?(Workhorse::Worker.shutdown_file_for(Process.pid))
|
|
141
|
+
end
|
|
142
|
+
ensure
|
|
143
|
+
FileUtils.rm_f Workhorse::Worker.shutdown_file_for(Process.pid)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def test_soft_restart_ignored_during_shutdown
|
|
147
|
+
with_worker(pool_size: 1, polling_interval: 0.2) do |w|
|
|
148
|
+
Process.kill 'TERM', Process.pid
|
|
149
|
+
with_retries { assert_equal :shutdown, w.state }
|
|
150
|
+
|
|
151
|
+
# Sending USR1 during shutdown should not crash or create shutdown file
|
|
152
|
+
Process.kill 'USR1', Process.pid
|
|
153
|
+
sleep 0.1
|
|
154
|
+
|
|
155
|
+
assert_not File.exist?(Workhorse::Worker.shutdown_file_for(Process.pid))
|
|
156
|
+
end
|
|
157
|
+
ensure
|
|
158
|
+
FileUtils.rm_f Workhorse::Worker.shutdown_file_for(Process.pid)
|
|
159
|
+
end
|
|
160
|
+
|
|
72
161
|
def test_no_queues
|
|
73
162
|
enqueue_in_multiple_queues
|
|
74
163
|
work 0.2, polling_interval: 0.2
|
|
@@ -150,14 +239,14 @@ class Workhorse::WorkerTest < WorkhorseTest
|
|
|
150
239
|
def test_order_with_priorities
|
|
151
240
|
Workhorse.enqueue BasicJob.new(some_param: 6, sleep_time: 0), priority: 4
|
|
152
241
|
Workhorse.enqueue BasicJob.new(some_param: 4, sleep_time: 0), priority: 3
|
|
153
|
-
sleep 1
|
|
242
|
+
sleep 0.1
|
|
154
243
|
Workhorse.enqueue BasicJob.new(some_param: 5, sleep_time: 0), priority: 3
|
|
155
244
|
Workhorse.enqueue BasicJob.new(some_param: 3, sleep_time: 0), priority: 2
|
|
156
245
|
Workhorse.enqueue BasicJob.new(some_param: 2, sleep_time: 0), priority: 1
|
|
157
246
|
Workhorse.enqueue BasicJob.new(some_param: 1, sleep_time: 0), priority: 0
|
|
158
247
|
|
|
159
248
|
BasicJob.results.clear
|
|
160
|
-
work 1
|
|
249
|
+
work 1, pool_size: 1, polling_interval: 0.1
|
|
161
250
|
assert_equal (1..6).to_a, BasicJob.results
|
|
162
251
|
end
|
|
163
252
|
|
|
@@ -227,13 +316,6 @@ class Workhorse::WorkerTest < WorkhorseTest
|
|
|
227
316
|
assert_not process?(pid), "Process #{pid} expected to be stopped"
|
|
228
317
|
end
|
|
229
318
|
|
|
230
|
-
def process?(pid)
|
|
231
|
-
Process.kill(0, pid)
|
|
232
|
-
true
|
|
233
|
-
rescue Errno::EPERM, Errno::ESRCH
|
|
234
|
-
false
|
|
235
|
-
end
|
|
236
|
-
|
|
237
319
|
def enqueue_in_multiple_queues
|
|
238
320
|
Workhorse.enqueue BasicJob.new(some_param: nil)
|
|
239
321
|
Workhorse.enqueue BasicJob.new(some_param: :q1), queue: :q1
|
data/workhorse.gemspec
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: workhorse 1.
|
|
2
|
+
# stub: workhorse 1.4.0.rc0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "workhorse".freeze
|
|
6
|
-
s.version = "1.
|
|
6
|
+
s.version = "1.4.0.rc0"
|
|
7
7
|
|
|
8
|
-
s.required_rubygems_version = Gem::Requirement.new("
|
|
8
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
10
10
|
s.authors = ["Sitrox".freeze]
|
|
11
|
-
s.date = "
|
|
11
|
+
s.date = "2026-02-11"
|
|
12
12
|
s.files = [".github/workflows/ruby.yml".freeze, ".gitignore".freeze, ".releaser_config".freeze, ".rubocop.yml".freeze, "CHANGELOG.md".freeze, "FAQ.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "RUBY_VERSION".freeze, "Rakefile".freeze, "VERSION".freeze, "bin/rubocop".freeze, "lib/active_job/queue_adapters/workhorse_adapter.rb".freeze, "lib/generators/workhorse/install_generator.rb".freeze, "lib/generators/workhorse/templates/bin/workhorse.rb".freeze, "lib/generators/workhorse/templates/config/initializers/workhorse.rb".freeze, "lib/generators/workhorse/templates/create_table_jobs.rb".freeze, "lib/workhorse.rb".freeze, "lib/workhorse/active_job_extension.rb".freeze, "lib/workhorse/daemon.rb".freeze, "lib/workhorse/daemon/shell_handler.rb".freeze, "lib/workhorse/db_job.rb".freeze, "lib/workhorse/enqueuer.rb".freeze, "lib/workhorse/jobs/cleanup_succeeded_jobs.rb".freeze, "lib/workhorse/jobs/detect_stale_jobs_job.rb".freeze, "lib/workhorse/jobs/run_active_job.rb".freeze, "lib/workhorse/jobs/run_rails_op.rb".freeze, "lib/workhorse/performer.rb".freeze, "lib/workhorse/poller.rb".freeze, "lib/workhorse/pool.rb".freeze, "lib/workhorse/scoped_env.rb".freeze, "lib/workhorse/worker.rb".freeze, "test/active_job/queue_adapters/workhorse_adapter_test.rb".freeze, "test/lib/db_schema.rb".freeze, "test/lib/jobs.rb".freeze, "test/lib/test_helper.rb".freeze, "test/workhorse/daemon_test.rb".freeze, "test/workhorse/db_job_test.rb".freeze, "test/workhorse/enqueuer_test.rb".freeze, "test/workhorse/performer_test.rb".freeze, "test/workhorse/poller_test.rb".freeze, "test/workhorse/pool_test.rb".freeze, "test/workhorse/worker_test.rb".freeze, "workhorse.gemspec".freeze]
|
|
13
13
|
s.rubygems_version = "3.4.6".freeze
|
|
14
14
|
s.summary = "Multi-threaded job backend with database queuing for ruby.".freeze
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: workhorse
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0.rc0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sitrox
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 2026-02-11 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: activesupport
|
|
@@ -112,11 +112,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
112
112
|
version: '0'
|
|
113
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
114
114
|
requirements:
|
|
115
|
-
- - "
|
|
115
|
+
- - ">"
|
|
116
116
|
- !ruby/object:Gem::Version
|
|
117
|
-
version:
|
|
117
|
+
version: 1.3.1
|
|
118
118
|
requirements: []
|
|
119
|
-
rubygems_version:
|
|
119
|
+
rubygems_version: 4.0.2
|
|
120
120
|
specification_version: 4
|
|
121
121
|
summary: Multi-threaded job backend with database queuing for ruby.
|
|
122
122
|
test_files:
|