workhorse 1.4.1 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.releaser_config +1 -1
- data/CHANGELOG.md +23 -1
- data/README.md +17 -1
- data/VERSION +1 -1
- data/lib/workhorse/daemon/shell_handler.rb +24 -1
- data/lib/workhorse/daemon.rb +45 -1
- data/lib/workhorse/poller.rb +8 -0
- data/lib/workhorse/worker.rb +58 -4
- data/lib/workhorse.rb +25 -0
- data/workhorse.gemspec +3 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 02c3e3163a1b21b8a983c756e9be5e916b2cc7017401ab3484905c04fe56293f
|
|
4
|
+
data.tar.gz: 52ffb9742bf24ea226e3aa3f7edf2f0a5deecdefa57310c3176a40bec00f227f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ce5cd0d30660a0bb9829c84957e76589d5e65e9c47795c7ff414101df2b57f8661fbf01cd4b73bb4a69b7901f4fc43796a3a43e6f400c31b5c1a5b6b1eff8e56
|
|
7
|
+
data.tar.gz: 3594464e0fd00d036c9fe163daeea5a17d78d2fcd79095be05dac95753fbce43530812881529052af28917240920f70f706b708f7902d38636caa4e5c6995d3a
|
data/.releaser_config
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Workhorse Changelog
|
|
2
2
|
|
|
3
|
+
## 1.4.4 - 2026-04-28
|
|
4
|
+
|
|
5
|
+
* Make debug logging (enabled if `config.debug_log_path` is set) more verbose.
|
|
6
|
+
|
|
7
|
+
Sitrox reference: #120574.
|
|
8
|
+
|
|
9
|
+
## 1.4.3 - 2026-04-28
|
|
10
|
+
|
|
11
|
+
* Yanked from RubyGems. Do not use this release.
|
|
12
|
+
|
|
13
|
+
## 1.4.2 - 2026-02-20
|
|
14
|
+
|
|
15
|
+
* Detach forked worker processes into their own session using `Process.setsid`.
|
|
16
|
+
Previously, when the ShellHandler was the session leader (e.g. started via
|
|
17
|
+
cron), its exit would cause the kernel to send `SIGHUP` to all forked workers,
|
|
18
|
+
potentially crashing them during startup before signal handlers were installed.
|
|
19
|
+
|
|
20
|
+
* Add optional debug logging (`config.debug_log_path`) for diagnosing issues
|
|
21
|
+
with signal handling, process lifecycle, log rotation, and daemon commands.
|
|
22
|
+
|
|
23
|
+
Sitrox reference: #120574.
|
|
24
|
+
|
|
3
25
|
## 1.4.1 - 2026-02-18
|
|
4
26
|
|
|
5
27
|
* Close inherited lockfile fd in forked worker processes. Previously the
|
|
@@ -50,7 +72,7 @@
|
|
|
50
72
|
|
|
51
73
|
## 1.3.0.rc4 - 2025-08-27
|
|
52
74
|
|
|
53
|
-
* Fix race-condition in polling mechanism which could result in workers
|
|
75
|
+
* Fix race-condition in polling mechanism which could result in workers
|
|
54
76
|
trying to run a job that is not yet locked.
|
|
55
77
|
|
|
56
78
|
Sitrox reference: #128333.
|
data/README.md
CHANGED
|
@@ -522,7 +522,6 @@ Gem-internal model class `Workhorse::DbJob`, for example:
|
|
|
522
522
|
|
|
523
523
|
```ruby
|
|
524
524
|
# config/initializers/workhorse.rb
|
|
525
|
-
|
|
526
525
|
ActiveSupport.on_load :workhorse_db_job do
|
|
527
526
|
# Code within this block will be run inside of the model class
|
|
528
527
|
# Workhorse::DbJob.
|
|
@@ -530,6 +529,23 @@ ActiveSupport.on_load :workhorse_db_job do
|
|
|
530
529
|
end
|
|
531
530
|
```
|
|
532
531
|
|
|
532
|
+
## Debug logging
|
|
533
|
+
|
|
534
|
+
Workhorse includes an optional debug log for diagnosing issues with signal
|
|
535
|
+
handling, process lifecycle, log rotation, and daemon commands. To enable,
|
|
536
|
+
set `debug_log_path` to a writable file path:
|
|
537
|
+
|
|
538
|
+
```ruby
|
|
539
|
+
# config/initializers/workhorse.rb
|
|
540
|
+
Workhorse.setup do |config|
|
|
541
|
+
config.debug_log_path = Rails.root.join('log', 'workhorse.debug.log')
|
|
542
|
+
end
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
The debug log is designed to be safe for production use: all writes are
|
|
546
|
+
best-effort and silently ignore errors to avoid interfering with normal
|
|
547
|
+
operation. Set `debug_log_path` to `nil` (the default) to disable.
|
|
548
|
+
|
|
533
549
|
## Caveats
|
|
534
550
|
|
|
535
551
|
### Errors during polling / crashed workers
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.4.
|
|
1
|
+
1.4.4
|
|
@@ -16,43 +16,61 @@ module Workhorse
|
|
|
16
16
|
begin
|
|
17
17
|
case ARGV.first
|
|
18
18
|
when 'start'
|
|
19
|
+
Workhorse.debug_log('ShellHandler: start command invoked')
|
|
19
20
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
21
|
+
Workhorse.debug_log('ShellHandler: lock acquired for start')
|
|
20
22
|
daemon.lockfile = lockfile
|
|
21
23
|
status = daemon.start
|
|
22
24
|
when 'stop'
|
|
25
|
+
Workhorse.debug_log('ShellHandler: stop command invoked')
|
|
23
26
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
27
|
+
Workhorse.debug_log('ShellHandler: lock acquired for stop')
|
|
24
28
|
daemon.lockfile = lockfile
|
|
25
29
|
status = daemon.stop
|
|
26
30
|
when 'kill'
|
|
31
|
+
Workhorse.debug_log('ShellHandler: kill command invoked')
|
|
27
32
|
begin
|
|
28
33
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX | File::LOCK_NB)
|
|
34
|
+
Workhorse.debug_log('ShellHandler: lock acquired for kill')
|
|
29
35
|
daemon.lockfile = lockfile
|
|
30
36
|
status = daemon.stop(true)
|
|
31
37
|
rescue LockNotAvailableError
|
|
38
|
+
Workhorse.debug_log('ShellHandler: lock not available for kill')
|
|
32
39
|
status = 1
|
|
33
40
|
end
|
|
34
41
|
when 'status'
|
|
42
|
+
Workhorse.debug_log('ShellHandler: status command invoked')
|
|
35
43
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
44
|
+
Workhorse.debug_log('ShellHandler: lock acquired for status')
|
|
36
45
|
daemon.lockfile = lockfile
|
|
37
46
|
status = daemon.status
|
|
38
47
|
when 'watch'
|
|
48
|
+
Workhorse.debug_log('ShellHandler: watch command invoked')
|
|
39
49
|
begin
|
|
40
50
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX | File::LOCK_NB)
|
|
51
|
+
Workhorse.debug_log('ShellHandler: lock acquired for watch')
|
|
41
52
|
daemon.lockfile = lockfile
|
|
42
53
|
status = daemon.watch
|
|
43
54
|
rescue LockNotAvailableError
|
|
55
|
+
Workhorse.debug_log('ShellHandler: lock not available for watch')
|
|
44
56
|
status = 1
|
|
45
57
|
end
|
|
46
58
|
when 'restart'
|
|
59
|
+
Workhorse.debug_log('ShellHandler: restart command invoked')
|
|
47
60
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
61
|
+
Workhorse.debug_log('ShellHandler: lock acquired for restart')
|
|
48
62
|
daemon.lockfile = lockfile
|
|
49
63
|
status = daemon.restart
|
|
50
64
|
when 'restart-logging'
|
|
65
|
+
Workhorse.debug_log('ShellHandler: restart-logging command invoked')
|
|
51
66
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
67
|
+
Workhorse.debug_log('ShellHandler: lock acquired for restart-logging')
|
|
52
68
|
daemon.lockfile = lockfile
|
|
53
69
|
status = daemon.restart_logging
|
|
54
70
|
when 'soft-restart'
|
|
71
|
+
Workhorse.debug_log('ShellHandler: soft-restart command invoked')
|
|
55
72
|
lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
|
|
73
|
+
Workhorse.debug_log('ShellHandler: lock acquired for soft-restart')
|
|
56
74
|
daemon.lockfile = lockfile
|
|
57
75
|
status = daemon.soft_restart
|
|
58
76
|
when 'usage'
|
|
@@ -63,10 +81,15 @@ module Workhorse
|
|
|
63
81
|
status = 99
|
|
64
82
|
end
|
|
65
83
|
rescue StandardError => e
|
|
84
|
+
Workhorse.debug_log("ShellHandler: #{ARGV.first} failed with #{e.class}: #{e.message}")
|
|
66
85
|
warn "#{e.message}\n#{e.backtrace.join("\n")}"
|
|
67
86
|
status = 99
|
|
68
87
|
ensure
|
|
69
|
-
lockfile
|
|
88
|
+
if lockfile
|
|
89
|
+
Workhorse.debug_log("ShellHandler: releasing lock for #{ARGV.first}")
|
|
90
|
+
lockfile.flock(File::LOCK_UN)
|
|
91
|
+
end
|
|
92
|
+
Workhorse.debug_log("ShellHandler: exiting with status #{status}")
|
|
70
93
|
exit! status
|
|
71
94
|
end
|
|
72
95
|
end
|
data/lib/workhorse/daemon.rb
CHANGED
|
@@ -82,6 +82,8 @@ module Workhorse
|
|
|
82
82
|
def start(quiet: false)
|
|
83
83
|
code = 0
|
|
84
84
|
|
|
85
|
+
Workhorse.debug_log("Daemon: starting #{@workers.count} worker(s)")
|
|
86
|
+
|
|
85
87
|
# Holds messages in format [[<message>, <severity>]]
|
|
86
88
|
messages = []
|
|
87
89
|
|
|
@@ -89,9 +91,11 @@ module Workhorse
|
|
|
89
91
|
pid_file, pid, active = read_pid(worker)
|
|
90
92
|
|
|
91
93
|
if pid_file && pid && active
|
|
94
|
+
Workhorse.debug_log("Daemon start: worker ##{worker.id} (#{worker.name}) already running (PID #{pid})")
|
|
92
95
|
messages << ["Worker ##{worker.id} (#{worker.name}): Already started (PID #{pid})", 2] unless quiet
|
|
93
96
|
code = 2
|
|
94
97
|
elsif pid_file
|
|
98
|
+
Workhorse.debug_log("Daemon start: worker ##{worker.id} (#{worker.name}) has stale pid file (PID #{pid.inspect}), starting")
|
|
95
99
|
File.delete pid_file
|
|
96
100
|
|
|
97
101
|
shutdown_file = pid ? Workhorse::Worker.shutdown_file_for(pid) : nil
|
|
@@ -101,6 +105,7 @@ module Workhorse
|
|
|
101
105
|
start_worker worker
|
|
102
106
|
FileUtils.rm(shutdown_file) if shutdown_file
|
|
103
107
|
else
|
|
108
|
+
Workhorse.debug_log("Daemon start: worker ##{worker.id} (#{worker.name}) not running, starting")
|
|
104
109
|
messages << ["Worker ##{worker.id} (#{worker.name}): Starting", 1] unless quiet
|
|
105
110
|
start_worker worker
|
|
106
111
|
end
|
|
@@ -115,6 +120,7 @@ module Workhorse
|
|
|
115
120
|
end
|
|
116
121
|
end
|
|
117
122
|
|
|
123
|
+
Workhorse.debug_log("Daemon: start complete, exit code=#{code}")
|
|
118
124
|
return code
|
|
119
125
|
end
|
|
120
126
|
|
|
@@ -126,21 +132,27 @@ module Workhorse
|
|
|
126
132
|
def stop(kill = false, quiet: false)
|
|
127
133
|
code = 0
|
|
128
134
|
|
|
135
|
+
Workhorse.debug_log("Daemon: stopping #{@workers.count} worker(s) (kill=#{kill})")
|
|
136
|
+
|
|
129
137
|
for_each_worker do |worker|
|
|
130
138
|
pid_file, pid, active = read_pid(worker)
|
|
131
139
|
|
|
132
140
|
if pid_file && pid && active
|
|
141
|
+
Workhorse.debug_log("Daemon stop: worker ##{worker.id} (#{worker.name}) running (PID #{pid}), stopping")
|
|
133
142
|
puts "Worker (#{worker.name}) ##{worker.id}: Stopping" unless quiet
|
|
134
143
|
stop_worker pid_file, pid, kill: kill
|
|
135
144
|
elsif pid_file
|
|
145
|
+
Workhorse.debug_log("Daemon stop: worker ##{worker.id} (#{worker.name}) stale pid file (PID #{pid.inspect})")
|
|
136
146
|
File.delete pid_file
|
|
137
147
|
puts "Worker (#{worker.name}) ##{worker.id}: Already stopped (stale PID file)" unless quiet
|
|
138
148
|
else
|
|
149
|
+
Workhorse.debug_log("Daemon stop: worker ##{worker.id} (#{worker.name}) already stopped")
|
|
139
150
|
warn "Worker (#{worker.name}) ##{worker.id}: Already stopped" unless quiet
|
|
140
151
|
code = 2
|
|
141
152
|
end
|
|
142
153
|
end
|
|
143
154
|
|
|
155
|
+
Workhorse.debug_log("Daemon: stop complete, exit code=#{code}")
|
|
144
156
|
return code
|
|
145
157
|
end
|
|
146
158
|
|
|
@@ -155,16 +167,20 @@ module Workhorse
|
|
|
155
167
|
pid_file, pid, active = read_pid(worker)
|
|
156
168
|
|
|
157
169
|
if pid_file && pid && active
|
|
170
|
+
Workhorse.debug_log("Daemon status: worker ##{worker.id} (#{worker.name}) running (PID #{pid})")
|
|
158
171
|
puts "Worker ##{worker.id} (#{worker.name}): Running" unless quiet
|
|
159
172
|
elsif pid_file
|
|
173
|
+
Workhorse.debug_log("Daemon status: worker ##{worker.id} (#{worker.name}) not running (stale PID file, PID #{pid.inspect})")
|
|
160
174
|
warn "Worker ##{worker.id} (#{worker.name}): Not running (stale PID file)" unless quiet
|
|
161
175
|
code = 2
|
|
162
176
|
else
|
|
177
|
+
Workhorse.debug_log("Daemon status: worker ##{worker.id} (#{worker.name}) not running (no pid file)")
|
|
163
178
|
warn "Worker ##{worker.id} (#{worker.name}): Not running" unless quiet
|
|
164
179
|
code = 2
|
|
165
180
|
end
|
|
166
181
|
end
|
|
167
182
|
|
|
183
|
+
Workhorse.debug_log("Daemon: status complete, exit code=#{code}")
|
|
168
184
|
return code
|
|
169
185
|
end
|
|
170
186
|
|
|
@@ -179,9 +195,14 @@ module Workhorse
|
|
|
179
195
|
should_be_running = true
|
|
180
196
|
end
|
|
181
197
|
|
|
182
|
-
|
|
198
|
+
status_code = status(quiet: true)
|
|
199
|
+
Workhorse.debug_log("Daemon watch: should_be_running=#{should_be_running}, status_code=#{status_code}")
|
|
200
|
+
|
|
201
|
+
if should_be_running && status_code != 0
|
|
202
|
+
Workhorse.debug_log('Daemon watch: starting workers')
|
|
183
203
|
return start(quiet: Workhorse.silence_watcher)
|
|
184
204
|
else
|
|
205
|
+
Workhorse.debug_log('Daemon watch: no action needed')
|
|
185
206
|
return 0
|
|
186
207
|
end
|
|
187
208
|
end
|
|
@@ -201,20 +222,27 @@ module Workhorse
|
|
|
201
222
|
def restart_logging
|
|
202
223
|
code = 0
|
|
203
224
|
|
|
225
|
+
Workhorse.debug_log("restart_logging: sending HUP to #{@workers.count} worker(s)")
|
|
226
|
+
|
|
204
227
|
for_each_worker do |worker|
|
|
205
228
|
_pid_file, pid, active = read_pid(worker)
|
|
206
229
|
|
|
230
|
+
Workhorse.debug_log("restart_logging: worker ##{worker.id} (#{worker.name}): pid=#{pid.inspect}, active=#{active.inspect}")
|
|
231
|
+
|
|
207
232
|
next unless pid && active
|
|
208
233
|
|
|
209
234
|
begin
|
|
210
235
|
Process.kill 'HUP', pid
|
|
236
|
+
Workhorse.debug_log("restart_logging: HUP sent successfully to PID #{pid}")
|
|
211
237
|
puts "Worker (#{worker.name}) ##{worker.id}: Sent signal for restart-logging"
|
|
212
238
|
rescue Errno::ESRCH
|
|
239
|
+
Workhorse.debug_log("restart_logging: HUP failed for PID #{pid}: process not found")
|
|
213
240
|
warn "Worker (#{worker.name}) ##{worker.id}: Could not send signal for restart-logging, process not found"
|
|
214
241
|
code = 2
|
|
215
242
|
end
|
|
216
243
|
end
|
|
217
244
|
|
|
245
|
+
Workhorse.debug_log("restart_logging: done, exit code=#{code}")
|
|
218
246
|
return code
|
|
219
247
|
end
|
|
220
248
|
|
|
@@ -227,20 +255,27 @@ module Workhorse
|
|
|
227
255
|
def soft_restart
|
|
228
256
|
code = 0
|
|
229
257
|
|
|
258
|
+
Workhorse.debug_log("Daemon: sending USR1 to #{@workers.count} worker(s)")
|
|
259
|
+
|
|
230
260
|
for_each_worker do |worker|
|
|
231
261
|
_pid_file, pid, active = read_pid(worker)
|
|
232
262
|
|
|
263
|
+
Workhorse.debug_log("Daemon soft_restart: worker ##{worker.id} (#{worker.name}): pid=#{pid.inspect}, active=#{active.inspect}")
|
|
264
|
+
|
|
233
265
|
next unless pid && active
|
|
234
266
|
|
|
235
267
|
begin
|
|
236
268
|
Process.kill 'USR1', pid
|
|
269
|
+
Workhorse.debug_log("Daemon: USR1 sent successfully to PID #{pid}")
|
|
237
270
|
puts "Worker (#{worker.name}) ##{worker.id}: Sent soft-restart signal"
|
|
238
271
|
rescue Errno::ESRCH
|
|
272
|
+
Workhorse.debug_log("Daemon: USR1 failed for PID #{pid}: process not found")
|
|
239
273
|
warn "Worker (#{worker.name}) ##{worker.id}: Process not found"
|
|
240
274
|
code = 2
|
|
241
275
|
end
|
|
242
276
|
end
|
|
243
277
|
|
|
278
|
+
Workhorse.debug_log("Daemon soft_restart: done, exit code=#{code}")
|
|
244
279
|
return code
|
|
245
280
|
end
|
|
246
281
|
|
|
@@ -263,7 +298,13 @@ module Workhorse
|
|
|
263
298
|
def start_worker(worker)
|
|
264
299
|
check_rails_env if defined?(Rails)
|
|
265
300
|
|
|
301
|
+
Workhorse.debug_log("Daemon: forking worker ##{worker.id} (#{worker.name})")
|
|
266
302
|
pid = fork do
|
|
303
|
+
# Detach from the parent's session so that the worker is not killed by
|
|
304
|
+
# SIGHUP when the parent (ShellHandler) exits. Without this, the kernel
|
|
305
|
+
# sends SIGHUP to the foreground process group when the session leader
|
|
306
|
+
# (e.g. a cron- or systemd-started ShellHandler) terminates.
|
|
307
|
+
Process.setsid
|
|
267
308
|
$0 = process_name(worker)
|
|
268
309
|
# Close inherited lockfile fd to prevent holding the flock after parent exits
|
|
269
310
|
@lockfile&.close
|
|
@@ -278,6 +319,7 @@ module Workhorse
|
|
|
278
319
|
worker.pid = pid
|
|
279
320
|
File.write(pid_file_for(worker), pid)
|
|
280
321
|
Process.detach(pid)
|
|
322
|
+
Workhorse.debug_log("Daemon: worker ##{worker.id} (#{worker.name}) forked with PID #{pid}")
|
|
281
323
|
end
|
|
282
324
|
|
|
283
325
|
# Stops a single worker process.
|
|
@@ -290,6 +332,7 @@ module Workhorse
|
|
|
290
332
|
def stop_worker(pid_file, pid, kill: false)
|
|
291
333
|
signals = kill ? %w[KILL] : %w[TERM INT]
|
|
292
334
|
|
|
335
|
+
Workhorse.debug_log("Daemon: stopping PID #{pid} with signals #{signals.join(', ')}")
|
|
293
336
|
loop do
|
|
294
337
|
begin
|
|
295
338
|
signals.each { |signal| Process.kill(signal, pid) }
|
|
@@ -300,6 +343,7 @@ module Workhorse
|
|
|
300
343
|
sleep 1
|
|
301
344
|
end
|
|
302
345
|
|
|
346
|
+
Workhorse.debug_log("Daemon: PID #{pid} stopped")
|
|
303
347
|
File.delete(pid_file)
|
|
304
348
|
end
|
|
305
349
|
|
data/lib/workhorse/poller.rb
CHANGED
|
@@ -49,14 +49,18 @@ module Workhorse
|
|
|
49
49
|
fail 'Poller is already running.' if running?
|
|
50
50
|
@running = true
|
|
51
51
|
|
|
52
|
+
Workhorse.debug_log("[Job worker #{worker.id}] Poller starting")
|
|
53
|
+
|
|
52
54
|
clean_stuck_jobs! if Workhorse.clean_stuck_jobs
|
|
53
55
|
|
|
54
56
|
@thread = Thread.new do
|
|
57
|
+
Workhorse.debug_log("[Job worker #{worker.id}] Poller thread started")
|
|
55
58
|
loop do
|
|
56
59
|
break unless running?
|
|
57
60
|
|
|
58
61
|
begin
|
|
59
62
|
unless @before_poll.call
|
|
63
|
+
Workhorse.debug_log("[Job worker #{worker.id}] before_poll returned false, triggering worker shutdown")
|
|
60
64
|
Thread.new { worker.shutdown }
|
|
61
65
|
sleep
|
|
62
66
|
next
|
|
@@ -65,6 +69,7 @@ module Workhorse
|
|
|
65
69
|
poll
|
|
66
70
|
sleep
|
|
67
71
|
rescue Exception => e
|
|
72
|
+
Workhorse.debug_log("[Job worker #{worker.id}] Poller exception, shutting down: #{e.class}: #{e.message}")
|
|
68
73
|
worker.log %(Poll encountered exception:\n#{e.message}\n#{e.backtrace.join("\n")})
|
|
69
74
|
worker.log 'Worker shutting down...'
|
|
70
75
|
Workhorse.on_exception.call(e) unless Workhorse.silence_poller_exceptions
|
|
@@ -73,6 +78,7 @@ module Workhorse
|
|
|
73
78
|
break
|
|
74
79
|
end
|
|
75
80
|
end
|
|
81
|
+
Workhorse.debug_log("[Job worker #{worker.id}] Poller thread exiting")
|
|
76
82
|
end
|
|
77
83
|
end
|
|
78
84
|
|
|
@@ -82,8 +88,10 @@ module Workhorse
|
|
|
82
88
|
# @raise [RuntimeError] If poller is not running
|
|
83
89
|
def shutdown
|
|
84
90
|
fail 'Poller is not running.' unless running?
|
|
91
|
+
Workhorse.debug_log("[Job worker #{worker.id}] Poller shutting down")
|
|
85
92
|
@running = false
|
|
86
93
|
wait
|
|
94
|
+
Workhorse.debug_log("[Job worker #{worker.id}] Poller shut down")
|
|
87
95
|
end
|
|
88
96
|
|
|
89
97
|
# Waits for the poller thread to complete.
|
data/lib/workhorse/worker.rb
CHANGED
|
@@ -158,6 +158,8 @@ module Workhorse
|
|
|
158
158
|
@poller.start
|
|
159
159
|
log 'Started up'
|
|
160
160
|
|
|
161
|
+
Workhorse.debug_log("[Job worker #{id}] Started: PID=#{pid}, logger=#{describe_logger(logger)}")
|
|
162
|
+
|
|
161
163
|
trap_termination if @auto_terminate
|
|
162
164
|
trap_log_reopen
|
|
163
165
|
trap_soft_restart
|
|
@@ -189,12 +191,14 @@ module Workhorse
|
|
|
189
191
|
mutex.synchronize do
|
|
190
192
|
assert_state! :running
|
|
191
193
|
|
|
194
|
+
Workhorse.debug_log("[Job worker #{id}] Shutdown starting")
|
|
192
195
|
log 'Shutting down'
|
|
193
196
|
@state = :shutdown
|
|
194
197
|
|
|
195
198
|
@poller.shutdown
|
|
196
199
|
@pool.shutdown
|
|
197
200
|
log 'Shut down'
|
|
201
|
+
Workhorse.debug_log("[Job worker #{id}] Shutdown complete")
|
|
198
202
|
end
|
|
199
203
|
end
|
|
200
204
|
|
|
@@ -267,6 +271,8 @@ module Workhorse
|
|
|
267
271
|
|
|
268
272
|
return true unless exceeded
|
|
269
273
|
|
|
274
|
+
Workhorse.debug_log("[Job worker #{id}] Memory limit exceeded: #{mem}MB > #{max}MB, initiating shutdown")
|
|
275
|
+
|
|
270
276
|
if defined?(Rails)
|
|
271
277
|
FileUtils.touch self.class.shutdown_file_for(pid)
|
|
272
278
|
end
|
|
@@ -296,16 +302,19 @@ module Workhorse
|
|
|
296
302
|
def trap_log_reopen
|
|
297
303
|
Signal.trap(LOG_REOPEN_SIGNAL) do
|
|
298
304
|
Thread.new do
|
|
305
|
+
Workhorse.debug_log("[Job worker #{id}] #{LOG_REOPEN_SIGNAL} received, logger state before reopen: #{describe_logger(logger)}")
|
|
306
|
+
|
|
299
307
|
logger&.reopen
|
|
308
|
+
Workhorse.debug_log("[Job worker #{id}] Logger state after reopen: #{describe_logger(logger)}")
|
|
300
309
|
|
|
301
|
-
|
|
302
|
-
ActiveRecord::Base.logger.reopen
|
|
303
|
-
end
|
|
310
|
+
Workhorse.debug_log("[Job worker #{id}] #{LOG_REOPEN_SIGNAL} handling complete")
|
|
304
311
|
rescue Exception => e
|
|
312
|
+
Workhorse.debug_log("[Job worker #{id}] Logger reopen failed: #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}")
|
|
305
313
|
log %(Log reopen signal handler error: #{e.message}\n#{e.backtrace.join("\n")}), :error
|
|
306
314
|
Workhorse.on_exception.call(e)
|
|
307
315
|
end.join
|
|
308
316
|
end
|
|
317
|
+
Workhorse.debug_log("[Job worker #{id}] Signal handler installed: #{LOG_REOPEN_SIGNAL}")
|
|
309
318
|
end
|
|
310
319
|
|
|
311
320
|
# Sets up signal handlers for graceful termination (TERM/INT signals).
|
|
@@ -320,11 +329,13 @@ module Workhorse
|
|
|
320
329
|
# quickly when called multiple times, this does not pose a risk of
|
|
321
330
|
# keeping open a big number of "shutdown threads".
|
|
322
331
|
Thread.new do
|
|
332
|
+
Workhorse.debug_log("[Job worker #{id}] #{signal} received, shutting down")
|
|
323
333
|
log "\nCaught #{signal}, shutting worker down..."
|
|
324
334
|
shutdown
|
|
325
335
|
end.join
|
|
326
336
|
end
|
|
327
337
|
end
|
|
338
|
+
Workhorse.debug_log("[Job worker #{id}] Signal handlers installed: #{SHUTDOWN_SIGNALS.join(', ')}")
|
|
328
339
|
end
|
|
329
340
|
|
|
330
341
|
# Initiates a soft restart of the worker.
|
|
@@ -339,9 +350,14 @@ module Workhorse
|
|
|
339
350
|
|
|
340
351
|
return unless @soft_restart_requested.make_true
|
|
341
352
|
|
|
353
|
+
Workhorse.debug_log("[Job worker #{id}] Soft restart initiated")
|
|
354
|
+
|
|
342
355
|
# Create shutdown file for watch to detect
|
|
343
356
|
shutdown_file = self.class.shutdown_file_for(pid)
|
|
344
|
-
|
|
357
|
+
if shutdown_file
|
|
358
|
+
FileUtils.touch(shutdown_file)
|
|
359
|
+
Workhorse.debug_log("[Job worker #{id}] Shutdown file created: #{shutdown_file}")
|
|
360
|
+
end
|
|
345
361
|
|
|
346
362
|
# Monitor in a separate thread to avoid blocking the signal handler
|
|
347
363
|
@soft_restart_thread = Thread.new do
|
|
@@ -361,6 +377,7 @@ module Workhorse
|
|
|
361
377
|
# Start a new thread as certain functionality (such as logging) is not
|
|
362
378
|
# available from within a trap context.
|
|
363
379
|
Thread.new do
|
|
380
|
+
Workhorse.debug_log("[Job worker #{id}] #{SOFT_RESTART_SIGNAL} received, initiating soft restart")
|
|
364
381
|
log "\nCaught #{SOFT_RESTART_SIGNAL}, initiating soft restart..."
|
|
365
382
|
soft_restart
|
|
366
383
|
rescue Exception => e
|
|
@@ -370,6 +387,41 @@ module Workhorse
|
|
|
370
387
|
# NOTE: Unlike trap_termination, we don't join here because soft_restart
|
|
371
388
|
# is designed to be fire-and-forget (it spawns its own monitoring thread).
|
|
372
389
|
end
|
|
390
|
+
Workhorse.debug_log("[Job worker #{id}] Signal handler installed: #{SOFT_RESTART_SIGNAL}")
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Returns a human-readable description of a logger's internal state.
|
|
394
|
+
# Used for debug logging to diagnose log rotation issues.
|
|
395
|
+
#
|
|
396
|
+
# @param lgr [Logger, nil] The logger to describe
|
|
397
|
+
# @return [String] Description of the logger's state
|
|
398
|
+
# @private
|
|
399
|
+
def describe_logger(lgr)
|
|
400
|
+
return 'nil' unless lgr
|
|
401
|
+
|
|
402
|
+
parts = ["class=#{lgr.class}"]
|
|
403
|
+
|
|
404
|
+
logdev = lgr.instance_variable_get(:@logdev)
|
|
405
|
+
if logdev
|
|
406
|
+
parts << "filename=#{logdev.filename.inspect}" if logdev.respond_to?(:filename)
|
|
407
|
+
|
|
408
|
+
dev = logdev.respond_to?(:dev) ? logdev.dev : nil
|
|
409
|
+
if dev
|
|
410
|
+
parts << "closed=#{dev.closed?}"
|
|
411
|
+
unless dev.closed?
|
|
412
|
+
fileno = dev.fileno
|
|
413
|
+
parts << "fd=#{fileno}"
|
|
414
|
+
fd_path = "/proc/self/fd/#{fileno}"
|
|
415
|
+
parts << "fd_target=#{File.readlink(fd_path).inspect}" if File.exist?(fd_path)
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
else
|
|
419
|
+
parts << 'logdev=nil'
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
parts.join(', ')
|
|
423
|
+
rescue Exception => e
|
|
424
|
+
"error describing logger: #{e.class}: #{e.message}"
|
|
373
425
|
end
|
|
374
426
|
|
|
375
427
|
# Waits for all jobs to complete, then shuts down the worker.
|
|
@@ -378,10 +430,12 @@ module Workhorse
|
|
|
378
430
|
# @return [void]
|
|
379
431
|
# @private
|
|
380
432
|
def wait_for_idle_then_shutdown
|
|
433
|
+
Workhorse.debug_log("[Job worker #{id}] Waiting for idle before soft restart shutdown (pool_size=#{@pool_size})")
|
|
381
434
|
loop do
|
|
382
435
|
break if @state == :shutdown
|
|
383
436
|
|
|
384
437
|
if idle == @pool_size
|
|
438
|
+
Workhorse.debug_log("[Job worker #{id}] All threads idle, proceeding with soft restart shutdown")
|
|
385
439
|
log 'All jobs completed, shutting down for soft restart'
|
|
386
440
|
shutdown
|
|
387
441
|
break
|
data/lib/workhorse.rb
CHANGED
|
@@ -107,6 +107,31 @@ module Workhorse
|
|
|
107
107
|
mattr_accessor :max_worker_memory_mb
|
|
108
108
|
self.max_worker_memory_mb = 0
|
|
109
109
|
|
|
110
|
+
# Path to a debug log file for diagnosing log rotation and signal handling issues.
|
|
111
|
+
# When set, Workhorse writes timestamped debug entries to this file at key points
|
|
112
|
+
# (worker startup, HUP signal handling, restart-logging command flow).
|
|
113
|
+
# Set to nil to disable (default).
|
|
114
|
+
#
|
|
115
|
+
# @return [String, nil] Path to debug log file
|
|
116
|
+
mattr_accessor :debug_log_path
|
|
117
|
+
self.debug_log_path = nil
|
|
118
|
+
|
|
119
|
+
# Writes a debug message to the debug log file.
|
|
120
|
+
# Does nothing if {.debug_log_path} is nil.
|
|
121
|
+
# Silently ignores all exceptions to avoid interfering with normal operation.
|
|
122
|
+
#
|
|
123
|
+
# @param message [String] The message to log
|
|
124
|
+
# @return [void]
|
|
125
|
+
def self.debug_log(message)
|
|
126
|
+
return unless debug_log_path
|
|
127
|
+
|
|
128
|
+
File.open(debug_log_path, 'a') do |f|
|
|
129
|
+
f.write("[#{Time.now.iso8601(3)}] [PID #{Process.pid}] #{message}\n")
|
|
130
|
+
f.flush
|
|
131
|
+
end
|
|
132
|
+
rescue Exception # rubocop:disable Lint/SuppressedException
|
|
133
|
+
end
|
|
134
|
+
|
|
110
135
|
# Configuration method for setting up Workhorse options.
|
|
111
136
|
#
|
|
112
137
|
# @yield [self] Configuration block
|
data/workhorse.gemspec
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: workhorse 1.4.
|
|
2
|
+
# stub: workhorse 1.4.4 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "workhorse".freeze
|
|
6
|
-
s.version = "1.4.
|
|
6
|
+
s.version = "1.4.4"
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
10
10
|
s.authors = ["Sitrox".freeze]
|
|
11
|
-
s.date = "2026-
|
|
11
|
+
s.date = "2026-04-28"
|
|
12
12
|
s.files = [".github/workflows/ruby.yml".freeze, ".gitignore".freeze, ".releaser_config".freeze, ".rubocop.yml".freeze, "CHANGELOG.md".freeze, "FAQ.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "RUBY_VERSION".freeze, "Rakefile".freeze, "VERSION".freeze, "bin/rubocop".freeze, "lib/active_job/queue_adapters/workhorse_adapter.rb".freeze, "lib/generators/workhorse/install_generator.rb".freeze, "lib/generators/workhorse/templates/bin/workhorse.rb".freeze, "lib/generators/workhorse/templates/config/initializers/workhorse.rb".freeze, "lib/generators/workhorse/templates/create_table_jobs.rb".freeze, "lib/workhorse.rb".freeze, "lib/workhorse/active_job_extension.rb".freeze, "lib/workhorse/daemon.rb".freeze, "lib/workhorse/daemon/shell_handler.rb".freeze, "lib/workhorse/db_job.rb".freeze, "lib/workhorse/enqueuer.rb".freeze, "lib/workhorse/jobs/cleanup_succeeded_jobs.rb".freeze, "lib/workhorse/jobs/detect_stale_jobs_job.rb".freeze, "lib/workhorse/jobs/run_active_job.rb".freeze, "lib/workhorse/jobs/run_rails_op.rb".freeze, "lib/workhorse/performer.rb".freeze, "lib/workhorse/poller.rb".freeze, "lib/workhorse/pool.rb".freeze, "lib/workhorse/scoped_env.rb".freeze, "lib/workhorse/worker.rb".freeze, "test/active_job/queue_adapters/workhorse_adapter_test.rb".freeze, "test/lib/db_schema.rb".freeze, "test/lib/jobs.rb".freeze, "test/lib/test_helper.rb".freeze, "test/workhorse/daemon_test.rb".freeze, "test/workhorse/db_job_test.rb".freeze, "test/workhorse/enqueuer_test.rb".freeze, "test/workhorse/performer_test.rb".freeze, "test/workhorse/poller_test.rb".freeze, "test/workhorse/pool_test.rb".freeze, "test/workhorse/worker_test.rb".freeze, "workhorse.gemspec".freeze]
|
|
13
13
|
s.homepage = "https://github.com/sitrox/workhorse".freeze
|
|
14
14
|
s.licenses = ["MIT".freeze]
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: workhorse
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.4.
|
|
4
|
+
version: 1.4.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sitrox
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-04-28 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: activesupport
|