workhorse 1.4.1 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df386a01e22eb4d5e05be449ef11a62341d28c3443fdef76b07ac7e8bca3e3d4
4
- data.tar.gz: 0a4f325c0bf2cb08357a195a2297d0302df275abf3cd6d83329942d68419f7c8
3
+ metadata.gz: 02c3e3163a1b21b8a983c756e9be5e916b2cc7017401ab3484905c04fe56293f
4
+ data.tar.gz: 52ffb9742bf24ea226e3aa3f7edf2f0a5deecdefa57310c3176a40bec00f227f
5
5
  SHA512:
6
- metadata.gz: 12b8ec75c276bf6d888e3f60b523a32cc387c6c4a935b20004fce4f63acbc4e7046979315eb7c6651953177d7289a3bf27fabc535da117a3d6a9c7be36f17ccd
7
- data.tar.gz: 1d15f6cf25a9fe2878e9edf45a7b2ce69d0bf0904119efd2d6234e8c15dc7142ce81c427912f22f2459fa27967b58932497424646d5ea9af431d625a1e154a8a
6
+ metadata.gz: ce5cd0d30660a0bb9829c84957e76589d5e65e9c47795c7ff414101df2b57f8661fbf01cd4b73bb4a69b7901f4fc43796a3a43e6f400c31b5c1a5b6b1eff8e56
7
+ data.tar.gz: 3594464e0fd00d036c9fe163daeea5a17d78d2fcd79095be05dac95753fbce43530812881529052af28917240920f70f706b708f7902d38636caa4e5c6995d3a
data/.releaser_config CHANGED
@@ -1,3 +1,3 @@
1
1
  version_file: VERSION
2
- always_from_master: true
2
+ always_from_master: false
3
3
  gem_style: github
data/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Workhorse Changelog
2
2
 
3
+ ## 1.4.4 - 2026-04-28
4
+
5
+ * Make debug logging (enabled if `config.debug_log_path` is set) more verbose.
6
+
7
+ Sitrox reference: #120574.
8
+
9
+ ## 1.4.3 - 2026-04-28
10
+
11
+ * Yanked from RubyGems. Do not use this release.
12
+
13
+ ## 1.4.2 - 2026-02-20
14
+
15
+ * Detach forked worker processes into their own session using `Process.setsid`.
16
+ Previously, when the ShellHandler was the session leader (e.g. started via
17
+ cron), its exit would cause the kernel to send `SIGHUP` to all forked workers,
18
+ potentially crashing them during startup before signal handlers were installed.
19
+
20
+ * Add optional debug logging (`config.debug_log_path`) for diagnosing issues
21
+ with signal handling, process lifecycle, log rotation, and daemon commands.
22
+
23
+ Sitrox reference: #120574.
24
+
3
25
  ## 1.4.1 - 2026-02-18
4
26
 
5
27
  * Close inherited lockfile fd in forked worker processes. Previously the
@@ -50,7 +72,7 @@
50
72
 
51
73
  ## 1.3.0.rc4 - 2025-08-27
52
74
 
53
- * Fix race-condition in polling mechanism which could result in workers
75
+ * Fix race-condition in polling mechanism which could result in workers
54
76
  trying to run a job that is not yet locked.
55
77
 
56
78
  Sitrox reference: #128333.
data/README.md CHANGED
@@ -522,7 +522,6 @@ Gem-internal model class `Workhorse::DbJob`, for example:
522
522
 
523
523
  ```ruby
524
524
  # config/initializers/workhorse.rb
525
-
526
525
  ActiveSupport.on_load :workhorse_db_job do
527
526
  # Code within this block will be run inside of the model class
528
527
  # Workhorse::DbJob.
@@ -530,6 +529,23 @@ ActiveSupport.on_load :workhorse_db_job do
530
529
  end
531
530
  ```
532
531
 
532
+ ## Debug logging
533
+
534
+ Workhorse includes an optional debug log for diagnosing issues with signal
535
+ handling, process lifecycle, log rotation, and daemon commands. To enable,
536
+ set `debug_log_path` to a writable file path:
537
+
538
+ ```ruby
539
+ # config/initializers/workhorse.rb
540
+ Workhorse.setup do |config|
541
+ config.debug_log_path = Rails.root.join('log', 'workhorse.debug.log')
542
+ end
543
+ ```
544
+
545
+ The debug log is designed to be safe for production use: all writes are
546
+ best-effort and silently ignore errors to avoid interfering with normal
547
+ operation. Set `debug_log_path` to `nil` (the default) to disable.
548
+
533
549
  ## Caveats
534
550
 
535
551
  ### Errors during polling / crashed workers
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.4.1
1
+ 1.4.4
@@ -16,43 +16,61 @@ module Workhorse
16
16
  begin
17
17
  case ARGV.first
18
18
  when 'start'
19
+ Workhorse.debug_log('ShellHandler: start command invoked')
19
20
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
21
+ Workhorse.debug_log('ShellHandler: lock acquired for start')
20
22
  daemon.lockfile = lockfile
21
23
  status = daemon.start
22
24
  when 'stop'
25
+ Workhorse.debug_log('ShellHandler: stop command invoked')
23
26
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
27
+ Workhorse.debug_log('ShellHandler: lock acquired for stop')
24
28
  daemon.lockfile = lockfile
25
29
  status = daemon.stop
26
30
  when 'kill'
31
+ Workhorse.debug_log('ShellHandler: kill command invoked')
27
32
  begin
28
33
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX | File::LOCK_NB)
34
+ Workhorse.debug_log('ShellHandler: lock acquired for kill')
29
35
  daemon.lockfile = lockfile
30
36
  status = daemon.stop(true)
31
37
  rescue LockNotAvailableError
38
+ Workhorse.debug_log('ShellHandler: lock not available for kill')
32
39
  status = 1
33
40
  end
34
41
  when 'status'
42
+ Workhorse.debug_log('ShellHandler: status command invoked')
35
43
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
44
+ Workhorse.debug_log('ShellHandler: lock acquired for status')
36
45
  daemon.lockfile = lockfile
37
46
  status = daemon.status
38
47
  when 'watch'
48
+ Workhorse.debug_log('ShellHandler: watch command invoked')
39
49
  begin
40
50
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX | File::LOCK_NB)
51
+ Workhorse.debug_log('ShellHandler: lock acquired for watch')
41
52
  daemon.lockfile = lockfile
42
53
  status = daemon.watch
43
54
  rescue LockNotAvailableError
55
+ Workhorse.debug_log('ShellHandler: lock not available for watch')
44
56
  status = 1
45
57
  end
46
58
  when 'restart'
59
+ Workhorse.debug_log('ShellHandler: restart command invoked')
47
60
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
61
+ Workhorse.debug_log('ShellHandler: lock acquired for restart')
48
62
  daemon.lockfile = lockfile
49
63
  status = daemon.restart
50
64
  when 'restart-logging'
65
+ Workhorse.debug_log('ShellHandler: restart-logging command invoked')
51
66
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
67
+ Workhorse.debug_log('ShellHandler: lock acquired for restart-logging')
52
68
  daemon.lockfile = lockfile
53
69
  status = daemon.restart_logging
54
70
  when 'soft-restart'
71
+ Workhorse.debug_log('ShellHandler: soft-restart command invoked')
55
72
  lockfile = acquire_lock(lockfile_path, File::LOCK_EX)
73
+ Workhorse.debug_log('ShellHandler: lock acquired for soft-restart')
56
74
  daemon.lockfile = lockfile
57
75
  status = daemon.soft_restart
58
76
  when 'usage'
@@ -63,10 +81,15 @@ module Workhorse
63
81
  status = 99
64
82
  end
65
83
  rescue StandardError => e
84
+ Workhorse.debug_log("ShellHandler: #{ARGV.first} failed with #{e.class}: #{e.message}")
66
85
  warn "#{e.message}\n#{e.backtrace.join("\n")}"
67
86
  status = 99
68
87
  ensure
69
- lockfile&.flock(File::LOCK_UN)
88
+ if lockfile
89
+ Workhorse.debug_log("ShellHandler: releasing lock for #{ARGV.first}")
90
+ lockfile.flock(File::LOCK_UN)
91
+ end
92
+ Workhorse.debug_log("ShellHandler: exiting with status #{status}")
70
93
  exit! status
71
94
  end
72
95
  end
@@ -82,6 +82,8 @@ module Workhorse
82
82
  def start(quiet: false)
83
83
  code = 0
84
84
 
85
+ Workhorse.debug_log("Daemon: starting #{@workers.count} worker(s)")
86
+
85
87
  # Holds messages in format [[<message>, <severity>]]
86
88
  messages = []
87
89
 
@@ -89,9 +91,11 @@ module Workhorse
89
91
  pid_file, pid, active = read_pid(worker)
90
92
 
91
93
  if pid_file && pid && active
94
+ Workhorse.debug_log("Daemon start: worker ##{worker.id} (#{worker.name}) already running (PID #{pid})")
92
95
  messages << ["Worker ##{worker.id} (#{worker.name}): Already started (PID #{pid})", 2] unless quiet
93
96
  code = 2
94
97
  elsif pid_file
98
+ Workhorse.debug_log("Daemon start: worker ##{worker.id} (#{worker.name}) has stale pid file (PID #{pid.inspect}), starting")
95
99
  File.delete pid_file
96
100
 
97
101
  shutdown_file = pid ? Workhorse::Worker.shutdown_file_for(pid) : nil
@@ -101,6 +105,7 @@ module Workhorse
101
105
  start_worker worker
102
106
  FileUtils.rm(shutdown_file) if shutdown_file
103
107
  else
108
+ Workhorse.debug_log("Daemon start: worker ##{worker.id} (#{worker.name}) not running, starting")
104
109
  messages << ["Worker ##{worker.id} (#{worker.name}): Starting", 1] unless quiet
105
110
  start_worker worker
106
111
  end
@@ -115,6 +120,7 @@ module Workhorse
115
120
  end
116
121
  end
117
122
 
123
+ Workhorse.debug_log("Daemon: start complete, exit code=#{code}")
118
124
  return code
119
125
  end
120
126
 
@@ -126,21 +132,27 @@ module Workhorse
126
132
  def stop(kill = false, quiet: false)
127
133
  code = 0
128
134
 
135
+ Workhorse.debug_log("Daemon: stopping #{@workers.count} worker(s) (kill=#{kill})")
136
+
129
137
  for_each_worker do |worker|
130
138
  pid_file, pid, active = read_pid(worker)
131
139
 
132
140
  if pid_file && pid && active
141
+ Workhorse.debug_log("Daemon stop: worker ##{worker.id} (#{worker.name}) running (PID #{pid}), stopping")
133
142
  puts "Worker (#{worker.name}) ##{worker.id}: Stopping" unless quiet
134
143
  stop_worker pid_file, pid, kill: kill
135
144
  elsif pid_file
145
+ Workhorse.debug_log("Daemon stop: worker ##{worker.id} (#{worker.name}) stale pid file (PID #{pid.inspect})")
136
146
  File.delete pid_file
137
147
  puts "Worker (#{worker.name}) ##{worker.id}: Already stopped (stale PID file)" unless quiet
138
148
  else
149
+ Workhorse.debug_log("Daemon stop: worker ##{worker.id} (#{worker.name}) already stopped")
139
150
  warn "Worker (#{worker.name}) ##{worker.id}: Already stopped" unless quiet
140
151
  code = 2
141
152
  end
142
153
  end
143
154
 
155
+ Workhorse.debug_log("Daemon: stop complete, exit code=#{code}")
144
156
  return code
145
157
  end
146
158
 
@@ -155,16 +167,20 @@ module Workhorse
155
167
  pid_file, pid, active = read_pid(worker)
156
168
 
157
169
  if pid_file && pid && active
170
+ Workhorse.debug_log("Daemon status: worker ##{worker.id} (#{worker.name}) running (PID #{pid})")
158
171
  puts "Worker ##{worker.id} (#{worker.name}): Running" unless quiet
159
172
  elsif pid_file
173
+ Workhorse.debug_log("Daemon status: worker ##{worker.id} (#{worker.name}) not running (stale PID file, PID #{pid.inspect})")
160
174
  warn "Worker ##{worker.id} (#{worker.name}): Not running (stale PID file)" unless quiet
161
175
  code = 2
162
176
  else
177
+ Workhorse.debug_log("Daemon status: worker ##{worker.id} (#{worker.name}) not running (no pid file)")
163
178
  warn "Worker ##{worker.id} (#{worker.name}): Not running" unless quiet
164
179
  code = 2
165
180
  end
166
181
  end
167
182
 
183
+ Workhorse.debug_log("Daemon: status complete, exit code=#{code}")
168
184
  return code
169
185
  end
170
186
 
@@ -179,9 +195,14 @@ module Workhorse
179
195
  should_be_running = true
180
196
  end
181
197
 
182
- if should_be_running && status(quiet: true) != 0
198
+ status_code = status(quiet: true)
199
+ Workhorse.debug_log("Daemon watch: should_be_running=#{should_be_running}, status_code=#{status_code}")
200
+
201
+ if should_be_running && status_code != 0
202
+ Workhorse.debug_log('Daemon watch: starting workers')
183
203
  return start(quiet: Workhorse.silence_watcher)
184
204
  else
205
+ Workhorse.debug_log('Daemon watch: no action needed')
185
206
  return 0
186
207
  end
187
208
  end
@@ -201,20 +222,27 @@ module Workhorse
201
222
  def restart_logging
202
223
  code = 0
203
224
 
225
+ Workhorse.debug_log("restart_logging: sending HUP to #{@workers.count} worker(s)")
226
+
204
227
  for_each_worker do |worker|
205
228
  _pid_file, pid, active = read_pid(worker)
206
229
 
230
+ Workhorse.debug_log("restart_logging: worker ##{worker.id} (#{worker.name}): pid=#{pid.inspect}, active=#{active.inspect}")
231
+
207
232
  next unless pid && active
208
233
 
209
234
  begin
210
235
  Process.kill 'HUP', pid
236
+ Workhorse.debug_log("restart_logging: HUP sent successfully to PID #{pid}")
211
237
  puts "Worker (#{worker.name}) ##{worker.id}: Sent signal for restart-logging"
212
238
  rescue Errno::ESRCH
239
+ Workhorse.debug_log("restart_logging: HUP failed for PID #{pid}: process not found")
213
240
  warn "Worker (#{worker.name}) ##{worker.id}: Could not send signal for restart-logging, process not found"
214
241
  code = 2
215
242
  end
216
243
  end
217
244
 
245
+ Workhorse.debug_log("restart_logging: done, exit code=#{code}")
218
246
  return code
219
247
  end
220
248
 
@@ -227,20 +255,27 @@ module Workhorse
227
255
  def soft_restart
228
256
  code = 0
229
257
 
258
+ Workhorse.debug_log("Daemon: sending USR1 to #{@workers.count} worker(s)")
259
+
230
260
  for_each_worker do |worker|
231
261
  _pid_file, pid, active = read_pid(worker)
232
262
 
263
+ Workhorse.debug_log("Daemon soft_restart: worker ##{worker.id} (#{worker.name}): pid=#{pid.inspect}, active=#{active.inspect}")
264
+
233
265
  next unless pid && active
234
266
 
235
267
  begin
236
268
  Process.kill 'USR1', pid
269
+ Workhorse.debug_log("Daemon: USR1 sent successfully to PID #{pid}")
237
270
  puts "Worker (#{worker.name}) ##{worker.id}: Sent soft-restart signal"
238
271
  rescue Errno::ESRCH
272
+ Workhorse.debug_log("Daemon: USR1 failed for PID #{pid}: process not found")
239
273
  warn "Worker (#{worker.name}) ##{worker.id}: Process not found"
240
274
  code = 2
241
275
  end
242
276
  end
243
277
 
278
+ Workhorse.debug_log("Daemon soft_restart: done, exit code=#{code}")
244
279
  return code
245
280
  end
246
281
 
@@ -263,7 +298,13 @@ module Workhorse
263
298
  def start_worker(worker)
264
299
  check_rails_env if defined?(Rails)
265
300
 
301
+ Workhorse.debug_log("Daemon: forking worker ##{worker.id} (#{worker.name})")
266
302
  pid = fork do
303
+ # Detach from the parent's session so that the worker is not killed by
304
+ # SIGHUP when the parent (ShellHandler) exits. Without this, the kernel
305
+ # sends SIGHUP to the foreground process group when the session leader
306
+ # (e.g. a cron- or systemd-started ShellHandler) terminates.
307
+ Process.setsid
267
308
  $0 = process_name(worker)
268
309
  # Close inherited lockfile fd to prevent holding the flock after parent exits
269
310
  @lockfile&.close
@@ -278,6 +319,7 @@ module Workhorse
278
319
  worker.pid = pid
279
320
  File.write(pid_file_for(worker), pid)
280
321
  Process.detach(pid)
322
+ Workhorse.debug_log("Daemon: worker ##{worker.id} (#{worker.name}) forked with PID #{pid}")
281
323
  end
282
324
 
283
325
  # Stops a single worker process.
@@ -290,6 +332,7 @@ module Workhorse
290
332
  def stop_worker(pid_file, pid, kill: false)
291
333
  signals = kill ? %w[KILL] : %w[TERM INT]
292
334
 
335
+ Workhorse.debug_log("Daemon: stopping PID #{pid} with signals #{signals.join(', ')}")
293
336
  loop do
294
337
  begin
295
338
  signals.each { |signal| Process.kill(signal, pid) }
@@ -300,6 +343,7 @@ module Workhorse
300
343
  sleep 1
301
344
  end
302
345
 
346
+ Workhorse.debug_log("Daemon: PID #{pid} stopped")
303
347
  File.delete(pid_file)
304
348
  end
305
349
 
@@ -49,14 +49,18 @@ module Workhorse
49
49
  fail 'Poller is already running.' if running?
50
50
  @running = true
51
51
 
52
+ Workhorse.debug_log("[Job worker #{worker.id}] Poller starting")
53
+
52
54
  clean_stuck_jobs! if Workhorse.clean_stuck_jobs
53
55
 
54
56
  @thread = Thread.new do
57
+ Workhorse.debug_log("[Job worker #{worker.id}] Poller thread started")
55
58
  loop do
56
59
  break unless running?
57
60
 
58
61
  begin
59
62
  unless @before_poll.call
63
+ Workhorse.debug_log("[Job worker #{worker.id}] before_poll returned false, triggering worker shutdown")
60
64
  Thread.new { worker.shutdown }
61
65
  sleep
62
66
  next
@@ -65,6 +69,7 @@ module Workhorse
65
69
  poll
66
70
  sleep
67
71
  rescue Exception => e
72
+ Workhorse.debug_log("[Job worker #{worker.id}] Poller exception, shutting down: #{e.class}: #{e.message}")
68
73
  worker.log %(Poll encountered exception:\n#{e.message}\n#{e.backtrace.join("\n")})
69
74
  worker.log 'Worker shutting down...'
70
75
  Workhorse.on_exception.call(e) unless Workhorse.silence_poller_exceptions
@@ -73,6 +78,7 @@ module Workhorse
73
78
  break
74
79
  end
75
80
  end
81
+ Workhorse.debug_log("[Job worker #{worker.id}] Poller thread exiting")
76
82
  end
77
83
  end
78
84
 
@@ -82,8 +88,10 @@ module Workhorse
82
88
  # @raise [RuntimeError] If poller is not running
83
89
  def shutdown
84
90
  fail 'Poller is not running.' unless running?
91
+ Workhorse.debug_log("[Job worker #{worker.id}] Poller shutting down")
85
92
  @running = false
86
93
  wait
94
+ Workhorse.debug_log("[Job worker #{worker.id}] Poller shut down")
87
95
  end
88
96
 
89
97
  # Waits for the poller thread to complete.
@@ -158,6 +158,8 @@ module Workhorse
158
158
  @poller.start
159
159
  log 'Started up'
160
160
 
161
+ Workhorse.debug_log("[Job worker #{id}] Started: PID=#{pid}, logger=#{describe_logger(logger)}")
162
+
161
163
  trap_termination if @auto_terminate
162
164
  trap_log_reopen
163
165
  trap_soft_restart
@@ -189,12 +191,14 @@ module Workhorse
189
191
  mutex.synchronize do
190
192
  assert_state! :running
191
193
 
194
+ Workhorse.debug_log("[Job worker #{id}] Shutdown starting")
192
195
  log 'Shutting down'
193
196
  @state = :shutdown
194
197
 
195
198
  @poller.shutdown
196
199
  @pool.shutdown
197
200
  log 'Shut down'
201
+ Workhorse.debug_log("[Job worker #{id}] Shutdown complete")
198
202
  end
199
203
  end
200
204
 
@@ -267,6 +271,8 @@ module Workhorse
267
271
 
268
272
  return true unless exceeded
269
273
 
274
+ Workhorse.debug_log("[Job worker #{id}] Memory limit exceeded: #{mem}MB > #{max}MB, initiating shutdown")
275
+
270
276
  if defined?(Rails)
271
277
  FileUtils.touch self.class.shutdown_file_for(pid)
272
278
  end
@@ -296,16 +302,19 @@ module Workhorse
296
302
  def trap_log_reopen
297
303
  Signal.trap(LOG_REOPEN_SIGNAL) do
298
304
  Thread.new do
305
+ Workhorse.debug_log("[Job worker #{id}] #{LOG_REOPEN_SIGNAL} received, logger state before reopen: #{describe_logger(logger)}")
306
+
299
307
  logger&.reopen
308
+ Workhorse.debug_log("[Job worker #{id}] Logger state after reopen: #{describe_logger(logger)}")
300
309
 
301
- if defined?(ActiveRecord::Base) && ActiveRecord::Base.logger && ActiveRecord::Base.logger != logger
302
- ActiveRecord::Base.logger.reopen
303
- end
310
+ Workhorse.debug_log("[Job worker #{id}] #{LOG_REOPEN_SIGNAL} handling complete")
304
311
  rescue Exception => e
312
+ Workhorse.debug_log("[Job worker #{id}] Logger reopen failed: #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}")
305
313
  log %(Log reopen signal handler error: #{e.message}\n#{e.backtrace.join("\n")}), :error
306
314
  Workhorse.on_exception.call(e)
307
315
  end.join
308
316
  end
317
+ Workhorse.debug_log("[Job worker #{id}] Signal handler installed: #{LOG_REOPEN_SIGNAL}")
309
318
  end
310
319
 
311
320
  # Sets up signal handlers for graceful termination (TERM/INT signals).
@@ -320,11 +329,13 @@ module Workhorse
320
329
  # quickly when called multiple times, this does not pose a risk of
321
330
  # keeping open a big number of "shutdown threads".
322
331
  Thread.new do
332
+ Workhorse.debug_log("[Job worker #{id}] #{signal} received, shutting down")
323
333
  log "\nCaught #{signal}, shutting worker down..."
324
334
  shutdown
325
335
  end.join
326
336
  end
327
337
  end
338
+ Workhorse.debug_log("[Job worker #{id}] Signal handlers installed: #{SHUTDOWN_SIGNALS.join(', ')}")
328
339
  end
329
340
 
330
341
  # Initiates a soft restart of the worker.
@@ -339,9 +350,14 @@ module Workhorse
339
350
 
340
351
  return unless @soft_restart_requested.make_true
341
352
 
353
+ Workhorse.debug_log("[Job worker #{id}] Soft restart initiated")
354
+
342
355
  # Create shutdown file for watch to detect
343
356
  shutdown_file = self.class.shutdown_file_for(pid)
344
- FileUtils.touch(shutdown_file) if shutdown_file
357
+ if shutdown_file
358
+ FileUtils.touch(shutdown_file)
359
+ Workhorse.debug_log("[Job worker #{id}] Shutdown file created: #{shutdown_file}")
360
+ end
345
361
 
346
362
  # Monitor in a separate thread to avoid blocking the signal handler
347
363
  @soft_restart_thread = Thread.new do
@@ -361,6 +377,7 @@ module Workhorse
361
377
  # Start a new thread as certain functionality (such as logging) is not
362
378
  # available from within a trap context.
363
379
  Thread.new do
380
+ Workhorse.debug_log("[Job worker #{id}] #{SOFT_RESTART_SIGNAL} received, initiating soft restart")
364
381
  log "\nCaught #{SOFT_RESTART_SIGNAL}, initiating soft restart..."
365
382
  soft_restart
366
383
  rescue Exception => e
@@ -370,6 +387,41 @@ module Workhorse
370
387
  # NOTE: Unlike trap_termination, we don't join here because soft_restart
371
388
  # is designed to be fire-and-forget (it spawns its own monitoring thread).
372
389
  end
390
+ Workhorse.debug_log("[Job worker #{id}] Signal handler installed: #{SOFT_RESTART_SIGNAL}")
391
+ end
392
+
393
+ # Returns a human-readable description of a logger's internal state.
394
+ # Used for debug logging to diagnose log rotation issues.
395
+ #
396
+ # @param lgr [Logger, nil] The logger to describe
397
+ # @return [String] Description of the logger's state
398
+ # @private
399
+ def describe_logger(lgr)
400
+ return 'nil' unless lgr
401
+
402
+ parts = ["class=#{lgr.class}"]
403
+
404
+ logdev = lgr.instance_variable_get(:@logdev)
405
+ if logdev
406
+ parts << "filename=#{logdev.filename.inspect}" if logdev.respond_to?(:filename)
407
+
408
+ dev = logdev.respond_to?(:dev) ? logdev.dev : nil
409
+ if dev
410
+ parts << "closed=#{dev.closed?}"
411
+ unless dev.closed?
412
+ fileno = dev.fileno
413
+ parts << "fd=#{fileno}"
414
+ fd_path = "/proc/self/fd/#{fileno}"
415
+ parts << "fd_target=#{File.readlink(fd_path).inspect}" if File.exist?(fd_path)
416
+ end
417
+ end
418
+ else
419
+ parts << 'logdev=nil'
420
+ end
421
+
422
+ parts.join(', ')
423
+ rescue Exception => e
424
+ "error describing logger: #{e.class}: #{e.message}"
373
425
  end
374
426
 
375
427
  # Waits for all jobs to complete, then shuts down the worker.
@@ -378,10 +430,12 @@ module Workhorse
378
430
  # @return [void]
379
431
  # @private
380
432
  def wait_for_idle_then_shutdown
433
+ Workhorse.debug_log("[Job worker #{id}] Waiting for idle before soft restart shutdown (pool_size=#{@pool_size})")
381
434
  loop do
382
435
  break if @state == :shutdown
383
436
 
384
437
  if idle == @pool_size
438
+ Workhorse.debug_log("[Job worker #{id}] All threads idle, proceeding with soft restart shutdown")
385
439
  log 'All jobs completed, shutting down for soft restart'
386
440
  shutdown
387
441
  break
data/lib/workhorse.rb CHANGED
@@ -107,6 +107,31 @@ module Workhorse
107
107
  mattr_accessor :max_worker_memory_mb
108
108
  self.max_worker_memory_mb = 0
109
109
 
110
+ # Path to a debug log file for diagnosing log rotation and signal handling issues.
111
+ # When set, Workhorse writes timestamped debug entries to this file at key points
112
+ # (worker startup, HUP signal handling, restart-logging command flow).
113
+ # Set to nil to disable (default).
114
+ #
115
+ # @return [String, nil] Path to debug log file
116
+ mattr_accessor :debug_log_path
117
+ self.debug_log_path = nil
118
+
119
+ # Writes a debug message to the debug log file.
120
+ # Does nothing if {.debug_log_path} is nil.
121
+ # Silently ignores all exceptions to avoid interfering with normal operation.
122
+ #
123
+ # @param message [String] The message to log
124
+ # @return [void]
125
+ def self.debug_log(message)
126
+ return unless debug_log_path
127
+
128
+ File.open(debug_log_path, 'a') do |f|
129
+ f.write("[#{Time.now.iso8601(3)}] [PID #{Process.pid}] #{message}\n")
130
+ f.flush
131
+ end
132
+ rescue Exception # rubocop:disable Lint/SuppressedException
133
+ end
134
+
110
135
  # Configuration method for setting up Workhorse options.
111
136
  #
112
137
  # @yield [self] Configuration block
data/workhorse.gemspec CHANGED
@@ -1,14 +1,14 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: workhorse 1.4.1 ruby lib
2
+ # stub: workhorse 1.4.4 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "workhorse".freeze
6
- s.version = "1.4.1"
6
+ s.version = "1.4.4"
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
10
10
  s.authors = ["Sitrox".freeze]
11
- s.date = "2026-02-18"
11
+ s.date = "2026-04-28"
12
12
  s.files = [".github/workflows/ruby.yml".freeze, ".gitignore".freeze, ".releaser_config".freeze, ".rubocop.yml".freeze, "CHANGELOG.md".freeze, "FAQ.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "RUBY_VERSION".freeze, "Rakefile".freeze, "VERSION".freeze, "bin/rubocop".freeze, "lib/active_job/queue_adapters/workhorse_adapter.rb".freeze, "lib/generators/workhorse/install_generator.rb".freeze, "lib/generators/workhorse/templates/bin/workhorse.rb".freeze, "lib/generators/workhorse/templates/config/initializers/workhorse.rb".freeze, "lib/generators/workhorse/templates/create_table_jobs.rb".freeze, "lib/workhorse.rb".freeze, "lib/workhorse/active_job_extension.rb".freeze, "lib/workhorse/daemon.rb".freeze, "lib/workhorse/daemon/shell_handler.rb".freeze, "lib/workhorse/db_job.rb".freeze, "lib/workhorse/enqueuer.rb".freeze, "lib/workhorse/jobs/cleanup_succeeded_jobs.rb".freeze, "lib/workhorse/jobs/detect_stale_jobs_job.rb".freeze, "lib/workhorse/jobs/run_active_job.rb".freeze, "lib/workhorse/jobs/run_rails_op.rb".freeze, "lib/workhorse/performer.rb".freeze, "lib/workhorse/poller.rb".freeze, "lib/workhorse/pool.rb".freeze, "lib/workhorse/scoped_env.rb".freeze, "lib/workhorse/worker.rb".freeze, "test/active_job/queue_adapters/workhorse_adapter_test.rb".freeze, "test/lib/db_schema.rb".freeze, "test/lib/jobs.rb".freeze, "test/lib/test_helper.rb".freeze, "test/workhorse/daemon_test.rb".freeze, "test/workhorse/db_job_test.rb".freeze, "test/workhorse/enqueuer_test.rb".freeze, "test/workhorse/performer_test.rb".freeze, "test/workhorse/poller_test.rb".freeze, "test/workhorse/pool_test.rb".freeze, "test/workhorse/worker_test.rb".freeze, "workhorse.gemspec".freeze]
13
13
  s.homepage = "https://github.com/sitrox/workhorse".freeze
14
14
  s.licenses = ["MIT".freeze]
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workhorse
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sitrox
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-02-18 00:00:00.000000000 Z
10
+ date: 2026-04-28 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: activesupport