qless 0.9.3 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/Gemfile +9 -3
  2. data/README.md +70 -25
  3. data/Rakefile +125 -9
  4. data/exe/install_phantomjs +21 -0
  5. data/lib/qless.rb +115 -76
  6. data/lib/qless/config.rb +11 -9
  7. data/lib/qless/failure_formatter.rb +43 -0
  8. data/lib/qless/job.rb +201 -102
  9. data/lib/qless/job_reservers/ordered.rb +7 -1
  10. data/lib/qless/job_reservers/round_robin.rb +16 -6
  11. data/lib/qless/job_reservers/shuffled_round_robin.rb +9 -2
  12. data/lib/qless/lua/qless-lib.lua +2463 -0
  13. data/lib/qless/lua/qless.lua +2012 -0
  14. data/lib/qless/lua_script.rb +63 -12
  15. data/lib/qless/middleware/memory_usage_monitor.rb +62 -0
  16. data/lib/qless/middleware/metriks.rb +45 -0
  17. data/lib/qless/middleware/redis_reconnect.rb +6 -3
  18. data/lib/qless/middleware/requeue_exceptions.rb +94 -0
  19. data/lib/qless/middleware/retry_exceptions.rb +38 -9
  20. data/lib/qless/middleware/sentry.rb +3 -7
  21. data/lib/qless/middleware/timeout.rb +64 -0
  22. data/lib/qless/queue.rb +90 -55
  23. data/lib/qless/server.rb +177 -130
  24. data/lib/qless/server/views/_job.erb +33 -15
  25. data/lib/qless/server/views/completed.erb +11 -0
  26. data/lib/qless/server/views/layout.erb +70 -11
  27. data/lib/qless/server/views/overview.erb +93 -53
  28. data/lib/qless/server/views/queue.erb +9 -8
  29. data/lib/qless/server/views/queues.erb +18 -1
  30. data/lib/qless/subscriber.rb +37 -22
  31. data/lib/qless/tasks.rb +5 -10
  32. data/lib/qless/test_helpers/worker_helpers.rb +55 -0
  33. data/lib/qless/version.rb +3 -1
  34. data/lib/qless/worker.rb +4 -413
  35. data/lib/qless/worker/base.rb +247 -0
  36. data/lib/qless/worker/forking.rb +245 -0
  37. data/lib/qless/worker/serial.rb +41 -0
  38. metadata +135 -52
  39. data/lib/qless/qless-core/cancel.lua +0 -101
  40. data/lib/qless/qless-core/complete.lua +0 -233
  41. data/lib/qless/qless-core/config.lua +0 -56
  42. data/lib/qless/qless-core/depends.lua +0 -65
  43. data/lib/qless/qless-core/deregister_workers.lua +0 -12
  44. data/lib/qless/qless-core/fail.lua +0 -117
  45. data/lib/qless/qless-core/failed.lua +0 -83
  46. data/lib/qless/qless-core/get.lua +0 -37
  47. data/lib/qless/qless-core/heartbeat.lua +0 -51
  48. data/lib/qless/qless-core/jobs.lua +0 -41
  49. data/lib/qless/qless-core/pause.lua +0 -18
  50. data/lib/qless/qless-core/peek.lua +0 -165
  51. data/lib/qless/qless-core/pop.lua +0 -314
  52. data/lib/qless/qless-core/priority.lua +0 -32
  53. data/lib/qless/qless-core/put.lua +0 -169
  54. data/lib/qless/qless-core/qless-lib.lua +0 -2354
  55. data/lib/qless/qless-core/qless.lua +0 -1862
  56. data/lib/qless/qless-core/queues.lua +0 -58
  57. data/lib/qless/qless-core/recur.lua +0 -190
  58. data/lib/qless/qless-core/retry.lua +0 -73
  59. data/lib/qless/qless-core/stats.lua +0 -92
  60. data/lib/qless/qless-core/tag.lua +0 -100
  61. data/lib/qless/qless-core/track.lua +0 -79
  62. data/lib/qless/qless-core/unfail.lua +0 -54
  63. data/lib/qless/qless-core/unpause.lua +0 -12
  64. data/lib/qless/qless-core/workers.lua +0 -69
  65. data/lib/qless/wait_until.rb +0 -19
@@ -40,12 +40,13 @@
40
40
 
41
41
  <div class="subnav subnav-fixed">
42
42
  <ul class="nav nav-pills">
43
- <li class="<%= tab == 'stats' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/stats" %>">Stats</a></li>
44
- <li class="<%= tab == 'running' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/running" %>">Running</a></li>
45
- <li class="<%= tab == 'waiting' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/waiting" %>">Waiting</a></li>
46
- <li class="<%= tab == 'scheduled' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/scheduled" %>">Scheduled</a></li>
47
- <li class="<%= tab == 'stalled' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/stalled" %>">Stalled</a></li>
48
- <li class="<%= tab == 'depends' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/depends" %>">Depends</a></li>
43
+ <li class="<%= tab == 'stats' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/stats" %>">Stats</a></li>
44
+ <li class="<%= tab == 'running' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/running" %>">Running</a></li>
45
+ <li class="<%= tab == 'waiting' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/waiting" %>">Waiting</a></li>
46
+ <li class="<%= tab == 'scheduled' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/scheduled" %>">Scheduled</a></li>
47
+ <li class="<%= tab == 'stalled' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/stalled" %>">Stalled</a></li>
48
+ <li class="<%= tab == 'depends' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/depends" %>">Depends</a></li>
49
+ <li class="<%= tab == 'recurring' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/recurring" %>">Recurring</a></li>
49
50
  </ul>
50
51
  </div>
51
52
 
@@ -53,7 +54,7 @@
53
54
 
54
55
  <div class="row">
55
56
  <div class="span8">
56
- <h2><a href="<%= u "/queues/#{queue['name']}" %>"><%= queue['name'] %></a> |
57
+ <h2><a href="<%= u "/queues/#{CGI::escape(queue['name'])}" %>"><%= queue['name'] %></a> |
57
58
  <%= queue['running'] %> /
58
59
  <%= queue['waiting'] %> /
59
60
  <%= queue['scheduled'] %> /
@@ -73,7 +74,7 @@
73
74
  </div>
74
75
  </div>
75
76
 
76
- <% if ['running', 'waiting', 'scheduled', 'stalled', 'depends'].include?(tab) %>
77
+ <% if ['running', 'waiting', 'scheduled', 'stalled', 'depends', 'recurring'].include?(tab) %>
77
78
  <hr/>
78
79
  <%= erb :_job_list, :locals => { :jobs => jobs, :queues => queues } %>
79
80
  <% else %>
@@ -10,7 +10,24 @@
10
10
  <% queues.each do |queue| %>
11
11
  <div class="row">
12
12
  <div class="span4">
13
- <h3><a href="<%= u "/queues/#{queue['name']}" %>"><%= queue['name'] %></a></h3>
13
+ <h3>
14
+ <% if queue['paused'] %>
15
+ <button
16
+ id="<%= queue['name'] %>-pause"
17
+ title="Unpause"
18
+ class="btn btn-success"
19
+ onclick="unpause('<%= queue['name'] %>')"><i class="icon-play"></i>
20
+ </button>
21
+ <% else %>
22
+ <button
23
+ id="<%= queue['name'] %>-pause"
24
+ title="Pause"
25
+ class="btn btn-warning"
26
+ onclick="pause('<%= queue['name'] %>')"><i class="icon-pause"></i>
27
+ </button>
28
+ <% end %>
29
+ <a href="<%= u "/queues/#{CGI::escape(queue['name'])}" %>"><%= queue['name'] %></a>
30
+ </h3>
14
31
  </div>
15
32
  <div class="span8">
16
33
  <h3> |
@@ -1,48 +1,63 @@
1
+ # Encoding: utf-8
2
+
1
3
  require 'thread'
2
- require 'qless/wait_until'
3
4
 
4
5
  module Qless
6
+ # A class used for subscribing to messages in a thread
5
7
  class Subscriber
6
8
  def self.start(*args, &block)
7
- new(*args, &block).start_pub_sub_listener
9
+ new(*args, &block).tap(&:start)
8
10
  end
9
11
 
10
- attr_reader :client, :channel
12
+ attr_reader :channel, :redis
11
13
 
12
- def initialize(client, channel, &message_received_callback)
13
- @client = client
14
+ def initialize(client, channel, options = {}, &message_received_callback)
14
15
  @channel = channel
15
16
  @message_received_callback = message_received_callback
17
+ @log = options.fetch(:log) { ::Logger.new($stderr) }
16
18
 
17
- # pub/sub blocks the connection so we must use a different redis connection
18
- @client_redis = client.redis
19
+ # pub/sub blocks the connection so we must use a different redis
20
+ # connection
21
+ @client_redis = client.redis
19
22
  @listener_redis = client.new_redis_connection
20
23
 
21
24
  @my_channel = Qless.generate_jid
22
25
  end
23
26
 
24
- def start_pub_sub_listener
25
- @thread = ::Thread.start do
26
- @listener_redis.subscribe(channel, @my_channel) do |on|
27
- on.message do |_channel, message|
28
- if _channel == @my_channel
29
- @listener_redis.unsubscribe(@my_channel)
30
- else
31
- @message_received_callback.call(self, JSON.parse(message))
32
- end
27
+ # Start a thread listening
28
+ def start
29
+ queue = ::Queue.new
30
+
31
+ @thread = Thread.start do
32
+ @listener_redis.subscribe(@channel, @my_channel) do |on|
33
+ on.subscribe do |channel|
34
+ queue.push(:subscribed) if channel == @channel
35
+ end
36
+
37
+ on.message do |channel, message|
38
+ handle_message(channel, message)
33
39
  end
34
40
  end
35
41
  end
36
42
 
37
- wait_until_thread_listening
43
+ queue.pop
38
44
  end
39
45
 
40
- def wait_until_thread_listening
41
- Qless::WaitUntil.wait_until(10) do
42
- @client_redis.publish(@my_channel, 'disconnect') == 1
46
+ def stop
47
+ @client_redis.publish(@my_channel, 'disconnect')
48
+ @thread.join
49
+ end
50
+
51
+ private
52
+
53
+ def handle_message(channel, message)
54
+ if channel == @my_channel
55
+ @listener_redis.unsubscribe(@channel, @my_channel) if message == "disconnect"
56
+ else
57
+ @message_received_callback.call(self, JSON.parse(message))
43
58
  end
59
+ rescue Exception => error
60
+ @log.error("Qless::Subscriber") { error }
44
61
  end
45
62
  end
46
63
  end
47
-
48
-
@@ -1,10 +1,5 @@
1
- namespace :qless do
2
- task :setup # no-op; users should define their own setup
3
-
4
- desc "Start a Qless worker using env vars: QUEUES, JOB_RESERVER, REDIS_URL, INTERVAL, VERBOSE, VVERBOSE"
5
- task :work => :setup do
6
- require 'qless/worker'
7
- Qless::Worker.start
8
- end
9
- end
10
-
1
+ # Encoding: utf-8
2
+ warn "Qless tasks are deprecated (they haven't worked for " \
3
+ "quite some time) and you should start a worker by " \
4
+ "writing a bit of ruby code that instantiates and " \
5
+ "runs a worker instead."
@@ -0,0 +1,55 @@
1
+ module Qless
2
+ module WorkerHelpers
3
+ # Yield with a worker running, and then clean the worker up afterwards
4
+ def run_worker_concurrently_with(worker, &block)
5
+ thread = Thread.start { stop_worker_after(worker, &block) }
6
+ thread.abort_on_exception = true
7
+ worker.run
8
+ ensure
9
+ thread.join(0.1)
10
+ end
11
+
12
+ def stop_worker_after(worker, &block)
13
+ yield
14
+ ensure
15
+ worker.stop!
16
+ end
17
+
18
+ # Run only the given number of jobs, then stop
19
+ def run_jobs(worker, count)
20
+ worker.extend Module.new {
21
+ define_method(:jobs) do
22
+ base_enum = super()
23
+ Enumerator.new do |enum|
24
+ count.times { enum << base_enum.next }
25
+ end
26
+ end
27
+ }
28
+
29
+ thread = Thread.start { yield } if block_given?
30
+ thread.abort_on_exception if thread
31
+ worker.run
32
+ ensure
33
+ thread.join(0.1) if thread
34
+ end
35
+
36
+ # Runs the worker until it has no more jobs to process,
37
+ # effectively drainig its queues.
38
+ def drain_worker_queues(worker)
39
+ worker.extend Module.new {
40
+ # For the child: stop as soon as it can't pop more jobs.
41
+ def no_job_available
42
+ shutdown
43
+ end
44
+
45
+ # For the parent: when the child stops,
46
+ # don't try to restart it; shutdown instead.
47
+ def spawn_replacement_child(*)
48
+ shutdown
49
+ end
50
+ }
51
+
52
+ worker.run
53
+ end
54
+ end
55
+ end
@@ -1,3 +1,5 @@
1
+ # Encoding: utf-8
2
+
1
3
  module Qless
2
- VERSION = "0.9.3"
4
+ VERSION = '0.10.0'
3
5
  end
@@ -1,414 +1,5 @@
1
- require 'qless'
2
- require 'time'
3
- require 'qless/job_reservers/ordered'
4
- require 'qless/job_reservers/round_robin'
5
- require 'qless/job_reservers/shuffled_round_robin'
6
- require 'qless/subscriber'
7
- require 'qless/wait_until'
8
-
9
- module Qless
10
- # This is heavily inspired by Resque's excellent worker:
11
- # https://github.com/defunkt/resque/blob/v1.20.0/lib/resque/worker.rb
12
- class Worker
13
- def initialize(job_reserver, options = {})
14
- self.job_reserver = job_reserver
15
- @shutdown = @paused = false
16
-
17
- self.very_verbose = options[:very_verbose]
18
- self.verbose = options[:verbose]
19
- self.run_as_single_process = options[:run_as_single_process]
20
- self.output = options.fetch(:output, $stdout)
21
- self.term_timeout = options.fetch(:term_timeout, 4.0)
22
- @backtrace_replacements = { Dir.pwd => '.' }
23
- @backtrace_replacements[ENV['GEM_HOME']] = '<GEM_HOME>' if ENV.has_key?('GEM_HOME')
24
-
25
- output.puts "\n\n\n" if verbose || very_verbose
26
- log "Instantiated Worker"
27
- end
28
-
29
- # Whether the worker should log basic info to STDOUT
30
- attr_accessor :verbose
31
-
32
- # Whether the worker should log lots of info to STDOUT
33
- attr_accessor :very_verbose
34
-
35
- # Whether the worker should run in a single prcoess
36
- # i.e. not fork a child process to do the work
37
- # This should only be true in a dev/test environment
38
- attr_accessor :run_as_single_process
39
-
40
- # An IO-like object that logging output is sent to.
41
- # Defaults to $stdout.
42
- attr_accessor :output
43
-
44
- # The object responsible for reserving jobs from the Qless server,
45
- # using some reasonable strategy (e.g. round robin or ordered)
46
- attr_accessor :job_reserver
47
-
48
- # How long the child process is given to exit before forcibly killing it.
49
- attr_accessor :term_timeout
50
-
51
- # Starts a worker based on ENV vars. Supported ENV vars:
52
- # - REDIS_URL=redis://host:port/db-num (the redis gem uses this automatically)
53
- # - QUEUES=high,medium,low or QUEUE=blah
54
- # - JOB_RESERVER=Ordered or JOB_RESERVER=RoundRobin
55
- # - INTERVAL=3.2
56
- # - VERBOSE=true (to enable logging)
57
- # - VVERBOSE=true (to enable very verbose logging)
58
- # - RUN_AS_SINGLE_PROCESS=true (false will fork children to do work, true will keep it single process)
59
- # This is designed to be called from a rake task
60
- def self.start
61
- client = Qless::Client.new
62
- queues = (ENV['QUEUES'] || ENV['QUEUE']).to_s.split(',').map { |q| client.queues[q.strip] }
63
- if queues.none?
64
- raise "No queues provided. You must pass QUEUE or QUEUES when starting a worker."
65
- end
66
-
67
- reserver = JobReservers.const_get(ENV.fetch('JOB_RESERVER', 'Ordered')).new(queues)
68
- interval = Float(ENV.fetch('INTERVAL', 5.0))
69
-
70
- options = {}
71
- options[:verbose] = !!ENV['VERBOSE']
72
- options[:very_verbose] = !!ENV['VVERBOSE']
73
- options[:run_as_single_process] = !!ENV['RUN_AS_SINGLE_PROCESS']
74
-
75
- new(reserver, options).work(interval)
76
- end
77
-
78
- def work(interval = 5.0)
79
- procline "Starting #{@job_reserver.description}"
80
- register_parent_signal_handlers
81
- uniq_clients.each { |client| start_parent_pub_sub_listener_for(client) }
82
-
83
- loop do
84
- break if shutdown?
85
- if paused?
86
- sleep interval
87
- next
88
- end
89
-
90
- unless job = reserve_job
91
- break if interval.zero?
92
- procline "Waiting for #{@job_reserver.description}"
93
- log! "Sleeping for #{interval} seconds"
94
- sleep interval
95
- next
96
- end
97
-
98
- perform_job_in_child_process(job)
99
- end
100
- ensure
101
- # make sure the worker deregisters on shutdown
102
- deregister
103
- end
104
-
105
- def perform(job)
106
- around_perform(job)
107
- rescue Exception => error
108
- fail_job(job, error, caller)
109
- else
110
- try_complete(job)
111
- end
112
-
113
- def reserve_job
114
- @job_reserver.reserve
115
- rescue Exception => error
116
- # We want workers to durably stay up, so we don't want errors
117
- # during job reserving (e.g. network timeouts, etc) to kill
118
- # the worker.
119
- log "Got an error while reserving a job: #{error.class}: #{error.message}"
120
- end
121
-
122
- def perform_job_in_child_process(job)
123
- with_job(job) do
124
- @child = fork do
125
- job.reconnect_to_redis
126
- register_child_signal_handlers
127
- start_child_pub_sub_listener_for(job.client)
128
- procline "Processing #{job.description}"
129
- perform(job)
130
- exit! # don't run at_exit hooks
131
- end
132
-
133
- if @child
134
- wait_for_child
135
- else
136
- procline "Single processing #{job.description}"
137
- perform(job)
138
- end
139
- end
140
- end
141
-
142
- def shutdown
143
- @shutdown = true
144
- end
145
-
146
- def shutdown!
147
- shutdown
148
- kill_child unless run_as_single_process
149
- end
150
-
151
- def shutdown?
152
- @shutdown
153
- end
154
-
155
- def paused?
156
- @paused
157
- end
158
-
159
- def pause_processing
160
- log "USR2 received; pausing job processing"
161
- @paused = true
162
- procline "Paused -- #{@job_reserver.description}"
163
- end
164
-
165
- def unpause_processing
166
- log "CONT received; resuming job processing"
167
- @paused = false
168
- end
169
-
170
- private
171
-
172
- def fork
173
- super unless run_as_single_process
174
- end
175
-
176
- def deregister
177
- uniq_clients.each do |client|
178
- client.deregister_workers(Qless.worker_name)
179
- end
180
- end
181
-
182
- def uniq_clients
183
- @uniq_clients ||= @job_reserver.queues.map(&:client).uniq
184
- end
185
-
186
- def try_complete(job)
187
- job.complete unless job.state_changed?
188
- rescue Job::CantCompleteError => e
189
- # There's not much we can do here. Complete fails in a few cases:
190
- # - The job is already failed (i.e. by another worker)
191
- # - The job is being worked on by another worker
192
- # - The job has been cancelled
193
- #
194
- # We don't want to (or are able to) fail the job with this error in
195
- # any of these cases, so the best we can do is log the failure.
196
- log "Failed to complete #{job.inspect}: #{e.message}"
197
- end
198
-
199
- # Allow middleware modules to be mixed in and override the
200
- # definition of around_perform while providing a default
201
- # implementation so our code can assume the method is present.
202
- include Module.new {
203
- def around_perform(job)
204
- job.perform
205
- end
206
- }
207
-
208
- def fail_job(job, error, worker_backtrace)
209
- group = "#{job.klass_name}:#{error.class}"
210
- message = "#{truncated_message(error)}\n\n#{format_failure_backtrace(error.backtrace, worker_backtrace)}"
211
- log "Got #{group} failure from #{job.inspect}"
212
- job.fail(group, message)
213
- end
214
-
215
- # TODO: pull this out into a config option.
216
- MAX_ERROR_MESSAGE_SIZE = 10_000
217
- def truncated_message(error)
218
- return error.message if error.message.length <= MAX_ERROR_MESSAGE_SIZE
219
- error.message.slice(0, MAX_ERROR_MESSAGE_SIZE) + "... (truncated due to length)"
220
- end
221
-
222
- def format_failure_backtrace(error_backtrace, worker_backtrace)
223
- (error_backtrace - worker_backtrace).map do |line|
224
- @backtrace_replacements.inject(line) do |line, (original, new)|
225
- line.sub(original, new)
226
- end
227
- end.join("\n")
228
- end
229
-
230
- def procline(value)
231
- $0 = "Qless-#{Qless::VERSION}: #{value} at #{Time.now.iso8601}"
232
- log! $0
233
- end
234
-
235
- def wait_for_child
236
- srand # Reseeding
237
- procline "Forked #{@child} at #{Time.now.to_i}"
238
- begin
239
- Process.waitpid(@child)
240
- rescue SystemCallError
241
- nil
242
- end
243
- end
244
-
245
- # Kills the forked child immediately with minimal remorse. The job it
246
- # is processing will not be completed. Send the child a TERM signal,
247
- # wait 5 seconds, and then a KILL signal if it has not quit
248
- def kill_child
249
- return unless @child
250
-
251
- if Process.waitpid(@child, Process::WNOHANG)
252
- log "Child #{@child} already quit."
253
- return
254
- end
255
-
256
- signal_child("TERM", @child)
257
-
258
- signal_child("KILL", @child) unless quit_gracefully?(@child)
259
- rescue SystemCallError
260
- log "Child #{@child} already quit and reaped."
261
- end
262
-
263
- # send a signal to a child, have it logged.
264
- def signal_child(signal, child)
265
- log "Sending #{signal} signal to child #{child}"
266
- Process.kill(signal, child)
267
- end
268
-
269
- # has our child quit gracefully within the timeout limit?
270
- def quit_gracefully?(child)
271
- (term_timeout.to_f * 10).round.times do |i|
272
- sleep(0.1)
273
- return true if Process.waitpid(child, Process::WNOHANG)
274
- end
275
-
276
- false
277
- end
278
-
279
- # This was originally stolen directly from resque... (thanks, @defunkt!)
280
- # Registers the various signal handlers a worker responds to.
281
- #
282
- # TERM: Shutdown immediately, stop processing jobs.
283
- # INT: Shutdown immediately, stop processing jobs.
284
- # QUIT: Shutdown after the current job has finished processing.
285
- # USR1: Kill the forked child immediately, continue processing jobs.
286
- # USR2: Don't process any new jobs; dump the backtrace.
287
- # CONT: Start processing jobs again after a USR2
288
- def register_parent_signal_handlers
289
- trap('TERM') { shutdown! }
290
- trap('INT') { shutdown! }
291
-
292
- begin
293
- trap('QUIT') { shutdown }
294
- trap('USR1') { kill_child }
295
- trap('USR2') do
296
- log "Current backtrace (parent): \n\n#{caller.join("\n")}\n\n"
297
- pause_processing
298
- end
299
-
300
- trap('CONT') { unpause_processing }
301
- rescue ArgumentError
302
- warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
303
- end
304
- end
305
-
306
- def register_child_signal_handlers
307
- trap('TERM') { raise SignalException.new("SIGTERM") }
308
- trap('INT', 'DEFAULT')
309
-
310
- begin
311
- trap('QUIT', 'DEFAULT')
312
- trap('USR1', 'DEFAULT')
313
- trap('USR2', 'DEFAULT')
314
-
315
- trap('USR2') do
316
- log "Current backtrace (child): \n\n#{caller.join("\n")}\n\n"
317
- end
318
- rescue ArgumentError
319
- end
320
- end
321
-
322
- # Log a message to STDOUT if we are verbose or very_verbose.
323
- def log(message)
324
- if verbose
325
- output.puts "*** #{message}"
326
- elsif very_verbose
327
- time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
328
- output.puts "** [#{time}] #$$: #{message}"
329
- end
330
- end
331
-
332
- # Logs a very verbose message to STDOUT.
333
- def log!(message)
334
- log message if very_verbose
335
- end
336
-
337
- def start_parent_pub_sub_listener_for(client)
338
- Subscriber.start(client, "ql:w:#{Qless.worker_name}") do |subscriber, message|
339
- if message["event"] == "lock_lost" && message["jid"] == current_job_jid
340
- fail_job_due_to_timeout
341
- kill_child
342
- end
343
- end
344
- end
345
-
346
- def start_child_pub_sub_listener_for(client)
347
- Subscriber.start(client, "ql:w:#{Qless.worker_name}:#{Process.pid}") do |subscriber, message|
348
- if message["event"] == "notify_backtrace"
349
- notify_parent_of_job_backtrace(client, message.fetch('notify_list'))
350
- end
351
- end
352
- end
353
-
354
- def with_job(job)
355
- @job = job
356
- yield
357
- ensure
358
- @job = nil
359
- end
360
-
361
- # To prevent race conditions (with our listener thread),
362
- # we cannot use a pattern like `use(@job) if @job` because
363
- # the value of `@job` could change between the checking of
364
- # it and the use of it. Here we use a pattern that avoids
365
- # the issue -- get the job into a local, and yield that if
366
- # it is set.
367
- def access_current_job
368
- if job = @job
369
- yield job
370
- end
371
- end
372
-
373
- def current_job_jid
374
- access_current_job &:jid
375
- end
376
-
377
- JobLockLost = Class.new(StandardError)
378
-
379
- def fail_job_due_to_timeout
380
- access_current_job do |job|
381
- error = JobLockLost.new
382
- error.set_backtrace(get_backtrace_from_child(job.client.redis))
383
- fail_job(job, error, caller)
384
- end
385
- end
386
-
387
- def notify_parent_of_job_backtrace(client, list)
388
- job_backtrace = Thread.main.backtrace
389
- client.redis.lpush list, JSON.dump(job_backtrace)
390
- client.redis.pexpire list, BACKTRACE_EXPIRATION_TIMEOUT_MS
391
- end
392
-
393
- WAIT_FOR_CHILD_BACKTRACE_TIMEOUT = 4
394
- BACKTRACE_EXPIRATION_TIMEOUT_MS = 60_000 # timeout after a minute
395
-
396
- def get_backtrace_from_child(child_redis)
397
- notification_list = "ql:child_backtraces:#{Qless.generate_jid}"
398
- request_backtrace = { "event" => "notify_backtrace",
399
- "notify_list" => notification_list }
400
-
401
- if child_redis.publish("ql:w:#{Qless.worker_name}:#{@child}", JSON.dump(request_backtrace)).zero?
402
- return ["Could not obtain child backtrace since it was not listening."]
403
- end
404
-
405
- begin
406
- _, backtrace_json = child_redis.blpop(notification_list, WAIT_FOR_CHILD_BACKTRACE_TIMEOUT)
407
- JSON.parse(backtrace_json)
408
- rescue => e
409
- ["Could not obtain child backtrace: #{e.class}: #{e.message}"] + e.backtrace
410
- end
411
- end
412
- end
413
- end
1
+ # Encoding: utf-8
414
2
 
3
+ require 'qless/worker/base'
4
+ require 'qless/worker/serial'
5
+ require 'qless/worker/forking'