qless 0.9.3 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +9 -3
- data/README.md +70 -25
- data/Rakefile +125 -9
- data/exe/install_phantomjs +21 -0
- data/lib/qless.rb +115 -76
- data/lib/qless/config.rb +11 -9
- data/lib/qless/failure_formatter.rb +43 -0
- data/lib/qless/job.rb +201 -102
- data/lib/qless/job_reservers/ordered.rb +7 -1
- data/lib/qless/job_reservers/round_robin.rb +16 -6
- data/lib/qless/job_reservers/shuffled_round_robin.rb +9 -2
- data/lib/qless/lua/qless-lib.lua +2463 -0
- data/lib/qless/lua/qless.lua +2012 -0
- data/lib/qless/lua_script.rb +63 -12
- data/lib/qless/middleware/memory_usage_monitor.rb +62 -0
- data/lib/qless/middleware/metriks.rb +45 -0
- data/lib/qless/middleware/redis_reconnect.rb +6 -3
- data/lib/qless/middleware/requeue_exceptions.rb +94 -0
- data/lib/qless/middleware/retry_exceptions.rb +38 -9
- data/lib/qless/middleware/sentry.rb +3 -7
- data/lib/qless/middleware/timeout.rb +64 -0
- data/lib/qless/queue.rb +90 -55
- data/lib/qless/server.rb +177 -130
- data/lib/qless/server/views/_job.erb +33 -15
- data/lib/qless/server/views/completed.erb +11 -0
- data/lib/qless/server/views/layout.erb +70 -11
- data/lib/qless/server/views/overview.erb +93 -53
- data/lib/qless/server/views/queue.erb +9 -8
- data/lib/qless/server/views/queues.erb +18 -1
- data/lib/qless/subscriber.rb +37 -22
- data/lib/qless/tasks.rb +5 -10
- data/lib/qless/test_helpers/worker_helpers.rb +55 -0
- data/lib/qless/version.rb +3 -1
- data/lib/qless/worker.rb +4 -413
- data/lib/qless/worker/base.rb +247 -0
- data/lib/qless/worker/forking.rb +245 -0
- data/lib/qless/worker/serial.rb +41 -0
- metadata +135 -52
- data/lib/qless/qless-core/cancel.lua +0 -101
- data/lib/qless/qless-core/complete.lua +0 -233
- data/lib/qless/qless-core/config.lua +0 -56
- data/lib/qless/qless-core/depends.lua +0 -65
- data/lib/qless/qless-core/deregister_workers.lua +0 -12
- data/lib/qless/qless-core/fail.lua +0 -117
- data/lib/qless/qless-core/failed.lua +0 -83
- data/lib/qless/qless-core/get.lua +0 -37
- data/lib/qless/qless-core/heartbeat.lua +0 -51
- data/lib/qless/qless-core/jobs.lua +0 -41
- data/lib/qless/qless-core/pause.lua +0 -18
- data/lib/qless/qless-core/peek.lua +0 -165
- data/lib/qless/qless-core/pop.lua +0 -314
- data/lib/qless/qless-core/priority.lua +0 -32
- data/lib/qless/qless-core/put.lua +0 -169
- data/lib/qless/qless-core/qless-lib.lua +0 -2354
- data/lib/qless/qless-core/qless.lua +0 -1862
- data/lib/qless/qless-core/queues.lua +0 -58
- data/lib/qless/qless-core/recur.lua +0 -190
- data/lib/qless/qless-core/retry.lua +0 -73
- data/lib/qless/qless-core/stats.lua +0 -92
- data/lib/qless/qless-core/tag.lua +0 -100
- data/lib/qless/qless-core/track.lua +0 -79
- data/lib/qless/qless-core/unfail.lua +0 -54
- data/lib/qless/qless-core/unpause.lua +0 -12
- data/lib/qless/qless-core/workers.lua +0 -69
- data/lib/qless/wait_until.rb +0 -19
@@ -40,12 +40,13 @@
|
|
40
40
|
|
41
41
|
<div class="subnav subnav-fixed">
|
42
42
|
<ul class="nav nav-pills">
|
43
|
-
<li class="<%= tab == 'stats' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/stats" %>">Stats</a></li>
|
44
|
-
<li class="<%= tab == 'running' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/running" %>">Running</a></li>
|
45
|
-
<li class="<%= tab == 'waiting' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/waiting" %>">Waiting</a></li>
|
46
|
-
<li class="<%= tab == 'scheduled' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/scheduled" %>">Scheduled</a></li>
|
47
|
-
<li class="<%= tab == 'stalled' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/stalled" %>">Stalled</a></li>
|
48
|
-
<li class="<%= tab == 'depends' ? 'active' : '' %>"><a href="<%= u "/queues/#{queue['name']}/depends" %>">Depends</a></li>
|
43
|
+
<li class="<%= tab == 'stats' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/stats" %>">Stats</a></li>
|
44
|
+
<li class="<%= tab == 'running' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/running" %>">Running</a></li>
|
45
|
+
<li class="<%= tab == 'waiting' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/waiting" %>">Waiting</a></li>
|
46
|
+
<li class="<%= tab == 'scheduled' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/scheduled" %>">Scheduled</a></li>
|
47
|
+
<li class="<%= tab == 'stalled' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/stalled" %>">Stalled</a></li>
|
48
|
+
<li class="<%= tab == 'depends' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/depends" %>">Depends</a></li>
|
49
|
+
<li class="<%= tab == 'recurring' ? 'active' : '' %>"><a href="<%= u "/queues/#{CGI::escape(queue['name'])}/recurring" %>">Recurring</a></li>
|
49
50
|
</ul>
|
50
51
|
</div>
|
51
52
|
|
@@ -53,7 +54,7 @@
|
|
53
54
|
|
54
55
|
<div class="row">
|
55
56
|
<div class="span8">
|
56
|
-
<h2><a href="<%= u "/queues/#{queue['name']}" %>"><%= queue['name'] %></a> |
|
57
|
+
<h2><a href="<%= u "/queues/#{CGI::escape(queue['name'])}" %>"><%= queue['name'] %></a> |
|
57
58
|
<%= queue['running'] %> /
|
58
59
|
<%= queue['waiting'] %> /
|
59
60
|
<%= queue['scheduled'] %> /
|
@@ -73,7 +74,7 @@
|
|
73
74
|
</div>
|
74
75
|
</div>
|
75
76
|
|
76
|
-
<% if ['running', 'waiting', 'scheduled', 'stalled', 'depends'].include?(tab) %>
|
77
|
+
<% if ['running', 'waiting', 'scheduled', 'stalled', 'depends', 'recurring'].include?(tab) %>
|
77
78
|
<hr/>
|
78
79
|
<%= erb :_job_list, :locals => { :jobs => jobs, :queues => queues } %>
|
79
80
|
<% else %>
|
@@ -10,7 +10,24 @@
|
|
10
10
|
<% queues.each do |queue| %>
|
11
11
|
<div class="row">
|
12
12
|
<div class="span4">
|
13
|
-
<h3
|
13
|
+
<h3>
|
14
|
+
<% if queue['paused'] %>
|
15
|
+
<button
|
16
|
+
id="<%= queue['name'] %>-pause"
|
17
|
+
title="Unpause"
|
18
|
+
class="btn btn-success"
|
19
|
+
onclick="unpause('<%= queue['name'] %>')"><i class="icon-play"></i>
|
20
|
+
</button>
|
21
|
+
<% else %>
|
22
|
+
<button
|
23
|
+
id="<%= queue['name'] %>-pause"
|
24
|
+
title="Pause"
|
25
|
+
class="btn btn-warning"
|
26
|
+
onclick="pause('<%= queue['name'] %>')"><i class="icon-pause"></i>
|
27
|
+
</button>
|
28
|
+
<% end %>
|
29
|
+
<a href="<%= u "/queues/#{CGI::escape(queue['name'])}" %>"><%= queue['name'] %></a>
|
30
|
+
</h3>
|
14
31
|
</div>
|
15
32
|
<div class="span8">
|
16
33
|
<h3> |
|
data/lib/qless/subscriber.rb
CHANGED
@@ -1,48 +1,63 @@
|
|
1
|
+
# Encoding: utf-8
|
2
|
+
|
1
3
|
require 'thread'
|
2
|
-
require 'qless/wait_until'
|
3
4
|
|
4
5
|
module Qless
|
6
|
+
# A class used for subscribing to messages in a thread
|
5
7
|
class Subscriber
|
6
8
|
def self.start(*args, &block)
|
7
|
-
new(*args, &block).
|
9
|
+
new(*args, &block).tap(&:start)
|
8
10
|
end
|
9
11
|
|
10
|
-
attr_reader :
|
12
|
+
attr_reader :channel, :redis
|
11
13
|
|
12
|
-
def initialize(client, channel, &message_received_callback)
|
13
|
-
@client = client
|
14
|
+
def initialize(client, channel, options = {}, &message_received_callback)
|
14
15
|
@channel = channel
|
15
16
|
@message_received_callback = message_received_callback
|
17
|
+
@log = options.fetch(:log) { ::Logger.new($stderr) }
|
16
18
|
|
17
|
-
# pub/sub blocks the connection so we must use a different redis
|
18
|
-
|
19
|
+
# pub/sub blocks the connection so we must use a different redis
|
20
|
+
# connection
|
21
|
+
@client_redis = client.redis
|
19
22
|
@listener_redis = client.new_redis_connection
|
20
23
|
|
21
24
|
@my_channel = Qless.generate_jid
|
22
25
|
end
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
27
|
+
# Start a thread listening
|
28
|
+
def start
|
29
|
+
queue = ::Queue.new
|
30
|
+
|
31
|
+
@thread = Thread.start do
|
32
|
+
@listener_redis.subscribe(@channel, @my_channel) do |on|
|
33
|
+
on.subscribe do |channel|
|
34
|
+
queue.push(:subscribed) if channel == @channel
|
35
|
+
end
|
36
|
+
|
37
|
+
on.message do |channel, message|
|
38
|
+
handle_message(channel, message)
|
33
39
|
end
|
34
40
|
end
|
35
41
|
end
|
36
42
|
|
37
|
-
|
43
|
+
queue.pop
|
38
44
|
end
|
39
45
|
|
40
|
-
def
|
41
|
-
|
42
|
-
|
46
|
+
def stop
|
47
|
+
@client_redis.publish(@my_channel, 'disconnect')
|
48
|
+
@thread.join
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def handle_message(channel, message)
|
54
|
+
if channel == @my_channel
|
55
|
+
@listener_redis.unsubscribe(@channel, @my_channel) if message == "disconnect"
|
56
|
+
else
|
57
|
+
@message_received_callback.call(self, JSON.parse(message))
|
43
58
|
end
|
59
|
+
rescue Exception => error
|
60
|
+
@log.error("Qless::Subscriber") { error }
|
44
61
|
end
|
45
62
|
end
|
46
63
|
end
|
47
|
-
|
48
|
-
|
data/lib/qless/tasks.rb
CHANGED
@@ -1,10 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
require 'qless/worker'
|
7
|
-
Qless::Worker.start
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
1
|
+
# Encoding: utf-8
|
2
|
+
warn "Qless tasks are deprecated (they haven't worked for " \
|
3
|
+
"quite some time) and you should start a worker by " \
|
4
|
+
"writing a bit of ruby code that instantiates and " \
|
5
|
+
"runs a worker instead."
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Qless
|
2
|
+
module WorkerHelpers
|
3
|
+
# Yield with a worker running, and then clean the worker up afterwards
|
4
|
+
def run_worker_concurrently_with(worker, &block)
|
5
|
+
thread = Thread.start { stop_worker_after(worker, &block) }
|
6
|
+
thread.abort_on_exception = true
|
7
|
+
worker.run
|
8
|
+
ensure
|
9
|
+
thread.join(0.1)
|
10
|
+
end
|
11
|
+
|
12
|
+
def stop_worker_after(worker, &block)
|
13
|
+
yield
|
14
|
+
ensure
|
15
|
+
worker.stop!
|
16
|
+
end
|
17
|
+
|
18
|
+
# Run only the given number of jobs, then stop
|
19
|
+
def run_jobs(worker, count)
|
20
|
+
worker.extend Module.new {
|
21
|
+
define_method(:jobs) do
|
22
|
+
base_enum = super()
|
23
|
+
Enumerator.new do |enum|
|
24
|
+
count.times { enum << base_enum.next }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
}
|
28
|
+
|
29
|
+
thread = Thread.start { yield } if block_given?
|
30
|
+
thread.abort_on_exception if thread
|
31
|
+
worker.run
|
32
|
+
ensure
|
33
|
+
thread.join(0.1) if thread
|
34
|
+
end
|
35
|
+
|
36
|
+
# Runs the worker until it has no more jobs to process,
|
37
|
+
# effectively drainig its queues.
|
38
|
+
def drain_worker_queues(worker)
|
39
|
+
worker.extend Module.new {
|
40
|
+
# For the child: stop as soon as it can't pop more jobs.
|
41
|
+
def no_job_available
|
42
|
+
shutdown
|
43
|
+
end
|
44
|
+
|
45
|
+
# For the parent: when the child stops,
|
46
|
+
# don't try to restart it; shutdown instead.
|
47
|
+
def spawn_replacement_child(*)
|
48
|
+
shutdown
|
49
|
+
end
|
50
|
+
}
|
51
|
+
|
52
|
+
worker.run
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/qless/version.rb
CHANGED
data/lib/qless/worker.rb
CHANGED
@@ -1,414 +1,5 @@
|
|
1
|
-
|
2
|
-
require 'time'
|
3
|
-
require 'qless/job_reservers/ordered'
|
4
|
-
require 'qless/job_reservers/round_robin'
|
5
|
-
require 'qless/job_reservers/shuffled_round_robin'
|
6
|
-
require 'qless/subscriber'
|
7
|
-
require 'qless/wait_until'
|
8
|
-
|
9
|
-
module Qless
|
10
|
-
# This is heavily inspired by Resque's excellent worker:
|
11
|
-
# https://github.com/defunkt/resque/blob/v1.20.0/lib/resque/worker.rb
|
12
|
-
class Worker
|
13
|
-
def initialize(job_reserver, options = {})
|
14
|
-
self.job_reserver = job_reserver
|
15
|
-
@shutdown = @paused = false
|
16
|
-
|
17
|
-
self.very_verbose = options[:very_verbose]
|
18
|
-
self.verbose = options[:verbose]
|
19
|
-
self.run_as_single_process = options[:run_as_single_process]
|
20
|
-
self.output = options.fetch(:output, $stdout)
|
21
|
-
self.term_timeout = options.fetch(:term_timeout, 4.0)
|
22
|
-
@backtrace_replacements = { Dir.pwd => '.' }
|
23
|
-
@backtrace_replacements[ENV['GEM_HOME']] = '<GEM_HOME>' if ENV.has_key?('GEM_HOME')
|
24
|
-
|
25
|
-
output.puts "\n\n\n" if verbose || very_verbose
|
26
|
-
log "Instantiated Worker"
|
27
|
-
end
|
28
|
-
|
29
|
-
# Whether the worker should log basic info to STDOUT
|
30
|
-
attr_accessor :verbose
|
31
|
-
|
32
|
-
# Whether the worker should log lots of info to STDOUT
|
33
|
-
attr_accessor :very_verbose
|
34
|
-
|
35
|
-
# Whether the worker should run in a single prcoess
|
36
|
-
# i.e. not fork a child process to do the work
|
37
|
-
# This should only be true in a dev/test environment
|
38
|
-
attr_accessor :run_as_single_process
|
39
|
-
|
40
|
-
# An IO-like object that logging output is sent to.
|
41
|
-
# Defaults to $stdout.
|
42
|
-
attr_accessor :output
|
43
|
-
|
44
|
-
# The object responsible for reserving jobs from the Qless server,
|
45
|
-
# using some reasonable strategy (e.g. round robin or ordered)
|
46
|
-
attr_accessor :job_reserver
|
47
|
-
|
48
|
-
# How long the child process is given to exit before forcibly killing it.
|
49
|
-
attr_accessor :term_timeout
|
50
|
-
|
51
|
-
# Starts a worker based on ENV vars. Supported ENV vars:
|
52
|
-
# - REDIS_URL=redis://host:port/db-num (the redis gem uses this automatically)
|
53
|
-
# - QUEUES=high,medium,low or QUEUE=blah
|
54
|
-
# - JOB_RESERVER=Ordered or JOB_RESERVER=RoundRobin
|
55
|
-
# - INTERVAL=3.2
|
56
|
-
# - VERBOSE=true (to enable logging)
|
57
|
-
# - VVERBOSE=true (to enable very verbose logging)
|
58
|
-
# - RUN_AS_SINGLE_PROCESS=true (false will fork children to do work, true will keep it single process)
|
59
|
-
# This is designed to be called from a rake task
|
60
|
-
def self.start
|
61
|
-
client = Qless::Client.new
|
62
|
-
queues = (ENV['QUEUES'] || ENV['QUEUE']).to_s.split(',').map { |q| client.queues[q.strip] }
|
63
|
-
if queues.none?
|
64
|
-
raise "No queues provided. You must pass QUEUE or QUEUES when starting a worker."
|
65
|
-
end
|
66
|
-
|
67
|
-
reserver = JobReservers.const_get(ENV.fetch('JOB_RESERVER', 'Ordered')).new(queues)
|
68
|
-
interval = Float(ENV.fetch('INTERVAL', 5.0))
|
69
|
-
|
70
|
-
options = {}
|
71
|
-
options[:verbose] = !!ENV['VERBOSE']
|
72
|
-
options[:very_verbose] = !!ENV['VVERBOSE']
|
73
|
-
options[:run_as_single_process] = !!ENV['RUN_AS_SINGLE_PROCESS']
|
74
|
-
|
75
|
-
new(reserver, options).work(interval)
|
76
|
-
end
|
77
|
-
|
78
|
-
def work(interval = 5.0)
|
79
|
-
procline "Starting #{@job_reserver.description}"
|
80
|
-
register_parent_signal_handlers
|
81
|
-
uniq_clients.each { |client| start_parent_pub_sub_listener_for(client) }
|
82
|
-
|
83
|
-
loop do
|
84
|
-
break if shutdown?
|
85
|
-
if paused?
|
86
|
-
sleep interval
|
87
|
-
next
|
88
|
-
end
|
89
|
-
|
90
|
-
unless job = reserve_job
|
91
|
-
break if interval.zero?
|
92
|
-
procline "Waiting for #{@job_reserver.description}"
|
93
|
-
log! "Sleeping for #{interval} seconds"
|
94
|
-
sleep interval
|
95
|
-
next
|
96
|
-
end
|
97
|
-
|
98
|
-
perform_job_in_child_process(job)
|
99
|
-
end
|
100
|
-
ensure
|
101
|
-
# make sure the worker deregisters on shutdown
|
102
|
-
deregister
|
103
|
-
end
|
104
|
-
|
105
|
-
def perform(job)
|
106
|
-
around_perform(job)
|
107
|
-
rescue Exception => error
|
108
|
-
fail_job(job, error, caller)
|
109
|
-
else
|
110
|
-
try_complete(job)
|
111
|
-
end
|
112
|
-
|
113
|
-
def reserve_job
|
114
|
-
@job_reserver.reserve
|
115
|
-
rescue Exception => error
|
116
|
-
# We want workers to durably stay up, so we don't want errors
|
117
|
-
# during job reserving (e.g. network timeouts, etc) to kill
|
118
|
-
# the worker.
|
119
|
-
log "Got an error while reserving a job: #{error.class}: #{error.message}"
|
120
|
-
end
|
121
|
-
|
122
|
-
def perform_job_in_child_process(job)
|
123
|
-
with_job(job) do
|
124
|
-
@child = fork do
|
125
|
-
job.reconnect_to_redis
|
126
|
-
register_child_signal_handlers
|
127
|
-
start_child_pub_sub_listener_for(job.client)
|
128
|
-
procline "Processing #{job.description}"
|
129
|
-
perform(job)
|
130
|
-
exit! # don't run at_exit hooks
|
131
|
-
end
|
132
|
-
|
133
|
-
if @child
|
134
|
-
wait_for_child
|
135
|
-
else
|
136
|
-
procline "Single processing #{job.description}"
|
137
|
-
perform(job)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
def shutdown
|
143
|
-
@shutdown = true
|
144
|
-
end
|
145
|
-
|
146
|
-
def shutdown!
|
147
|
-
shutdown
|
148
|
-
kill_child unless run_as_single_process
|
149
|
-
end
|
150
|
-
|
151
|
-
def shutdown?
|
152
|
-
@shutdown
|
153
|
-
end
|
154
|
-
|
155
|
-
def paused?
|
156
|
-
@paused
|
157
|
-
end
|
158
|
-
|
159
|
-
def pause_processing
|
160
|
-
log "USR2 received; pausing job processing"
|
161
|
-
@paused = true
|
162
|
-
procline "Paused -- #{@job_reserver.description}"
|
163
|
-
end
|
164
|
-
|
165
|
-
def unpause_processing
|
166
|
-
log "CONT received; resuming job processing"
|
167
|
-
@paused = false
|
168
|
-
end
|
169
|
-
|
170
|
-
private
|
171
|
-
|
172
|
-
def fork
|
173
|
-
super unless run_as_single_process
|
174
|
-
end
|
175
|
-
|
176
|
-
def deregister
|
177
|
-
uniq_clients.each do |client|
|
178
|
-
client.deregister_workers(Qless.worker_name)
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def uniq_clients
|
183
|
-
@uniq_clients ||= @job_reserver.queues.map(&:client).uniq
|
184
|
-
end
|
185
|
-
|
186
|
-
def try_complete(job)
|
187
|
-
job.complete unless job.state_changed?
|
188
|
-
rescue Job::CantCompleteError => e
|
189
|
-
# There's not much we can do here. Complete fails in a few cases:
|
190
|
-
# - The job is already failed (i.e. by another worker)
|
191
|
-
# - The job is being worked on by another worker
|
192
|
-
# - The job has been cancelled
|
193
|
-
#
|
194
|
-
# We don't want to (or are able to) fail the job with this error in
|
195
|
-
# any of these cases, so the best we can do is log the failure.
|
196
|
-
log "Failed to complete #{job.inspect}: #{e.message}"
|
197
|
-
end
|
198
|
-
|
199
|
-
# Allow middleware modules to be mixed in and override the
|
200
|
-
# definition of around_perform while providing a default
|
201
|
-
# implementation so our code can assume the method is present.
|
202
|
-
include Module.new {
|
203
|
-
def around_perform(job)
|
204
|
-
job.perform
|
205
|
-
end
|
206
|
-
}
|
207
|
-
|
208
|
-
def fail_job(job, error, worker_backtrace)
|
209
|
-
group = "#{job.klass_name}:#{error.class}"
|
210
|
-
message = "#{truncated_message(error)}\n\n#{format_failure_backtrace(error.backtrace, worker_backtrace)}"
|
211
|
-
log "Got #{group} failure from #{job.inspect}"
|
212
|
-
job.fail(group, message)
|
213
|
-
end
|
214
|
-
|
215
|
-
# TODO: pull this out into a config option.
|
216
|
-
MAX_ERROR_MESSAGE_SIZE = 10_000
|
217
|
-
def truncated_message(error)
|
218
|
-
return error.message if error.message.length <= MAX_ERROR_MESSAGE_SIZE
|
219
|
-
error.message.slice(0, MAX_ERROR_MESSAGE_SIZE) + "... (truncated due to length)"
|
220
|
-
end
|
221
|
-
|
222
|
-
def format_failure_backtrace(error_backtrace, worker_backtrace)
|
223
|
-
(error_backtrace - worker_backtrace).map do |line|
|
224
|
-
@backtrace_replacements.inject(line) do |line, (original, new)|
|
225
|
-
line.sub(original, new)
|
226
|
-
end
|
227
|
-
end.join("\n")
|
228
|
-
end
|
229
|
-
|
230
|
-
def procline(value)
|
231
|
-
$0 = "Qless-#{Qless::VERSION}: #{value} at #{Time.now.iso8601}"
|
232
|
-
log! $0
|
233
|
-
end
|
234
|
-
|
235
|
-
def wait_for_child
|
236
|
-
srand # Reseeding
|
237
|
-
procline "Forked #{@child} at #{Time.now.to_i}"
|
238
|
-
begin
|
239
|
-
Process.waitpid(@child)
|
240
|
-
rescue SystemCallError
|
241
|
-
nil
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
# Kills the forked child immediately with minimal remorse. The job it
|
246
|
-
# is processing will not be completed. Send the child a TERM signal,
|
247
|
-
# wait 5 seconds, and then a KILL signal if it has not quit
|
248
|
-
def kill_child
|
249
|
-
return unless @child
|
250
|
-
|
251
|
-
if Process.waitpid(@child, Process::WNOHANG)
|
252
|
-
log "Child #{@child} already quit."
|
253
|
-
return
|
254
|
-
end
|
255
|
-
|
256
|
-
signal_child("TERM", @child)
|
257
|
-
|
258
|
-
signal_child("KILL", @child) unless quit_gracefully?(@child)
|
259
|
-
rescue SystemCallError
|
260
|
-
log "Child #{@child} already quit and reaped."
|
261
|
-
end
|
262
|
-
|
263
|
-
# send a signal to a child, have it logged.
|
264
|
-
def signal_child(signal, child)
|
265
|
-
log "Sending #{signal} signal to child #{child}"
|
266
|
-
Process.kill(signal, child)
|
267
|
-
end
|
268
|
-
|
269
|
-
# has our child quit gracefully within the timeout limit?
|
270
|
-
def quit_gracefully?(child)
|
271
|
-
(term_timeout.to_f * 10).round.times do |i|
|
272
|
-
sleep(0.1)
|
273
|
-
return true if Process.waitpid(child, Process::WNOHANG)
|
274
|
-
end
|
275
|
-
|
276
|
-
false
|
277
|
-
end
|
278
|
-
|
279
|
-
# This was originally stolen directly from resque... (thanks, @defunkt!)
|
280
|
-
# Registers the various signal handlers a worker responds to.
|
281
|
-
#
|
282
|
-
# TERM: Shutdown immediately, stop processing jobs.
|
283
|
-
# INT: Shutdown immediately, stop processing jobs.
|
284
|
-
# QUIT: Shutdown after the current job has finished processing.
|
285
|
-
# USR1: Kill the forked child immediately, continue processing jobs.
|
286
|
-
# USR2: Don't process any new jobs; dump the backtrace.
|
287
|
-
# CONT: Start processing jobs again after a USR2
|
288
|
-
def register_parent_signal_handlers
|
289
|
-
trap('TERM') { shutdown! }
|
290
|
-
trap('INT') { shutdown! }
|
291
|
-
|
292
|
-
begin
|
293
|
-
trap('QUIT') { shutdown }
|
294
|
-
trap('USR1') { kill_child }
|
295
|
-
trap('USR2') do
|
296
|
-
log "Current backtrace (parent): \n\n#{caller.join("\n")}\n\n"
|
297
|
-
pause_processing
|
298
|
-
end
|
299
|
-
|
300
|
-
trap('CONT') { unpause_processing }
|
301
|
-
rescue ArgumentError
|
302
|
-
warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
def register_child_signal_handlers
|
307
|
-
trap('TERM') { raise SignalException.new("SIGTERM") }
|
308
|
-
trap('INT', 'DEFAULT')
|
309
|
-
|
310
|
-
begin
|
311
|
-
trap('QUIT', 'DEFAULT')
|
312
|
-
trap('USR1', 'DEFAULT')
|
313
|
-
trap('USR2', 'DEFAULT')
|
314
|
-
|
315
|
-
trap('USR2') do
|
316
|
-
log "Current backtrace (child): \n\n#{caller.join("\n")}\n\n"
|
317
|
-
end
|
318
|
-
rescue ArgumentError
|
319
|
-
end
|
320
|
-
end
|
321
|
-
|
322
|
-
# Log a message to STDOUT if we are verbose or very_verbose.
|
323
|
-
def log(message)
|
324
|
-
if verbose
|
325
|
-
output.puts "*** #{message}"
|
326
|
-
elsif very_verbose
|
327
|
-
time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
|
328
|
-
output.puts "** [#{time}] #$$: #{message}"
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
# Logs a very verbose message to STDOUT.
|
333
|
-
def log!(message)
|
334
|
-
log message if very_verbose
|
335
|
-
end
|
336
|
-
|
337
|
-
def start_parent_pub_sub_listener_for(client)
|
338
|
-
Subscriber.start(client, "ql:w:#{Qless.worker_name}") do |subscriber, message|
|
339
|
-
if message["event"] == "lock_lost" && message["jid"] == current_job_jid
|
340
|
-
fail_job_due_to_timeout
|
341
|
-
kill_child
|
342
|
-
end
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
def start_child_pub_sub_listener_for(client)
|
347
|
-
Subscriber.start(client, "ql:w:#{Qless.worker_name}:#{Process.pid}") do |subscriber, message|
|
348
|
-
if message["event"] == "notify_backtrace"
|
349
|
-
notify_parent_of_job_backtrace(client, message.fetch('notify_list'))
|
350
|
-
end
|
351
|
-
end
|
352
|
-
end
|
353
|
-
|
354
|
-
def with_job(job)
|
355
|
-
@job = job
|
356
|
-
yield
|
357
|
-
ensure
|
358
|
-
@job = nil
|
359
|
-
end
|
360
|
-
|
361
|
-
# To prevent race conditions (with our listener thread),
|
362
|
-
# we cannot use a pattern like `use(@job) if @job` because
|
363
|
-
# the value of `@job` could change between the checking of
|
364
|
-
# it and the use of it. Here we use a pattern that avoids
|
365
|
-
# the issue -- get the job into a local, and yield that if
|
366
|
-
# it is set.
|
367
|
-
def access_current_job
|
368
|
-
if job = @job
|
369
|
-
yield job
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
def current_job_jid
|
374
|
-
access_current_job &:jid
|
375
|
-
end
|
376
|
-
|
377
|
-
JobLockLost = Class.new(StandardError)
|
378
|
-
|
379
|
-
def fail_job_due_to_timeout
|
380
|
-
access_current_job do |job|
|
381
|
-
error = JobLockLost.new
|
382
|
-
error.set_backtrace(get_backtrace_from_child(job.client.redis))
|
383
|
-
fail_job(job, error, caller)
|
384
|
-
end
|
385
|
-
end
|
386
|
-
|
387
|
-
def notify_parent_of_job_backtrace(client, list)
|
388
|
-
job_backtrace = Thread.main.backtrace
|
389
|
-
client.redis.lpush list, JSON.dump(job_backtrace)
|
390
|
-
client.redis.pexpire list, BACKTRACE_EXPIRATION_TIMEOUT_MS
|
391
|
-
end
|
392
|
-
|
393
|
-
WAIT_FOR_CHILD_BACKTRACE_TIMEOUT = 4
|
394
|
-
BACKTRACE_EXPIRATION_TIMEOUT_MS = 60_000 # timeout after a minute
|
395
|
-
|
396
|
-
def get_backtrace_from_child(child_redis)
|
397
|
-
notification_list = "ql:child_backtraces:#{Qless.generate_jid}"
|
398
|
-
request_backtrace = { "event" => "notify_backtrace",
|
399
|
-
"notify_list" => notification_list }
|
400
|
-
|
401
|
-
if child_redis.publish("ql:w:#{Qless.worker_name}:#{@child}", JSON.dump(request_backtrace)).zero?
|
402
|
-
return ["Could not obtain child backtrace since it was not listening."]
|
403
|
-
end
|
404
|
-
|
405
|
-
begin
|
406
|
-
_, backtrace_json = child_redis.blpop(notification_list, WAIT_FOR_CHILD_BACKTRACE_TIMEOUT)
|
407
|
-
JSON.parse(backtrace_json)
|
408
|
-
rescue => e
|
409
|
-
["Could not obtain child backtrace: #{e.class}: #{e.message}"] + e.backtrace
|
410
|
-
end
|
411
|
-
end
|
412
|
-
end
|
413
|
-
end
|
1
|
+
# Encoding: utf-8
|
414
2
|
|
3
|
+
require 'qless/worker/base'
|
4
|
+
require 'qless/worker/serial'
|
5
|
+
require 'qless/worker/forking'
|