sidekiq 3.5.4 → 4.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/4.0-Upgrade.md +40 -0
- data/Changes.md +5 -8
- data/Ent-Changes.md +6 -0
- data/Gemfile +2 -1
- data/Pro-3.0-Upgrade.md +46 -0
- data/bin/sidekiqctl +3 -3
- data/bin/sidekiqload +17 -6
- data/lib/sidekiq.rb +24 -11
- data/lib/sidekiq/cli.rb +19 -28
- data/lib/sidekiq/client.rb +0 -5
- data/lib/sidekiq/fetch.rb +35 -111
- data/lib/sidekiq/launcher.rb +105 -46
- data/lib/sidekiq/manager.rb +71 -181
- data/lib/sidekiq/middleware/server/retry_jobs.rb +1 -1
- data/lib/sidekiq/processor.rb +119 -96
- data/lib/sidekiq/redis_connection.rb +22 -4
- data/lib/sidekiq/scheduled.rb +47 -26
- data/lib/sidekiq/util.rb +7 -0
- data/lib/sidekiq/version.rb +1 -1
- data/sidekiq.gemspec +1 -1
- data/test/helper.rb +30 -5
- data/test/test_actors.rb +137 -0
- data/test/test_api.rb +395 -394
- data/test/test_fetch.rb +2 -57
- data/test/test_launcher.rb +80 -0
- data/test/test_manager.rb +13 -132
- data/test/test_middleware.rb +3 -5
- data/test/test_processor.rb +20 -57
- data/test/test_scheduled.rb +2 -2
- metadata +19 -14
- data/lib/sidekiq/actor.rb +0 -39
data/lib/sidekiq/launcher.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
2
|
require 'sidekiq/manager'
|
3
3
|
require 'sidekiq/fetch'
|
4
4
|
require 'sidekiq/scheduled'
|
@@ -9,65 +9,116 @@ module Sidekiq
|
|
9
9
|
# If any of these actors die, the Sidekiq process exits
|
10
10
|
# immediately.
|
11
11
|
class Launcher
|
12
|
-
include Actor
|
13
12
|
include Util
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
attr_reader :manager, :poller, :fetcher
|
14
|
+
attr_accessor :manager, :poller, :fetcher
|
18
15
|
|
19
16
|
def initialize(options)
|
20
|
-
@
|
21
|
-
@
|
22
|
-
@poller = Sidekiq::Scheduled::Poller.new_link
|
23
|
-
@fetcher = Sidekiq::Fetcher.new_link(@manager, options)
|
24
|
-
@manager.fetcher = @fetcher
|
17
|
+
@manager = Sidekiq::Manager.new(options)
|
18
|
+
@poller = Sidekiq::Scheduled::Poller.new
|
25
19
|
@done = false
|
26
20
|
@options = options
|
27
21
|
end
|
28
22
|
|
29
|
-
def
|
30
|
-
|
31
|
-
|
23
|
+
def run
|
24
|
+
@thread = safe_thread("heartbeat", &method(:start_heartbeat))
|
25
|
+
@poller.start
|
26
|
+
@manager.start
|
27
|
+
end
|
32
28
|
|
33
|
-
|
34
|
-
|
35
|
-
|
29
|
+
# Stops this instance from processing any more jobs,
|
30
|
+
#
|
31
|
+
def quiet
|
32
|
+
@done = true
|
33
|
+
@manager.quiet
|
34
|
+
@poller.terminate
|
36
35
|
end
|
37
36
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
# Shuts down the process. This method does not
|
38
|
+
# return until all work is complete and cleaned up.
|
39
|
+
# It can take up to the timeout to complete.
|
40
|
+
def stop
|
41
|
+
deadline = Time.now + @options[:timeout]
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
@done = true
|
44
|
+
@manager.quiet
|
45
|
+
@poller.terminate
|
46
|
+
|
47
|
+
@manager.stop(deadline)
|
48
|
+
|
49
|
+
# Requeue everything in case there was a worker who grabbed work while stopped
|
50
|
+
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
51
|
+
strategy = (@options[:fetch] || Sidekiq::BasicFetch)
|
52
|
+
strategy.bulk_requeue([], @options)
|
53
|
+
|
54
|
+
clear_heartbeat
|
45
55
|
end
|
46
56
|
|
47
|
-
def
|
48
|
-
|
49
|
-
@done = true
|
50
|
-
Sidekiq::Fetcher.done!
|
51
|
-
fetcher.terminate if fetcher.alive?
|
52
|
-
poller.terminate if poller.alive?
|
53
|
-
|
54
|
-
manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
|
55
|
-
fire_event(:shutdown, true)
|
56
|
-
@condvar.wait
|
57
|
-
manager.terminate
|
58
|
-
|
59
|
-
# Requeue everything in case there was a worker who grabbed work while stopped
|
60
|
-
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
61
|
-
Sidekiq::Fetcher.strategy.bulk_requeue([], @options)
|
62
|
-
|
63
|
-
stop_heartbeat
|
64
|
-
end
|
57
|
+
def stopping?
|
58
|
+
@done
|
65
59
|
end
|
66
60
|
|
67
|
-
private
|
61
|
+
private unless $TESTING
|
62
|
+
|
63
|
+
JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
|
64
|
+
|
65
|
+
def heartbeat(k, data, json)
|
66
|
+
results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, data) }
|
67
|
+
results.compact!
|
68
|
+
$0 = results.join(' ')
|
69
|
+
|
70
|
+
❤(k, json)
|
71
|
+
end
|
72
|
+
|
73
|
+
def ❤(key, json)
|
74
|
+
fails = procd = 0
|
75
|
+
begin
|
76
|
+
Processor::FAILURE.update {|curr| fails = curr; 0 }
|
77
|
+
Processor::PROCESSED.update {|curr| procd = curr; 0 }
|
78
|
+
|
79
|
+
workers_key = "#{key}:workers".freeze
|
80
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
|
81
|
+
Sidekiq.redis do |conn|
|
82
|
+
conn.pipelined do
|
83
|
+
conn.incrby("stat:processed".freeze, procd)
|
84
|
+
conn.incrby("stat:processed:#{nowdate}", procd)
|
85
|
+
conn.incrby("stat:failed".freeze, fails)
|
86
|
+
conn.incrby("stat:failed:#{nowdate}", fails)
|
87
|
+
conn.del(workers_key)
|
88
|
+
Processor::WORKER_STATE.each_pair do |tid, hash|
|
89
|
+
conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
fails = procd = 0
|
94
|
+
|
95
|
+
_, _, _, msg = Sidekiq.redis do |conn|
|
96
|
+
conn.pipelined do
|
97
|
+
conn.sadd('processes', key)
|
98
|
+
conn.hmset(key, 'info', json, 'busy', Processor::WORKER_STATE.size, 'beat', Time.now.to_f)
|
99
|
+
conn.expire(key, 60)
|
100
|
+
conn.rpop("#{key}-signals")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
return unless msg
|
105
|
+
|
106
|
+
if JVM_RESERVED_SIGNALS.include?(msg)
|
107
|
+
Sidekiq::CLI.instance.handle_signal(msg)
|
108
|
+
else
|
109
|
+
::Process.kill(msg, $$)
|
110
|
+
end
|
111
|
+
rescue => e
|
112
|
+
# ignore all redis/network issues
|
113
|
+
logger.error("heartbeat: #{e.message}")
|
114
|
+
# don't lose the counts if there was a network issue
|
115
|
+
PROCESSED.increment(procd)
|
116
|
+
FAILURE.increment(fails)
|
117
|
+
end
|
118
|
+
end
|
68
119
|
|
69
120
|
def start_heartbeat
|
70
|
-
|
121
|
+
k = identity
|
71
122
|
data = {
|
72
123
|
'hostname' => hostname,
|
73
124
|
'started_at' => Time.now.to_f,
|
@@ -75,16 +126,24 @@ module Sidekiq
|
|
75
126
|
'tag' => @options[:tag] || '',
|
76
127
|
'concurrency' => @options[:concurrency],
|
77
128
|
'queues' => @options[:queues].uniq,
|
78
|
-
'labels' =>
|
79
|
-
'identity' =>
|
129
|
+
'labels' => @options[:labels],
|
130
|
+
'identity' => k,
|
80
131
|
}
|
81
132
|
# this data doesn't change so dump it to a string
|
82
133
|
# now so we don't need to dump it every heartbeat.
|
83
134
|
json = Sidekiq.dump_json(data)
|
84
|
-
|
135
|
+
|
136
|
+
while true
|
137
|
+
heartbeat(k, data, json)
|
138
|
+
sleep 5
|
139
|
+
end
|
140
|
+
Sidekiq.logger.info("Heartbeat stopping...")
|
85
141
|
end
|
86
142
|
|
87
|
-
def
|
143
|
+
def clear_heartbeat
|
144
|
+
# Remove record from Redis since we are shutting down.
|
145
|
+
# Note we don't stop the heartbeat thread; if the process
|
146
|
+
# doesn't actually exit, it'll reappear in the Web UI.
|
88
147
|
Sidekiq.redis do |conn|
|
89
148
|
conn.pipelined do
|
90
149
|
conn.srem('processes', identity)
|
data/lib/sidekiq/manager.rb
CHANGED
@@ -1,156 +1,89 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'sidekiq/util'
|
3
|
-
require 'sidekiq/actor'
|
4
3
|
require 'sidekiq/processor'
|
5
4
|
require 'sidekiq/fetch'
|
5
|
+
require 'thread'
|
6
6
|
|
7
7
|
module Sidekiq
|
8
8
|
|
9
9
|
##
|
10
|
-
# The
|
11
|
-
#
|
12
|
-
#
|
10
|
+
# The Manager is the central coordination point in Sidekiq, controlling
|
11
|
+
# the lifecycle of the Processors and feeding them jobs as necessary.
|
12
|
+
#
|
13
|
+
# Tasks:
|
14
|
+
#
|
15
|
+
# 1. start: Spin up Processors.
|
16
|
+
# 3. processor_died: Handle job failure, throw away Processor, create new one.
|
17
|
+
# 4. quiet: shutdown idle Processors.
|
18
|
+
# 5. stop: hard stop the Processors by deadline.
|
19
|
+
#
|
20
|
+
# Note that only the last task requires its own Thread since it has to monitor
|
21
|
+
# the shutdown process. The other tasks are performed by other threads.
|
13
22
|
#
|
14
23
|
class Manager
|
15
24
|
include Util
|
16
|
-
include Actor
|
17
|
-
trap_exit :processor_died
|
18
25
|
|
19
|
-
attr_reader :
|
20
|
-
attr_reader :
|
21
|
-
attr_accessor :fetcher
|
26
|
+
attr_reader :workers
|
27
|
+
attr_reader :options
|
22
28
|
|
23
|
-
|
24
|
-
JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
|
25
|
-
|
26
|
-
def initialize(condvar, options={})
|
29
|
+
def initialize(options={})
|
27
30
|
logger.debug { options.inspect }
|
28
31
|
@options = options
|
29
32
|
@count = options[:concurrency] || 25
|
30
33
|
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
31
|
-
@done_callback = nil
|
32
|
-
@finished = condvar
|
33
34
|
|
34
|
-
@in_progress = {}
|
35
|
-
@threads = {}
|
36
35
|
@done = false
|
37
|
-
@
|
38
|
-
@
|
39
|
-
|
40
|
-
p.proxy_id = p.object_id
|
41
|
-
p
|
36
|
+
@workers = Set.new
|
37
|
+
@count.times do
|
38
|
+
@workers << Processor.new(self)
|
42
39
|
end
|
40
|
+
@plock = Mutex.new
|
43
41
|
end
|
44
42
|
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
timeout = options[:timeout]
|
49
|
-
|
50
|
-
@done = true
|
51
|
-
|
52
|
-
logger.info { "Terminating #{@ready.size} quiet workers" }
|
53
|
-
@ready.each { |x| x.terminate if x.alive? }
|
54
|
-
@ready.clear
|
55
|
-
|
56
|
-
return if clean_up_for_graceful_shutdown
|
57
|
-
|
58
|
-
hard_shutdown_in timeout if should_shutdown
|
43
|
+
def start
|
44
|
+
@workers.each do |x|
|
45
|
+
x.start
|
59
46
|
end
|
60
47
|
end
|
61
48
|
|
62
|
-
def
|
63
|
-
if @
|
64
|
-
|
65
|
-
return true
|
66
|
-
end
|
49
|
+
def quiet
|
50
|
+
return if @done
|
51
|
+
@done = true
|
67
52
|
|
68
|
-
|
69
|
-
|
53
|
+
logger.info { "Terminating quiet workers" }
|
54
|
+
@workers.each { |x| x.terminate }
|
70
55
|
end
|
71
56
|
|
72
|
-
def
|
73
|
-
|
74
|
-
|
57
|
+
def stop(deadline)
|
58
|
+
quiet
|
59
|
+
return if @workers.empty?
|
75
60
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
@done_callback.call(processor) if @done_callback
|
83
|
-
@in_progress.delete(processor.object_id)
|
84
|
-
@threads.delete(processor.object_id)
|
85
|
-
@busy.delete(processor)
|
86
|
-
if stopped?
|
87
|
-
processor.terminate if processor.alive?
|
88
|
-
shutdown if @busy.empty?
|
89
|
-
else
|
90
|
-
@ready << processor if processor.alive?
|
91
|
-
end
|
92
|
-
dispatch
|
61
|
+
logger.info { "Pausing to allow workers to finish..." }
|
62
|
+
remaining = deadline - Time.now
|
63
|
+
while remaining > 0.5
|
64
|
+
return if @workers.empty?
|
65
|
+
sleep 0.5
|
66
|
+
remaining = deadline - Time.now
|
93
67
|
end
|
94
|
-
|
68
|
+
return if @workers.empty?
|
95
69
|
|
96
|
-
|
97
|
-
watchdog("Manager#processor_died died") do
|
98
|
-
@in_progress.delete(processor.object_id)
|
99
|
-
@threads.delete(processor.object_id)
|
100
|
-
@busy.delete(processor)
|
101
|
-
|
102
|
-
unless stopped?
|
103
|
-
p = Processor.new_link(current_actor)
|
104
|
-
p.proxy_id = p.object_id
|
105
|
-
@ready << p
|
106
|
-
dispatch
|
107
|
-
else
|
108
|
-
shutdown if @busy.empty?
|
109
|
-
end
|
110
|
-
end
|
70
|
+
hard_shutdown
|
111
71
|
end
|
112
72
|
|
113
|
-
def
|
114
|
-
|
115
|
-
|
116
|
-
# Race condition between Manager#stop if Fetcher
|
117
|
-
# is blocked on redis and gets a message after
|
118
|
-
# all the ready Processors have been stopped.
|
119
|
-
# Push the message back to redis.
|
120
|
-
work.requeue
|
121
|
-
else
|
122
|
-
processor = @ready.pop
|
123
|
-
@in_progress[processor.object_id] = work
|
124
|
-
@busy << processor
|
125
|
-
processor.async.process(work)
|
126
|
-
end
|
73
|
+
def processor_stopped(processor)
|
74
|
+
@plock.synchronize do
|
75
|
+
@workers.delete(processor)
|
127
76
|
end
|
128
77
|
end
|
129
78
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
PROCTITLES = [
|
139
|
-
proc { 'sidekiq'.freeze },
|
140
|
-
proc { Sidekiq::VERSION },
|
141
|
-
proc { |mgr, data| data['tag'] },
|
142
|
-
proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
|
143
|
-
proc { |mgr, data| "stopping" if mgr.stopped? },
|
144
|
-
]
|
145
|
-
|
146
|
-
def heartbeat(key, data, json)
|
147
|
-
results = PROCTITLES.map {|x| x.(self, data) }
|
148
|
-
results.compact!
|
149
|
-
$0 = results.join(' ')
|
150
|
-
|
151
|
-
❤(key, json)
|
152
|
-
after(5) do
|
153
|
-
heartbeat(key, data, json)
|
79
|
+
def processor_died(processor, reason)
|
80
|
+
@plock.synchronize do
|
81
|
+
@workers.delete(processor)
|
82
|
+
unless @done
|
83
|
+
p = Processor.new(self)
|
84
|
+
@workers << p
|
85
|
+
p.start
|
86
|
+
end
|
154
87
|
end
|
155
88
|
end
|
156
89
|
|
@@ -160,77 +93,34 @@ module Sidekiq
|
|
160
93
|
|
161
94
|
private
|
162
95
|
|
163
|
-
def
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
conn.expire(key, 60)
|
170
|
-
conn.rpop("#{key}-signals")
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
return unless msg
|
175
|
-
|
176
|
-
if JVM_RESERVED_SIGNALS.include?(msg)
|
177
|
-
Sidekiq::CLI.instance.handle_signal(msg)
|
178
|
-
else
|
179
|
-
::Process.kill(msg, $$)
|
180
|
-
end
|
181
|
-
rescue => e
|
182
|
-
# ignore all redis/network issues
|
183
|
-
logger.error("heartbeat: #{e.message}")
|
96
|
+
def hard_shutdown
|
97
|
+
# We've reached the timeout and we still have busy workers.
|
98
|
+
# They must die but their jobs shall live on.
|
99
|
+
cleanup = nil
|
100
|
+
@plock.synchronize do
|
101
|
+
cleanup = @workers.dup
|
184
102
|
end
|
185
|
-
end
|
186
103
|
|
187
|
-
|
188
|
-
|
104
|
+
if cleanup.size > 0
|
105
|
+
jobs = cleanup.map {|p| p.job }.compact
|
189
106
|
|
190
|
-
|
191
|
-
|
192
|
-
# We've reached the timeout and we still have busy workers.
|
193
|
-
# They must die but their messages shall live on.
|
194
|
-
logger.warn { "Terminating #{@busy.size} busy worker threads" }
|
195
|
-
logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
|
107
|
+
logger.warn { "Terminating #{cleanup.size} busy worker threads" }
|
108
|
+
logger.warn { "Work still in progress #{jobs.inspect}" }
|
196
109
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
@finished.signal
|
206
|
-
end
|
110
|
+
# Re-enqueue unfinished jobs
|
111
|
+
# NOTE: You may notice that we may push a job back to redis before
|
112
|
+
# the worker thread is terminated. This is ok because Sidekiq's
|
113
|
+
# contract says that jobs are run AT LEAST once. Process termination
|
114
|
+
# is delayed until we're certain the jobs are back in Redis because
|
115
|
+
# it is worse to lose a job than to run it twice.
|
116
|
+
strategy = (@options[:fetch] || Sidekiq::BasicFetch)
|
117
|
+
strategy.bulk_requeue(jobs, @options)
|
207
118
|
end
|
208
|
-
end
|
209
|
-
|
210
|
-
def dispatch
|
211
|
-
return if stopped?
|
212
|
-
# This is a safety check to ensure we haven't leaked
|
213
|
-
# processors somehow.
|
214
|
-
raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
|
215
|
-
raise "No ready processor!?" if @ready.empty?
|
216
119
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
def shutdown
|
221
|
-
requeue
|
222
|
-
@finished.signal
|
120
|
+
cleanup.each do |processor|
|
121
|
+
processor.kill
|
122
|
+
end
|
223
123
|
end
|
224
124
|
|
225
|
-
def requeue
|
226
|
-
# Re-enqueue terminated jobs
|
227
|
-
# NOTE: You may notice that we may push a job back to redis before
|
228
|
-
# the worker thread is terminated. This is ok because Sidekiq's
|
229
|
-
# contract says that jobs are run AT LEAST once. Process termination
|
230
|
-
# is delayed until we're certain the jobs are back in Redis because
|
231
|
-
# it is worse to lose a job than to run it twice.
|
232
|
-
Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
|
233
|
-
@in_progress.clear
|
234
|
-
end
|
235
125
|
end
|
236
126
|
end
|