sidekiq 3.5.4 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/4.0-Upgrade.md +50 -0
- data/Changes.md +15 -3
- data/Ent-Changes.md +27 -0
- data/Gemfile +2 -1
- data/Pro-3.0-Upgrade.md +46 -0
- data/Pro-Changes.md +21 -0
- data/README.md +4 -4
- data/bin/sidekiqctl +8 -2
- data/bin/sidekiqload +20 -6
- data/lib/sidekiq.rb +24 -11
- data/lib/sidekiq/api.rb +2 -2
- data/lib/sidekiq/cli.rb +19 -29
- data/lib/sidekiq/client.rb +0 -5
- data/lib/sidekiq/fetch.rb +35 -111
- data/lib/sidekiq/launcher.rb +105 -46
- data/lib/sidekiq/manager.rb +77 -180
- data/lib/sidekiq/middleware/server/retry_jobs.rb +1 -1
- data/lib/sidekiq/processor.rb +119 -96
- data/lib/sidekiq/redis_connection.rb +23 -5
- data/lib/sidekiq/scheduled.rb +47 -26
- data/lib/sidekiq/testing.rb +84 -14
- data/lib/sidekiq/util.rb +7 -0
- data/lib/sidekiq/version.rb +1 -1
- data/lib/sidekiq/web_helpers.rb +8 -1
- data/sidekiq.gemspec +2 -2
- data/test/helper.rb +30 -5
- data/test/test_actors.rb +137 -0
- data/test/test_api.rb +395 -394
- data/test/test_fetch.rb +2 -57
- data/test/test_launcher.rb +80 -0
- data/test/test_manager.rb +13 -132
- data/test/test_middleware.rb +3 -5
- data/test/test_processor.rb +20 -57
- data/test/test_scheduled.rb +2 -2
- data/test/test_testing_fake.rb +64 -1
- data/web/assets/stylesheets/application.css +4 -0
- data/web/views/_footer.erb +2 -7
- metadata +30 -25
- data/lib/sidekiq/actor.rb +0 -39
@@ -96,7 +96,7 @@ module Sidekiq
|
|
96
96
|
|
97
97
|
# App code can stuff all sorts of crazy binary data into the error message
|
98
98
|
# that won't convert to JSON.
|
99
|
-
m = exception.message
|
99
|
+
m = exception.message[0..10_000]
|
100
100
|
if m.respond_to?(:scrub!)
|
101
101
|
m.force_encoding("utf-8")
|
102
102
|
m.scrub!
|
data/lib/sidekiq/processor.rb
CHANGED
@@ -1,59 +1,135 @@
|
|
1
1
|
require 'sidekiq/util'
|
2
|
-
require 'sidekiq/
|
3
|
-
|
4
|
-
require '
|
5
|
-
require '
|
2
|
+
require 'sidekiq/fetch'
|
3
|
+
require 'thread'
|
4
|
+
require 'concurrent/map'
|
5
|
+
require 'concurrent/atomic/atomic_fixnum'
|
6
6
|
|
7
7
|
module Sidekiq
|
8
8
|
##
|
9
|
-
# The Processor
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# The Processor is a standalone thread which:
|
10
|
+
#
|
11
|
+
# 1. fetches a job from Redis
|
12
|
+
# 2. executes the job
|
13
|
+
# a. instantiate the Worker
|
14
|
+
# b. run the middleware chain
|
15
|
+
# c. call #perform
|
16
|
+
#
|
17
|
+
# A Processor can exit due to shutdown (processor_stopped)
|
18
|
+
# or due to an error during job execution (processor_died)
|
19
|
+
#
|
20
|
+
# If an error occurs in the job execution, the
|
21
|
+
# Processor calls the Manager to create a new one
|
22
|
+
# to replace itself and exits.
|
23
|
+
#
|
12
24
|
class Processor
|
13
|
-
# To prevent a memory leak, ensure that stats expire. However, they should take up a minimal amount of storage
|
14
|
-
# so keep them around for a long time
|
15
|
-
STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5
|
16
25
|
|
17
26
|
include Util
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
|
28
|
+
attr_reader :thread
|
29
|
+
attr_reader :job
|
30
|
+
|
31
|
+
def initialize(mgr)
|
32
|
+
@mgr = mgr
|
33
|
+
@down = false
|
34
|
+
@done = false
|
35
|
+
@job = nil
|
36
|
+
@thread = nil
|
37
|
+
@strategy = (mgr.options[:fetch] || Sidekiq::BasicFetch).new(mgr.options)
|
38
|
+
end
|
39
|
+
|
40
|
+
def terminate(wait=false)
|
41
|
+
@done = true
|
42
|
+
return if !@thread
|
43
|
+
@thread.value if wait
|
44
|
+
end
|
45
|
+
|
46
|
+
def kill(wait=false)
|
47
|
+
@done = true
|
48
|
+
return if !@thread
|
49
|
+
# unlike the other actors, terminate does not wait
|
50
|
+
# for the thread to finish because we don't know how
|
51
|
+
# long the job will take to finish. Instead we
|
52
|
+
# provide a `kill` method to call after the shutdown
|
53
|
+
# timeout passes.
|
54
|
+
@thread.raise ::Sidekiq::Shutdown
|
55
|
+
@thread.value if wait
|
56
|
+
end
|
57
|
+
|
58
|
+
def start
|
59
|
+
@thread ||= safe_thread("processor", &method(:run))
|
60
|
+
end
|
61
|
+
|
62
|
+
private unless $TESTING
|
63
|
+
|
64
|
+
def run
|
65
|
+
begin
|
66
|
+
while !@done
|
67
|
+
process_one
|
27
68
|
end
|
69
|
+
@mgr.processor_stopped(self)
|
70
|
+
rescue Sidekiq::Shutdown
|
71
|
+
@mgr.processor_stopped(self)
|
72
|
+
rescue Exception => ex
|
73
|
+
@mgr.processor_died(self, ex)
|
28
74
|
end
|
29
75
|
end
|
30
76
|
|
31
|
-
|
77
|
+
def process_one
|
78
|
+
@job = fetch
|
79
|
+
process(@job) if @job
|
80
|
+
@job = nil
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_one
|
84
|
+
begin
|
85
|
+
work = @strategy.retrieve_work
|
86
|
+
(logger.info { "Redis is online, #{Time.now - @down} sec downtime" }; @down = nil) if @down
|
87
|
+
work
|
88
|
+
rescue Sidekiq::Shutdown
|
89
|
+
rescue => ex
|
90
|
+
handle_fetch_exception(ex)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def fetch
|
95
|
+
j = get_one
|
96
|
+
if j && @done
|
97
|
+
j.requeue
|
98
|
+
nil
|
99
|
+
else
|
100
|
+
j
|
101
|
+
end
|
102
|
+
end
|
32
103
|
|
33
|
-
def
|
34
|
-
|
104
|
+
def handle_fetch_exception(ex)
|
105
|
+
if !@down
|
106
|
+
@down = Time.now
|
107
|
+
logger.error("Error fetching job: #{ex}")
|
108
|
+
ex.backtrace.each do |bt|
|
109
|
+
logger.error(bt)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
sleep(1)
|
35
113
|
end
|
36
114
|
|
37
115
|
def process(work)
|
38
|
-
|
116
|
+
jobstr = work.job
|
39
117
|
queue = work.queue_name
|
40
118
|
|
41
|
-
@boss.async.real_thread(proxy_id, Thread.current)
|
42
|
-
|
43
119
|
ack = false
|
44
120
|
begin
|
45
|
-
|
46
|
-
klass =
|
121
|
+
job = Sidekiq.load_json(jobstr)
|
122
|
+
klass = job['class'.freeze].constantize
|
47
123
|
worker = klass.new
|
48
|
-
worker.jid =
|
124
|
+
worker.jid = job['jid'.freeze]
|
49
125
|
|
50
|
-
stats(worker,
|
51
|
-
Sidekiq.server_middleware.invoke(worker,
|
126
|
+
stats(worker, job, queue) do
|
127
|
+
Sidekiq.server_middleware.invoke(worker, job, queue) do
|
52
128
|
# Only ack if we either attempted to start this job or
|
53
129
|
# successfully completed it. This prevents us from
|
54
130
|
# losing jobs if a middleware raises an exception before yielding
|
55
131
|
ack = true
|
56
|
-
execute_job(worker, cloned(
|
132
|
+
execute_job(worker, cloned(job['args'.freeze]))
|
57
133
|
end
|
58
134
|
end
|
59
135
|
ack = true
|
@@ -63,99 +139,46 @@ module Sidekiq
|
|
63
139
|
# we didn't properly finish it.
|
64
140
|
ack = false
|
65
141
|
rescue Exception => ex
|
66
|
-
handle_exception(ex,
|
142
|
+
handle_exception(ex, job || { :job => jobstr })
|
67
143
|
raise
|
68
144
|
ensure
|
69
145
|
work.acknowledge if ack
|
70
146
|
end
|
71
|
-
|
72
|
-
@boss.async.processor_done(current_actor)
|
73
|
-
end
|
74
|
-
|
75
|
-
def inspect
|
76
|
-
"<Processor##{object_id.to_s(16)}>"
|
77
147
|
end
|
78
148
|
|
79
149
|
def execute_job(worker, cloned_args)
|
80
150
|
worker.perform(*cloned_args)
|
81
151
|
end
|
82
152
|
|
83
|
-
private
|
84
|
-
|
85
153
|
def thread_identity
|
86
154
|
@str ||= Thread.current.object_id.to_s(36)
|
87
155
|
end
|
88
156
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
conn.hmset("#{identity}:workers", thread_identity, hash)
|
97
|
-
conn.expire("#{identity}:workers", 60*60*4)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
157
|
+
WORKER_STATE = Concurrent::Map.new
|
158
|
+
PROCESSED = Concurrent::AtomicFixnum.new
|
159
|
+
FAILURE = Concurrent::AtomicFixnum.new
|
160
|
+
|
161
|
+
def stats(worker, job, queue)
|
162
|
+
tid = thread_identity
|
163
|
+
WORKER_STATE[tid] = {:queue => queue, :payload => job, :run_at => Time.now.to_i }
|
101
164
|
|
102
|
-
nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
|
103
165
|
begin
|
104
166
|
yield
|
105
167
|
rescue Exception
|
106
|
-
|
107
|
-
failed = "stat:failed:#{nowdate}"
|
108
|
-
Sidekiq.redis do |conn|
|
109
|
-
conn.multi do
|
110
|
-
conn.incrby("stat:failed".freeze, 1)
|
111
|
-
conn.incrby(failed, 1)
|
112
|
-
conn.expire(failed, STATS_TIMEOUT)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
168
|
+
FAILURE.increment
|
116
169
|
raise
|
117
170
|
ensure
|
118
|
-
|
119
|
-
|
120
|
-
Sidekiq.redis do |conn|
|
121
|
-
conn.multi do
|
122
|
-
conn.hdel("#{identity}:workers", thread_identity)
|
123
|
-
conn.incrby("stat:processed".freeze, 1)
|
124
|
-
conn.incrby(processed, 1)
|
125
|
-
conn.expire(processed, STATS_TIMEOUT)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
171
|
+
WORKER_STATE.delete(tid)
|
172
|
+
PROCESSED.increment
|
129
173
|
end
|
130
174
|
end
|
131
175
|
|
132
176
|
# Deep clone the arguments passed to the worker so that if
|
133
|
-
# the
|
177
|
+
# the job fails, what is pushed back onto Redis hasn't
|
134
178
|
# been mutated by the worker.
|
135
179
|
def cloned(ary)
|
136
180
|
Marshal.load(Marshal.dump(ary))
|
137
181
|
end
|
138
182
|
|
139
|
-
# If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.
|
140
|
-
# All exceptions will be swallowed and logged.
|
141
|
-
def retry_and_suppress_exceptions(max_retries = 5)
|
142
|
-
retry_count = 0
|
143
|
-
begin
|
144
|
-
yield
|
145
|
-
rescue => e
|
146
|
-
retry_count += 1
|
147
|
-
if retry_count <= max_retries
|
148
|
-
Sidekiq.logger.debug {"Suppressing and retrying error: #{e.inspect}"}
|
149
|
-
pause_for_recovery(retry_count)
|
150
|
-
retry
|
151
|
-
else
|
152
|
-
handle_exception(e, { :message => "Exhausted #{max_retries} retries"})
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
def pause_for_recovery(retry_count)
|
158
|
-
sleep(retry_count)
|
159
|
-
end
|
160
183
|
end
|
161
184
|
end
|
@@ -9,10 +9,11 @@ module Sidekiq
|
|
9
9
|
def create(options={})
|
10
10
|
options[:url] ||= determine_redis_provider
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
size = options[:size] || (Sidekiq.server? ? (Sidekiq.options[:concurrency] + 5) : 5)
|
13
|
+
|
14
|
+
verify_sizing(size, Sidekiq.options[:concurrency]) if Sidekiq.server?
|
15
15
|
|
16
|
+
pool_timeout = options[:pool_timeout] || 1
|
16
17
|
log_info(options)
|
17
18
|
|
18
19
|
ConnectionPool.new(:timeout => pool_timeout, :size => size) do
|
@@ -22,13 +23,30 @@ module Sidekiq
|
|
22
23
|
|
23
24
|
private
|
24
25
|
|
26
|
+
# Sidekiq needs a lot of concurrent Redis connections.
|
27
|
+
#
|
28
|
+
# We need a connection for each Processor.
|
29
|
+
# We need a connection for Pro's real-time change listener
|
30
|
+
# We need a connection to various features to call Redis every few seconds:
|
31
|
+
# - the process heartbeat.
|
32
|
+
# - enterprise's leader election
|
33
|
+
# - enterprise's cron support
|
34
|
+
def verify_sizing(size, concurrency)
|
35
|
+
raise ArgumentError, "Your Redis connection pool is too small for Sidekiq to work, your pool has #{size} connections but really needs to have at least #{concurrency + 2}" if size <= concurrency
|
36
|
+
end
|
37
|
+
|
25
38
|
def build_client(options)
|
26
39
|
namespace = options[:namespace]
|
27
40
|
|
28
41
|
client = Redis.new client_opts(options)
|
29
42
|
if namespace
|
30
|
-
|
31
|
-
|
43
|
+
begin
|
44
|
+
require 'redis/namespace'
|
45
|
+
Redis::Namespace.new(namespace, :redis => client)
|
46
|
+
rescue LoadError
|
47
|
+
Sidekiq.logger.error("redis-namespace gem not included in Gemfile, cannot use namespace '#{namespace}'")
|
48
|
+
exit(-127)
|
49
|
+
end
|
32
50
|
else
|
33
51
|
client
|
34
52
|
end
|
data/lib/sidekiq/scheduled.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'sidekiq'
|
2
2
|
require 'sidekiq/util'
|
3
|
-
require 'sidekiq/actor'
|
4
3
|
require 'sidekiq/api'
|
5
4
|
|
6
5
|
module Sidekiq
|
@@ -17,7 +16,7 @@ module Sidekiq
|
|
17
16
|
# We need to go through the list one at a time to reduce the risk of something
|
18
17
|
# going wrong between the time jobs are popped from the scheduled queue and when
|
19
18
|
# they are pushed onto a work queue and losing the jobs.
|
20
|
-
while job = conn.zrangebyscore(sorted_set, '-inf', now, :limit => [0, 1]).first do
|
19
|
+
while job = conn.zrangebyscore(sorted_set, '-inf'.freeze, now, :limit => [0, 1]).first do
|
21
20
|
|
22
21
|
# Pop item off the queue and add it to the work queue. If the job can't be popped from
|
23
22
|
# the queue, it's because another process already popped it so we can move on to the
|
@@ -39,33 +38,56 @@ module Sidekiq
|
|
39
38
|
# workers can pick it up like any other job.
|
40
39
|
class Poller
|
41
40
|
include Util
|
42
|
-
include Actor
|
43
41
|
|
44
42
|
INITIAL_WAIT = 10
|
45
43
|
|
46
44
|
def initialize
|
47
45
|
@enq = (Sidekiq.options[:scheduled_enq] || Sidekiq::Scheduled::Enq).new
|
46
|
+
@sleeper = ConnectionPool::TimedStack.new
|
47
|
+
@done = false
|
48
48
|
end
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
50
|
+
# Shut down this instance, will pause until the thread is dead.
|
51
|
+
def terminate
|
52
|
+
@done = true
|
53
|
+
if @thread
|
54
|
+
t = @thread
|
55
|
+
@thread = nil
|
56
|
+
@sleeper << 0
|
57
|
+
t.value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def start
|
62
|
+
@thread ||= safe_thread("scheduler") do
|
63
|
+
initial_wait
|
64
|
+
|
65
|
+
while !@done
|
66
|
+
enqueue
|
67
|
+
wait
|
61
68
|
end
|
69
|
+
Sidekiq.logger.info("Scheduler exiting...")
|
70
|
+
end
|
71
|
+
end
|
62
72
|
|
63
|
-
|
73
|
+
def enqueue
|
74
|
+
begin
|
75
|
+
@enq.enqueue_jobs
|
76
|
+
rescue => ex
|
77
|
+
# Most likely a problem with redis networking.
|
78
|
+
# Punt and try again at the next interval
|
79
|
+
logger.error ex.message
|
80
|
+
logger.error ex.backtrace.first
|
64
81
|
end
|
65
82
|
end
|
66
83
|
|
67
84
|
private
|
68
85
|
|
86
|
+
def wait
|
87
|
+
@sleeper.pop(random_poll_interval)
|
88
|
+
rescue Timeout::Error
|
89
|
+
end
|
90
|
+
|
69
91
|
# Calculates a random interval that is ±50% the desired average.
|
70
92
|
def random_poll_interval
|
71
93
|
poll_interval_average * rand + poll_interval_average.to_f / 2
|
@@ -83,7 +105,7 @@ module Sidekiq
|
|
83
105
|
# all your Sidekiq processes at the same time will lead to them all polling at
|
84
106
|
# the same time: the thundering herd problem.
|
85
107
|
#
|
86
|
-
# We only do this if
|
108
|
+
# We only do this if poll_interval_average is unset (the default).
|
87
109
|
def poll_interval_average
|
88
110
|
Sidekiq.options[:poll_interval_average] ||= scaled_poll_interval
|
89
111
|
end
|
@@ -98,16 +120,15 @@ module Sidekiq
|
|
98
120
|
end
|
99
121
|
|
100
122
|
def initial_wait
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
end
|
123
|
+
# Have all processes sleep between 5-15 seconds. 10 seconds
|
124
|
+
# to give time for the heartbeat to register (if the poll interval is going to be calculated by the number
|
125
|
+
# of workers), and 5 random seconds to ensure they don't all hit Redis at the same time.
|
126
|
+
total = 0
|
127
|
+
total += INITIAL_WAIT unless Sidekiq.options[:poll_interval_average]
|
128
|
+
total += (5 * rand)
|
129
|
+
|
130
|
+
@sleeper.pop(total)
|
131
|
+
rescue Timeout::Error
|
111
132
|
end
|
112
133
|
|
113
134
|
end
|