sidekiq 3.5.4 → 4.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

@@ -96,7 +96,7 @@ module Sidekiq
96
96
 
97
97
  # App code can stuff all sorts of crazy binary data into the error message
98
98
  # that won't convert to JSON.
99
- m = exception.message.to_s[0, 10_000]
99
+ m = exception.message[0..10_000]
100
100
  if m.respond_to?(:scrub!)
101
101
  m.force_encoding("utf-8")
102
102
  m.scrub!
@@ -1,59 +1,135 @@
1
1
  require 'sidekiq/util'
2
- require 'sidekiq/actor'
3
-
4
- require 'sidekiq/middleware/server/retry_jobs'
5
- require 'sidekiq/middleware/server/logging'
2
+ require 'sidekiq/fetch'
3
+ require 'thread'
4
+ require 'concurrent/map'
5
+ require 'concurrent/atomic/atomic_fixnum'
6
6
 
7
7
  module Sidekiq
8
8
  ##
9
- # The Processor receives a message from the Manager and actually
10
- # processes it. It instantiates the worker, runs the middleware
11
- # chain and then calls Sidekiq::Worker#perform.
9
+ # The Processor is a standalone thread which:
10
+ #
11
+ # 1. fetches a job from Redis
12
+ # 2. executes the job
13
+ # a. instantiate the Worker
14
+ # b. run the middleware chain
15
+ # c. call #perform
16
+ #
17
+ # A Processor can exit due to shutdown (processor_stopped)
18
+ # or due to an error during job execution (processor_died)
19
+ #
20
+ # If an error occurs in the job execution, the
21
+ # Processor calls the Manager to create a new one
22
+ # to replace itself and exits.
23
+ #
12
24
  class Processor
13
- # To prevent a memory leak, ensure that stats expire. However, they should take up a minimal amount of storage
14
- # so keep them around for a long time
15
- STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5
16
25
 
17
26
  include Util
18
- include Actor
19
-
20
- def self.default_middleware
21
- Middleware::Chain.new do |m|
22
- m.add Middleware::Server::Logging
23
- m.add Middleware::Server::RetryJobs
24
- if defined?(::ActiveRecord::Base)
25
- require 'sidekiq/middleware/server/active_record'
26
- m.add Sidekiq::Middleware::Server::ActiveRecord
27
+
28
+ attr_reader :thread
29
+ attr_reader :job
30
+
31
+ def initialize(mgr)
32
+ @mgr = mgr
33
+ @down = false
34
+ @done = false
35
+ @job = nil
36
+ @thread = nil
37
+ @strategy = (mgr.options[:fetch] || Sidekiq::BasicFetch).new(mgr.options)
38
+ end
39
+
40
+ def terminate(wait=false)
41
+ @done = true
42
+ return if !@thread
43
+ @thread.value if wait
44
+ end
45
+
46
+ def kill(wait=false)
47
+ @done = true
48
+ return if !@thread
49
+ # unlike the other actors, terminate does not wait
50
+ # for the thread to finish because we don't know how
51
+ # long the job will take to finish. Instead we
52
+ # provide a `kill` method to call after the shutdown
53
+ # timeout passes.
54
+ @thread.raise ::Sidekiq::Shutdown
55
+ @thread.value if wait
56
+ end
57
+
58
+ def start
59
+ @thread ||= safe_thread("processor", &method(:run))
60
+ end
61
+
62
+ private unless $TESTING
63
+
64
+ def run
65
+ begin
66
+ while !@done
67
+ process_one
27
68
  end
69
+ @mgr.processor_stopped(self)
70
+ rescue Sidekiq::Shutdown
71
+ @mgr.processor_stopped(self)
72
+ rescue Exception => ex
73
+ @mgr.processor_died(self, ex)
28
74
  end
29
75
  end
30
76
 
31
- attr_accessor :proxy_id
77
+ def process_one
78
+ @job = fetch
79
+ process(@job) if @job
80
+ @job = nil
81
+ end
82
+
83
+ def get_one
84
+ begin
85
+ work = @strategy.retrieve_work
86
+ (logger.info { "Redis is online, #{Time.now - @down} sec downtime" }; @down = nil) if @down
87
+ work
88
+ rescue Sidekiq::Shutdown
89
+ rescue => ex
90
+ handle_fetch_exception(ex)
91
+ end
92
+ end
93
+
94
+ def fetch
95
+ j = get_one
96
+ if j && @done
97
+ j.requeue
98
+ nil
99
+ else
100
+ j
101
+ end
102
+ end
32
103
 
33
- def initialize(boss)
34
- @boss = boss
104
+ def handle_fetch_exception(ex)
105
+ if !@down
106
+ @down = Time.now
107
+ logger.error("Error fetching job: #{ex}")
108
+ ex.backtrace.each do |bt|
109
+ logger.error(bt)
110
+ end
111
+ end
112
+ sleep(1)
35
113
  end
36
114
 
37
115
  def process(work)
38
- msgstr = work.message
116
+ jobstr = work.job
39
117
  queue = work.queue_name
40
118
 
41
- @boss.async.real_thread(proxy_id, Thread.current)
42
-
43
119
  ack = false
44
120
  begin
45
- msg = Sidekiq.load_json(msgstr)
46
- klass = msg['class'.freeze].constantize
121
+ job = Sidekiq.load_json(jobstr)
122
+ klass = job['class'.freeze].constantize
47
123
  worker = klass.new
48
- worker.jid = msg['jid'.freeze]
124
+ worker.jid = job['jid'.freeze]
49
125
 
50
- stats(worker, msg, queue) do
51
- Sidekiq.server_middleware.invoke(worker, msg, queue) do
126
+ stats(worker, job, queue) do
127
+ Sidekiq.server_middleware.invoke(worker, job, queue) do
52
128
  # Only ack if we either attempted to start this job or
53
129
  # successfully completed it. This prevents us from
54
130
  # losing jobs if a middleware raises an exception before yielding
55
131
  ack = true
56
- execute_job(worker, cloned(msg['args'.freeze]))
132
+ execute_job(worker, cloned(job['args'.freeze]))
57
133
  end
58
134
  end
59
135
  ack = true
@@ -63,99 +139,46 @@ module Sidekiq
63
139
  # we didn't properly finish it.
64
140
  ack = false
65
141
  rescue Exception => ex
66
- handle_exception(ex, msg || { :message => msgstr })
142
+ handle_exception(ex, job || { :job => jobstr })
67
143
  raise
68
144
  ensure
69
145
  work.acknowledge if ack
70
146
  end
71
-
72
- @boss.async.processor_done(current_actor)
73
- end
74
-
75
- def inspect
76
- "<Processor##{object_id.to_s(16)}>"
77
147
  end
78
148
 
79
149
  def execute_job(worker, cloned_args)
80
150
  worker.perform(*cloned_args)
81
151
  end
82
152
 
83
- private
84
-
85
153
  def thread_identity
86
154
  @str ||= Thread.current.object_id.to_s(36)
87
155
  end
88
156
 
89
- def stats(worker, msg, queue)
90
- # Do not conflate errors from the job with errors caused by updating
91
- # stats so calling code can react appropriately
92
- retry_and_suppress_exceptions do
93
- hash = Sidekiq.dump_json({:queue => queue, :payload => msg, :run_at => Time.now.to_i })
94
- Sidekiq.redis do |conn|
95
- conn.multi do
96
- conn.hmset("#{identity}:workers", thread_identity, hash)
97
- conn.expire("#{identity}:workers", 60*60*4)
98
- end
99
- end
100
- end
157
+ WORKER_STATE = Concurrent::Map.new
158
+ PROCESSED = Concurrent::AtomicFixnum.new
159
+ FAILURE = Concurrent::AtomicFixnum.new
160
+
161
+ def stats(worker, job, queue)
162
+ tid = thread_identity
163
+ WORKER_STATE[tid] = {:queue => queue, :payload => job, :run_at => Time.now.to_i }
101
164
 
102
- nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
103
165
  begin
104
166
  yield
105
167
  rescue Exception
106
- retry_and_suppress_exceptions do
107
- failed = "stat:failed:#{nowdate}"
108
- Sidekiq.redis do |conn|
109
- conn.multi do
110
- conn.incrby("stat:failed".freeze, 1)
111
- conn.incrby(failed, 1)
112
- conn.expire(failed, STATS_TIMEOUT)
113
- end
114
- end
115
- end
168
+ FAILURE.increment
116
169
  raise
117
170
  ensure
118
- retry_and_suppress_exceptions do
119
- processed = "stat:processed:#{nowdate}"
120
- Sidekiq.redis do |conn|
121
- conn.multi do
122
- conn.hdel("#{identity}:workers", thread_identity)
123
- conn.incrby("stat:processed".freeze, 1)
124
- conn.incrby(processed, 1)
125
- conn.expire(processed, STATS_TIMEOUT)
126
- end
127
- end
128
- end
171
+ WORKER_STATE.delete(tid)
172
+ PROCESSED.increment
129
173
  end
130
174
  end
131
175
 
132
176
  # Deep clone the arguments passed to the worker so that if
133
- # the message fails, what is pushed back onto Redis hasn't
177
+ # the job fails, what is pushed back onto Redis hasn't
134
178
  # been mutated by the worker.
135
179
  def cloned(ary)
136
180
  Marshal.load(Marshal.dump(ary))
137
181
  end
138
182
 
139
- # If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.
140
- # All exceptions will be swallowed and logged.
141
- def retry_and_suppress_exceptions(max_retries = 5)
142
- retry_count = 0
143
- begin
144
- yield
145
- rescue => e
146
- retry_count += 1
147
- if retry_count <= max_retries
148
- Sidekiq.logger.debug {"Suppressing and retrying error: #{e.inspect}"}
149
- pause_for_recovery(retry_count)
150
- retry
151
- else
152
- handle_exception(e, { :message => "Exhausted #{max_retries} retries"})
153
- end
154
- end
155
- end
156
-
157
- def pause_for_recovery(retry_count)
158
- sleep(retry_count)
159
- end
160
183
  end
161
184
  end
@@ -9,10 +9,11 @@ module Sidekiq
9
9
  def create(options={})
10
10
  options[:url] ||= determine_redis_provider
11
11
 
12
- # need a connection for Fetcher and Retry
13
12
  size = options[:size] || (Sidekiq.server? ? (Sidekiq.options[:concurrency] + 2) : 5)
14
- pool_timeout = options[:pool_timeout] || 1
15
13
 
14
+ verify_sizing(size, Sidekiq.options[:concurrency]) if Sidekiq.server?
15
+
16
+ pool_timeout = options[:pool_timeout] || 1
16
17
  log_info(options)
17
18
 
18
19
  ConnectionPool.new(:timeout => pool_timeout, :size => size) do
@@ -22,13 +23,30 @@ module Sidekiq
22
23
 
23
24
  private
24
25
 
26
+ # Sidekiq needs a lot of concurrent Redis connections.
27
+ #
28
+ # We need a connection for each Processor.
29
+ # We need a connection for Pro's real-time change listener
30
+ # We need a connection to various features to call Redis every few seconds:
31
+ # - the process heartbeat.
32
+ # - enterprise's leader election
33
+ # - enterprise's cron support
34
+ def verify_sizing(size, concurrency)
35
+ raise ArgumentError, "Your Redis connection pool is too small for Sidekiq to work, your pool has #{size} connections but really needs to have at least #{concurrency + 2}" if size <= concurrency
36
+ end
37
+
25
38
  def build_client(options)
26
39
  namespace = options[:namespace]
27
40
 
28
41
  client = Redis.new client_opts(options)
29
42
  if namespace
30
- require 'redis/namespace'
31
- Redis::Namespace.new(namespace, :redis => client)
43
+ begin
44
+ require 'redis/namespace'
45
+ Redis::Namespace.new(namespace, :redis => client)
46
+ rescue LoadError
47
+ Sidekiq.logger.error("redis-namespace gem not included in Gemfile, cannot use namespace '#{namespace}'")
48
+ exit(-127)
49
+ end
32
50
  else
33
51
  client
34
52
  end
@@ -1,6 +1,5 @@
1
1
  require 'sidekiq'
2
2
  require 'sidekiq/util'
3
- require 'sidekiq/actor'
4
3
  require 'sidekiq/api'
5
4
 
6
5
  module Sidekiq
@@ -17,7 +16,7 @@ module Sidekiq
17
16
  # We need to go through the list one at a time to reduce the risk of something
18
17
  # going wrong between the time jobs are popped from the scheduled queue and when
19
18
  # they are pushed onto a work queue and losing the jobs.
20
- while job = conn.zrangebyscore(sorted_set, '-inf', now, :limit => [0, 1]).first do
19
+ while job = conn.zrangebyscore(sorted_set, '-inf'.freeze, now, :limit => [0, 1]).first do
21
20
 
22
21
  # Pop item off the queue and add it to the work queue. If the job can't be popped from
23
22
  # the queue, it's because another process already popped it so we can move on to the
@@ -39,33 +38,56 @@ module Sidekiq
39
38
  # workers can pick it up like any other job.
40
39
  class Poller
41
40
  include Util
42
- include Actor
43
41
 
44
42
  INITIAL_WAIT = 10
45
43
 
46
44
  def initialize
47
45
  @enq = (Sidekiq.options[:scheduled_enq] || Sidekiq::Scheduled::Enq).new
46
+ @sleeper = ConnectionPool::TimedStack.new
47
+ @done = false
48
48
  end
49
49
 
50
- def poll(first_time=false)
51
- watchdog('scheduling poller thread died!') do
52
- initial_wait if first_time
53
-
54
- begin
55
- @enq.enqueue_jobs
56
- rescue => ex
57
- # Most likely a problem with redis networking.
58
- # Punt and try again at the next interval
59
- logger.error ex.message
60
- logger.error ex.backtrace.first
50
+ # Shut down this instance, will pause until the thread is dead.
51
+ def terminate
52
+ @done = true
53
+ if @thread
54
+ t = @thread
55
+ @thread = nil
56
+ @sleeper << 0
57
+ t.value
58
+ end
59
+ end
60
+
61
+ def start
62
+ @thread ||= safe_thread("scheduler") do
63
+ initial_wait
64
+
65
+ while !@done
66
+ enqueue
67
+ wait
61
68
  end
69
+ Sidekiq.logger.info("Scheduler exiting...")
70
+ end
71
+ end
62
72
 
63
- after(random_poll_interval) { poll }
73
+ def enqueue
74
+ begin
75
+ @enq.enqueue_jobs
76
+ rescue => ex
77
+ # Most likely a problem with redis networking.
78
+ # Punt and try again at the next interval
79
+ logger.error ex.message
80
+ logger.error ex.backtrace.first
64
81
  end
65
82
  end
66
83
 
67
84
  private
68
85
 
86
+ def wait
87
+ @sleeper.pop(random_poll_interval)
88
+ rescue Timeout::Error
89
+ end
90
+
69
91
  # Calculates a random interval that is ±50% the desired average.
70
92
  def random_poll_interval
71
93
  poll_interval_average * rand + poll_interval_average.to_f / 2
@@ -83,7 +105,7 @@ module Sidekiq
83
105
  # all your Sidekiq processes at the same time will lead to them all polling at
84
106
  # the same time: the thundering herd problem.
85
107
  #
86
- # We only do this if poll_interval is unset (the default).
108
+ # We only do this if poll_interval_average is unset (the default).
87
109
  def poll_interval_average
88
110
  Sidekiq.options[:poll_interval_average] ||= scaled_poll_interval
89
111
  end
@@ -98,16 +120,15 @@ module Sidekiq
98
120
  end
99
121
 
100
122
  def initial_wait
101
- begin
102
- # Have all processes sleep between 5-15 seconds. 10 seconds
103
- # to give time for the heartbeat to register (if the poll interval is going to be calculated by the number
104
- # of workers), and 5 random seconds to ensure they don't all hit Redis at the same time.
105
- sleep(INITIAL_WAIT) unless Sidekiq.options[:poll_interval_average]
106
- sleep(5 * rand)
107
- rescue Celluloid::TaskTerminated
108
- # Hit Ctrl-C when Sidekiq is finished booting and we have a chance
109
- # to get here.
110
- end
123
+ # Have all processes sleep between 5-15 seconds. 10 seconds
124
+ # to give time for the heartbeat to register (if the poll interval is going to be calculated by the number
125
+ # of workers), and 5 random seconds to ensure they don't all hit Redis at the same time.
126
+ total = 0
127
+ total += INITIAL_WAIT unless Sidekiq.options[:poll_interval_average]
128
+ total += (5 * rand)
129
+
130
+ @sleeper.pop(total)
131
+ rescue Timeout::Error
111
132
  end
112
133
 
113
134
  end