sidekiq 3.5.4 → 4.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- require 'sidekiq/actor'
1
+ # encoding: utf-8
2
2
  require 'sidekiq/manager'
3
3
  require 'sidekiq/fetch'
4
4
  require 'sidekiq/scheduled'
@@ -9,65 +9,116 @@ module Sidekiq
9
9
  # If any of these actors die, the Sidekiq process exits
10
10
  # immediately.
11
11
  class Launcher
12
- include Actor
13
12
  include Util
14
13
 
15
- trap_exit :actor_died
16
-
17
- attr_reader :manager, :poller, :fetcher
14
+ attr_accessor :manager, :poller, :fetcher
18
15
 
19
16
  def initialize(options)
20
- @condvar = Celluloid::Condition.new
21
- @manager = Sidekiq::Manager.new_link(@condvar, options)
22
- @poller = Sidekiq::Scheduled::Poller.new_link
23
- @fetcher = Sidekiq::Fetcher.new_link(@manager, options)
24
- @manager.fetcher = @fetcher
17
+ @manager = Sidekiq::Manager.new(options)
18
+ @poller = Sidekiq::Scheduled::Poller.new
25
19
  @done = false
26
20
  @options = options
27
21
  end
28
22
 
29
- def actor_died(actor, reason)
30
- # https://github.com/mperham/sidekiq/issues/2057#issuecomment-66485477
31
- return if @done || !reason
23
+ def run
24
+ @thread = safe_thread("heartbeat", &method(:start_heartbeat))
25
+ @poller.start
26
+ @manager.start
27
+ end
32
28
 
33
- Sidekiq.logger.warn("Sidekiq died due to the following error, cannot recover, process exiting")
34
- handle_exception(reason)
35
- exit(1)
29
+ # Stops this instance from processing any more jobs,
30
+ #
31
+ def quiet
32
+ @done = true
33
+ @manager.quiet
34
+ @poller.terminate
36
35
  end
37
36
 
38
- def run
39
- watchdog('Launcher#run') do
40
- manager.async.start
41
- poller.async.poll(true)
37
+ # Shuts down the process. This method does not
38
+ # return until all work is complete and cleaned up.
39
+ # It can take up to the timeout to complete.
40
+ def stop
41
+ deadline = Time.now + @options[:timeout]
42
42
 
43
- start_heartbeat
44
- end
43
+ @done = true
44
+ @manager.quiet
45
+ @poller.terminate
46
+
47
+ @manager.stop(deadline)
48
+
49
+ # Requeue everything in case there was a worker who grabbed work while stopped
50
+ # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
51
+ strategy = (@options[:fetch] || Sidekiq::BasicFetch)
52
+ strategy.bulk_requeue([], @options)
53
+
54
+ clear_heartbeat
45
55
  end
46
56
 
47
- def stop
48
- watchdog('Launcher#stop') do
49
- @done = true
50
- Sidekiq::Fetcher.done!
51
- fetcher.terminate if fetcher.alive?
52
- poller.terminate if poller.alive?
53
-
54
- manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
55
- fire_event(:shutdown, true)
56
- @condvar.wait
57
- manager.terminate
58
-
59
- # Requeue everything in case there was a worker who grabbed work while stopped
60
- # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
61
- Sidekiq::Fetcher.strategy.bulk_requeue([], @options)
62
-
63
- stop_heartbeat
64
- end
57
+ def stopping?
58
+ @done
65
59
  end
66
60
 
67
- private
61
+ private unless $TESTING
62
+
63
+ JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
64
+
65
+ def heartbeat(k, data, json)
66
+ results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, data) }
67
+ results.compact!
68
+ $0 = results.join(' ')
69
+
70
+ ❤(k, json)
71
+ end
72
+
73
+ def ❤(key, json)
74
+ fails = procd = 0
75
+ begin
76
+ Processor::FAILURE.update {|curr| fails = curr; 0 }
77
+ Processor::PROCESSED.update {|curr| procd = curr; 0 }
78
+
79
+ workers_key = "#{key}:workers".freeze
80
+ nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
81
+ Sidekiq.redis do |conn|
82
+ conn.pipelined do
83
+ conn.incrby("stat:processed".freeze, procd)
84
+ conn.incrby("stat:processed:#{nowdate}", procd)
85
+ conn.incrby("stat:failed".freeze, fails)
86
+ conn.incrby("stat:failed:#{nowdate}", fails)
87
+ conn.del(workers_key)
88
+ Processor::WORKER_STATE.each_pair do |tid, hash|
89
+ conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
90
+ end
91
+ end
92
+ end
93
+ fails = procd = 0
94
+
95
+ _, _, _, msg = Sidekiq.redis do |conn|
96
+ conn.pipelined do
97
+ conn.sadd('processes', key)
98
+ conn.hmset(key, 'info', json, 'busy', Processor::WORKER_STATE.size, 'beat', Time.now.to_f)
99
+ conn.expire(key, 60)
100
+ conn.rpop("#{key}-signals")
101
+ end
102
+ end
103
+
104
+ return unless msg
105
+
106
+ if JVM_RESERVED_SIGNALS.include?(msg)
107
+ Sidekiq::CLI.instance.handle_signal(msg)
108
+ else
109
+ ::Process.kill(msg, $$)
110
+ end
111
+ rescue => e
112
+ # ignore all redis/network issues
113
+ logger.error("heartbeat: #{e.message}")
114
+ # don't lose the counts if there was a network issue
115
+ PROCESSED.increment(procd)
116
+ FAILURE.increment(fails)
117
+ end
118
+ end
68
119
 
69
120
  def start_heartbeat
70
- key = identity
121
+ k = identity
71
122
  data = {
72
123
  'hostname' => hostname,
73
124
  'started_at' => Time.now.to_f,
@@ -75,16 +126,24 @@ module Sidekiq
75
126
  'tag' => @options[:tag] || '',
76
127
  'concurrency' => @options[:concurrency],
77
128
  'queues' => @options[:queues].uniq,
78
- 'labels' => Sidekiq.options[:labels],
79
- 'identity' => identity,
129
+ 'labels' => @options[:labels],
130
+ 'identity' => k,
80
131
  }
81
132
  # this data doesn't change so dump it to a string
82
133
  # now so we don't need to dump it every heartbeat.
83
134
  json = Sidekiq.dump_json(data)
84
- manager.heartbeat(key, data, json)
135
+
136
+ while true
137
+ heartbeat(k, data, json)
138
+ sleep 5
139
+ end
140
+ Sidekiq.logger.info("Heartbeat stopping...")
85
141
  end
86
142
 
87
- def stop_heartbeat
143
+ def clear_heartbeat
144
+ # Remove record from Redis since we are shutting down.
145
+ # Note we don't stop the heartbeat thread; if the process
146
+ # doesn't actually exit, it'll reappear in the Web UI.
88
147
  Sidekiq.redis do |conn|
89
148
  conn.pipelined do
90
149
  conn.srem('processes', identity)
@@ -1,156 +1,89 @@
1
1
  # encoding: utf-8
2
2
  require 'sidekiq/util'
3
- require 'sidekiq/actor'
4
3
  require 'sidekiq/processor'
5
4
  require 'sidekiq/fetch'
5
+ require 'thread'
6
6
 
7
7
  module Sidekiq
8
8
 
9
9
  ##
10
- # The main router in the system. This
11
- # manages the processor state and accepts messages
12
- # from Redis to be dispatched to an idle processor.
10
+ # The Manager is the central coordination point in Sidekiq, controlling
11
+ # the lifecycle of the Processors and feeding them jobs as necessary.
12
+ #
13
+ # Tasks:
14
+ #
15
+ # 1. start: Spin up Processors.
16
+ # 3. processor_died: Handle job failure, throw away Processor, create new one.
17
+ # 4. quiet: shutdown idle Processors.
18
+ # 5. stop: hard stop the Processors by deadline.
19
+ #
20
+ # Note that only the last task requires its own Thread since it has to monitor
21
+ # the shutdown process. The other tasks are performed by other threads.
13
22
  #
14
23
  class Manager
15
24
  include Util
16
- include Actor
17
- trap_exit :processor_died
18
25
 
19
- attr_reader :ready
20
- attr_reader :busy
21
- attr_accessor :fetcher
26
+ attr_reader :workers
27
+ attr_reader :options
22
28
 
23
- SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
24
- JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
25
-
26
- def initialize(condvar, options={})
29
+ def initialize(options={})
27
30
  logger.debug { options.inspect }
28
31
  @options = options
29
32
  @count = options[:concurrency] || 25
30
33
  raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
31
- @done_callback = nil
32
- @finished = condvar
33
34
 
34
- @in_progress = {}
35
- @threads = {}
36
35
  @done = false
37
- @busy = []
38
- @ready = @count.times.map do
39
- p = Processor.new_link(current_actor)
40
- p.proxy_id = p.object_id
41
- p
36
+ @workers = Set.new
37
+ @count.times do
38
+ @workers << Processor.new(self)
42
39
  end
40
+ @plock = Mutex.new
43
41
  end
44
42
 
45
- def stop(options={})
46
- watchdog('Manager#stop died') do
47
- should_shutdown = options[:shutdown]
48
- timeout = options[:timeout]
49
-
50
- @done = true
51
-
52
- logger.info { "Terminating #{@ready.size} quiet workers" }
53
- @ready.each { |x| x.terminate if x.alive? }
54
- @ready.clear
55
-
56
- return if clean_up_for_graceful_shutdown
57
-
58
- hard_shutdown_in timeout if should_shutdown
43
+ def start
44
+ @workers.each do |x|
45
+ x.start
59
46
  end
60
47
  end
61
48
 
62
- def clean_up_for_graceful_shutdown
63
- if @busy.empty?
64
- shutdown
65
- return true
66
- end
49
+ def quiet
50
+ return if @done
51
+ @done = true
67
52
 
68
- after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
69
- false
53
+ logger.info { "Terminating quiet workers" }
54
+ @workers.each { |x| x.terminate }
70
55
  end
71
56
 
72
- def start
73
- @ready.each { dispatch }
74
- end
57
+ def stop(deadline)
58
+ quiet
59
+ return if @workers.empty?
75
60
 
76
- def when_done(&blk)
77
- @done_callback = blk
78
- end
79
-
80
- def processor_done(processor)
81
- watchdog('Manager#processor_done died') do
82
- @done_callback.call(processor) if @done_callback
83
- @in_progress.delete(processor.object_id)
84
- @threads.delete(processor.object_id)
85
- @busy.delete(processor)
86
- if stopped?
87
- processor.terminate if processor.alive?
88
- shutdown if @busy.empty?
89
- else
90
- @ready << processor if processor.alive?
91
- end
92
- dispatch
61
+ logger.info { "Pausing to allow workers to finish..." }
62
+ remaining = deadline - Time.now
63
+ while remaining > 0.5
64
+ return if @workers.empty?
65
+ sleep 0.5
66
+ remaining = deadline - Time.now
93
67
  end
94
- end
68
+ return if @workers.empty?
95
69
 
96
- def processor_died(processor, reason)
97
- watchdog("Manager#processor_died died") do
98
- @in_progress.delete(processor.object_id)
99
- @threads.delete(processor.object_id)
100
- @busy.delete(processor)
101
-
102
- unless stopped?
103
- p = Processor.new_link(current_actor)
104
- p.proxy_id = p.object_id
105
- @ready << p
106
- dispatch
107
- else
108
- shutdown if @busy.empty?
109
- end
110
- end
70
+ hard_shutdown
111
71
  end
112
72
 
113
- def assign(work)
114
- watchdog("Manager#assign died") do
115
- if stopped?
116
- # Race condition between Manager#stop if Fetcher
117
- # is blocked on redis and gets a message after
118
- # all the ready Processors have been stopped.
119
- # Push the message back to redis.
120
- work.requeue
121
- else
122
- processor = @ready.pop
123
- @in_progress[processor.object_id] = work
124
- @busy << processor
125
- processor.async.process(work)
126
- end
73
+ def processor_stopped(processor)
74
+ @plock.synchronize do
75
+ @workers.delete(processor)
127
76
  end
128
77
  end
129
78
 
130
- # A hack worthy of Rube Goldberg. We need to be able
131
- # to hard stop a working thread. But there's no way for us to
132
- # get handle to the underlying thread performing work for a processor
133
- # so we have it call us and tell us.
134
- def real_thread(proxy_id, thr)
135
- @threads[proxy_id] = thr if thr.alive?
136
- end
137
-
138
- PROCTITLES = [
139
- proc { 'sidekiq'.freeze },
140
- proc { Sidekiq::VERSION },
141
- proc { |mgr, data| data['tag'] },
142
- proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
143
- proc { |mgr, data| "stopping" if mgr.stopped? },
144
- ]
145
-
146
- def heartbeat(key, data, json)
147
- results = PROCTITLES.map {|x| x.(self, data) }
148
- results.compact!
149
- $0 = results.join(' ')
150
-
151
- ❤(key, json)
152
- after(5) do
153
- heartbeat(key, data, json)
79
+ def processor_died(processor, reason)
80
+ @plock.synchronize do
81
+ @workers.delete(processor)
82
+ unless @done
83
+ p = Processor.new(self)
84
+ @workers << p
85
+ p.start
86
+ end
154
87
  end
155
88
  end
156
89
 
@@ -160,77 +93,34 @@ module Sidekiq
160
93
 
161
94
  private
162
95
 
163
- def ❤(key, json)
164
- begin
165
- _, _, _, msg = Sidekiq.redis do |conn|
166
- conn.multi do
167
- conn.sadd('processes', key)
168
- conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
169
- conn.expire(key, 60)
170
- conn.rpop("#{key}-signals")
171
- end
172
- end
173
-
174
- return unless msg
175
-
176
- if JVM_RESERVED_SIGNALS.include?(msg)
177
- Sidekiq::CLI.instance.handle_signal(msg)
178
- else
179
- ::Process.kill(msg, $$)
180
- end
181
- rescue => e
182
- # ignore all redis/network issues
183
- logger.error("heartbeat: #{e.message}")
96
+ def hard_shutdown
97
+ # We've reached the timeout and we still have busy workers.
98
+ # They must die but their jobs shall live on.
99
+ cleanup = nil
100
+ @plock.synchronize do
101
+ cleanup = @workers.dup
184
102
  end
185
- end
186
103
 
187
- def hard_shutdown_in(delay)
188
- logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
104
+ if cleanup.size > 0
105
+ jobs = cleanup.map {|p| p.job }.compact
189
106
 
190
- after(delay) do
191
- watchdog("Manager#hard_shutdown_in died") do
192
- # We've reached the timeout and we still have busy workers.
193
- # They must die but their messages shall live on.
194
- logger.warn { "Terminating #{@busy.size} busy worker threads" }
195
- logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
107
+ logger.warn { "Terminating #{cleanup.size} busy worker threads" }
108
+ logger.warn { "Work still in progress #{jobs.inspect}" }
196
109
 
197
- requeue
198
-
199
- @busy.each do |processor|
200
- if processor.alive? && t = @threads.delete(processor.object_id)
201
- t.raise Shutdown
202
- end
203
- end
204
-
205
- @finished.signal
206
- end
110
+ # Re-enqueue unfinished jobs
111
+ # NOTE: You may notice that we may push a job back to redis before
112
+ # the worker thread is terminated. This is ok because Sidekiq's
113
+ # contract says that jobs are run AT LEAST once. Process termination
114
+ # is delayed until we're certain the jobs are back in Redis because
115
+ # it is worse to lose a job than to run it twice.
116
+ strategy = (@options[:fetch] || Sidekiq::BasicFetch)
117
+ strategy.bulk_requeue(jobs, @options)
207
118
  end
208
- end
209
-
210
- def dispatch
211
- return if stopped?
212
- # This is a safety check to ensure we haven't leaked
213
- # processors somehow.
214
- raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
215
- raise "No ready processor!?" if @ready.empty?
216
119
 
217
- @fetcher.async.fetch
218
- end
219
-
220
- def shutdown
221
- requeue
222
- @finished.signal
120
+ cleanup.each do |processor|
121
+ processor.kill
122
+ end
223
123
  end
224
124
 
225
- def requeue
226
- # Re-enqueue terminated jobs
227
- # NOTE: You may notice that we may push a job back to redis before
228
- # the worker thread is terminated. This is ok because Sidekiq's
229
- # contract says that jobs are run AT LEAST once. Process termination
230
- # is delayed until we're certain the jobs are back in Redis because
231
- # it is worse to lose a job than to run it twice.
232
- Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
233
- @in_progress.clear
234
- end
235
125
  end
236
126
  end