sidekiq 2.15.1 → 4.2.10

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

Files changed (187) hide show
  1. checksums.yaml +7 -0
  2. data/.github/contributing.md +32 -0
  3. data/.github/issue_template.md +9 -0
  4. data/.gitignore +1 -0
  5. data/.travis.yml +16 -17
  6. data/3.0-Upgrade.md +70 -0
  7. data/4.0-Upgrade.md +53 -0
  8. data/COMM-LICENSE +56 -44
  9. data/Changes.md +644 -1
  10. data/Ent-Changes.md +173 -0
  11. data/Gemfile +27 -0
  12. data/LICENSE +1 -1
  13. data/Pro-2.0-Upgrade.md +138 -0
  14. data/Pro-3.0-Upgrade.md +44 -0
  15. data/Pro-Changes.md +457 -3
  16. data/README.md +46 -29
  17. data/Rakefile +6 -3
  18. data/bin/sidekiq +4 -0
  19. data/bin/sidekiqctl +41 -20
  20. data/bin/sidekiqload +154 -0
  21. data/code_of_conduct.md +50 -0
  22. data/lib/generators/sidekiq/templates/worker.rb.erb +9 -0
  23. data/lib/generators/sidekiq/templates/worker_spec.rb.erb +6 -0
  24. data/lib/generators/sidekiq/templates/worker_test.rb.erb +8 -0
  25. data/lib/generators/sidekiq/worker_generator.rb +49 -0
  26. data/lib/sidekiq.rb +141 -29
  27. data/lib/sidekiq/api.rb +540 -106
  28. data/lib/sidekiq/cli.rb +131 -71
  29. data/lib/sidekiq/client.rb +168 -96
  30. data/lib/sidekiq/core_ext.rb +36 -8
  31. data/lib/sidekiq/exception_handler.rb +20 -28
  32. data/lib/sidekiq/extensions/action_mailer.rb +25 -5
  33. data/lib/sidekiq/extensions/active_record.rb +8 -4
  34. data/lib/sidekiq/extensions/class_methods.rb +9 -5
  35. data/lib/sidekiq/extensions/generic_proxy.rb +1 -0
  36. data/lib/sidekiq/fetch.rb +45 -101
  37. data/lib/sidekiq/launcher.rb +144 -30
  38. data/lib/sidekiq/logging.rb +69 -12
  39. data/lib/sidekiq/manager.rb +90 -140
  40. data/lib/sidekiq/middleware/chain.rb +18 -5
  41. data/lib/sidekiq/middleware/i18n.rb +9 -2
  42. data/lib/sidekiq/middleware/server/active_record.rb +1 -1
  43. data/lib/sidekiq/middleware/server/logging.rb +11 -11
  44. data/lib/sidekiq/middleware/server/retry_jobs.rb +98 -44
  45. data/lib/sidekiq/paginator.rb +20 -8
  46. data/lib/sidekiq/processor.rb +157 -96
  47. data/lib/sidekiq/rails.rb +109 -5
  48. data/lib/sidekiq/redis_connection.rb +70 -24
  49. data/lib/sidekiq/scheduled.rb +122 -50
  50. data/lib/sidekiq/testing.rb +171 -31
  51. data/lib/sidekiq/testing/inline.rb +1 -0
  52. data/lib/sidekiq/util.rb +31 -5
  53. data/lib/sidekiq/version.rb +2 -1
  54. data/lib/sidekiq/web.rb +136 -263
  55. data/lib/sidekiq/web/action.rb +93 -0
  56. data/lib/sidekiq/web/application.rb +336 -0
  57. data/lib/sidekiq/web/helpers.rb +278 -0
  58. data/lib/sidekiq/web/router.rb +100 -0
  59. data/lib/sidekiq/worker.rb +40 -7
  60. data/sidekiq.gemspec +18 -14
  61. data/web/assets/images/favicon.ico +0 -0
  62. data/web/assets/images/{status-sd8051fd480.png → status.png} +0 -0
  63. data/web/assets/javascripts/application.js +67 -19
  64. data/web/assets/javascripts/dashboard.js +138 -29
  65. data/web/assets/stylesheets/application.css +267 -406
  66. data/web/assets/stylesheets/bootstrap.css +4 -8
  67. data/web/locales/cs.yml +78 -0
  68. data/web/locales/da.yml +9 -1
  69. data/web/locales/de.yml +18 -9
  70. data/web/locales/el.yml +68 -0
  71. data/web/locales/en.yml +19 -4
  72. data/web/locales/es.yml +10 -1
  73. data/web/locales/fa.yml +79 -0
  74. data/web/locales/fr.yml +50 -32
  75. data/web/locales/hi.yml +75 -0
  76. data/web/locales/it.yml +27 -18
  77. data/web/locales/ja.yml +27 -12
  78. data/web/locales/ko.yml +8 -3
  79. data/web/locales/{no.yml → nb.yml} +19 -5
  80. data/web/locales/nl.yml +8 -3
  81. data/web/locales/pl.yml +0 -1
  82. data/web/locales/pt-br.yml +11 -4
  83. data/web/locales/pt.yml +8 -1
  84. data/web/locales/ru.yml +39 -21
  85. data/web/locales/sv.yml +68 -0
  86. data/web/locales/ta.yml +75 -0
  87. data/web/locales/uk.yml +76 -0
  88. data/web/locales/zh-cn.yml +68 -0
  89. data/web/locales/zh-tw.yml +68 -0
  90. data/web/views/_footer.erb +17 -0
  91. data/web/views/_job_info.erb +72 -60
  92. data/web/views/_nav.erb +58 -25
  93. data/web/views/_paging.erb +5 -5
  94. data/web/views/_poll_link.erb +7 -0
  95. data/web/views/_summary.erb +20 -14
  96. data/web/views/busy.erb +94 -0
  97. data/web/views/dashboard.erb +34 -21
  98. data/web/views/dead.erb +34 -0
  99. data/web/views/layout.erb +8 -30
  100. data/web/views/morgue.erb +75 -0
  101. data/web/views/queue.erb +37 -30
  102. data/web/views/queues.erb +26 -20
  103. data/web/views/retries.erb +60 -47
  104. data/web/views/retry.erb +23 -19
  105. data/web/views/scheduled.erb +39 -35
  106. data/web/views/scheduled_job_info.erb +2 -1
  107. metadata +152 -195
  108. data/Contributing.md +0 -29
  109. data/config.ru +0 -18
  110. data/lib/sidekiq/actor.rb +0 -7
  111. data/lib/sidekiq/capistrano.rb +0 -54
  112. data/lib/sidekiq/yaml_patch.rb +0 -21
  113. data/test/config.yml +0 -11
  114. data/test/env_based_config.yml +0 -11
  115. data/test/fake_env.rb +0 -0
  116. data/test/helper.rb +0 -42
  117. data/test/test_api.rb +0 -341
  118. data/test/test_cli.rb +0 -326
  119. data/test/test_client.rb +0 -211
  120. data/test/test_exception_handler.rb +0 -124
  121. data/test/test_extensions.rb +0 -105
  122. data/test/test_fetch.rb +0 -44
  123. data/test/test_manager.rb +0 -83
  124. data/test/test_middleware.rb +0 -135
  125. data/test/test_processor.rb +0 -160
  126. data/test/test_redis_connection.rb +0 -97
  127. data/test/test_retry.rb +0 -306
  128. data/test/test_scheduled.rb +0 -86
  129. data/test/test_scheduling.rb +0 -47
  130. data/test/test_sidekiq.rb +0 -37
  131. data/test/test_testing.rb +0 -82
  132. data/test/test_testing_fake.rb +0 -265
  133. data/test/test_testing_inline.rb +0 -92
  134. data/test/test_util.rb +0 -18
  135. data/test/test_web.rb +0 -372
  136. data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
  137. data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
  138. data/web/assets/images/status/active.png +0 -0
  139. data/web/assets/images/status/idle.png +0 -0
  140. data/web/assets/javascripts/locales/README.md +0 -27
  141. data/web/assets/javascripts/locales/jquery.timeago.ar.js +0 -96
  142. data/web/assets/javascripts/locales/jquery.timeago.bg.js +0 -18
  143. data/web/assets/javascripts/locales/jquery.timeago.bs.js +0 -49
  144. data/web/assets/javascripts/locales/jquery.timeago.ca.js +0 -18
  145. data/web/assets/javascripts/locales/jquery.timeago.cy.js +0 -20
  146. data/web/assets/javascripts/locales/jquery.timeago.cz.js +0 -18
  147. data/web/assets/javascripts/locales/jquery.timeago.da.js +0 -18
  148. data/web/assets/javascripts/locales/jquery.timeago.de.js +0 -18
  149. data/web/assets/javascripts/locales/jquery.timeago.el.js +0 -18
  150. data/web/assets/javascripts/locales/jquery.timeago.en-short.js +0 -20
  151. data/web/assets/javascripts/locales/jquery.timeago.en.js +0 -20
  152. data/web/assets/javascripts/locales/jquery.timeago.es.js +0 -18
  153. data/web/assets/javascripts/locales/jquery.timeago.et.js +0 -18
  154. data/web/assets/javascripts/locales/jquery.timeago.fa.js +0 -22
  155. data/web/assets/javascripts/locales/jquery.timeago.fi.js +0 -28
  156. data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +0 -16
  157. data/web/assets/javascripts/locales/jquery.timeago.fr.js +0 -17
  158. data/web/assets/javascripts/locales/jquery.timeago.he.js +0 -18
  159. data/web/assets/javascripts/locales/jquery.timeago.hr.js +0 -49
  160. data/web/assets/javascripts/locales/jquery.timeago.hu.js +0 -18
  161. data/web/assets/javascripts/locales/jquery.timeago.hy.js +0 -18
  162. data/web/assets/javascripts/locales/jquery.timeago.id.js +0 -18
  163. data/web/assets/javascripts/locales/jquery.timeago.it.js +0 -16
  164. data/web/assets/javascripts/locales/jquery.timeago.ja.js +0 -19
  165. data/web/assets/javascripts/locales/jquery.timeago.ko.js +0 -17
  166. data/web/assets/javascripts/locales/jquery.timeago.lt.js +0 -20
  167. data/web/assets/javascripts/locales/jquery.timeago.mk.js +0 -20
  168. data/web/assets/javascripts/locales/jquery.timeago.nl.js +0 -20
  169. data/web/assets/javascripts/locales/jquery.timeago.no.js +0 -18
  170. data/web/assets/javascripts/locales/jquery.timeago.pl.js +0 -31
  171. data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +0 -16
  172. data/web/assets/javascripts/locales/jquery.timeago.pt.js +0 -16
  173. data/web/assets/javascripts/locales/jquery.timeago.ro.js +0 -18
  174. data/web/assets/javascripts/locales/jquery.timeago.rs.js +0 -49
  175. data/web/assets/javascripts/locales/jquery.timeago.ru.js +0 -34
  176. data/web/assets/javascripts/locales/jquery.timeago.sk.js +0 -18
  177. data/web/assets/javascripts/locales/jquery.timeago.sl.js +0 -44
  178. data/web/assets/javascripts/locales/jquery.timeago.sv.js +0 -18
  179. data/web/assets/javascripts/locales/jquery.timeago.th.js +0 -20
  180. data/web/assets/javascripts/locales/jquery.timeago.tr.js +0 -16
  181. data/web/assets/javascripts/locales/jquery.timeago.uk.js +0 -34
  182. data/web/assets/javascripts/locales/jquery.timeago.uz.js +0 -19
  183. data/web/assets/javascripts/locales/jquery.timeago.zh-CN.js +0 -20
  184. data/web/assets/javascripts/locales/jquery.timeago.zh-TW.js +0 -20
  185. data/web/views/_poll.erb +0 -14
  186. data/web/views/_workers.erb +0 -29
  187. data/web/views/index.erb +0 -16
@@ -1,4 +1,5 @@
1
- require 'sidekiq/actor'
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
2
3
  require 'sidekiq/manager'
3
4
  require 'sidekiq/fetch'
4
5
  require 'sidekiq/scheduled'
@@ -9,51 +10,164 @@ module Sidekiq
9
10
  # If any of these actors die, the Sidekiq process exits
10
11
  # immediately.
11
12
  class Launcher
12
- include Actor
13
13
  include Util
14
14
 
15
- trap_exit :actor_died
16
-
17
- attr_reader :manager, :poller, :fetcher
15
+ attr_accessor :manager, :poller, :fetcher
18
16
 
19
17
  def initialize(options)
20
- @manager = Sidekiq::Manager.new_link options
21
- @poller = Sidekiq::Scheduled::Poller.new_link
22
- @fetcher = Sidekiq::Fetcher.new_link @manager, options
23
- @manager.fetcher = @fetcher
18
+ @manager = Sidekiq::Manager.new(options)
19
+ @poller = Sidekiq::Scheduled::Poller.new
24
20
  @done = false
25
21
  @options = options
26
22
  end
27
23
 
28
- def actor_died(actor, reason)
29
- return if @done
30
- Sidekiq.logger.warn("Sidekiq died due to the following error, cannot recover, process exiting")
31
- handle_exception(reason)
32
- exit(1)
24
+ def run
25
+ @thread = safe_thread("heartbeat", &method(:start_heartbeat))
26
+ @poller.start
27
+ @manager.start
33
28
  end
34
29
 
35
- def run
36
- watchdog('Launcher#run') do
37
- manager.async.start
38
- poller.async.poll(true)
39
- end
30
+ # Stops this instance from processing any more jobs,
31
+ #
32
+ def quiet
33
+ @done = true
34
+ @manager.quiet
35
+ @poller.terminate
40
36
  end
41
37
 
38
+ # Shuts down the process. This method does not
39
+ # return until all work is complete and cleaned up.
40
+ # It can take up to the timeout to complete.
42
41
  def stop
43
- watchdog('Launcher#stop') do
44
- @done = true
45
- Sidekiq::Fetcher.done!
46
- fetcher.async.terminate if fetcher.alive?
47
- poller.async.terminate if poller.alive?
48
-
49
- manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
50
- manager.wait(:shutdown)
42
+ deadline = Time.now + @options[:timeout]
43
+
44
+ @done = true
45
+ @manager.quiet
46
+ @poller.terminate
47
+
48
+ @manager.stop(deadline)
49
+
50
+ # Requeue everything in case there was a worker who grabbed work while stopped
51
+ # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
52
+ strategy = (@options[:fetch] || Sidekiq::BasicFetch)
53
+ strategy.bulk_requeue([], @options)
54
+
55
+ clear_heartbeat
56
+ end
57
+
58
+ def stopping?
59
+ @done
60
+ end
61
+
62
+ private unless $TESTING
63
+
64
+ JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
65
+
66
+ def heartbeat
67
+ results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, to_data) }
68
+ results.compact!
69
+ $0 = results.join(' ')
70
+
71
+
72
+ end
73
+
74
+ def ❤
75
+ key = identity
76
+ fails = procd = 0
77
+ begin
78
+ Processor::FAILURE.update {|curr| fails = curr; 0 }
79
+ Processor::PROCESSED.update {|curr| procd = curr; 0 }
80
+
81
+ workers_key = "#{key}:workers".freeze
82
+ nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
83
+ Sidekiq.redis do |conn|
84
+ conn.multi do
85
+ conn.incrby("stat:processed".freeze, procd)
86
+ conn.incrby("stat:processed:#{nowdate}", procd)
87
+ conn.incrby("stat:failed".freeze, fails)
88
+ conn.incrby("stat:failed:#{nowdate}", fails)
89
+ conn.del(workers_key)
90
+ Processor::WORKER_STATE.each_pair do |tid, hash|
91
+ conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
92
+ end
93
+ conn.expire(workers_key, 60)
94
+ end
95
+ end
96
+ fails = procd = 0
97
+
98
+ _, exists, _, _, msg = Sidekiq.redis do |conn|
99
+ conn.multi do
100
+ conn.sadd('processes', key)
101
+ conn.exists(key)
102
+ conn.hmset(key, 'info', to_json, 'busy', Processor::WORKER_STATE.size, 'beat', Time.now.to_f, 'quiet', @done)
103
+ conn.expire(key, 60)
104
+ conn.rpop("#{key}-signals")
105
+ end
106
+ end
107
+
108
+ # first heartbeat or recovering from an outage and need to reestablish our heartbeat
109
+ fire_event(:heartbeat) if !exists
110
+
111
+ return unless msg
112
+
113
+ if JVM_RESERVED_SIGNALS.include?(msg)
114
+ Sidekiq::CLI.instance.handle_signal(msg)
115
+ else
116
+ ::Process.kill(msg, $$)
117
+ end
118
+ rescue => e
119
+ # ignore all redis/network issues
120
+ logger.error("heartbeat: #{e.message}")
121
+ # don't lose the counts if there was a network issue
122
+ Processor::PROCESSED.increment(procd)
123
+ Processor::FAILURE.increment(fails)
124
+ end
125
+ end
126
+
127
+ def start_heartbeat
128
+ while true
129
+ heartbeat
130
+ sleep 5
51
131
  end
132
+ Sidekiq.logger.info("Heartbeat stopping...")
52
133
  end
53
134
 
54
- def procline(tag)
55
- $0 = manager.procline(tag)
56
- manager.after(5) { procline(tag) }
135
+ def to_data
136
+ @data ||= begin
137
+ {
138
+ 'hostname' => hostname,
139
+ 'started_at' => Time.now.to_f,
140
+ 'pid' => $$,
141
+ 'tag' => @options[:tag] || '',
142
+ 'concurrency' => @options[:concurrency],
143
+ 'queues' => @options[:queues].uniq,
144
+ 'labels' => @options[:labels],
145
+ 'identity' => identity,
146
+ }
147
+ end
148
+ end
149
+
150
+ def to_json
151
+ @json ||= begin
152
+ # this data changes infrequently so dump it to a string
153
+ # now so we don't need to dump it every heartbeat.
154
+ Sidekiq.dump_json(to_data)
155
+ end
57
156
  end
157
+
158
+ def clear_heartbeat
159
+ # Remove record from Redis since we are shutting down.
160
+ # Note we don't stop the heartbeat thread; if the process
161
+ # doesn't actually exit, it'll reappear in the Web UI.
162
+ Sidekiq.redis do |conn|
163
+ conn.pipelined do
164
+ conn.srem('processes', identity)
165
+ conn.del("#{identity}:workers")
166
+ end
167
+ end
168
+ rescue
169
+ # best effort, ignore network errors
170
+ end
171
+
58
172
  end
59
173
  end
@@ -1,45 +1,102 @@
1
+ # frozen_string_literal: true
1
2
  require 'time'
2
3
  require 'logger'
4
+ require 'fcntl'
3
5
 
4
6
  module Sidekiq
5
7
  module Logging
6
8
 
7
9
  class Pretty < Logger::Formatter
10
+ SPACE = " "
11
+
8
12
  # Provide a call() method that returns the formatted message.
9
13
  def call(severity, time, program_name, message)
10
- "#{time.utc.iso8601} #{Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
14
+ "#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
11
15
  end
12
16
 
13
17
  def context
14
18
  c = Thread.current[:sidekiq_context]
15
- c ? " #{c}" : ''
19
+ " #{c.join(SPACE)}" if c && c.any?
16
20
  end
17
21
  end
18
22
 
19
- def self.with_context(msg)
20
- begin
21
- Thread.current[:sidekiq_context] = msg
22
- yield
23
- ensure
24
- Thread.current[:sidekiq_context] = nil
23
+ class WithoutTimestamp < Pretty
24
+ def call(severity, time, program_name, message)
25
+ "#{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
25
26
  end
26
27
  end
27
28
 
29
+ def self.with_context(msg)
30
+ Thread.current[:sidekiq_context] ||= []
31
+ Thread.current[:sidekiq_context] << msg
32
+ yield
33
+ ensure
34
+ Thread.current[:sidekiq_context].pop
35
+ end
36
+
28
37
  def self.initialize_logger(log_target = STDOUT)
29
- oldlogger = @logger
38
+ oldlogger = defined?(@logger) ? @logger : nil
30
39
  @logger = Logger.new(log_target)
31
40
  @logger.level = Logger::INFO
32
- @logger.formatter = Pretty.new
41
+ @logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
33
42
  oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
34
43
  @logger
35
44
  end
36
45
 
37
46
  def self.logger
38
- @logger || initialize_logger
47
+ defined?(@logger) ? @logger : initialize_logger
39
48
  end
40
49
 
41
50
  def self.logger=(log)
42
- @logger = (log ? log : Logger.new('/dev/null'))
51
+ @logger = (log ? log : Logger.new(File::NULL))
52
+ end
53
+
54
+ # This reopens ALL logfiles in the process that have been rotated
55
+ # using logrotate(8) (without copytruncate) or similar tools.
56
+ # A +File+ object is considered for reopening if it is:
57
+ # 1) opened with the O_APPEND and O_WRONLY flags
58
+ # 2) the current open file handle does not match its original open path
59
+ # 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
60
+ # Returns the number of files reopened
61
+ def self.reopen_logs
62
+ to_reopen = []
63
+ append_flags = File::WRONLY | File::APPEND
64
+
65
+ ObjectSpace.each_object(File) do |fp|
66
+ begin
67
+ if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
68
+ to_reopen << fp
69
+ end
70
+ rescue IOError, Errno::EBADF
71
+ end
72
+ end
73
+
74
+ nr = 0
75
+ to_reopen.each do |fp|
76
+ orig_st = begin
77
+ fp.stat
78
+ rescue IOError, Errno::EBADF
79
+ next
80
+ end
81
+
82
+ begin
83
+ b = File.stat(fp.path)
84
+ next if orig_st.ino == b.ino && orig_st.dev == b.dev
85
+ rescue Errno::ENOENT
86
+ end
87
+
88
+ begin
89
+ File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
90
+ fp.sync = true
91
+ nr += 1
92
+ rescue IOError, Errno::EBADF
93
+ # not much we can do...
94
+ end
95
+ end
96
+ nr
97
+ rescue RuntimeError => ex
98
+ # RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
99
+ puts "Unable to reopen logs: #{ex.message}"
43
100
  end
44
101
 
45
102
  def logger
@@ -1,188 +1,138 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
1
3
  require 'sidekiq/util'
2
- require 'sidekiq/actor'
3
4
  require 'sidekiq/processor'
4
5
  require 'sidekiq/fetch'
6
+ require 'thread'
7
+ require 'set'
5
8
 
6
9
  module Sidekiq
7
10
 
8
11
  ##
9
- # The main router in the system. This
10
- # manages the processor state and accepts messages
11
- # from Redis to be dispatched to an idle processor.
12
+ # The Manager is the central coordination point in Sidekiq, controlling
13
+ # the lifecycle of the Processors.
14
+ #
15
+ # Tasks:
16
+ #
17
+ # 1. start: Spin up Processors.
18
+ # 3. processor_died: Handle job failure, throw away Processor, create new one.
19
+ # 4. quiet: shutdown idle Processors.
20
+ # 5. stop: hard stop the Processors by deadline.
21
+ #
22
+ # Note that only the last task requires its own Thread since it has to monitor
23
+ # the shutdown process. The other tasks are performed by other threads.
12
24
  #
13
25
  class Manager
14
26
  include Util
15
- include Actor
16
- trap_exit :processor_died
17
27
 
18
- attr_reader :ready
19
- attr_reader :busy
20
- attr_accessor :fetcher
28
+ attr_reader :workers
29
+ attr_reader :options
21
30
 
22
31
  def initialize(options={})
23
32
  logger.debug { options.inspect }
33
+ @options = options
24
34
  @count = options[:concurrency] || 25
25
- @done_callback = nil
35
+ raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
26
36
 
27
- @in_progress = {}
28
- @threads = {}
29
37
  @done = false
30
- @busy = []
31
- @ready = @count.times.map do
32
- p = Processor.new_link(current_actor)
33
- p.proxy_id = p.object_id
34
- p
38
+ @workers = Set.new
39
+ @count.times do
40
+ @workers << Processor.new(self)
35
41
  end
42
+ @plock = Mutex.new
36
43
  end
37
44
 
38
- def stop(options={})
39
- watchdog('Manager#stop died') do
40
- shutdown = options[:shutdown]
41
- timeout = options[:timeout]
42
-
43
- @done = true
44
-
45
- logger.info { "Shutting down #{@ready.size} quiet workers" }
46
- @ready.each { |x| x.terminate if x.alive? }
47
- @ready.clear
48
-
49
- clear_worker_set
50
-
51
- return after(0) { signal(:shutdown) } if @busy.empty?
52
- hard_shutdown_in timeout if shutdown
45
+ def start
46
+ @workers.each do |x|
47
+ x.start
53
48
  end
54
49
  end
55
50
 
56
- def start
57
- @ready.each { dispatch }
58
- end
51
+ def quiet
52
+ return if @done
53
+ @done = true
59
54
 
60
- def when_done(&blk)
61
- @done_callback = blk
55
+ logger.info { "Terminating quiet workers" }
56
+ @workers.each { |x| x.terminate }
57
+ fire_event(:quiet, true)
62
58
  end
63
59
 
64
- def processor_done(processor)
65
- watchdog('Manager#processor_done died') do
66
- @done_callback.call(processor) if @done_callback
67
- @in_progress.delete(processor.object_id)
68
- @threads.delete(processor.object_id)
69
- @busy.delete(processor)
70
- if stopped?
71
- processor.terminate if processor.alive?
72
- signal(:shutdown) if @busy.empty?
73
- else
74
- @ready << processor if processor.alive?
75
- end
76
- dispatch
60
+ # hack for quicker development / testing environment #2774
61
+ PAUSE_TIME = STDOUT.tty? ? 0.1 : 0.5
62
+
63
+ def stop(deadline)
64
+ quiet
65
+ fire_event(:shutdown, true)
66
+
67
+ # some of the shutdown events can be async,
68
+ # we don't have any way to know when they're done but
69
+ # give them a little time to take effect
70
+ sleep PAUSE_TIME
71
+ return if @workers.empty?
72
+
73
+ logger.info { "Pausing to allow workers to finish..." }
74
+ remaining = deadline - Time.now
75
+ while remaining > PAUSE_TIME
76
+ return if @workers.empty?
77
+ sleep PAUSE_TIME
78
+ remaining = deadline - Time.now
77
79
  end
80
+ return if @workers.empty?
81
+
82
+ hard_shutdown
78
83
  end
79
84
 
80
- def processor_died(processor, reason)
81
- watchdog("Manager#processor_died died") do
82
- @in_progress.delete(processor.object_id)
83
- @threads.delete(processor.object_id)
84
- @busy.delete(processor)
85
-
86
- unless stopped?
87
- p = Processor.new_link(current_actor)
88
- p.proxy_id = p.object_id
89
- @ready << p
90
- dispatch
91
- else
92
- signal(:shutdown) if @busy.empty?
93
- end
85
+ def processor_stopped(processor)
86
+ @plock.synchronize do
87
+ @workers.delete(processor)
94
88
  end
95
89
  end
96
90
 
97
- def assign(work)
98
- watchdog("Manager#assign died") do
99
- if stopped?
100
- # Race condition between Manager#stop if Fetcher
101
- # is blocked on redis and gets a message after
102
- # all the ready Processors have been stopped.
103
- # Push the message back to redis.
104
- work.requeue
105
- else
106
- processor = @ready.pop
107
- @in_progress[processor.object_id] = work
108
- @busy << processor
109
- processor.async.process(work)
91
+ def processor_died(processor, reason)
92
+ @plock.synchronize do
93
+ @workers.delete(processor)
94
+ unless @done
95
+ p = Processor.new(self)
96
+ @workers << p
97
+ p.start
110
98
  end
111
99
  end
112
100
  end
113
101
 
114
- # A hack worthy of Rube Goldberg. We need to be able
115
- # to hard stop a working thread. But there's no way for us to
116
- # get handle to the underlying thread performing work for a processor
117
- # so we have it call us and tell us.
118
- def real_thread(proxy_id, thr)
119
- @threads[proxy_id] = thr
120
- end
121
-
122
- def procline(tag)
123
- "sidekiq #{Sidekiq::VERSION} #{tag}[#{@busy.size} of #{@count} busy]#{stopped? ? ' stopping' : ''}"
102
+ def stopped?
103
+ @done
124
104
  end
125
105
 
126
106
  private
127
107
 
128
- def clear_worker_set
129
- # Clearing workers in Redis
130
- # NOTE: we do this before terminating worker threads because the
131
- # process will likely receive a hard shutdown soon anyway, which
132
- # means the threads will killed.
133
- logger.debug { "Clearing workers in redis" }
134
- Sidekiq.redis do |conn|
135
- workers = conn.smembers('workers')
136
- workers_to_remove = workers.select do |worker_name|
137
- worker_name =~ /:#{process_id}-/
138
- end
139
- conn.srem('workers', workers_to_remove) if !workers_to_remove.empty?
108
+ def hard_shutdown
109
+ # We've reached the timeout and we still have busy workers.
110
+ # They must die but their jobs shall live on.
111
+ cleanup = nil
112
+ @plock.synchronize do
113
+ cleanup = @workers.dup
140
114
  end
141
- rescue => ex
142
- Sidekiq.logger.warn("Unable to clear worker set while shutting down: #{ex.message}")
143
- end
144
115
 
145
- def hard_shutdown_in(delay)
146
- logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
147
-
148
- after(delay) do
149
- watchdog("Manager#hard_shutdown_in died") do
150
- # We've reached the timeout and we still have busy workers.
151
- # They must die but their messages shall live on.
152
- logger.info("Still waiting for #{@busy.size} busy workers")
153
-
154
- # Re-enqueue terminated jobs
155
- # NOTE: You may notice that we may push a job back to redis before
156
- # the worker thread is terminated. This is ok because Sidekiq's
157
- # contract says that jobs are run AT LEAST once. Process termination
158
- # is delayed until we're certain the jobs are back in Redis because
159
- # it is worse to lose a job than to run it twice.
160
- Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values)
161
-
162
- logger.debug { "Terminating #{@busy.size} busy worker threads" }
163
- @busy.each do |processor|
164
- if processor.alive? && t = @threads.delete(processor.object_id)
165
- t.raise Shutdown
166
- end
167
- end
168
-
169
- after(0) { signal(:shutdown) }
170
- end
171
- end
172
- end
116
+ if cleanup.size > 0
117
+ jobs = cleanup.map {|p| p.job }.compact
173
118
 
174
- def dispatch
175
- return if stopped?
176
- # This is a safety check to ensure we haven't leaked
177
- # processors somehow.
178
- raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
179
- raise "No ready processor!?" if @ready.empty?
119
+ logger.warn { "Terminating #{cleanup.size} busy worker threads" }
120
+ logger.warn { "Work still in progress #{jobs.inspect}" }
180
121
 
181
- @fetcher.async.fetch
182
- end
122
+ # Re-enqueue unfinished jobs
123
+ # NOTE: You may notice that we may push a job back to redis before
124
+ # the worker thread is terminated. This is ok because Sidekiq's
125
+ # contract says that jobs are run AT LEAST once. Process termination
126
+ # is delayed until we're certain the jobs are back in Redis because
127
+ # it is worse to lose a job than to run it twice.
128
+ strategy = (@options[:fetch] || Sidekiq::BasicFetch)
129
+ strategy.bulk_requeue(jobs, @options)
130
+ end
183
131
 
184
- def stopped?
185
- @done
132
+ cleanup.each do |processor|
133
+ processor.kill
134
+ end
186
135
  end
136
+
187
137
  end
188
138
  end