sidekiq 3.5.4 → 5.2.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

Files changed (175) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +61 -0
  3. data/{Contributing.md → .github/contributing.md} +0 -0
  4. data/.github/issue_template.md +11 -0
  5. data/.gitignore +3 -0
  6. data/.travis.yml +5 -10
  7. data/4.0-Upgrade.md +53 -0
  8. data/5.0-Upgrade.md +56 -0
  9. data/COMM-LICENSE +13 -11
  10. data/Changes.md +376 -1
  11. data/Ent-Changes.md +201 -2
  12. data/Gemfile +14 -18
  13. data/LICENSE +1 -1
  14. data/Pro-3.0-Upgrade.md +44 -0
  15. data/Pro-4.0-Upgrade.md +35 -0
  16. data/Pro-Changes.md +307 -2
  17. data/README.md +34 -22
  18. data/Rakefile +3 -3
  19. data/bin/sidekiq +0 -1
  20. data/bin/sidekiqctl +13 -86
  21. data/bin/sidekiqload +23 -27
  22. data/code_of_conduct.md +50 -0
  23. data/lib/generators/sidekiq/templates/worker_spec.rb.erb +3 -3
  24. data/lib/generators/sidekiq/templates/worker_test.rb.erb +6 -6
  25. data/lib/sidekiq.rb +72 -25
  26. data/lib/sidekiq/api.rb +206 -73
  27. data/lib/sidekiq/cli.rb +145 -101
  28. data/lib/sidekiq/client.rb +42 -36
  29. data/lib/sidekiq/core_ext.rb +1 -105
  30. data/lib/sidekiq/ctl.rb +221 -0
  31. data/lib/sidekiq/delay.rb +42 -0
  32. data/lib/sidekiq/exception_handler.rb +4 -5
  33. data/lib/sidekiq/extensions/action_mailer.rb +1 -0
  34. data/lib/sidekiq/extensions/active_record.rb +1 -0
  35. data/lib/sidekiq/extensions/class_methods.rb +1 -0
  36. data/lib/sidekiq/extensions/generic_proxy.rb +8 -1
  37. data/lib/sidekiq/fetch.rb +36 -111
  38. data/lib/sidekiq/job_logger.rb +25 -0
  39. data/lib/sidekiq/job_retry.rb +262 -0
  40. data/lib/sidekiq/launcher.rb +129 -55
  41. data/lib/sidekiq/logging.rb +21 -3
  42. data/lib/sidekiq/manager.rb +83 -182
  43. data/lib/sidekiq/middleware/chain.rb +1 -0
  44. data/lib/sidekiq/middleware/i18n.rb +1 -0
  45. data/lib/sidekiq/middleware/server/active_record.rb +10 -0
  46. data/lib/sidekiq/paginator.rb +1 -0
  47. data/lib/sidekiq/processor.rb +221 -103
  48. data/lib/sidekiq/rails.rb +47 -27
  49. data/lib/sidekiq/redis_connection.rb +74 -7
  50. data/lib/sidekiq/scheduled.rb +87 -28
  51. data/lib/sidekiq/testing.rb +150 -19
  52. data/lib/sidekiq/testing/inline.rb +1 -0
  53. data/lib/sidekiq/util.rb +15 -17
  54. data/lib/sidekiq/version.rb +2 -1
  55. data/lib/sidekiq/web.rb +120 -184
  56. data/lib/sidekiq/web/action.rb +89 -0
  57. data/lib/sidekiq/web/application.rb +353 -0
  58. data/lib/sidekiq/{web_helpers.rb → web/helpers.rb} +123 -47
  59. data/lib/sidekiq/web/router.rb +100 -0
  60. data/lib/sidekiq/worker.rb +135 -18
  61. data/sidekiq.gemspec +8 -14
  62. data/web/assets/images/{status-sd8051fd480.png → status.png} +0 -0
  63. data/web/assets/javascripts/application.js +24 -20
  64. data/web/assets/javascripts/dashboard.js +33 -18
  65. data/web/assets/stylesheets/application-rtl.css +246 -0
  66. data/web/assets/stylesheets/application.css +401 -7
  67. data/web/assets/stylesheets/bootstrap-rtl.min.css +9 -0
  68. data/web/assets/stylesheets/bootstrap.css +4 -8
  69. data/web/locales/ar.yml +81 -0
  70. data/web/locales/cs.yml +11 -1
  71. data/web/locales/de.yml +1 -1
  72. data/web/locales/en.yml +4 -0
  73. data/web/locales/es.yml +4 -3
  74. data/web/locales/fa.yml +80 -0
  75. data/web/locales/fr.yml +21 -12
  76. data/web/locales/he.yml +79 -0
  77. data/web/locales/ja.yml +24 -13
  78. data/web/locales/ru.yml +3 -0
  79. data/web/locales/ur.yml +80 -0
  80. data/web/views/_footer.erb +7 -9
  81. data/web/views/_job_info.erb +5 -1
  82. data/web/views/_nav.erb +5 -19
  83. data/web/views/_paging.erb +1 -1
  84. data/web/views/busy.erb +18 -9
  85. data/web/views/dashboard.erb +5 -5
  86. data/web/views/dead.erb +1 -1
  87. data/web/views/layout.erb +13 -5
  88. data/web/views/morgue.erb +16 -12
  89. data/web/views/queue.erb +12 -11
  90. data/web/views/queues.erb +5 -3
  91. data/web/views/retries.erb +19 -13
  92. data/web/views/retry.erb +2 -2
  93. data/web/views/scheduled.erb +4 -4
  94. data/web/views/scheduled_job_info.erb +1 -1
  95. metadata +45 -227
  96. data/lib/sidekiq/actor.rb +0 -39
  97. data/lib/sidekiq/middleware/server/logging.rb +0 -40
  98. data/lib/sidekiq/middleware/server/retry_jobs.rb +0 -206
  99. data/test/config.yml +0 -9
  100. data/test/env_based_config.yml +0 -11
  101. data/test/fake_env.rb +0 -0
  102. data/test/fixtures/en.yml +0 -2
  103. data/test/helper.rb +0 -49
  104. data/test/test_api.rb +0 -493
  105. data/test/test_cli.rb +0 -335
  106. data/test/test_client.rb +0 -194
  107. data/test/test_exception_handler.rb +0 -55
  108. data/test/test_extensions.rb +0 -126
  109. data/test/test_fetch.rb +0 -104
  110. data/test/test_logging.rb +0 -34
  111. data/test/test_manager.rb +0 -168
  112. data/test/test_middleware.rb +0 -159
  113. data/test/test_processor.rb +0 -237
  114. data/test/test_rails.rb +0 -21
  115. data/test/test_redis_connection.rb +0 -126
  116. data/test/test_retry.rb +0 -325
  117. data/test/test_scheduled.rb +0 -114
  118. data/test/test_scheduling.rb +0 -49
  119. data/test/test_sidekiq.rb +0 -99
  120. data/test/test_testing.rb +0 -142
  121. data/test/test_testing_fake.rb +0 -268
  122. data/test/test_testing_inline.rb +0 -93
  123. data/test/test_util.rb +0 -16
  124. data/test/test_web.rb +0 -608
  125. data/test/test_web_helpers.rb +0 -53
  126. data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
  127. data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
  128. data/web/assets/images/status/active.png +0 -0
  129. data/web/assets/images/status/idle.png +0 -0
  130. data/web/assets/javascripts/locales/README.md +0 -27
  131. data/web/assets/javascripts/locales/jquery.timeago.ar.js +0 -96
  132. data/web/assets/javascripts/locales/jquery.timeago.bg.js +0 -18
  133. data/web/assets/javascripts/locales/jquery.timeago.bs.js +0 -49
  134. data/web/assets/javascripts/locales/jquery.timeago.ca.js +0 -18
  135. data/web/assets/javascripts/locales/jquery.timeago.cs.js +0 -18
  136. data/web/assets/javascripts/locales/jquery.timeago.cy.js +0 -20
  137. data/web/assets/javascripts/locales/jquery.timeago.da.js +0 -18
  138. data/web/assets/javascripts/locales/jquery.timeago.de.js +0 -18
  139. data/web/assets/javascripts/locales/jquery.timeago.el.js +0 -18
  140. data/web/assets/javascripts/locales/jquery.timeago.en-short.js +0 -20
  141. data/web/assets/javascripts/locales/jquery.timeago.en.js +0 -20
  142. data/web/assets/javascripts/locales/jquery.timeago.es.js +0 -18
  143. data/web/assets/javascripts/locales/jquery.timeago.et.js +0 -18
  144. data/web/assets/javascripts/locales/jquery.timeago.fa.js +0 -22
  145. data/web/assets/javascripts/locales/jquery.timeago.fi.js +0 -28
  146. data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +0 -16
  147. data/web/assets/javascripts/locales/jquery.timeago.fr.js +0 -17
  148. data/web/assets/javascripts/locales/jquery.timeago.he.js +0 -18
  149. data/web/assets/javascripts/locales/jquery.timeago.hr.js +0 -49
  150. data/web/assets/javascripts/locales/jquery.timeago.hu.js +0 -18
  151. data/web/assets/javascripts/locales/jquery.timeago.hy.js +0 -18
  152. data/web/assets/javascripts/locales/jquery.timeago.id.js +0 -18
  153. data/web/assets/javascripts/locales/jquery.timeago.it.js +0 -16
  154. data/web/assets/javascripts/locales/jquery.timeago.ja.js +0 -19
  155. data/web/assets/javascripts/locales/jquery.timeago.ko.js +0 -17
  156. data/web/assets/javascripts/locales/jquery.timeago.lt.js +0 -20
  157. data/web/assets/javascripts/locales/jquery.timeago.mk.js +0 -20
  158. data/web/assets/javascripts/locales/jquery.timeago.nl.js +0 -20
  159. data/web/assets/javascripts/locales/jquery.timeago.no.js +0 -18
  160. data/web/assets/javascripts/locales/jquery.timeago.pl.js +0 -31
  161. data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +0 -16
  162. data/web/assets/javascripts/locales/jquery.timeago.pt.js +0 -16
  163. data/web/assets/javascripts/locales/jquery.timeago.ro.js +0 -18
  164. data/web/assets/javascripts/locales/jquery.timeago.rs.js +0 -49
  165. data/web/assets/javascripts/locales/jquery.timeago.ru.js +0 -34
  166. data/web/assets/javascripts/locales/jquery.timeago.sk.js +0 -18
  167. data/web/assets/javascripts/locales/jquery.timeago.sl.js +0 -44
  168. data/web/assets/javascripts/locales/jquery.timeago.sv.js +0 -18
  169. data/web/assets/javascripts/locales/jquery.timeago.th.js +0 -20
  170. data/web/assets/javascripts/locales/jquery.timeago.tr.js +0 -16
  171. data/web/assets/javascripts/locales/jquery.timeago.uk.js +0 -34
  172. data/web/assets/javascripts/locales/jquery.timeago.uz.js +0 -19
  173. data/web/assets/javascripts/locales/jquery.timeago.zh-cn.js +0 -20
  174. data/web/assets/javascripts/locales/jquery.timeago.zh-tw.js +0 -20
  175. data/web/views/_poll_js.erb +0 -5
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  require 'time'
2
3
  require 'logger'
4
+ require 'fcntl'
3
5
 
4
6
  module Sidekiq
5
7
  module Logging
@@ -9,7 +11,7 @@ module Sidekiq
9
11
 
10
12
  # Provide a call() method that returns the formatted message.
11
13
  def call(severity, time, program_name, message)
12
- "#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
14
+ "#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Sidekiq::Logging.tid}#{context} #{severity}: #{message}\n"
13
15
  end
14
16
 
15
17
  def context
@@ -20,10 +22,26 @@ module Sidekiq
20
22
 
21
23
  class WithoutTimestamp < Pretty
22
24
  def call(severity, time, program_name, message)
23
- "#{::Process.pid} TID-#{Thread.current.object_id.to_s(36)}#{context} #{severity}: #{message}\n"
25
+ "#{::Process.pid} TID-#{Sidekiq::Logging.tid}#{context} #{severity}: #{message}\n"
24
26
  end
25
27
  end
26
28
 
29
+ def self.tid
30
+ Thread.current['sidekiq_tid'] ||= (Thread.current.object_id ^ ::Process.pid).to_s(36)
31
+ end
32
+
33
+ def self.job_hash_context(job_hash)
34
+ # If we're using a wrapper class, like ActiveJob, use the "wrapped"
35
+ # attribute to expose the underlying thing.
36
+ klass = job_hash['wrapped'] || job_hash["class"]
37
+ bid = job_hash['bid']
38
+ "#{klass} JID-#{job_hash['jid']}#{" BID-#{bid}" if bid}"
39
+ end
40
+
41
+ def self.with_job_hash_context(job_hash, &block)
42
+ with_context(job_hash_context(job_hash), &block)
43
+ end
44
+
27
45
  def self.with_context(msg)
28
46
  Thread.current[:sidekiq_context] ||= []
29
47
  Thread.current[:sidekiq_context] << msg
@@ -46,7 +64,7 @@ module Sidekiq
46
64
  end
47
65
 
48
66
  def self.logger=(log)
49
- @logger = (log ? log : Logger.new('/dev/null'))
67
+ @logger = (log ? log : Logger.new(File::NULL))
50
68
  end
51
69
 
52
70
  # This reopens ALL logfiles in the process that have been rotated
@@ -1,156 +1,100 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
  require 'sidekiq/util'
3
- require 'sidekiq/actor'
4
3
  require 'sidekiq/processor'
5
4
  require 'sidekiq/fetch'
5
+ require 'thread'
6
+ require 'set'
6
7
 
7
8
  module Sidekiq
8
9
 
9
10
  ##
10
- # The main router in the system. This
11
- # manages the processor state and accepts messages
12
- # from Redis to be dispatched to an idle processor.
11
+ # The Manager is the central coordination point in Sidekiq, controlling
12
+ # the lifecycle of the Processors.
13
+ #
14
+ # Tasks:
15
+ #
16
+ # 1. start: Spin up Processors.
17
+ # 3. processor_died: Handle job failure, throw away Processor, create new one.
18
+ # 4. quiet: shutdown idle Processors.
19
+ # 5. stop: hard stop the Processors by deadline.
20
+ #
21
+ # Note that only the last task requires its own Thread since it has to monitor
22
+ # the shutdown process. The other tasks are performed by other threads.
13
23
  #
14
24
  class Manager
15
25
  include Util
16
- include Actor
17
- trap_exit :processor_died
18
26
 
19
- attr_reader :ready
20
- attr_reader :busy
21
- attr_accessor :fetcher
27
+ attr_reader :workers
28
+ attr_reader :options
22
29
 
23
- SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
24
- JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
25
-
26
- def initialize(condvar, options={})
30
+ def initialize(options={})
27
31
  logger.debug { options.inspect }
28
32
  @options = options
29
- @count = options[:concurrency] || 25
33
+ @count = options[:concurrency] || 10
30
34
  raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
31
- @done_callback = nil
32
- @finished = condvar
33
35
 
34
- @in_progress = {}
35
- @threads = {}
36
36
  @done = false
37
- @busy = []
38
- @ready = @count.times.map do
39
- p = Processor.new_link(current_actor)
40
- p.proxy_id = p.object_id
41
- p
37
+ @workers = Set.new
38
+ @count.times do
39
+ @workers << Processor.new(self)
42
40
  end
41
+ @plock = Mutex.new
43
42
  end
44
43
 
45
- def stop(options={})
46
- watchdog('Manager#stop died') do
47
- should_shutdown = options[:shutdown]
48
- timeout = options[:timeout]
49
-
50
- @done = true
51
-
52
- logger.info { "Terminating #{@ready.size} quiet workers" }
53
- @ready.each { |x| x.terminate if x.alive? }
54
- @ready.clear
55
-
56
- return if clean_up_for_graceful_shutdown
57
-
58
- hard_shutdown_in timeout if should_shutdown
44
+ def start
45
+ @workers.each do |x|
46
+ x.start
59
47
  end
60
48
  end
61
49
 
62
- def clean_up_for_graceful_shutdown
63
- if @busy.empty?
64
- shutdown
65
- return true
66
- end
50
+ def quiet
51
+ return if @done
52
+ @done = true
67
53
 
68
- after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
69
- false
54
+ logger.info { "Terminating quiet workers" }
55
+ @workers.each { |x| x.terminate }
56
+ fire_event(:quiet, reverse: true)
70
57
  end
71
58
 
72
- def start
73
- @ready.each { dispatch }
74
- end
59
+ # hack for quicker development / testing environment #2774
60
+ PAUSE_TIME = STDOUT.tty? ? 0.1 : 0.5
75
61
 
76
- def when_done(&blk)
77
- @done_callback = blk
78
- end
62
+ def stop(deadline)
63
+ quiet
64
+ fire_event(:shutdown, reverse: true)
79
65
 
80
- def processor_done(processor)
81
- watchdog('Manager#processor_done died') do
82
- @done_callback.call(processor) if @done_callback
83
- @in_progress.delete(processor.object_id)
84
- @threads.delete(processor.object_id)
85
- @busy.delete(processor)
86
- if stopped?
87
- processor.terminate if processor.alive?
88
- shutdown if @busy.empty?
89
- else
90
- @ready << processor if processor.alive?
91
- end
92
- dispatch
93
- end
94
- end
66
+ # some of the shutdown events can be async,
67
+ # we don't have any way to know when they're done but
68
+ # give them a little time to take effect
69
+ sleep PAUSE_TIME
70
+ return if @workers.empty?
95
71
 
96
- def processor_died(processor, reason)
97
- watchdog("Manager#processor_died died") do
98
- @in_progress.delete(processor.object_id)
99
- @threads.delete(processor.object_id)
100
- @busy.delete(processor)
101
-
102
- unless stopped?
103
- p = Processor.new_link(current_actor)
104
- p.proxy_id = p.object_id
105
- @ready << p
106
- dispatch
107
- else
108
- shutdown if @busy.empty?
109
- end
72
+ logger.info { "Pausing to allow workers to finish..." }
73
+ remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
74
+ while remaining > PAUSE_TIME
75
+ return if @workers.empty?
76
+ sleep PAUSE_TIME
77
+ remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
110
78
  end
111
- end
79
+ return if @workers.empty?
112
80
 
113
- def assign(work)
114
- watchdog("Manager#assign died") do
115
- if stopped?
116
- # Race condition between Manager#stop if Fetcher
117
- # is blocked on redis and gets a message after
118
- # all the ready Processors have been stopped.
119
- # Push the message back to redis.
120
- work.requeue
121
- else
122
- processor = @ready.pop
123
- @in_progress[processor.object_id] = work
124
- @busy << processor
125
- processor.async.process(work)
126
- end
127
- end
81
+ hard_shutdown
128
82
  end
129
83
 
130
- # A hack worthy of Rube Goldberg. We need to be able
131
- # to hard stop a working thread. But there's no way for us to
132
- # get handle to the underlying thread performing work for a processor
133
- # so we have it call us and tell us.
134
- def real_thread(proxy_id, thr)
135
- @threads[proxy_id] = thr if thr.alive?
84
+ def processor_stopped(processor)
85
+ @plock.synchronize do
86
+ @workers.delete(processor)
87
+ end
136
88
  end
137
89
 
138
- PROCTITLES = [
139
- proc { 'sidekiq'.freeze },
140
- proc { Sidekiq::VERSION },
141
- proc { |mgr, data| data['tag'] },
142
- proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
143
- proc { |mgr, data| "stopping" if mgr.stopped? },
144
- ]
145
-
146
- def heartbeat(key, data, json)
147
- results = PROCTITLES.map {|x| x.(self, data) }
148
- results.compact!
149
- $0 = results.join(' ')
150
-
151
- ❤(key, json)
152
- after(5) do
153
- heartbeat(key, data, json)
90
+ def processor_died(processor, reason)
91
+ @plock.synchronize do
92
+ @workers.delete(processor)
93
+ unless @done
94
+ p = Processor.new(self)
95
+ @workers << p
96
+ p.start
97
+ end
154
98
  end
155
99
  end
156
100
 
@@ -160,77 +104,34 @@ module Sidekiq
160
104
 
161
105
  private
162
106
 
163
- def ❤(key, json)
164
- begin
165
- _, _, _, msg = Sidekiq.redis do |conn|
166
- conn.multi do
167
- conn.sadd('processes', key)
168
- conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
169
- conn.expire(key, 60)
170
- conn.rpop("#{key}-signals")
171
- end
172
- end
173
-
174
- return unless msg
175
-
176
- if JVM_RESERVED_SIGNALS.include?(msg)
177
- Sidekiq::CLI.instance.handle_signal(msg)
178
- else
179
- ::Process.kill(msg, $$)
180
- end
181
- rescue => e
182
- # ignore all redis/network issues
183
- logger.error("heartbeat: #{e.message}")
107
+ def hard_shutdown
108
+ # We've reached the timeout and we still have busy workers.
109
+ # They must die but their jobs shall live on.
110
+ cleanup = nil
111
+ @plock.synchronize do
112
+ cleanup = @workers.dup
184
113
  end
185
- end
186
114
 
187
- def hard_shutdown_in(delay)
188
- logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
115
+ if cleanup.size > 0
116
+ jobs = cleanup.map {|p| p.job }.compact
189
117
 
190
- after(delay) do
191
- watchdog("Manager#hard_shutdown_in died") do
192
- # We've reached the timeout and we still have busy workers.
193
- # They must die but their messages shall live on.
194
- logger.warn { "Terminating #{@busy.size} busy worker threads" }
195
- logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
118
+ logger.warn { "Terminating #{cleanup.size} busy worker threads" }
119
+ logger.warn { "Work still in progress #{jobs.inspect}" }
196
120
 
197
- requeue
198
-
199
- @busy.each do |processor|
200
- if processor.alive? && t = @threads.delete(processor.object_id)
201
- t.raise Shutdown
202
- end
203
- end
204
-
205
- @finished.signal
206
- end
121
+ # Re-enqueue unfinished jobs
122
+ # NOTE: You may notice that we may push a job back to redis before
123
+ # the worker thread is terminated. This is ok because Sidekiq's
124
+ # contract says that jobs are run AT LEAST once. Process termination
125
+ # is delayed until we're certain the jobs are back in Redis because
126
+ # it is worse to lose a job than to run it twice.
127
+ strategy = (@options[:fetch] || Sidekiq::BasicFetch)
128
+ strategy.bulk_requeue(jobs, @options)
207
129
  end
208
- end
209
-
210
- def dispatch
211
- return if stopped?
212
- # This is a safety check to ensure we haven't leaked
213
- # processors somehow.
214
- raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
215
- raise "No ready processor!?" if @ready.empty?
216
130
 
217
- @fetcher.async.fetch
218
- end
219
-
220
- def shutdown
221
- requeue
222
- @finished.signal
131
+ cleanup.each do |processor|
132
+ processor.kill
133
+ end
223
134
  end
224
135
 
225
- def requeue
226
- # Re-enqueue terminated jobs
227
- # NOTE: You may notice that we may push a job back to redis before
228
- # the worker thread is terminated. This is ok because Sidekiq's
229
- # contract says that jobs are run AT LEAST once. Process termination
230
- # is delayed until we're certain the jobs are back in Redis because
231
- # it is worse to lose a job than to run it twice.
232
- Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
233
- @in_progress.clear
234
- end
235
136
  end
236
137
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Sidekiq
2
3
  # Middleware is code configured to run before/after
3
4
  # a message is processed. It is patterned after Rack
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  #
2
3
  # Simple middleware to save the current locale and restore it when the job executes.
3
4
  # Use it by requiring it in your initializer:
@@ -1,7 +1,17 @@
1
+ # frozen_string_literal: true
1
2
  module Sidekiq
2
3
  module Middleware
3
4
  module Server
4
5
  class ActiveRecord
6
+
7
+ def initialize
8
+ # With Rails 5+ we must use the Reloader **always**.
9
+ # The reloader handles code loading and db connection management.
10
+ if defined?(::Rails) && ::Rails::VERSION::MAJOR >= 5
11
+ raise ArgumentError, "Rails 5 no longer needs or uses the ActiveRecord middleware."
12
+ end
13
+ end
14
+
5
15
  def call(*args)
6
16
  yield
7
17
  ensure
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Sidekiq
2
3
  module Paginator
3
4
 
@@ -1,161 +1,279 @@
1
+ # frozen_string_literal: true
1
2
  require 'sidekiq/util'
2
- require 'sidekiq/actor'
3
-
4
- require 'sidekiq/middleware/server/retry_jobs'
5
- require 'sidekiq/middleware/server/logging'
3
+ require 'sidekiq/fetch'
4
+ require 'sidekiq/job_logger'
5
+ require 'sidekiq/job_retry'
6
+ require 'thread'
6
7
 
7
8
  module Sidekiq
8
9
  ##
9
- # The Processor receives a message from the Manager and actually
10
- # processes it. It instantiates the worker, runs the middleware
11
- # chain and then calls Sidekiq::Worker#perform.
10
+ # The Processor is a standalone thread which:
11
+ #
12
+ # 1. fetches a job from Redis
13
+ # 2. executes the job
14
+ # a. instantiate the Worker
15
+ # b. run the middleware chain
16
+ # c. call #perform
17
+ #
18
+ # A Processor can exit due to shutdown (processor_stopped)
19
+ # or due to an error during job execution (processor_died)
20
+ #
21
+ # If an error occurs in the job execution, the
22
+ # Processor calls the Manager to create a new one
23
+ # to replace itself and exits.
24
+ #
12
25
  class Processor
13
- # To prevent a memory leak, ensure that stats expire. However, they should take up a minimal amount of storage
14
- # so keep them around for a long time
15
- STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5
16
26
 
17
27
  include Util
18
- include Actor
19
-
20
- def self.default_middleware
21
- Middleware::Chain.new do |m|
22
- m.add Middleware::Server::Logging
23
- m.add Middleware::Server::RetryJobs
24
- if defined?(::ActiveRecord::Base)
25
- require 'sidekiq/middleware/server/active_record'
26
- m.add Sidekiq::Middleware::Server::ActiveRecord
28
+
29
+ attr_reader :thread
30
+ attr_reader :job
31
+
32
+ def initialize(mgr)
33
+ @mgr = mgr
34
+ @down = false
35
+ @done = false
36
+ @job = nil
37
+ @thread = nil
38
+ @strategy = (mgr.options[:fetch] || Sidekiq::BasicFetch).new(mgr.options)
39
+ @reloader = Sidekiq.options[:reloader]
40
+ @logging = (mgr.options[:job_logger] || Sidekiq::JobLogger).new
41
+ @retrier = Sidekiq::JobRetry.new
42
+ end
43
+
44
+ def terminate(wait=false)
45
+ @done = true
46
+ return if !@thread
47
+ @thread.value if wait
48
+ end
49
+
50
+ def kill(wait=false)
51
+ @done = true
52
+ return if !@thread
53
+ # unlike the other actors, terminate does not wait
54
+ # for the thread to finish because we don't know how
55
+ # long the job will take to finish. Instead we
56
+ # provide a `kill` method to call after the shutdown
57
+ # timeout passes.
58
+ @thread.raise ::Sidekiq::Shutdown
59
+ @thread.value if wait
60
+ end
61
+
62
+ def start
63
+ @thread ||= safe_thread("processor", &method(:run))
64
+ end
65
+
66
+ private unless $TESTING
67
+
68
+ def run
69
+ begin
70
+ while !@done
71
+ process_one
27
72
  end
73
+ @mgr.processor_stopped(self)
74
+ rescue Sidekiq::Shutdown
75
+ @mgr.processor_stopped(self)
76
+ rescue Exception => ex
77
+ @mgr.processor_died(self, ex)
28
78
  end
29
79
  end
30
80
 
31
- attr_accessor :proxy_id
81
+ def process_one
82
+ @job = fetch
83
+ process(@job) if @job
84
+ @job = nil
85
+ end
32
86
 
33
- def initialize(boss)
34
- @boss = boss
87
+ def get_one
88
+ begin
89
+ work = @strategy.retrieve_work
90
+ (logger.info { "Redis is online, #{::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - @down} sec downtime" }; @down = nil) if @down
91
+ work
92
+ rescue Sidekiq::Shutdown
93
+ rescue => ex
94
+ handle_fetch_exception(ex)
95
+ end
96
+ end
97
+
98
+ def fetch
99
+ j = get_one
100
+ if j && @done
101
+ j.requeue
102
+ nil
103
+ else
104
+ j
105
+ end
106
+ end
107
+
108
+ def handle_fetch_exception(ex)
109
+ if !@down
110
+ @down = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
111
+ logger.error("Error fetching job: #{ex}")
112
+ handle_exception(ex)
113
+ end
114
+ sleep(1)
115
+ nil
116
+ end
117
+
118
+ def dispatch(job_hash, queue)
119
+ # since middleware can mutate the job hash
120
+ # we clone here so we report the original
121
+ # job structure to the Web UI
122
+ pristine = cloned(job_hash)
123
+
124
+ Sidekiq::Logging.with_job_hash_context(job_hash) do
125
+ @retrier.global(pristine, queue) do
126
+ @logging.call(job_hash, queue) do
127
+ stats(pristine, queue) do
128
+ # Rails 5 requires a Reloader to wrap code execution. In order to
129
+ # constantize the worker and instantiate an instance, we have to call
130
+ # the Reloader. It handles code loading, db connection management, etc.
131
+ # Effectively this block denotes a "unit of work" to Rails.
132
+ @reloader.call do
133
+ klass = constantize(job_hash['class'])
134
+ worker = klass.new
135
+ worker.jid = job_hash['jid']
136
+ @retrier.local(worker, pristine, queue) do
137
+ yield worker
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
35
144
  end
36
145
 
37
146
  def process(work)
38
- msgstr = work.message
147
+ jobstr = work.job
39
148
  queue = work.queue_name
40
149
 
41
- @boss.async.real_thread(proxy_id, Thread.current)
150
+ # Treat malformed JSON as a special case: job goes straight to the morgue.
151
+ job_hash = nil
152
+ begin
153
+ job_hash = Sidekiq.load_json(jobstr)
154
+ rescue => ex
155
+ handle_exception(ex, { :context => "Invalid JSON for job", :jobstr => jobstr })
156
+ # we can't notify because the job isn't a valid hash payload.
157
+ DeadSet.new.kill(jobstr, notify_failure: false)
158
+ return work.acknowledge
159
+ end
42
160
 
43
- ack = false
161
+ ack = true
44
162
  begin
45
- msg = Sidekiq.load_json(msgstr)
46
- klass = msg['class'.freeze].constantize
47
- worker = klass.new
48
- worker.jid = msg['jid'.freeze]
49
-
50
- stats(worker, msg, queue) do
51
- Sidekiq.server_middleware.invoke(worker, msg, queue) do
52
- # Only ack if we either attempted to start this job or
53
- # successfully completed it. This prevents us from
54
- # losing jobs if a middleware raises an exception before yielding
55
- ack = true
56
- execute_job(worker, cloned(msg['args'.freeze]))
163
+ dispatch(job_hash, queue) do |worker|
164
+ Sidekiq.server_middleware.invoke(worker, job_hash, queue) do
165
+ execute_job(worker, cloned(job_hash['args']))
57
166
  end
58
167
  end
59
- ack = true
60
168
  rescue Sidekiq::Shutdown
61
169
  # Had to force kill this job because it didn't finish
62
170
  # within the timeout. Don't acknowledge the work since
63
171
  # we didn't properly finish it.
64
172
  ack = false
173
+ rescue Sidekiq::JobRetry::Handled => h
174
+ # this is the common case: job raised error and Sidekiq::JobRetry::Handled
175
+ # signals that we created a retry successfully. We can acknowlege the job.
176
+ e = h.cause ? h.cause : h
177
+ handle_exception(e, { :context => "Job raised exception", :job => job_hash, :jobstr => jobstr })
178
+ raise e
65
179
  rescue Exception => ex
66
- handle_exception(ex, msg || { :message => msgstr })
67
- raise
180
+ # Unexpected error! This is very bad and indicates an exception that got past
181
+ # the retry subsystem (e.g. network partition). We won't acknowledge the job
182
+ # so it can be rescued when using Sidekiq Pro.
183
+ ack = false
184
+ handle_exception(ex, { :context => "Internal exception!", :job => job_hash, :jobstr => jobstr })
185
+ raise e
68
186
  ensure
69
187
  work.acknowledge if ack
70
188
  end
71
-
72
- @boss.async.processor_done(current_actor)
73
- end
74
-
75
- def inspect
76
- "<Processor##{object_id.to_s(16)}>"
77
189
  end
78
190
 
79
191
  def execute_job(worker, cloned_args)
80
192
  worker.perform(*cloned_args)
81
193
  end
82
194
 
83
- private
195
+ # Ruby doesn't provide atomic counters out of the box so we'll
196
+ # implement something simple ourselves.
197
+ # https://bugs.ruby-lang.org/issues/14706
198
+ class Counter
199
+ def initialize
200
+ @value = 0
201
+ @lock = Mutex.new
202
+ end
84
203
 
85
- def thread_identity
86
- @str ||= Thread.current.object_id.to_s(36)
204
+ def incr(amount=1)
205
+ @lock.synchronize { @value = @value + amount }
206
+ end
207
+
208
+ def reset
209
+ @lock.synchronize { val = @value; @value = 0; val }
210
+ end
87
211
  end
88
212
 
89
- def stats(worker, msg, queue)
90
- # Do not conflate errors from the job with errors caused by updating
91
- # stats so calling code can react appropriately
92
- retry_and_suppress_exceptions do
93
- hash = Sidekiq.dump_json({:queue => queue, :payload => msg, :run_at => Time.now.to_i })
94
- Sidekiq.redis do |conn|
95
- conn.multi do
96
- conn.hmset("#{identity}:workers", thread_identity, hash)
97
- conn.expire("#{identity}:workers", 60*60*4)
98
- end
99
- end
213
+ # jruby's Hash implementation is not threadsafe, so we wrap it in a mutex here
214
+ class SharedWorkerState
215
+ def initialize
216
+ @worker_state = {}
217
+ @lock = Mutex.new
218
+ end
219
+
220
+ def set(tid, hash)
221
+ @lock.synchronize { @worker_state[tid] = hash }
222
+ end
223
+
224
+ def delete(tid)
225
+ @lock.synchronize { @worker_state.delete(tid) }
226
+ end
227
+
228
+ def dup
229
+ @lock.synchronize { @worker_state.dup }
230
+ end
231
+
232
+ def size
233
+ @lock.synchronize { @worker_state.size }
234
+ end
235
+
236
+ def clear
237
+ @lock.synchronize { @worker_state.clear }
100
238
  end
239
+ end
240
+
241
+ PROCESSED = Counter.new
242
+ FAILURE = Counter.new
243
+ WORKER_STATE = SharedWorkerState.new
244
+
245
+ def stats(job_hash, queue)
246
+ tid = Sidekiq::Logging.tid
247
+ WORKER_STATE.set(tid, {:queue => queue, :payload => job_hash, :run_at => Time.now.to_i })
101
248
 
102
- nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
103
249
  begin
104
250
  yield
105
251
  rescue Exception
106
- retry_and_suppress_exceptions do
107
- failed = "stat:failed:#{nowdate}"
108
- Sidekiq.redis do |conn|
109
- conn.multi do
110
- conn.incrby("stat:failed".freeze, 1)
111
- conn.incrby(failed, 1)
112
- conn.expire(failed, STATS_TIMEOUT)
113
- end
114
- end
115
- end
252
+ FAILURE.incr
116
253
  raise
117
254
  ensure
118
- retry_and_suppress_exceptions do
119
- processed = "stat:processed:#{nowdate}"
120
- Sidekiq.redis do |conn|
121
- conn.multi do
122
- conn.hdel("#{identity}:workers", thread_identity)
123
- conn.incrby("stat:processed".freeze, 1)
124
- conn.incrby(processed, 1)
125
- conn.expire(processed, STATS_TIMEOUT)
126
- end
127
- end
128
- end
255
+ WORKER_STATE.delete(tid)
256
+ PROCESSED.incr
129
257
  end
130
258
  end
131
259
 
132
260
  # Deep clone the arguments passed to the worker so that if
133
- # the message fails, what is pushed back onto Redis hasn't
261
+ # the job fails, what is pushed back onto Redis hasn't
134
262
  # been mutated by the worker.
135
- def cloned(ary)
136
- Marshal.load(Marshal.dump(ary))
263
+ def cloned(thing)
264
+ Marshal.load(Marshal.dump(thing))
137
265
  end
138
266
 
139
- # If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.
140
- # All exceptions will be swallowed and logged.
141
- def retry_and_suppress_exceptions(max_retries = 5)
142
- retry_count = 0
143
- begin
144
- yield
145
- rescue => e
146
- retry_count += 1
147
- if retry_count <= max_retries
148
- Sidekiq.logger.debug {"Suppressing and retrying error: #{e.inspect}"}
149
- pause_for_recovery(retry_count)
150
- retry
151
- else
152
- handle_exception(e, { :message => "Exhausted #{max_retries} retries"})
153
- end
267
+ def constantize(str)
268
+ names = str.split('::')
269
+ names.shift if names.empty? || names.first.empty?
270
+
271
+ names.inject(Object) do |constant, name|
272
+ # the false flag limits search for name to under the constant namespace
273
+ # which mimics Rails' behaviour
274
+ constant.const_defined?(name, false) ? constant.const_get(name, false) : constant.const_missing(name)
154
275
  end
155
276
  end
156
277
 
157
- def pause_for_recovery(retry_count)
158
- sleep(retry_count)
159
- end
160
278
  end
161
279
  end