sidekiq 5.2.8 → 6.2.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Changes.md +248 -0
  3. data/LICENSE +1 -1
  4. data/README.md +18 -34
  5. data/bin/sidekiq +26 -2
  6. data/bin/sidekiqload +32 -24
  7. data/bin/sidekiqmon +8 -0
  8. data/lib/generators/sidekiq/templates/worker_test.rb.erb +1 -1
  9. data/lib/generators/sidekiq/worker_generator.rb +21 -13
  10. data/lib/sidekiq/api.rb +310 -249
  11. data/lib/sidekiq/cli.rb +144 -180
  12. data/lib/sidekiq/client.rb +64 -48
  13. data/lib/sidekiq/delay.rb +5 -6
  14. data/lib/sidekiq/exception_handler.rb +10 -12
  15. data/lib/sidekiq/extensions/action_mailer.rb +13 -22
  16. data/lib/sidekiq/extensions/active_record.rb +13 -10
  17. data/lib/sidekiq/extensions/class_methods.rb +14 -11
  18. data/lib/sidekiq/extensions/generic_proxy.rb +6 -4
  19. data/lib/sidekiq/fetch.rb +38 -31
  20. data/lib/sidekiq/job.rb +8 -0
  21. data/lib/sidekiq/job_logger.rb +45 -7
  22. data/lib/sidekiq/job_retry.rb +64 -67
  23. data/lib/sidekiq/launcher.rb +146 -60
  24. data/lib/sidekiq/logger.rb +166 -0
  25. data/lib/sidekiq/manager.rb +11 -13
  26. data/lib/sidekiq/middleware/chain.rb +20 -8
  27. data/lib/sidekiq/middleware/i18n.rb +5 -7
  28. data/lib/sidekiq/monitor.rb +133 -0
  29. data/lib/sidekiq/paginator.rb +18 -14
  30. data/lib/sidekiq/processor.rb +71 -70
  31. data/lib/sidekiq/rails.rb +29 -37
  32. data/lib/sidekiq/redis_connection.rb +50 -48
  33. data/lib/sidekiq/scheduled.rb +35 -30
  34. data/lib/sidekiq/sd_notify.rb +149 -0
  35. data/lib/sidekiq/systemd.rb +24 -0
  36. data/lib/sidekiq/testing/inline.rb +2 -1
  37. data/lib/sidekiq/testing.rb +36 -27
  38. data/lib/sidekiq/util.rb +45 -16
  39. data/lib/sidekiq/version.rb +2 -1
  40. data/lib/sidekiq/web/action.rb +15 -11
  41. data/lib/sidekiq/web/application.rb +86 -76
  42. data/lib/sidekiq/web/csrf_protection.rb +180 -0
  43. data/lib/sidekiq/web/helpers.rb +114 -86
  44. data/lib/sidekiq/web/router.rb +23 -19
  45. data/lib/sidekiq/web.rb +61 -105
  46. data/lib/sidekiq/worker.rb +126 -102
  47. data/lib/sidekiq.rb +69 -44
  48. data/sidekiq.gemspec +23 -16
  49. data/web/assets/images/apple-touch-icon.png +0 -0
  50. data/web/assets/javascripts/application.js +25 -27
  51. data/web/assets/javascripts/dashboard.js +4 -23
  52. data/web/assets/stylesheets/application-dark.css +147 -0
  53. data/web/assets/stylesheets/application.css +37 -128
  54. data/web/locales/ar.yml +8 -2
  55. data/web/locales/de.yml +14 -2
  56. data/web/locales/en.yml +5 -0
  57. data/web/locales/es.yml +18 -2
  58. data/web/locales/fr.yml +10 -3
  59. data/web/locales/ja.yml +7 -1
  60. data/web/locales/lt.yml +83 -0
  61. data/web/locales/pl.yml +4 -4
  62. data/web/locales/ru.yml +4 -0
  63. data/web/locales/vi.yml +83 -0
  64. data/web/views/_job_info.erb +3 -2
  65. data/web/views/busy.erb +54 -20
  66. data/web/views/dashboard.erb +14 -6
  67. data/web/views/dead.erb +3 -3
  68. data/web/views/layout.erb +2 -0
  69. data/web/views/morgue.erb +9 -6
  70. data/web/views/queue.erb +11 -2
  71. data/web/views/queues.erb +10 -2
  72. data/web/views/retries.erb +11 -8
  73. data/web/views/retry.erb +3 -3
  74. data/web/views/scheduled.erb +5 -2
  75. metadata +32 -64
  76. data/.circleci/config.yml +0 -61
  77. data/.github/contributing.md +0 -32
  78. data/.github/issue_template.md +0 -11
  79. data/.gitignore +0 -15
  80. data/.travis.yml +0 -11
  81. data/3.0-Upgrade.md +0 -70
  82. data/4.0-Upgrade.md +0 -53
  83. data/5.0-Upgrade.md +0 -56
  84. data/COMM-LICENSE +0 -97
  85. data/Ent-Changes.md +0 -238
  86. data/Gemfile +0 -23
  87. data/Pro-2.0-Upgrade.md +0 -138
  88. data/Pro-3.0-Upgrade.md +0 -44
  89. data/Pro-4.0-Upgrade.md +0 -35
  90. data/Pro-Changes.md +0 -759
  91. data/Rakefile +0 -9
  92. data/bin/sidekiqctl +0 -20
  93. data/code_of_conduct.md +0 -50
  94. data/lib/sidekiq/core_ext.rb +0 -1
  95. data/lib/sidekiq/ctl.rb +0 -221
  96. data/lib/sidekiq/logging.rb +0 -122
  97. data/lib/sidekiq/middleware/server/active_record.rb +0 -23
data/lib/sidekiq/fetch.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
- require 'sidekiq'
2
+
3
+ require "sidekiq"
3
4
 
4
5
  module Sidekiq
5
6
  class BasicFetch
@@ -7,68 +8,60 @@ module Sidekiq
7
8
  # can check if the process is shutting down.
8
9
  TIMEOUT = 2
9
10
 
10
- UnitOfWork = Struct.new(:queue, :job) do
11
+ UnitOfWork = Struct.new(:queue, :job) {
11
12
  def acknowledge
12
13
  # nothing to do
13
14
  end
14
15
 
15
16
  def queue_name
16
- queue.sub(/.*queue:/, '')
17
+ queue.delete_prefix("queue:")
17
18
  end
18
19
 
19
20
  def requeue
20
21
  Sidekiq.redis do |conn|
21
- conn.rpush("queue:#{queue_name}", job)
22
+ conn.rpush(queue, job)
22
23
  end
23
24
  end
24
- end
25
+ }
25
26
 
26
27
  def initialize(options)
27
- @strictly_ordered_queues = !!options[:strict]
28
- @queues = options[:queues].map { |q| "queue:#{q}" }
28
+ raise ArgumentError, "missing queue list" unless options[:queues]
29
+ @options = options
30
+ @strictly_ordered_queues = !!@options[:strict]
31
+ @queues = @options[:queues].map { |q| "queue:#{q}" }
29
32
  if @strictly_ordered_queues
30
- @queues = @queues.uniq
33
+ @queues.uniq!
31
34
  @queues << TIMEOUT
32
35
  end
33
36
  end
34
37
 
35
38
  def retrieve_work
36
- work = Sidekiq.redis { |conn| conn.brpop(*queues_cmd) }
37
- UnitOfWork.new(*work) if work
38
- end
39
-
40
- # Creating the Redis#brpop command takes into account any
41
- # configured queue weights. By default Redis#brpop returns
42
- # data from the first queue that has pending elements. We
43
- # recreate the queue command each time we invoke Redis#brpop
44
- # to honor weights and avoid queue starvation.
45
- def queues_cmd
46
- if @strictly_ordered_queues
47
- @queues
48
- else
49
- queues = @queues.shuffle.uniq
50
- queues << TIMEOUT
51
- queues
39
+ qs = queues_cmd
40
+ # 4825 Sidekiq Pro with all queues paused will return an
41
+ # empty set of queues with a trailing TIMEOUT value.
42
+ if qs.size <= 1
43
+ sleep(TIMEOUT)
44
+ return nil
52
45
  end
53
- end
54
46
 
47
+ work = Sidekiq.redis { |conn| conn.brpop(*qs) }
48
+ UnitOfWork.new(*work) if work
49
+ end
55
50
 
56
- # By leaving this as a class method, it can be pluggable and used by the Manager actor. Making it
57
- # an instance method will make it async to the Fetcher actor
58
- def self.bulk_requeue(inprogress, options)
51
+ def bulk_requeue(inprogress, options)
59
52
  return if inprogress.empty?
60
53
 
61
54
  Sidekiq.logger.debug { "Re-queueing terminated jobs" }
62
55
  jobs_to_requeue = {}
63
56
  inprogress.each do |unit_of_work|
64
- jobs_to_requeue[unit_of_work.queue_name] ||= []
65
- jobs_to_requeue[unit_of_work.queue_name] << unit_of_work.job
57
+ jobs_to_requeue[unit_of_work.queue] ||= []
58
+ jobs_to_requeue[unit_of_work.queue] << unit_of_work.job
66
59
  end
67
60
 
68
61
  Sidekiq.redis do |conn|
69
62
  conn.pipelined do
70
63
  jobs_to_requeue.each do |queue, jobs|
71
- conn.rpush("queue:#{queue}", jobs)
64
+ conn.rpush(queue, jobs)
72
65
  end
73
66
  end
74
67
  end
@@ -77,5 +70,19 @@ module Sidekiq
77
70
  Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
78
71
  end
79
72
 
73
+ # Creating the Redis#brpop command takes into account any
74
+ # configured queue weights. By default Redis#brpop returns
75
+ # data from the first queue that has pending elements. We
76
+ # recreate the queue command each time we invoke Redis#brpop
77
+ # to honor weights and avoid queue starvation.
78
+ def queues_cmd
79
+ if @strictly_ordered_queues
80
+ @queues
81
+ else
82
+ queues = @queues.shuffle!.uniq
83
+ queues << TIMEOUT
84
+ queues
85
+ end
86
+ end
80
87
  end
81
88
  end
@@ -0,0 +1,8 @@
1
+ require "sidekiq/worker"
2
+
3
+ module Sidekiq
4
+ # Sidekiq::Job is a new alias for Sidekiq::Worker, coming in 6.3.0.
5
+ # You can opt into this by requiring 'sidekiq/job' in your initializer
6
+ # and then using `include Sidekiq::Job` rather than `Sidekiq::Worker`.
7
+ Job = Worker
8
+ end
@@ -1,25 +1,63 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Sidekiq
3
4
  class JobLogger
5
+ def initialize(logger = Sidekiq.logger)
6
+ @logger = logger
7
+ end
4
8
 
5
9
  def call(item, queue)
6
10
  start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
7
- logger.info("start")
11
+ @logger.info("start")
12
+
8
13
  yield
9
- logger.info("done: #{elapsed(start)} sec")
14
+
15
+ with_elapsed_time_context(start) do
16
+ @logger.info("done")
17
+ end
10
18
  rescue Exception
11
- logger.info("fail: #{elapsed(start)} sec")
19
+ with_elapsed_time_context(start) do
20
+ @logger.info("fail")
21
+ end
22
+
12
23
  raise
13
24
  end
14
25
 
26
+ def prepare(job_hash, &block)
27
+ level = job_hash["log_level"]
28
+ if level
29
+ @logger.log_at(level) do
30
+ Sidekiq::Context.with(job_hash_context(job_hash), &block)
31
+ end
32
+ else
33
+ Sidekiq::Context.with(job_hash_context(job_hash), &block)
34
+ end
35
+ end
36
+
37
+ def job_hash_context(job_hash)
38
+ # If we're using a wrapper class, like ActiveJob, use the "wrapped"
39
+ # attribute to expose the underlying thing.
40
+ h = {
41
+ class: job_hash["display_class"] || job_hash["wrapped"] || job_hash["class"],
42
+ jid: job_hash["jid"]
43
+ }
44
+ h[:bid] = job_hash["bid"] if job_hash["bid"]
45
+ h[:tags] = job_hash["tags"] if job_hash["tags"]
46
+ h
47
+ end
48
+
49
+ def with_elapsed_time_context(start, &block)
50
+ Sidekiq::Context.with(elapsed_time_context(start), &block)
51
+ end
52
+
53
+ def elapsed_time_context(start)
54
+ {elapsed: elapsed(start).to_s}
55
+ end
56
+
15
57
  private
16
58
 
17
59
  def elapsed(start)
18
60
  (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start).round(3)
19
61
  end
20
-
21
- def logger
22
- Sidekiq.logger
23
- end
24
62
  end
25
63
  end
@@ -1,6 +1,10 @@
1
1
  # frozen_string_literal: true
2
- require 'sidekiq/scheduled'
3
- require 'sidekiq/api'
2
+
3
+ require "sidekiq/scheduled"
4
+ require "sidekiq/api"
5
+
6
+ require "zlib"
7
+ require "base64"
4
8
 
5
9
  module Sidekiq
6
10
  ##
@@ -57,6 +61,7 @@ module Sidekiq
57
61
  #
58
62
  class JobRetry
59
63
  class Handled < ::RuntimeError; end
64
+
60
65
  class Skip < Handled; end
61
66
 
62
67
  include Sidekiq::Util
@@ -70,7 +75,7 @@ module Sidekiq
70
75
  # The global retry handler requires only the barest of data.
71
76
  # We want to be able to retry as much as possible so we don't
72
77
  # require the worker to be instantiated.
73
- def global(msg, queue)
78
+ def global(jobstr, queue)
74
79
  yield
75
80
  rescue Handled => ex
76
81
  raise ex
@@ -81,22 +86,20 @@ module Sidekiq
81
86
  # ignore, will be pushed back onto queue during hard_shutdown
82
87
  raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
83
88
 
84
- if msg['retry']
89
+ msg = Sidekiq.load_json(jobstr)
90
+ if msg["retry"]
85
91
  attempt_retry(nil, msg, queue, e)
86
92
  else
87
93
  Sidekiq.death_handlers.each do |handler|
88
- begin
89
- handler.call(msg, e)
90
- rescue => handler_ex
91
- handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
92
- end
94
+ handler.call(msg, e)
95
+ rescue => handler_ex
96
+ handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
93
97
  end
94
98
  end
95
99
 
96
100
  raise Handled
97
101
  end
98
102
 
99
-
100
103
  # The local retry support means that any errors that occur within
101
104
  # this block can be associated with the given worker instance.
102
105
  # This is required to support the `sidekiq_retries_exhausted` block.
@@ -105,7 +108,7 @@ module Sidekiq
105
108
  # exception so the global block does not reprocess the error. The
106
109
  # Skip exception is unwrapped within Sidekiq::Processor#process before
107
110
  # calling the handle_exception handlers.
108
- def local(worker, msg, queue)
111
+ def local(worker, jobstr, queue)
109
112
  yield
110
113
  rescue Handled => ex
111
114
  raise ex
@@ -116,11 +119,12 @@ module Sidekiq
116
119
  # ignore, will be pushed back onto queue during hard_shutdown
117
120
  raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
118
121
 
119
- if msg['retry'] == nil
120
- msg['retry'] = worker.class.get_sidekiq_options['retry']
122
+ msg = Sidekiq.load_json(jobstr)
123
+ if msg["retry"].nil?
124
+ msg["retry"] = worker.class.get_sidekiq_options["retry"]
121
125
  end
122
126
 
123
- raise e unless msg['retry']
127
+ raise e unless msg["retry"]
124
128
  attempt_retry(worker, msg, queue, e)
125
129
  # We've handled this error associated with this job, don't
126
130
  # need to handle it at the global level
@@ -133,13 +137,9 @@ module Sidekiq
133
137
  # instantiate the worker instance. All access must be guarded and
134
138
  # best effort.
135
139
  def attempt_retry(worker, msg, queue, exception)
136
- max_retry_attempts = retry_attempts_from(msg['retry'], @max_retries)
140
+ max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
137
141
 
138
- msg['queue'] = if msg['retry_queue']
139
- msg['retry_queue']
140
- else
141
- queue
142
- end
142
+ msg["queue"] = (msg["retry_queue"] || queue)
143
143
 
144
144
  m = exception_message(exception)
145
145
  if m.respond_to?(:scrub!)
@@ -147,32 +147,34 @@ module Sidekiq
147
147
  m.scrub!
148
148
  end
149
149
 
150
- msg['error_message'] = m
151
- msg['error_class'] = exception.class.name
152
- count = if msg['retry_count']
153
- msg['retried_at'] = Time.now.to_f
154
- msg['retry_count'] += 1
150
+ msg["error_message"] = m
151
+ msg["error_class"] = exception.class.name
152
+ count = if msg["retry_count"]
153
+ msg["retried_at"] = Time.now.to_f
154
+ msg["retry_count"] += 1
155
155
  else
156
- msg['failed_at'] = Time.now.to_f
157
- msg['retry_count'] = 0
156
+ msg["failed_at"] = Time.now.to_f
157
+ msg["retry_count"] = 0
158
158
  end
159
159
 
160
- if msg['backtrace'] == true
161
- msg['error_backtrace'] = exception.backtrace
162
- elsif !msg['backtrace']
163
- # do nothing
164
- elsif msg['backtrace'].to_i != 0
165
- msg['error_backtrace'] = exception.backtrace[0...msg['backtrace'].to_i]
160
+ if msg["backtrace"]
161
+ lines = if msg["backtrace"] == true
162
+ exception.backtrace
163
+ else
164
+ exception.backtrace[0...msg["backtrace"].to_i]
165
+ end
166
+
167
+ msg["error_backtrace"] = compress_backtrace(lines)
166
168
  end
167
169
 
168
170
  if count < max_retry_attempts
169
171
  delay = delay_for(worker, count, exception)
170
172
  # Logging here can break retries if the logging device raises ENOSPC #3979
171
- #logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
173
+ # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
172
174
  retry_at = Time.now.to_f + delay
173
175
  payload = Sidekiq.dump_json(msg)
174
176
  Sidekiq.redis do |conn|
175
- conn.zadd('retry', retry_at.to_s, payload)
177
+ conn.zadd("retry", retry_at.to_s, payload)
176
178
  end
177
179
  else
178
180
  # Goodbye dear message, you (re)tried your best I'm sure.
@@ -182,25 +184,23 @@ module Sidekiq
182
184
 
183
185
  def retries_exhausted(worker, msg, exception)
184
186
  begin
185
- block = worker && worker.sidekiq_retries_exhausted_block
186
- block.call(msg, exception) if block
187
+ block = worker&.sidekiq_retries_exhausted_block
188
+ block&.call(msg, exception)
187
189
  rescue => e
188
- handle_exception(e, { context: "Error calling retries_exhausted", job: msg })
190
+ handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
189
191
  end
190
192
 
193
+ send_to_morgue(msg) unless msg["dead"] == false
194
+
191
195
  Sidekiq.death_handlers.each do |handler|
192
- begin
193
- handler.call(msg, exception)
194
- rescue => e
195
- handle_exception(e, { context: "Error calling death handler", job: msg })
196
- end
196
+ handler.call(msg, exception)
197
+ rescue => e
198
+ handle_exception(e, {context: "Error calling death handler", job: msg})
197
199
  end
198
-
199
- send_to_morgue(msg) unless msg['dead'] == false
200
200
  end
201
201
 
202
202
  def send_to_morgue(msg)
203
- logger.info { "Adding dead #{msg['class']} job #{msg['jid']}" }
203
+ logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
204
204
  payload = Sidekiq.dump_json(msg)
205
205
  DeadSet.new.kill(payload, notify_failure: false)
206
206
  end
@@ -214,25 +214,19 @@ module Sidekiq
214
214
  end
215
215
 
216
216
  def delay_for(worker, count, exception)
217
- if worker && worker.sidekiq_retry_in_block
217
+ jitter = rand(10) * (count + 1)
218
+ if worker&.sidekiq_retry_in_block
218
219
  custom_retry_in = retry_in(worker, count, exception).to_i
219
- return custom_retry_in if custom_retry_in > 0
220
+ return custom_retry_in + jitter if custom_retry_in > 0
220
221
  end
221
- seconds_to_delay(count)
222
- end
223
-
224
- # delayed_job uses the same basic formula
225
- def seconds_to_delay(count)
226
- (count ** 4) + 15 + (rand(30)*(count+1))
222
+ (count**4) + 15 + jitter
227
223
  end
228
224
 
229
225
  def retry_in(worker, count, exception)
230
- begin
231
- worker.sidekiq_retry_in_block.call(count, exception)
232
- rescue Exception => e
233
- handle_exception(e, { context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default" })
234
- nil
235
- end
226
+ worker.sidekiq_retry_in_block.call(count, exception)
227
+ rescue Exception => e
228
+ handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
229
+ nil
236
230
  end
237
231
 
238
232
  def exception_caused_by_shutdown?(e, checked_causes = [])
@@ -249,14 +243,17 @@ module Sidekiq
249
243
  # Extract message from exception.
250
244
  # Set a default if the message raises an error
251
245
  def exception_message(exception)
252
- begin
253
- # App code can stuff all sorts of crazy binary data into the error message
254
- # that won't convert to JSON.
255
- exception.message.to_s[0, 10_000]
256
- rescue
257
- "!!! ERROR MESSAGE THREW AN ERROR !!!".dup
258
- end
246
+ # App code can stuff all sorts of crazy binary data into the error message
247
+ # that won't convert to JSON.
248
+ exception.message.to_s[0, 10_000]
249
+ rescue
250
+ +"!!! ERROR MESSAGE THREW AN ERROR !!!"
259
251
  end
260
252
 
253
+ def compress_backtrace(backtrace)
254
+ serialized = Sidekiq.dump_json(backtrace)
255
+ compressed = Zlib::Deflate.deflate(serialized)
256
+ Base64.encode64(compressed)
257
+ end
261
258
  end
262
259
  end
@@ -1,21 +1,28 @@
1
1
  # frozen_string_literal: true
2
- require 'sidekiq/manager'
3
- require 'sidekiq/fetch'
4
- require 'sidekiq/scheduled'
2
+
3
+ require "sidekiq/manager"
4
+ require "sidekiq/fetch"
5
+ require "sidekiq/scheduled"
5
6
 
6
7
  module Sidekiq
7
- # The Launcher is a very simple Actor whose job is to
8
- # start, monitor and stop the core Actors in Sidekiq.
9
- # If any of these actors die, the Sidekiq process exits
10
- # immediately.
8
+ # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
11
9
  class Launcher
12
10
  include Util
13
11
 
14
- attr_accessor :manager, :poller, :fetcher
12
+ STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
15
13
 
16
- STATS_TTL = 5*365*24*60*60
14
+ PROCTITLES = [
15
+ proc { "sidekiq" },
16
+ proc { Sidekiq::VERSION },
17
+ proc { |me, data| data["tag"] },
18
+ proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
19
+ proc { |me, data| "stopping" if me.stopping? }
20
+ ]
21
+
22
+ attr_accessor :manager, :poller, :fetcher
17
23
 
18
24
  def initialize(options)
25
+ options[:fetch] ||= BasicFetch.new(options)
19
26
  @manager = Sidekiq::Manager.new(options)
20
27
  @poller = Sidekiq::Scheduled::Poller.new
21
28
  @done = false
@@ -50,7 +57,7 @@ module Sidekiq
50
57
 
51
58
  # Requeue everything in case there was a worker who grabbed work while stopped
52
59
  # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
53
- strategy = (@options[:fetch] || Sidekiq::BasicFetch)
60
+ strategy = @options[:fetch]
54
61
  strategy.bulk_requeue([], @options)
55
62
 
56
63
  clear_heartbeat
@@ -62,17 +69,64 @@ module Sidekiq
62
69
 
63
70
  private unless $TESTING
64
71
 
72
+ def start_heartbeat
73
+ loop do
74
+ heartbeat
75
+ sleep 5
76
+ end
77
+ Sidekiq.logger.info("Heartbeat stopping...")
78
+ end
79
+
80
+ def clear_heartbeat
81
+ # Remove record from Redis since we are shutting down.
82
+ # Note we don't stop the heartbeat thread; if the process
83
+ # doesn't actually exit, it'll reappear in the Web UI.
84
+ Sidekiq.redis do |conn|
85
+ conn.pipelined do
86
+ conn.srem("processes", identity)
87
+ conn.unlink("#{identity}:workers")
88
+ end
89
+ end
90
+ rescue
91
+ # best effort, ignore network errors
92
+ end
93
+
65
94
  def heartbeat
66
- results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, to_data) }
67
- results.compact!
68
- $0 = results.join(' ')
95
+ $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
69
96
 
70
97
 
71
98
  end
72
99
 
100
+ def self.flush_stats
101
+ fails = Processor::FAILURE.reset
102
+ procd = Processor::PROCESSED.reset
103
+ return if fails + procd == 0
104
+
105
+ nowdate = Time.now.utc.strftime("%Y-%m-%d")
106
+ begin
107
+ Sidekiq.redis do |conn|
108
+ conn.pipelined do
109
+ conn.incrby("stat:processed", procd)
110
+ conn.incrby("stat:processed:#{nowdate}", procd)
111
+ conn.expire("stat:processed:#{nowdate}", STATS_TTL)
112
+
113
+ conn.incrby("stat:failed", fails)
114
+ conn.incrby("stat:failed:#{nowdate}", fails)
115
+ conn.expire("stat:failed:#{nowdate}", STATS_TTL)
116
+ end
117
+ end
118
+ rescue => ex
119
+ # we're exiting the process, things might be shut down so don't
120
+ # try to handle the exception
121
+ Sidekiq.logger.warn("Unable to flush stats: #{ex}")
122
+ end
123
+ end
124
+ at_exit(&method(:flush_stats))
125
+
73
126
  def ❤
74
127
  key = identity
75
128
  fails = procd = 0
129
+
76
130
  begin
77
131
  fails = Processor::FAILURE.reset
78
132
  procd = Processor::PROCESSED.reset
@@ -80,6 +134,7 @@ module Sidekiq
80
134
 
81
135
  workers_key = "#{key}:workers"
82
136
  nowdate = Time.now.utc.strftime("%Y-%m-%d")
137
+
83
138
  Sidekiq.redis do |conn|
84
139
  conn.multi do
85
140
  conn.incrby("stat:processed", procd)
@@ -90,84 +145,115 @@ module Sidekiq
90
145
  conn.incrby("stat:failed:#{nowdate}", fails)
91
146
  conn.expire("stat:failed:#{nowdate}", STATS_TTL)
92
147
 
93
- conn.del(workers_key)
148
+ conn.unlink(workers_key)
94
149
  curstate.each_pair do |tid, hash|
95
150
  conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
96
151
  end
97
152
  conn.expire(workers_key, 60)
98
153
  end
99
154
  end
155
+
156
+ rtt = check_rtt
157
+
100
158
  fails = procd = 0
159
+ kb = memory_usage(::Process.pid)
101
160
 
102
- _, exists, _, _, msg = Sidekiq.redis do |conn|
103
- conn.multi do
104
- conn.sadd('processes', key)
105
- conn.exists(key)
106
- conn.hmset(key, 'info', to_json, 'busy', curstate.size, 'beat', Time.now.to_f, 'quiet', @done)
161
+ _, exists, _, _, msg = Sidekiq.redis { |conn|
162
+ conn.multi {
163
+ conn.sadd("processes", key)
164
+ conn.exists?(key)
165
+ conn.hmset(key, "info", to_json,
166
+ "busy", curstate.size,
167
+ "beat", Time.now.to_f,
168
+ "rtt_us", rtt,
169
+ "quiet", @done,
170
+ "rss", kb)
107
171
  conn.expire(key, 60)
108
172
  conn.rpop("#{key}-signals")
109
- end
110
- end
173
+ }
174
+ }
111
175
 
112
176
  # first heartbeat or recovering from an outage and need to reestablish our heartbeat
113
- fire_event(:heartbeat) if !exists
177
+ fire_event(:heartbeat) unless exists
114
178
 
115
179
  return unless msg
116
180
 
117
- ::Process.kill(msg, $$)
181
+ ::Process.kill(msg, ::Process.pid)
118
182
  rescue => e
119
183
  # ignore all redis/network issues
120
- logger.error("heartbeat: #{e.message}")
184
+ logger.error("heartbeat: #{e}")
121
185
  # don't lose the counts if there was a network issue
122
186
  Processor::PROCESSED.incr(procd)
123
187
  Processor::FAILURE.incr(fails)
124
188
  end
125
189
  end
126
190
 
127
- def start_heartbeat
128
- while true
129
- heartbeat
130
- sleep 5
191
+ # We run the heartbeat every five seconds.
192
+ # Capture five samples of RTT, log a warning if each sample
193
+ # is above our warning threshold.
194
+ RTT_READINGS = RingBuffer.new(5)
195
+ RTT_WARNING_LEVEL = 50_000
196
+
197
+ def check_rtt
198
+ a = b = 0
199
+ Sidekiq.redis do |x|
200
+ a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
201
+ x.ping
202
+ b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
131
203
  end
132
- Sidekiq.logger.info("Heartbeat stopping...")
204
+ rtt = b - a
205
+ RTT_READINGS << rtt
206
+ # Ideal RTT for Redis is < 1000µs
207
+ # Workable is < 10,000µs
208
+ # Log a warning if it's a disaster.
209
+ if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
210
+ Sidekiq.logger.warn <<~EOM
211
+ Your Redis network connection is performing extremely poorly.
212
+ Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
213
+ Ensure Redis is running in the same AZ or datacenter as Sidekiq.
214
+ EOM
215
+ RTT_READINGS.reset
216
+ end
217
+ rtt
133
218
  end
134
219
 
135
- def to_data
136
- @data ||= begin
137
- {
138
- 'hostname' => hostname,
139
- 'started_at' => Time.now.to_f,
140
- 'pid' => $$,
141
- 'tag' => @options[:tag] || '',
142
- 'concurrency' => @options[:concurrency],
143
- 'queues' => @options[:queues].uniq,
144
- 'labels' => @options[:labels],
145
- 'identity' => identity,
146
- }
147
- end
220
+ MEMORY_GRABBER = case RUBY_PLATFORM
221
+ when /linux/
222
+ ->(pid) {
223
+ IO.readlines("/proc/#{$$}/status").each do |line|
224
+ next unless line.start_with?("VmRSS:")
225
+ break line.split[1].to_i
226
+ end
227
+ }
228
+ when /darwin|bsd/
229
+ ->(pid) {
230
+ `ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
231
+ }
232
+ else
233
+ ->(pid) { 0 }
148
234
  end
149
235
 
150
- def to_json
151
- @json ||= begin
152
- # this data changes infrequently so dump it to a string
153
- # now so we don't need to dump it every heartbeat.
154
- Sidekiq.dump_json(to_data)
155
- end
236
+ def memory_usage(pid)
237
+ MEMORY_GRABBER.call(pid)
156
238
  end
157
239
 
158
- def clear_heartbeat
159
- # Remove record from Redis since we are shutting down.
160
- # Note we don't stop the heartbeat thread; if the process
161
- # doesn't actually exit, it'll reappear in the Web UI.
162
- Sidekiq.redis do |conn|
163
- conn.pipelined do
164
- conn.srem('processes', identity)
165
- conn.del("#{identity}:workers")
166
- end
167
- end
168
- rescue
169
- # best effort, ignore network errors
240
+ def to_data
241
+ @data ||= {
242
+ "hostname" => hostname,
243
+ "started_at" => Time.now.to_f,
244
+ "pid" => ::Process.pid,
245
+ "tag" => @options[:tag] || "",
246
+ "concurrency" => @options[:concurrency],
247
+ "queues" => @options[:queues].uniq,
248
+ "labels" => @options[:labels],
249
+ "identity" => identity
250
+ }
170
251
  end
171
252
 
253
+ def to_json
254
+ # this data changes infrequently so dump it to a string
255
+ # now so we don't need to dump it every heartbeat.
256
+ @json ||= Sidekiq.dump_json(to_data)
257
+ end
172
258
  end
173
259
  end