sidekiq 5.2.9 → 6.4.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/Changes.md +318 -1
  3. data/LICENSE +3 -3
  4. data/README.md +23 -34
  5. data/bin/sidekiq +27 -3
  6. data/bin/sidekiqload +67 -61
  7. data/bin/sidekiqmon +8 -0
  8. data/lib/generators/sidekiq/job_generator.rb +57 -0
  9. data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +2 -2
  10. data/lib/generators/sidekiq/templates/{worker_spec.rb.erb → job_spec.rb.erb} +1 -1
  11. data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
  12. data/lib/sidekiq/api.rb +335 -267
  13. data/lib/sidekiq/cli.rb +164 -182
  14. data/lib/sidekiq/client.rb +58 -61
  15. data/lib/sidekiq/delay.rb +7 -6
  16. data/lib/sidekiq/exception_handler.rb +10 -12
  17. data/lib/sidekiq/extensions/action_mailer.rb +13 -22
  18. data/lib/sidekiq/extensions/active_record.rb +13 -10
  19. data/lib/sidekiq/extensions/class_methods.rb +14 -11
  20. data/lib/sidekiq/extensions/generic_proxy.rb +6 -4
  21. data/lib/sidekiq/fetch.rb +40 -32
  22. data/lib/sidekiq/job.rb +13 -0
  23. data/lib/sidekiq/job_logger.rb +33 -7
  24. data/lib/sidekiq/job_retry.rb +70 -71
  25. data/lib/sidekiq/job_util.rb +65 -0
  26. data/lib/sidekiq/launcher.rb +161 -71
  27. data/lib/sidekiq/logger.rb +170 -0
  28. data/lib/sidekiq/manager.rb +17 -21
  29. data/lib/sidekiq/middleware/chain.rb +20 -8
  30. data/lib/sidekiq/middleware/current_attributes.rb +57 -0
  31. data/lib/sidekiq/middleware/i18n.rb +5 -7
  32. data/lib/sidekiq/monitor.rb +133 -0
  33. data/lib/sidekiq/paginator.rb +20 -16
  34. data/lib/sidekiq/processor.rb +71 -70
  35. data/lib/sidekiq/rails.rb +40 -37
  36. data/lib/sidekiq/redis_connection.rb +48 -48
  37. data/lib/sidekiq/scheduled.rb +62 -28
  38. data/lib/sidekiq/sd_notify.rb +149 -0
  39. data/lib/sidekiq/systemd.rb +24 -0
  40. data/lib/sidekiq/testing/inline.rb +2 -1
  41. data/lib/sidekiq/testing.rb +36 -27
  42. data/lib/sidekiq/util.rb +57 -15
  43. data/lib/sidekiq/version.rb +2 -1
  44. data/lib/sidekiq/web/action.rb +15 -11
  45. data/lib/sidekiq/web/application.rb +88 -75
  46. data/lib/sidekiq/web/csrf_protection.rb +180 -0
  47. data/lib/sidekiq/web/helpers.rb +109 -92
  48. data/lib/sidekiq/web/router.rb +23 -19
  49. data/lib/sidekiq/web.rb +61 -105
  50. data/lib/sidekiq/worker.rb +247 -105
  51. data/lib/sidekiq.rb +77 -44
  52. data/sidekiq.gemspec +23 -16
  53. data/web/assets/images/apple-touch-icon.png +0 -0
  54. data/web/assets/javascripts/application.js +83 -64
  55. data/web/assets/javascripts/dashboard.js +54 -73
  56. data/web/assets/stylesheets/application-dark.css +143 -0
  57. data/web/assets/stylesheets/application-rtl.css +0 -4
  58. data/web/assets/stylesheets/application.css +45 -232
  59. data/web/locales/ar.yml +8 -2
  60. data/web/locales/de.yml +14 -2
  61. data/web/locales/en.yml +6 -1
  62. data/web/locales/es.yml +18 -2
  63. data/web/locales/fr.yml +10 -3
  64. data/web/locales/ja.yml +7 -1
  65. data/web/locales/lt.yml +83 -0
  66. data/web/locales/pl.yml +4 -4
  67. data/web/locales/ru.yml +4 -0
  68. data/web/locales/vi.yml +83 -0
  69. data/web/views/_footer.erb +1 -1
  70. data/web/views/_job_info.erb +3 -2
  71. data/web/views/_poll_link.erb +2 -5
  72. data/web/views/_summary.erb +7 -7
  73. data/web/views/busy.erb +54 -20
  74. data/web/views/dashboard.erb +22 -14
  75. data/web/views/dead.erb +3 -3
  76. data/web/views/layout.erb +3 -1
  77. data/web/views/morgue.erb +9 -6
  78. data/web/views/queue.erb +19 -10
  79. data/web/views/queues.erb +10 -2
  80. data/web/views/retries.erb +11 -8
  81. data/web/views/retry.erb +3 -3
  82. data/web/views/scheduled.erb +5 -2
  83. metadata +34 -64
  84. data/.circleci/config.yml +0 -61
  85. data/.github/contributing.md +0 -32
  86. data/.github/issue_template.md +0 -11
  87. data/.gitignore +0 -15
  88. data/.travis.yml +0 -11
  89. data/3.0-Upgrade.md +0 -70
  90. data/4.0-Upgrade.md +0 -53
  91. data/5.0-Upgrade.md +0 -56
  92. data/COMM-LICENSE +0 -97
  93. data/Ent-Changes.md +0 -238
  94. data/Gemfile +0 -23
  95. data/Pro-2.0-Upgrade.md +0 -138
  96. data/Pro-3.0-Upgrade.md +0 -44
  97. data/Pro-4.0-Upgrade.md +0 -35
  98. data/Pro-Changes.md +0 -759
  99. data/Rakefile +0 -9
  100. data/bin/sidekiqctl +0 -20
  101. data/code_of_conduct.md +0 -50
  102. data/lib/generators/sidekiq/worker_generator.rb +0 -49
  103. data/lib/sidekiq/core_ext.rb +0 -1
  104. data/lib/sidekiq/ctl.rb +0 -221
  105. data/lib/sidekiq/logging.rb +0 -122
  106. data/lib/sidekiq/middleware/server/active_record.rb +0 -23
@@ -1,6 +1,10 @@
1
1
  # frozen_string_literal: true
2
- require 'sidekiq/scheduled'
3
- require 'sidekiq/api'
2
+
3
+ require "sidekiq/scheduled"
4
+ require "sidekiq/api"
5
+
6
+ require "zlib"
7
+ require "base64"
4
8
 
5
9
  module Sidekiq
6
10
  ##
@@ -30,9 +34,10 @@ module Sidekiq
30
34
  # The job will be retried this number of times before giving up. (If simply
31
35
  # 'true', Sidekiq retries 25 times)
32
36
  #
33
- # We'll add a bit more data to the job to support retries:
37
+ # Relevant options for job retries:
34
38
  #
35
- # * 'queue' - the queue to use
39
+ # * 'queue' - the queue for the initial job
40
+ # * 'retry_queue' - if job retries should be pushed to a different (e.g. lower priority) queue
36
41
  # * 'retry_count' - number of times we've retried so far.
37
42
  # * 'error_message' - the message from the exception
38
43
  # * 'error_class' - the exception class
@@ -48,15 +53,17 @@ module Sidekiq
48
53
  #
49
54
  # Sidekiq.options[:max_retries] = 7
50
55
  #
51
- # or limit the number of retries for a particular worker with:
56
+ # or limit the number of retries for a particular worker and send retries to
57
+ # a low priority queue with:
52
58
  #
53
59
  # class MyWorker
54
60
  # include Sidekiq::Worker
55
- # sidekiq_options :retry => 10
61
+ # sidekiq_options retry: 10, retry_queue: 'low'
56
62
  # end
57
63
  #
58
64
  class JobRetry
59
65
  class Handled < ::RuntimeError; end
66
+
60
67
  class Skip < Handled; end
61
68
 
62
69
  include Sidekiq::Util
@@ -70,7 +77,7 @@ module Sidekiq
70
77
  # The global retry handler requires only the barest of data.
71
78
  # We want to be able to retry as much as possible so we don't
72
79
  # require the worker to be instantiated.
73
- def global(msg, queue)
80
+ def global(jobstr, queue)
74
81
  yield
75
82
  rescue Handled => ex
76
83
  raise ex
@@ -81,22 +88,20 @@ module Sidekiq
81
88
  # ignore, will be pushed back onto queue during hard_shutdown
82
89
  raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
83
90
 
84
- if msg['retry']
91
+ msg = Sidekiq.load_json(jobstr)
92
+ if msg["retry"]
85
93
  attempt_retry(nil, msg, queue, e)
86
94
  else
87
95
  Sidekiq.death_handlers.each do |handler|
88
- begin
89
- handler.call(msg, e)
90
- rescue => handler_ex
91
- handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
92
- end
96
+ handler.call(msg, e)
97
+ rescue => handler_ex
98
+ handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
93
99
  end
94
100
  end
95
101
 
96
102
  raise Handled
97
103
  end
98
104
 
99
-
100
105
  # The local retry support means that any errors that occur within
101
106
  # this block can be associated with the given worker instance.
102
107
  # This is required to support the `sidekiq_retries_exhausted` block.
@@ -105,7 +110,7 @@ module Sidekiq
105
110
  # exception so the global block does not reprocess the error. The
106
111
  # Skip exception is unwrapped within Sidekiq::Processor#process before
107
112
  # calling the handle_exception handlers.
108
- def local(worker, msg, queue)
113
+ def local(worker, jobstr, queue)
109
114
  yield
110
115
  rescue Handled => ex
111
116
  raise ex
@@ -116,11 +121,12 @@ module Sidekiq
116
121
  # ignore, will be pushed back onto queue during hard_shutdown
117
122
  raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
118
123
 
119
- if msg['retry'] == nil
120
- msg['retry'] = worker.class.get_sidekiq_options['retry']
124
+ msg = Sidekiq.load_json(jobstr)
125
+ if msg["retry"].nil?
126
+ msg["retry"] = worker.class.get_sidekiq_options["retry"]
121
127
  end
122
128
 
123
- raise e unless msg['retry']
129
+ raise e unless msg["retry"]
124
130
  attempt_retry(worker, msg, queue, e)
125
131
  # We've handled this error associated with this job, don't
126
132
  # need to handle it at the global level
@@ -133,13 +139,9 @@ module Sidekiq
133
139
  # instantiate the worker instance. All access must be guarded and
134
140
  # best effort.
135
141
  def attempt_retry(worker, msg, queue, exception)
136
- max_retry_attempts = retry_attempts_from(msg['retry'], @max_retries)
142
+ max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
137
143
 
138
- msg['queue'] = if msg['retry_queue']
139
- msg['retry_queue']
140
- else
141
- queue
142
- end
144
+ msg["queue"] = (msg["retry_queue"] || queue)
143
145
 
144
146
  m = exception_message(exception)
145
147
  if m.respond_to?(:scrub!)
@@ -147,32 +149,34 @@ module Sidekiq
147
149
  m.scrub!
148
150
  end
149
151
 
150
- msg['error_message'] = m
151
- msg['error_class'] = exception.class.name
152
- count = if msg['retry_count']
153
- msg['retried_at'] = Time.now.to_f
154
- msg['retry_count'] += 1
152
+ msg["error_message"] = m
153
+ msg["error_class"] = exception.class.name
154
+ count = if msg["retry_count"]
155
+ msg["retried_at"] = Time.now.to_f
156
+ msg["retry_count"] += 1
155
157
  else
156
- msg['failed_at'] = Time.now.to_f
157
- msg['retry_count'] = 0
158
+ msg["failed_at"] = Time.now.to_f
159
+ msg["retry_count"] = 0
158
160
  end
159
161
 
160
- if msg['backtrace'] == true
161
- msg['error_backtrace'] = exception.backtrace
162
- elsif !msg['backtrace']
163
- # do nothing
164
- elsif msg['backtrace'].to_i != 0
165
- msg['error_backtrace'] = exception.backtrace[0...msg['backtrace'].to_i]
162
+ if msg["backtrace"]
163
+ lines = if msg["backtrace"] == true
164
+ exception.backtrace
165
+ else
166
+ exception.backtrace[0...msg["backtrace"].to_i]
167
+ end
168
+
169
+ msg["error_backtrace"] = compress_backtrace(lines)
166
170
  end
167
171
 
168
172
  if count < max_retry_attempts
169
173
  delay = delay_for(worker, count, exception)
170
174
  # Logging here can break retries if the logging device raises ENOSPC #3979
171
- #logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
175
+ # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
172
176
  retry_at = Time.now.to_f + delay
173
177
  payload = Sidekiq.dump_json(msg)
174
178
  Sidekiq.redis do |conn|
175
- conn.zadd('retry', retry_at.to_s, payload)
179
+ conn.zadd("retry", retry_at.to_s, payload)
176
180
  end
177
181
  else
178
182
  # Goodbye dear message, you (re)tried your best I'm sure.
@@ -182,25 +186,23 @@ module Sidekiq
182
186
 
183
187
  def retries_exhausted(worker, msg, exception)
184
188
  begin
185
- block = worker && worker.sidekiq_retries_exhausted_block
186
- block.call(msg, exception) if block
189
+ block = worker&.sidekiq_retries_exhausted_block
190
+ block&.call(msg, exception)
187
191
  rescue => e
188
- handle_exception(e, { context: "Error calling retries_exhausted", job: msg })
192
+ handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
189
193
  end
190
194
 
195
+ send_to_morgue(msg) unless msg["dead"] == false
196
+
191
197
  Sidekiq.death_handlers.each do |handler|
192
- begin
193
- handler.call(msg, exception)
194
- rescue => e
195
- handle_exception(e, { context: "Error calling death handler", job: msg })
196
- end
198
+ handler.call(msg, exception)
199
+ rescue => e
200
+ handle_exception(e, {context: "Error calling death handler", job: msg})
197
201
  end
198
-
199
- send_to_morgue(msg) unless msg['dead'] == false
200
202
  end
201
203
 
202
204
  def send_to_morgue(msg)
203
- logger.info { "Adding dead #{msg['class']} job #{msg['jid']}" }
205
+ logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
204
206
  payload = Sidekiq.dump_json(msg)
205
207
  DeadSet.new.kill(payload, notify_failure: false)
206
208
  end
@@ -214,25 +216,19 @@ module Sidekiq
214
216
  end
215
217
 
216
218
  def delay_for(worker, count, exception)
217
- if worker && worker.sidekiq_retry_in_block
219
+ jitter = rand(10) * (count + 1)
220
+ if worker&.sidekiq_retry_in_block
218
221
  custom_retry_in = retry_in(worker, count, exception).to_i
219
- return custom_retry_in if custom_retry_in > 0
222
+ return custom_retry_in + jitter if custom_retry_in > 0
220
223
  end
221
- seconds_to_delay(count)
222
- end
223
-
224
- # delayed_job uses the same basic formula
225
- def seconds_to_delay(count)
226
- (count ** 4) + 15 + (rand(30)*(count+1))
224
+ (count**4) + 15 + jitter
227
225
  end
228
226
 
229
227
  def retry_in(worker, count, exception)
230
- begin
231
- worker.sidekiq_retry_in_block.call(count, exception)
232
- rescue Exception => e
233
- handle_exception(e, { context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default" })
234
- nil
235
- end
228
+ worker.sidekiq_retry_in_block.call(count, exception)
229
+ rescue Exception => e
230
+ handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
231
+ nil
236
232
  end
237
233
 
238
234
  def exception_caused_by_shutdown?(e, checked_causes = [])
@@ -249,14 +245,17 @@ module Sidekiq
249
245
  # Extract message from exception.
250
246
  # Set a default if the message raises an error
251
247
  def exception_message(exception)
252
- begin
253
- # App code can stuff all sorts of crazy binary data into the error message
254
- # that won't convert to JSON.
255
- exception.message.to_s[0, 10_000]
256
- rescue
257
- "!!! ERROR MESSAGE THREW AN ERROR !!!".dup
258
- end
248
+ # App code can stuff all sorts of crazy binary data into the error message
249
+ # that won't convert to JSON.
250
+ exception.message.to_s[0, 10_000]
251
+ rescue
252
+ +"!!! ERROR MESSAGE THREW AN ERROR !!!"
259
253
  end
260
254
 
255
+ def compress_backtrace(backtrace)
256
+ serialized = Sidekiq.dump_json(backtrace)
257
+ compressed = Zlib::Deflate.deflate(serialized)
258
+ Base64.encode64(compressed)
259
+ end
261
260
  end
262
261
  end
@@ -0,0 +1,65 @@
1
+ require "securerandom"
2
+ require "time"
3
+
4
+ module Sidekiq
5
+ module JobUtil
6
+ # These functions encapsulate various job utilities.
7
+ # They must be simple and free from side effects.
8
+
9
+ def validate(item)
10
+ raise(ArgumentError, "Job must be a Hash with 'class' and 'args' keys: `#{item}`") unless item.is_a?(Hash) && item.key?("class") && item.key?("args")
11
+ raise(ArgumentError, "Job args must be an Array: `#{item}`") unless item["args"].is_a?(Array)
12
+ raise(ArgumentError, "Job class must be either a Class or String representation of the class name: `#{item}`") unless item["class"].is_a?(Class) || item["class"].is_a?(String)
13
+ raise(ArgumentError, "Job 'at' must be a Numeric timestamp: `#{item}`") if item.key?("at") && !item["at"].is_a?(Numeric)
14
+ raise(ArgumentError, "Job tags must be an Array: `#{item}`") if item["tags"] && !item["tags"].is_a?(Array)
15
+
16
+ if Sidekiq.options[:on_complex_arguments] == :raise
17
+ msg = <<~EOM
18
+ Job arguments to #{item["class"]} must be native JSON types, see https://github.com/mperham/sidekiq/wiki/Best-Practices.
19
+ To disable this error, remove `Sidekiq.strict_args!` from your initializer.
20
+ EOM
21
+ raise(ArgumentError, msg) unless json_safe?(item)
22
+ elsif Sidekiq.options[:on_complex_arguments] == :warn
23
+ Sidekiq.logger.warn <<~EOM unless json_safe?(item)
24
+ Job arguments to #{item["class"]} do not serialize to JSON safely. This will raise an error in
25
+ Sidekiq 7.0. See https://github.com/mperham/sidekiq/wiki/Best-Practices or raise an error today
26
+ by calling `Sidekiq.strict_args!` during Sidekiq initialization.
27
+ EOM
28
+ end
29
+ end
30
+
31
+ def normalize_item(item)
32
+ validate(item)
33
+
34
+ # merge in the default sidekiq_options for the item's class and/or wrapped element
35
+ # this allows ActiveJobs to control sidekiq_options too.
36
+ defaults = normalized_hash(item["class"])
37
+ defaults = defaults.merge(item["wrapped"].get_sidekiq_options) if item["wrapped"].respond_to?(:get_sidekiq_options)
38
+ item = defaults.merge(item)
39
+
40
+ raise(ArgumentError, "Job must include a valid queue name") if item["queue"].nil? || item["queue"] == ""
41
+
42
+ item["class"] = item["class"].to_s
43
+ item["queue"] = item["queue"].to_s
44
+ item["jid"] ||= SecureRandom.hex(12)
45
+ item["created_at"] ||= Time.now.to_f
46
+
47
+ item
48
+ end
49
+
50
+ def normalized_hash(item_class)
51
+ if item_class.is_a?(Class)
52
+ raise(ArgumentError, "Message must include a Sidekiq::Worker class, not class name: #{item_class.ancestors.inspect}") unless item_class.respond_to?(:get_sidekiq_options)
53
+ item_class.get_sidekiq_options
54
+ else
55
+ Sidekiq.default_worker_options
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def json_safe?(item)
62
+ JSON.parse(JSON.dump(item["args"])) == item["args"]
63
+ end
64
+ end
65
+ end
@@ -1,21 +1,28 @@
1
1
  # frozen_string_literal: true
2
- require 'sidekiq/manager'
3
- require 'sidekiq/fetch'
4
- require 'sidekiq/scheduled'
2
+
3
+ require "sidekiq/manager"
4
+ require "sidekiq/fetch"
5
+ require "sidekiq/scheduled"
5
6
 
6
7
  module Sidekiq
7
- # The Launcher is a very simple Actor whose job is to
8
- # start, monitor and stop the core Actors in Sidekiq.
9
- # If any of these actors die, the Sidekiq process exits
10
- # immediately.
8
+ # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
11
9
  class Launcher
12
10
  include Util
13
11
 
14
- attr_accessor :manager, :poller, :fetcher
12
+ STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
15
13
 
16
- STATS_TTL = 5*365*24*60*60
14
+ PROCTITLES = [
15
+ proc { "sidekiq" },
16
+ proc { Sidekiq::VERSION },
17
+ proc { |me, data| data["tag"] },
18
+ proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
19
+ proc { |me, data| "stopping" if me.stopping? }
20
+ ]
21
+
22
+ attr_accessor :manager, :poller, :fetcher
17
23
 
18
24
  def initialize(options)
25
+ options[:fetch] ||= BasicFetch.new(options)
19
26
  @manager = Sidekiq::Manager.new(options)
20
27
  @poller = Sidekiq::Scheduled::Poller.new
21
28
  @done = false
@@ -50,7 +57,7 @@ module Sidekiq
50
57
 
51
58
  # Requeue everything in case there was a worker who grabbed work while stopped
52
59
  # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
53
- strategy = (@options[:fetch] || Sidekiq::BasicFetch)
60
+ strategy = @options[:fetch]
54
61
  strategy.bulk_requeue([], @options)
55
62
 
56
63
  clear_heartbeat
@@ -62,17 +69,66 @@ module Sidekiq
62
69
 
63
70
  private unless $TESTING
64
71
 
72
+ BEAT_PAUSE = 5
73
+
74
+ def start_heartbeat
75
+ loop do
76
+ heartbeat
77
+ sleep BEAT_PAUSE
78
+ end
79
+ Sidekiq.logger.info("Heartbeat stopping...")
80
+ end
81
+
82
+ def clear_heartbeat
83
+ # Remove record from Redis since we are shutting down.
84
+ # Note we don't stop the heartbeat thread; if the process
85
+ # doesn't actually exit, it'll reappear in the Web UI.
86
+ Sidekiq.redis do |conn|
87
+ conn.pipelined do |pipeline|
88
+ pipeline.srem("processes", identity)
89
+ pipeline.unlink("#{identity}:workers")
90
+ end
91
+ end
92
+ rescue
93
+ # best effort, ignore network errors
94
+ end
95
+
65
96
  def heartbeat
66
- results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, to_data) }
67
- results.compact!
68
- $0 = results.join(' ')
97
+ $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
69
98
 
70
99
 
71
100
  end
72
101
 
102
+ def self.flush_stats
103
+ fails = Processor::FAILURE.reset
104
+ procd = Processor::PROCESSED.reset
105
+ return if fails + procd == 0
106
+
107
+ nowdate = Time.now.utc.strftime("%Y-%m-%d")
108
+ begin
109
+ Sidekiq.redis do |conn|
110
+ conn.pipelined do |pipeline|
111
+ pipeline.incrby("stat:processed", procd)
112
+ pipeline.incrby("stat:processed:#{nowdate}", procd)
113
+ pipeline.expire("stat:processed:#{nowdate}", STATS_TTL)
114
+
115
+ pipeline.incrby("stat:failed", fails)
116
+ pipeline.incrby("stat:failed:#{nowdate}", fails)
117
+ pipeline.expire("stat:failed:#{nowdate}", STATS_TTL)
118
+ end
119
+ end
120
+ rescue => ex
121
+ # we're exiting the process, things might be shut down so don't
122
+ # try to handle the exception
123
+ Sidekiq.logger.warn("Unable to flush stats: #{ex}")
124
+ end
125
+ end
126
+ at_exit(&method(:flush_stats))
127
+
73
128
  def ❤
74
129
  key = identity
75
130
  fails = procd = 0
131
+
76
132
  begin
77
133
  fails = Processor::FAILURE.reset
78
134
  procd = Processor::PROCESSED.reset
@@ -80,94 +136,128 @@ module Sidekiq
80
136
 
81
137
  workers_key = "#{key}:workers"
82
138
  nowdate = Time.now.utc.strftime("%Y-%m-%d")
139
+
83
140
  Sidekiq.redis do |conn|
84
- conn.multi do
85
- conn.incrby("stat:processed", procd)
86
- conn.incrby("stat:processed:#{nowdate}", procd)
87
- conn.expire("stat:processed:#{nowdate}", STATS_TTL)
141
+ conn.multi do |transaction|
142
+ transaction.incrby("stat:processed", procd)
143
+ transaction.incrby("stat:processed:#{nowdate}", procd)
144
+ transaction.expire("stat:processed:#{nowdate}", STATS_TTL)
88
145
 
89
- conn.incrby("stat:failed", fails)
90
- conn.incrby("stat:failed:#{nowdate}", fails)
91
- conn.expire("stat:failed:#{nowdate}", STATS_TTL)
146
+ transaction.incrby("stat:failed", fails)
147
+ transaction.incrby("stat:failed:#{nowdate}", fails)
148
+ transaction.expire("stat:failed:#{nowdate}", STATS_TTL)
92
149
 
93
- conn.del(workers_key)
150
+ transaction.unlink(workers_key)
94
151
  curstate.each_pair do |tid, hash|
95
- conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
152
+ transaction.hset(workers_key, tid, Sidekiq.dump_json(hash))
96
153
  end
97
- conn.expire(workers_key, 60)
154
+ transaction.expire(workers_key, 60)
98
155
  end
99
156
  end
157
+
158
+ rtt = check_rtt
159
+
100
160
  fails = procd = 0
161
+ kb = memory_usage(::Process.pid)
101
162
 
102
- _, exists, _, _, msg = Sidekiq.redis do |conn|
103
- conn.multi do
104
- conn.sadd('processes', key)
105
- conn.exists(key)
106
- conn.hmset(key, 'info', to_json, 'busy', curstate.size, 'beat', Time.now.to_f, 'quiet', @done)
107
- conn.expire(key, 60)
108
- conn.rpop("#{key}-signals")
109
- end
110
- end
163
+ _, exists, _, _, msg = Sidekiq.redis { |conn|
164
+ conn.multi { |transaction|
165
+ transaction.sadd("processes", key)
166
+ transaction.exists?(key)
167
+ transaction.hmset(key, "info", to_json,
168
+ "busy", curstate.size,
169
+ "beat", Time.now.to_f,
170
+ "rtt_us", rtt,
171
+ "quiet", @done,
172
+ "rss", kb)
173
+ transaction.expire(key, 60)
174
+ transaction.rpop("#{key}-signals")
175
+ }
176
+ }
111
177
 
112
178
  # first heartbeat or recovering from an outage and need to reestablish our heartbeat
113
- fire_event(:heartbeat) if !exists
179
+ fire_event(:heartbeat) unless exists
114
180
 
115
181
  return unless msg
116
182
 
117
- ::Process.kill(msg, $$)
183
+ ::Process.kill(msg, ::Process.pid)
118
184
  rescue => e
119
185
  # ignore all redis/network issues
120
- logger.error("heartbeat: #{e.message}")
186
+ logger.error("heartbeat: #{e}")
121
187
  # don't lose the counts if there was a network issue
122
188
  Processor::PROCESSED.incr(procd)
123
189
  Processor::FAILURE.incr(fails)
124
190
  end
125
191
  end
126
192
 
127
- def start_heartbeat
128
- while true
129
- heartbeat
130
- sleep 5
193
+ # We run the heartbeat every five seconds.
194
+ # Capture five samples of RTT, log a warning if each sample
195
+ # is above our warning threshold.
196
+ RTT_READINGS = RingBuffer.new(5)
197
+ RTT_WARNING_LEVEL = 50_000
198
+
199
+ def check_rtt
200
+ a = b = 0
201
+ Sidekiq.redis do |x|
202
+ a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
203
+ x.ping
204
+ b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
131
205
  end
132
- Sidekiq.logger.info("Heartbeat stopping...")
206
+ rtt = b - a
207
+ RTT_READINGS << rtt
208
+ # Ideal RTT for Redis is < 1000µs
209
+ # Workable is < 10,000µs
210
+ # Log a warning if it's a disaster.
211
+ if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
212
+ Sidekiq.logger.warn <<~EOM
213
+ Your Redis network connection is performing extremely poorly.
214
+ Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
215
+ Ensure Redis is running in the same AZ or datacenter as Sidekiq.
216
+ If these values are close to 100,000, that means your Sidekiq process may be
217
+ CPU overloaded; see https://github.com/mperham/sidekiq/discussions/5039
218
+ EOM
219
+ RTT_READINGS.reset
220
+ end
221
+ rtt
133
222
  end
134
223
 
135
- def to_data
136
- @data ||= begin
137
- {
138
- 'hostname' => hostname,
139
- 'started_at' => Time.now.to_f,
140
- 'pid' => $$,
141
- 'tag' => @options[:tag] || '',
142
- 'concurrency' => @options[:concurrency],
143
- 'queues' => @options[:queues].uniq,
144
- 'labels' => @options[:labels],
145
- 'identity' => identity,
146
- }
147
- end
224
+ MEMORY_GRABBER = case RUBY_PLATFORM
225
+ when /linux/
226
+ ->(pid) {
227
+ IO.readlines("/proc/#{$$}/status").each do |line|
228
+ next unless line.start_with?("VmRSS:")
229
+ break line.split[1].to_i
230
+ end
231
+ }
232
+ when /darwin|bsd/
233
+ ->(pid) {
234
+ `ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
235
+ }
236
+ else
237
+ ->(pid) { 0 }
148
238
  end
149
239
 
150
- def to_json
151
- @json ||= begin
152
- # this data changes infrequently so dump it to a string
153
- # now so we don't need to dump it every heartbeat.
154
- Sidekiq.dump_json(to_data)
155
- end
240
+ def memory_usage(pid)
241
+ MEMORY_GRABBER.call(pid)
156
242
  end
157
243
 
158
- def clear_heartbeat
159
- # Remove record from Redis since we are shutting down.
160
- # Note we don't stop the heartbeat thread; if the process
161
- # doesn't actually exit, it'll reappear in the Web UI.
162
- Sidekiq.redis do |conn|
163
- conn.pipelined do
164
- conn.srem('processes', identity)
165
- conn.del("#{identity}:workers")
166
- end
167
- end
168
- rescue
169
- # best effort, ignore network errors
244
+ def to_data
245
+ @data ||= {
246
+ "hostname" => hostname,
247
+ "started_at" => Time.now.to_f,
248
+ "pid" => ::Process.pid,
249
+ "tag" => @options[:tag] || "",
250
+ "concurrency" => @options[:concurrency],
251
+ "queues" => @options[:queues].uniq,
252
+ "labels" => @options[:labels],
253
+ "identity" => identity
254
+ }
170
255
  end
171
256
 
257
+ def to_json
258
+ # this data changes infrequently so dump it to a string
259
+ # now so we don't need to dump it every heartbeat.
260
+ @json ||= Sidekiq.dump_json(to_data)
261
+ end
172
262
  end
173
263
  end