sidekiq 4.2.2 → 6.3.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

Files changed (138) hide show
  1. checksums.yaml +5 -5
  2. data/Changes.md +516 -0
  3. data/LICENSE +2 -2
  4. data/README.md +23 -36
  5. data/bin/sidekiq +26 -2
  6. data/bin/sidekiqload +28 -38
  7. data/bin/sidekiqmon +8 -0
  8. data/lib/generators/sidekiq/templates/worker_spec.rb.erb +1 -1
  9. data/lib/generators/sidekiq/templates/worker_test.rb.erb +2 -2
  10. data/lib/generators/sidekiq/worker_generator.rb +21 -13
  11. data/lib/sidekiq/api.rb +401 -243
  12. data/lib/sidekiq/cli.rb +228 -212
  13. data/lib/sidekiq/client.rb +76 -53
  14. data/lib/sidekiq/delay.rb +41 -0
  15. data/lib/sidekiq/exception_handler.rb +12 -16
  16. data/lib/sidekiq/extensions/action_mailer.rb +13 -22
  17. data/lib/sidekiq/extensions/active_record.rb +13 -10
  18. data/lib/sidekiq/extensions/class_methods.rb +14 -11
  19. data/lib/sidekiq/extensions/generic_proxy.rb +12 -4
  20. data/lib/sidekiq/fetch.rb +39 -31
  21. data/lib/sidekiq/job.rb +13 -0
  22. data/lib/sidekiq/job_logger.rb +63 -0
  23. data/lib/sidekiq/job_retry.rb +259 -0
  24. data/lib/sidekiq/launcher.rb +170 -71
  25. data/lib/sidekiq/logger.rb +166 -0
  26. data/lib/sidekiq/manager.rb +17 -20
  27. data/lib/sidekiq/middleware/chain.rb +20 -8
  28. data/lib/sidekiq/middleware/current_attributes.rb +52 -0
  29. data/lib/sidekiq/middleware/i18n.rb +5 -7
  30. data/lib/sidekiq/monitor.rb +133 -0
  31. data/lib/sidekiq/paginator.rb +18 -14
  32. data/lib/sidekiq/processor.rb +169 -78
  33. data/lib/sidekiq/rails.rb +41 -36
  34. data/lib/sidekiq/redis_connection.rb +65 -20
  35. data/lib/sidekiq/scheduled.rb +85 -34
  36. data/lib/sidekiq/sd_notify.rb +149 -0
  37. data/lib/sidekiq/systemd.rb +24 -0
  38. data/lib/sidekiq/testing/inline.rb +2 -1
  39. data/lib/sidekiq/testing.rb +52 -26
  40. data/lib/sidekiq/util.rb +48 -15
  41. data/lib/sidekiq/version.rb +2 -1
  42. data/lib/sidekiq/web/action.rb +15 -17
  43. data/lib/sidekiq/web/application.rb +114 -92
  44. data/lib/sidekiq/web/csrf_protection.rb +180 -0
  45. data/lib/sidekiq/web/helpers.rb +151 -83
  46. data/lib/sidekiq/web/router.rb +27 -19
  47. data/lib/sidekiq/web.rb +85 -76
  48. data/lib/sidekiq/worker.rb +233 -43
  49. data/lib/sidekiq.rb +88 -64
  50. data/sidekiq.gemspec +24 -22
  51. data/web/assets/images/apple-touch-icon.png +0 -0
  52. data/web/assets/javascripts/application.js +86 -59
  53. data/web/assets/javascripts/dashboard.js +81 -85
  54. data/web/assets/stylesheets/application-dark.css +147 -0
  55. data/web/assets/stylesheets/application-rtl.css +242 -0
  56. data/web/assets/stylesheets/application.css +319 -141
  57. data/web/assets/stylesheets/bootstrap-rtl.min.css +9 -0
  58. data/web/assets/stylesheets/bootstrap.css +2 -2
  59. data/web/locales/ar.yml +87 -0
  60. data/web/locales/de.yml +14 -2
  61. data/web/locales/en.yml +8 -1
  62. data/web/locales/es.yml +22 -5
  63. data/web/locales/fa.yml +80 -0
  64. data/web/locales/fr.yml +10 -3
  65. data/web/locales/he.yml +79 -0
  66. data/web/locales/ja.yml +12 -4
  67. data/web/locales/lt.yml +83 -0
  68. data/web/locales/pl.yml +4 -4
  69. data/web/locales/ru.yml +4 -0
  70. data/web/locales/ur.yml +80 -0
  71. data/web/locales/vi.yml +83 -0
  72. data/web/views/_footer.erb +5 -2
  73. data/web/views/_job_info.erb +4 -3
  74. data/web/views/_nav.erb +4 -18
  75. data/web/views/_paging.erb +1 -1
  76. data/web/views/_poll_link.erb +2 -5
  77. data/web/views/_summary.erb +7 -7
  78. data/web/views/busy.erb +60 -22
  79. data/web/views/dashboard.erb +23 -15
  80. data/web/views/dead.erb +3 -3
  81. data/web/views/layout.erb +14 -3
  82. data/web/views/morgue.erb +19 -12
  83. data/web/views/queue.erb +24 -14
  84. data/web/views/queues.erb +14 -4
  85. data/web/views/retries.erb +22 -13
  86. data/web/views/retry.erb +4 -4
  87. data/web/views/scheduled.erb +7 -4
  88. metadata +44 -194
  89. data/.github/contributing.md +0 -32
  90. data/.github/issue_template.md +0 -4
  91. data/.gitignore +0 -12
  92. data/.travis.yml +0 -12
  93. data/3.0-Upgrade.md +0 -70
  94. data/4.0-Upgrade.md +0 -53
  95. data/COMM-LICENSE +0 -95
  96. data/Ent-Changes.md +0 -146
  97. data/Gemfile +0 -29
  98. data/Pro-2.0-Upgrade.md +0 -138
  99. data/Pro-3.0-Upgrade.md +0 -44
  100. data/Pro-Changes.md +0 -570
  101. data/Rakefile +0 -9
  102. data/bin/sidekiqctl +0 -99
  103. data/code_of_conduct.md +0 -50
  104. data/lib/sidekiq/core_ext.rb +0 -106
  105. data/lib/sidekiq/logging.rb +0 -106
  106. data/lib/sidekiq/middleware/server/active_record.rb +0 -13
  107. data/lib/sidekiq/middleware/server/logging.rb +0 -40
  108. data/lib/sidekiq/middleware/server/retry_jobs.rb +0 -205
  109. data/test/config.yml +0 -9
  110. data/test/env_based_config.yml +0 -11
  111. data/test/fake_env.rb +0 -1
  112. data/test/fixtures/en.yml +0 -2
  113. data/test/helper.rb +0 -75
  114. data/test/test_actors.rb +0 -138
  115. data/test/test_api.rb +0 -528
  116. data/test/test_cli.rb +0 -418
  117. data/test/test_client.rb +0 -266
  118. data/test/test_exception_handler.rb +0 -56
  119. data/test/test_extensions.rb +0 -127
  120. data/test/test_fetch.rb +0 -50
  121. data/test/test_launcher.rb +0 -95
  122. data/test/test_logging.rb +0 -35
  123. data/test/test_manager.rb +0 -50
  124. data/test/test_middleware.rb +0 -158
  125. data/test/test_processor.rb +0 -201
  126. data/test/test_rails.rb +0 -22
  127. data/test/test_redis_connection.rb +0 -132
  128. data/test/test_retry.rb +0 -326
  129. data/test/test_retry_exhausted.rb +0 -149
  130. data/test/test_scheduled.rb +0 -115
  131. data/test/test_scheduling.rb +0 -50
  132. data/test/test_sidekiq.rb +0 -107
  133. data/test/test_testing.rb +0 -143
  134. data/test/test_testing_fake.rb +0 -357
  135. data/test/test_testing_inline.rb +0 -94
  136. data/test/test_util.rb +0 -13
  137. data/test/test_web.rb +0 -666
  138. data/test/test_web_helpers.rb +0 -54
@@ -0,0 +1,13 @@
1
+ require "sidekiq/worker"
2
+
3
+ module Sidekiq
4
+ # Sidekiq::Job is a new alias for Sidekiq::Worker as of Sidekiq 6.3.0.
5
+ # Use `include Sidekiq::Job` rather than `include Sidekiq::Worker`.
6
+ #
7
+ # The term "worker" is too generic and overly confusing, used in several
8
+ # different contexts meaning different things. Many people call a Sidekiq
9
+ # process a "worker". Some people call the thread that executes jobs a
10
+ # "worker". This change brings Sidekiq closer to ActiveJob where your job
11
+ # classes extend ApplicationJob.
12
+ Job = Worker
13
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sidekiq
4
+ class JobLogger
5
+ def initialize(logger = Sidekiq.logger)
6
+ @logger = logger
7
+ end
8
+
9
+ def call(item, queue)
10
+ start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
11
+ @logger.info("start")
12
+
13
+ yield
14
+
15
+ with_elapsed_time_context(start) do
16
+ @logger.info("done")
17
+ end
18
+ rescue Exception
19
+ with_elapsed_time_context(start) do
20
+ @logger.info("fail")
21
+ end
22
+
23
+ raise
24
+ end
25
+
26
+ def prepare(job_hash, &block)
27
+ level = job_hash["log_level"]
28
+ if level
29
+ @logger.log_at(level) do
30
+ Sidekiq::Context.with(job_hash_context(job_hash), &block)
31
+ end
32
+ else
33
+ Sidekiq::Context.with(job_hash_context(job_hash), &block)
34
+ end
35
+ end
36
+
37
+ def job_hash_context(job_hash)
38
+ # If we're using a wrapper class, like ActiveJob, use the "wrapped"
39
+ # attribute to expose the underlying thing.
40
+ h = {
41
+ class: job_hash["display_class"] || job_hash["wrapped"] || job_hash["class"],
42
+ jid: job_hash["jid"]
43
+ }
44
+ h[:bid] = job_hash["bid"] if job_hash["bid"]
45
+ h[:tags] = job_hash["tags"] if job_hash["tags"]
46
+ h
47
+ end
48
+
49
+ def with_elapsed_time_context(start, &block)
50
+ Sidekiq::Context.with(elapsed_time_context(start), &block)
51
+ end
52
+
53
+ def elapsed_time_context(start)
54
+ {elapsed: elapsed(start).to_s}
55
+ end
56
+
57
+ private
58
+
59
+ def elapsed(start)
60
+ (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start).round(3)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,259 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq/scheduled"
4
+ require "sidekiq/api"
5
+
6
+ require "zlib"
7
+ require "base64"
8
+
9
+ module Sidekiq
10
+ ##
11
+ # Automatically retry jobs that fail in Sidekiq.
12
+ # Sidekiq's retry support assumes a typical development lifecycle:
13
+ #
14
+ # 0. Push some code changes with a bug in it.
15
+ # 1. Bug causes job processing to fail, Sidekiq's middleware captures
16
+ # the job and pushes it onto a retry queue.
17
+ # 2. Sidekiq retries jobs in the retry queue multiple times with
18
+ # an exponential delay, the job continues to fail.
19
+ # 3. After a few days, a developer deploys a fix. The job is
20
+ # reprocessed successfully.
21
+ # 4. Once retries are exhausted, Sidekiq will give up and move the
22
+ # job to the Dead Job Queue (aka morgue) where it must be dealt with
23
+ # manually in the Web UI.
24
+ # 5. After 6 months on the DJQ, Sidekiq will discard the job.
25
+ #
26
+ # A job looks like:
27
+ #
28
+ # { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => true }
29
+ #
30
+ # The 'retry' option also accepts a number (in place of 'true'):
31
+ #
32
+ # { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => 5 }
33
+ #
34
+ # The job will be retried this number of times before giving up. (If simply
35
+ # 'true', Sidekiq retries 25 times)
36
+ #
37
+ # We'll add a bit more data to the job to support retries:
38
+ #
39
+ # * 'queue' - the queue to use
40
+ # * 'retry_count' - number of times we've retried so far.
41
+ # * 'error_message' - the message from the exception
42
+ # * 'error_class' - the exception class
43
+ # * 'failed_at' - the first time it failed
44
+ # * 'retried_at' - the last time it was retried
45
+ # * 'backtrace' - the number of lines of error backtrace to store
46
+ #
47
+ # We don't store the backtrace by default as that can add a lot of overhead
48
+ # to the job and everyone is using an error service, right?
49
+ #
50
+ # The default number of retries is 25 which works out to about 3 weeks
51
+ # You can change the default maximum number of retries in your initializer:
52
+ #
53
+ # Sidekiq.options[:max_retries] = 7
54
+ #
55
+ # or limit the number of retries for a particular worker with:
56
+ #
57
+ # class MyWorker
58
+ # include Sidekiq::Worker
59
+ # sidekiq_options :retry => 10
60
+ # end
61
+ #
62
+ class JobRetry
63
+ class Handled < ::RuntimeError; end
64
+
65
+ class Skip < Handled; end
66
+
67
+ include Sidekiq::Util
68
+
69
+ DEFAULT_MAX_RETRY_ATTEMPTS = 25
70
+
71
+ def initialize(options = {})
72
+ @max_retries = Sidekiq.options.merge(options).fetch(:max_retries, DEFAULT_MAX_RETRY_ATTEMPTS)
73
+ end
74
+
75
+ # The global retry handler requires only the barest of data.
76
+ # We want to be able to retry as much as possible so we don't
77
+ # require the worker to be instantiated.
78
+ def global(jobstr, queue)
79
+ yield
80
+ rescue Handled => ex
81
+ raise ex
82
+ rescue Sidekiq::Shutdown => ey
83
+ # ignore, will be pushed back onto queue during hard_shutdown
84
+ raise ey
85
+ rescue Exception => e
86
+ # ignore, will be pushed back onto queue during hard_shutdown
87
+ raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
88
+
89
+ msg = Sidekiq.load_json(jobstr)
90
+ if msg["retry"]
91
+ attempt_retry(nil, msg, queue, e)
92
+ else
93
+ Sidekiq.death_handlers.each do |handler|
94
+ handler.call(msg, e)
95
+ rescue => handler_ex
96
+ handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
97
+ end
98
+ end
99
+
100
+ raise Handled
101
+ end
102
+
103
+ # The local retry support means that any errors that occur within
104
+ # this block can be associated with the given worker instance.
105
+ # This is required to support the `sidekiq_retries_exhausted` block.
106
+ #
107
+ # Note that any exception from the block is wrapped in the Skip
108
+ # exception so the global block does not reprocess the error. The
109
+ # Skip exception is unwrapped within Sidekiq::Processor#process before
110
+ # calling the handle_exception handlers.
111
+ def local(worker, jobstr, queue)
112
+ yield
113
+ rescue Handled => ex
114
+ raise ex
115
+ rescue Sidekiq::Shutdown => ey
116
+ # ignore, will be pushed back onto queue during hard_shutdown
117
+ raise ey
118
+ rescue Exception => e
119
+ # ignore, will be pushed back onto queue during hard_shutdown
120
+ raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
121
+
122
+ msg = Sidekiq.load_json(jobstr)
123
+ if msg["retry"].nil?
124
+ msg["retry"] = worker.class.get_sidekiq_options["retry"]
125
+ end
126
+
127
+ raise e unless msg["retry"]
128
+ attempt_retry(worker, msg, queue, e)
129
+ # We've handled this error associated with this job, don't
130
+ # need to handle it at the global level
131
+ raise Skip
132
+ end
133
+
134
+ private
135
+
136
+ # Note that +worker+ can be nil here if an error is raised before we can
137
+ # instantiate the worker instance. All access must be guarded and
138
+ # best effort.
139
+ def attempt_retry(worker, msg, queue, exception)
140
+ max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
141
+
142
+ msg["queue"] = (msg["retry_queue"] || queue)
143
+
144
+ m = exception_message(exception)
145
+ if m.respond_to?(:scrub!)
146
+ m.force_encoding("utf-8")
147
+ m.scrub!
148
+ end
149
+
150
+ msg["error_message"] = m
151
+ msg["error_class"] = exception.class.name
152
+ count = if msg["retry_count"]
153
+ msg["retried_at"] = Time.now.to_f
154
+ msg["retry_count"] += 1
155
+ else
156
+ msg["failed_at"] = Time.now.to_f
157
+ msg["retry_count"] = 0
158
+ end
159
+
160
+ if msg["backtrace"]
161
+ lines = if msg["backtrace"] == true
162
+ exception.backtrace
163
+ else
164
+ exception.backtrace[0...msg["backtrace"].to_i]
165
+ end
166
+
167
+ msg["error_backtrace"] = compress_backtrace(lines)
168
+ end
169
+
170
+ if count < max_retry_attempts
171
+ delay = delay_for(worker, count, exception)
172
+ # Logging here can break retries if the logging device raises ENOSPC #3979
173
+ # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
174
+ retry_at = Time.now.to_f + delay
175
+ payload = Sidekiq.dump_json(msg)
176
+ Sidekiq.redis do |conn|
177
+ conn.zadd("retry", retry_at.to_s, payload)
178
+ end
179
+ else
180
+ # Goodbye dear message, you (re)tried your best I'm sure.
181
+ retries_exhausted(worker, msg, exception)
182
+ end
183
+ end
184
+
185
+ def retries_exhausted(worker, msg, exception)
186
+ begin
187
+ block = worker&.sidekiq_retries_exhausted_block
188
+ block&.call(msg, exception)
189
+ rescue => e
190
+ handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
191
+ end
192
+
193
+ send_to_morgue(msg) unless msg["dead"] == false
194
+
195
+ Sidekiq.death_handlers.each do |handler|
196
+ handler.call(msg, exception)
197
+ rescue => e
198
+ handle_exception(e, {context: "Error calling death handler", job: msg})
199
+ end
200
+ end
201
+
202
+ def send_to_morgue(msg)
203
+ logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
204
+ payload = Sidekiq.dump_json(msg)
205
+ DeadSet.new.kill(payload, notify_failure: false)
206
+ end
207
+
208
+ def retry_attempts_from(msg_retry, default)
209
+ if msg_retry.is_a?(Integer)
210
+ msg_retry
211
+ else
212
+ default
213
+ end
214
+ end
215
+
216
+ def delay_for(worker, count, exception)
217
+ jitter = rand(10) * (count + 1)
218
+ if worker&.sidekiq_retry_in_block
219
+ custom_retry_in = retry_in(worker, count, exception).to_i
220
+ return custom_retry_in + jitter if custom_retry_in > 0
221
+ end
222
+ (count**4) + 15 + jitter
223
+ end
224
+
225
+ def retry_in(worker, count, exception)
226
+ worker.sidekiq_retry_in_block.call(count, exception)
227
+ rescue Exception => e
228
+ handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
229
+ nil
230
+ end
231
+
232
+ def exception_caused_by_shutdown?(e, checked_causes = [])
233
+ return false unless e.cause
234
+
235
+ # Handle circular causes
236
+ checked_causes << e.object_id
237
+ return false if checked_causes.include?(e.cause.object_id)
238
+
239
+ e.cause.instance_of?(Sidekiq::Shutdown) ||
240
+ exception_caused_by_shutdown?(e.cause, checked_causes)
241
+ end
242
+
243
+ # Extract message from exception.
244
+ # Set a default if the message raises an error
245
+ def exception_message(exception)
246
+ # App code can stuff all sorts of crazy binary data into the error message
247
+ # that won't convert to JSON.
248
+ exception.message.to_s[0, 10_000]
249
+ rescue
250
+ +"!!! ERROR MESSAGE THREW AN ERROR !!!"
251
+ end
252
+
253
+ def compress_backtrace(backtrace)
254
+ serialized = Sidekiq.dump_json(backtrace)
255
+ compressed = Zlib::Deflate.deflate(serialized)
256
+ Base64.encode64(compressed)
257
+ end
258
+ end
259
+ end
@@ -1,20 +1,28 @@
1
1
  # frozen_string_literal: true
2
- # encoding: utf-8
3
- require 'sidekiq/manager'
4
- require 'sidekiq/fetch'
5
- require 'sidekiq/scheduled'
2
+
3
+ require "sidekiq/manager"
4
+ require "sidekiq/fetch"
5
+ require "sidekiq/scheduled"
6
6
 
7
7
  module Sidekiq
8
- # The Launcher is a very simple Actor whose job is to
9
- # start, monitor and stop the core Actors in Sidekiq.
10
- # If any of these actors die, the Sidekiq process exits
11
- # immediately.
8
+ # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
12
9
  class Launcher
13
10
  include Util
14
11
 
12
+ STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
13
+
14
+ PROCTITLES = [
15
+ proc { "sidekiq" },
16
+ proc { Sidekiq::VERSION },
17
+ proc { |me, data| data["tag"] },
18
+ proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
19
+ proc { |me, data| "stopping" if me.stopping? }
20
+ ]
21
+
15
22
  attr_accessor :manager, :poller, :fetcher
16
23
 
17
24
  def initialize(options)
25
+ options[:fetch] ||= BasicFetch.new(options)
18
26
  @manager = Sidekiq::Manager.new(options)
19
27
  @poller = Sidekiq::Scheduled::Poller.new
20
28
  @done = false
@@ -39,7 +47,7 @@ module Sidekiq
39
47
  # return until all work is complete and cleaned up.
40
48
  # It can take up to the timeout to complete.
41
49
  def stop
42
- deadline = Time.now + @options[:timeout]
50
+ deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @options[:timeout]
43
51
 
44
52
  @done = true
45
53
  @manager.quiet
@@ -49,7 +57,7 @@ module Sidekiq
49
57
 
50
58
  # Requeue everything in case there was a worker who grabbed work while stopped
51
59
  # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
52
- strategy = (@options[:fetch] || Sidekiq::BasicFetch)
60
+ strategy = @options[:fetch]
53
61
  strategy.bulk_requeue([], @options)
54
62
 
55
63
  clear_heartbeat
@@ -61,104 +69,195 @@ module Sidekiq
61
69
 
62
70
  private unless $TESTING
63
71
 
64
- JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
72
+ BEAT_PAUSE = 5
73
+
74
+ def start_heartbeat
75
+ loop do
76
+ heartbeat
77
+ sleep BEAT_PAUSE
78
+ end
79
+ Sidekiq.logger.info("Heartbeat stopping...")
80
+ end
81
+
82
+ def clear_heartbeat
83
+ # Remove record from Redis since we are shutting down.
84
+ # Note we don't stop the heartbeat thread; if the process
85
+ # doesn't actually exit, it'll reappear in the Web UI.
86
+ Sidekiq.redis do |conn|
87
+ conn.pipelined do
88
+ conn.srem("processes", identity)
89
+ conn.unlink("#{identity}:workers")
90
+ end
91
+ end
92
+ rescue
93
+ # best effort, ignore network errors
94
+ end
95
+
96
+ def heartbeat
97
+ $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
98
+
99
+
100
+ end
101
+
102
+ def self.flush_stats
103
+ fails = Processor::FAILURE.reset
104
+ procd = Processor::PROCESSED.reset
105
+ return if fails + procd == 0
65
106
 
66
- def heartbeat(k, data, json)
67
- results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, data) }
68
- results.compact!
69
- $0 = results.join(' ')
107
+ nowdate = Time.now.utc.strftime("%Y-%m-%d")
108
+ begin
109
+ Sidekiq.redis do |conn|
110
+ conn.pipelined do
111
+ conn.incrby("stat:processed", procd)
112
+ conn.incrby("stat:processed:#{nowdate}", procd)
113
+ conn.expire("stat:processed:#{nowdate}", STATS_TTL)
70
114
 
71
- (k, json)
115
+ conn.incrby("stat:failed", fails)
116
+ conn.incrby("stat:failed:#{nowdate}", fails)
117
+ conn.expire("stat:failed:#{nowdate}", STATS_TTL)
118
+ end
119
+ end
120
+ rescue => ex
121
+ # we're exiting the process, things might be shut down so don't
122
+ # try to handle the exception
123
+ Sidekiq.logger.warn("Unable to flush stats: #{ex}")
124
+ end
72
125
  end
126
+ at_exit(&method(:flush_stats))
73
127
 
74
- def ❤(key, json)
128
+ def ❤
129
+ key = identity
75
130
  fails = procd = 0
131
+
76
132
  begin
77
- Processor::FAILURE.update {|curr| fails = curr; 0 }
78
- Processor::PROCESSED.update {|curr| procd = curr; 0 }
133
+ fails = Processor::FAILURE.reset
134
+ procd = Processor::PROCESSED.reset
135
+ curstate = Processor::WORKER_STATE.dup
136
+
137
+ workers_key = "#{key}:workers"
138
+ nowdate = Time.now.utc.strftime("%Y-%m-%d")
79
139
 
80
- workers_key = "#{key}:workers".freeze
81
- nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
82
140
  Sidekiq.redis do |conn|
83
141
  conn.multi do
84
- conn.incrby("stat:processed".freeze, procd)
142
+ conn.incrby("stat:processed", procd)
85
143
  conn.incrby("stat:processed:#{nowdate}", procd)
86
- conn.incrby("stat:failed".freeze, fails)
144
+ conn.expire("stat:processed:#{nowdate}", STATS_TTL)
145
+
146
+ conn.incrby("stat:failed", fails)
87
147
  conn.incrby("stat:failed:#{nowdate}", fails)
88
- conn.del(workers_key)
89
- Processor::WORKER_STATE.each_pair do |tid, hash|
148
+ conn.expire("stat:failed:#{nowdate}", STATS_TTL)
149
+
150
+ conn.unlink(workers_key)
151
+ curstate.each_pair do |tid, hash|
90
152
  conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
91
153
  end
92
154
  conn.expire(workers_key, 60)
93
155
  end
94
156
  end
157
+
158
+ rtt = check_rtt
159
+
95
160
  fails = procd = 0
161
+ kb = memory_usage(::Process.pid)
96
162
 
97
- _, exists, _, _, msg = Sidekiq.redis do |conn|
98
- conn.multi do
99
- conn.sadd('processes', key)
100
- conn.exists(key)
101
- conn.hmset(key, 'info', json, 'busy', Processor::WORKER_STATE.size, 'beat', Time.now.to_f, 'quiet', @done)
163
+ _, exists, _, _, msg = Sidekiq.redis { |conn|
164
+ conn.multi {
165
+ conn.sadd("processes", key)
166
+ conn.exists?(key)
167
+ conn.hmset(key, "info", to_json,
168
+ "busy", curstate.size,
169
+ "beat", Time.now.to_f,
170
+ "rtt_us", rtt,
171
+ "quiet", @done,
172
+ "rss", kb)
102
173
  conn.expire(key, 60)
103
174
  conn.rpop("#{key}-signals")
104
- end
105
- end
175
+ }
176
+ }
106
177
 
107
178
  # first heartbeat or recovering from an outage and need to reestablish our heartbeat
108
- fire_event(:heartbeat) if !exists
179
+ fire_event(:heartbeat) unless exists
109
180
 
110
181
  return unless msg
111
182
 
112
- if JVM_RESERVED_SIGNALS.include?(msg)
113
- Sidekiq::CLI.instance.handle_signal(msg)
114
- else
115
- ::Process.kill(msg, $$)
116
- end
183
+ ::Process.kill(msg, ::Process.pid)
117
184
  rescue => e
118
185
  # ignore all redis/network issues
119
- logger.error("heartbeat: #{e.message}")
186
+ logger.error("heartbeat: #{e}")
120
187
  # don't lose the counts if there was a network issue
121
- Processor::PROCESSED.increment(procd)
122
- Processor::FAILURE.increment(fails)
188
+ Processor::PROCESSED.incr(procd)
189
+ Processor::FAILURE.incr(fails)
123
190
  end
124
191
  end
125
192
 
126
- def start_heartbeat
127
- k = identity
128
- data = {
129
- 'hostname' => hostname,
130
- 'started_at' => Time.now.to_f,
131
- 'pid' => $$,
132
- 'tag' => @options[:tag] || '',
133
- 'concurrency' => @options[:concurrency],
134
- 'queues' => @options[:queues].uniq,
135
- 'labels' => @options[:labels],
136
- 'identity' => k,
137
- }
138
- # this data doesn't change so dump it to a string
139
- # now so we don't need to dump it every heartbeat.
140
- json = Sidekiq.dump_json(data)
193
+ # We run the heartbeat every five seconds.
194
+ # Capture five samples of RTT, log a warning if each sample
195
+ # is above our warning threshold.
196
+ RTT_READINGS = RingBuffer.new(5)
197
+ RTT_WARNING_LEVEL = 50_000
141
198
 
142
- while true
143
- heartbeat(k, data, json)
144
- sleep 5
199
+ def check_rtt
200
+ a = b = 0
201
+ Sidekiq.redis do |x|
202
+ a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
203
+ x.ping
204
+ b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
145
205
  end
146
- Sidekiq.logger.info("Heartbeat stopping...")
206
+ rtt = b - a
207
+ RTT_READINGS << rtt
208
+ # Ideal RTT for Redis is < 1000µs
209
+ # Workable is < 10,000µs
210
+ # Log a warning if it's a disaster.
211
+ if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
212
+ Sidekiq.logger.warn <<~EOM
213
+ Your Redis network connection is performing extremely poorly.
214
+ Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
215
+ Ensure Redis is running in the same AZ or datacenter as Sidekiq.
216
+ If these values are close to 100,000, that means your Sidekiq process may be
217
+ CPU overloaded; see https://github.com/mperham/sidekiq/discussions/5039
218
+ EOM
219
+ RTT_READINGS.reset
220
+ end
221
+ rtt
147
222
  end
148
223
 
149
- def clear_heartbeat
150
- # Remove record from Redis since we are shutting down.
151
- # Note we don't stop the heartbeat thread; if the process
152
- # doesn't actually exit, it'll reappear in the Web UI.
153
- Sidekiq.redis do |conn|
154
- conn.pipelined do
155
- conn.srem('processes', identity)
156
- conn.del("#{identity}:workers")
224
+ MEMORY_GRABBER = case RUBY_PLATFORM
225
+ when /linux/
226
+ ->(pid) {
227
+ IO.readlines("/proc/#{$$}/status").each do |line|
228
+ next unless line.start_with?("VmRSS:")
229
+ break line.split[1].to_i
157
230
  end
158
- end
159
- rescue
160
- # best effort, ignore network errors
231
+ }
232
+ when /darwin|bsd/
233
+ ->(pid) {
234
+ `ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
235
+ }
236
+ else
237
+ ->(pid) { 0 }
238
+ end
239
+
240
+ def memory_usage(pid)
241
+ MEMORY_GRABBER.call(pid)
242
+ end
243
+
244
+ def to_data
245
+ @data ||= {
246
+ "hostname" => hostname,
247
+ "started_at" => Time.now.to_f,
248
+ "pid" => ::Process.pid,
249
+ "tag" => @options[:tag] || "",
250
+ "concurrency" => @options[:concurrency],
251
+ "queues" => @options[:queues].uniq,
252
+ "labels" => @options[:labels],
253
+ "identity" => identity
254
+ }
161
255
  end
162
256
 
257
+ def to_json
258
+ # this data changes infrequently so dump it to a string
259
+ # now so we don't need to dump it every heartbeat.
260
+ @json ||= Sidekiq.dump_json(to_data)
261
+ end
163
262
  end
164
263
  end