sidekiq 6.1.0 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sidekiq might be problematic. Click here for more details.

Files changed (141) hide show
  1. checksums.yaml +4 -4
  2. data/Changes.md +256 -7
  3. data/LICENSE.txt +9 -0
  4. data/README.md +21 -16
  5. data/bin/sidekiq +4 -9
  6. data/bin/sidekiqload +71 -76
  7. data/bin/sidekiqmon +1 -1
  8. data/lib/generators/sidekiq/job_generator.rb +57 -0
  9. data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +2 -2
  10. data/lib/generators/sidekiq/templates/{worker_spec.rb.erb → job_spec.rb.erb} +1 -1
  11. data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
  12. data/lib/sidekiq/api.rb +352 -229
  13. data/lib/sidekiq/capsule.rb +110 -0
  14. data/lib/sidekiq/cli.rb +109 -89
  15. data/lib/sidekiq/client.rb +75 -86
  16. data/lib/sidekiq/{util.rb → component.rb} +13 -14
  17. data/lib/sidekiq/config.rb +271 -0
  18. data/lib/sidekiq/deploy.rb +62 -0
  19. data/lib/sidekiq/embedded.rb +61 -0
  20. data/lib/sidekiq/fetch.rb +31 -23
  21. data/lib/sidekiq/{worker.rb → job.rb} +162 -28
  22. data/lib/sidekiq/job_logger.rb +17 -29
  23. data/lib/sidekiq/job_retry.rb +80 -60
  24. data/lib/sidekiq/job_util.rb +71 -0
  25. data/lib/sidekiq/launcher.rb +143 -92
  26. data/lib/sidekiq/logger.rb +11 -45
  27. data/lib/sidekiq/manager.rb +40 -41
  28. data/lib/sidekiq/metrics/query.rb +153 -0
  29. data/lib/sidekiq/metrics/shared.rb +95 -0
  30. data/lib/sidekiq/metrics/tracking.rb +134 -0
  31. data/lib/sidekiq/middleware/chain.rb +90 -46
  32. data/lib/sidekiq/middleware/current_attributes.rb +58 -0
  33. data/lib/sidekiq/middleware/i18n.rb +6 -4
  34. data/lib/sidekiq/middleware/modules.rb +21 -0
  35. data/lib/sidekiq/monitor.rb +1 -1
  36. data/lib/sidekiq/paginator.rb +16 -8
  37. data/lib/sidekiq/processor.rb +56 -59
  38. data/lib/sidekiq/rails.rb +17 -5
  39. data/lib/sidekiq/redis_client_adapter.rb +118 -0
  40. data/lib/sidekiq/redis_connection.rb +17 -88
  41. data/lib/sidekiq/ring_buffer.rb +29 -0
  42. data/lib/sidekiq/scheduled.rb +102 -39
  43. data/lib/sidekiq/testing/inline.rb +4 -4
  44. data/lib/sidekiq/testing.rb +42 -71
  45. data/lib/sidekiq/transaction_aware_client.rb +44 -0
  46. data/lib/sidekiq/version.rb +2 -1
  47. data/lib/sidekiq/web/action.rb +3 -3
  48. data/lib/sidekiq/web/application.rb +42 -17
  49. data/lib/sidekiq/web/csrf_protection.rb +33 -6
  50. data/lib/sidekiq/web/helpers.rb +52 -41
  51. data/lib/sidekiq/web/router.rb +4 -1
  52. data/lib/sidekiq/web.rb +26 -81
  53. data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
  54. data/lib/sidekiq.rb +86 -201
  55. data/sidekiq.gemspec +38 -6
  56. data/web/assets/images/apple-touch-icon.png +0 -0
  57. data/web/assets/javascripts/application.js +113 -65
  58. data/web/assets/javascripts/base-charts.js +106 -0
  59. data/web/assets/javascripts/chart.min.js +13 -0
  60. data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
  61. data/web/assets/javascripts/dashboard-charts.js +166 -0
  62. data/web/assets/javascripts/dashboard.js +36 -273
  63. data/web/assets/javascripts/metrics.js +236 -0
  64. data/web/assets/stylesheets/application-dark.css +61 -51
  65. data/web/assets/stylesheets/application-rtl.css +2 -95
  66. data/web/assets/stylesheets/application.css +98 -532
  67. data/web/locales/ar.yml +71 -65
  68. data/web/locales/cs.yml +62 -62
  69. data/web/locales/da.yml +52 -52
  70. data/web/locales/de.yml +65 -65
  71. data/web/locales/el.yml +43 -24
  72. data/web/locales/en.yml +83 -67
  73. data/web/locales/es.yml +70 -54
  74. data/web/locales/fa.yml +65 -65
  75. data/web/locales/fr.yml +69 -62
  76. data/web/locales/he.yml +65 -64
  77. data/web/locales/hi.yml +59 -59
  78. data/web/locales/it.yml +53 -53
  79. data/web/locales/ja.yml +72 -66
  80. data/web/locales/ko.yml +52 -52
  81. data/web/locales/lt.yml +66 -66
  82. data/web/locales/nb.yml +61 -61
  83. data/web/locales/nl.yml +52 -52
  84. data/web/locales/pl.yml +45 -45
  85. data/web/locales/pt-br.yml +63 -55
  86. data/web/locales/pt.yml +51 -51
  87. data/web/locales/ru.yml +68 -63
  88. data/web/locales/sv.yml +53 -53
  89. data/web/locales/ta.yml +60 -60
  90. data/web/locales/uk.yml +62 -61
  91. data/web/locales/ur.yml +64 -64
  92. data/web/locales/vi.yml +67 -67
  93. data/web/locales/zh-cn.yml +37 -11
  94. data/web/locales/zh-tw.yml +42 -8
  95. data/web/views/_footer.erb +6 -3
  96. data/web/views/_job_info.erb +1 -1
  97. data/web/views/_nav.erb +1 -1
  98. data/web/views/_poll_link.erb +2 -5
  99. data/web/views/_summary.erb +7 -7
  100. data/web/views/busy.erb +57 -21
  101. data/web/views/dashboard.erb +58 -18
  102. data/web/views/dead.erb +1 -1
  103. data/web/views/layout.erb +2 -1
  104. data/web/views/metrics.erb +80 -0
  105. data/web/views/metrics_for_job.erb +69 -0
  106. data/web/views/morgue.erb +6 -6
  107. data/web/views/queue.erb +15 -11
  108. data/web/views/queues.erb +4 -4
  109. data/web/views/retries.erb +7 -7
  110. data/web/views/retry.erb +1 -1
  111. data/web/views/scheduled.erb +1 -1
  112. metadata +87 -52
  113. data/.circleci/config.yml +0 -71
  114. data/.github/contributing.md +0 -32
  115. data/.github/issue_template.md +0 -11
  116. data/.gitignore +0 -13
  117. data/.standard.yml +0 -20
  118. data/3.0-Upgrade.md +0 -70
  119. data/4.0-Upgrade.md +0 -53
  120. data/5.0-Upgrade.md +0 -56
  121. data/6.0-Upgrade.md +0 -72
  122. data/COMM-LICENSE +0 -97
  123. data/Ent-2.0-Upgrade.md +0 -37
  124. data/Ent-Changes.md +0 -269
  125. data/Gemfile +0 -24
  126. data/Gemfile.lock +0 -208
  127. data/LICENSE +0 -9
  128. data/Pro-2.0-Upgrade.md +0 -138
  129. data/Pro-3.0-Upgrade.md +0 -44
  130. data/Pro-4.0-Upgrade.md +0 -35
  131. data/Pro-5.0-Upgrade.md +0 -25
  132. data/Pro-Changes.md +0 -790
  133. data/Rakefile +0 -10
  134. data/code_of_conduct.md +0 -50
  135. data/lib/generators/sidekiq/worker_generator.rb +0 -57
  136. data/lib/sidekiq/delay.rb +0 -41
  137. data/lib/sidekiq/exception_handler.rb +0 -27
  138. data/lib/sidekiq/extensions/action_mailer.rb +0 -47
  139. data/lib/sidekiq/extensions/active_record.rb +0 -43
  140. data/lib/sidekiq/extensions/class_methods.rb +0 -43
  141. data/lib/sidekiq/extensions/generic_proxy.rb +0 -31
@@ -1,10 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "sidekiq/scheduled"
4
- require "sidekiq/api"
5
-
6
3
  require "zlib"
7
4
  require "base64"
5
+ require "sidekiq/component"
8
6
 
9
7
  module Sidekiq
10
8
  ##
@@ -25,18 +23,19 @@ module Sidekiq
25
23
  #
26
24
  # A job looks like:
27
25
  #
28
- # { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => true }
26
+ # { 'class' => 'HardJob', 'args' => [1, 2, 'foo'], 'retry' => true }
29
27
  #
30
28
  # The 'retry' option also accepts a number (in place of 'true'):
31
29
  #
32
- # { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => 5 }
30
+ # { 'class' => 'HardJob', 'args' => [1, 2, 'foo'], 'retry' => 5 }
33
31
  #
34
32
  # The job will be retried this number of times before giving up. (If simply
35
33
  # 'true', Sidekiq retries 25 times)
36
34
  #
37
- # We'll add a bit more data to the job to support retries:
35
+ # Relevant options for job retries:
38
36
  #
39
- # * 'queue' - the queue to use
37
+ # * 'queue' - the queue for the initial job
38
+ # * 'retry_queue' - if job retries should be pushed to a different (e.g. lower priority) queue
40
39
  # * 'retry_count' - number of times we've retried so far.
41
40
  # * 'error_message' - the message from the exception
42
41
  # * 'error_class' - the exception class
@@ -52,28 +51,31 @@ module Sidekiq
52
51
  #
53
52
  # Sidekiq.options[:max_retries] = 7
54
53
  #
55
- # or limit the number of retries for a particular worker with:
54
+ # or limit the number of retries for a particular job and send retries to
55
+ # a low priority queue with:
56
56
  #
57
- # class MyWorker
58
- # include Sidekiq::Worker
59
- # sidekiq_options :retry => 10
57
+ # class MyJob
58
+ # include Sidekiq::Job
59
+ # sidekiq_options retry: 10, retry_queue: 'low'
60
60
  # end
61
61
  #
62
62
  class JobRetry
63
63
  class Handled < ::RuntimeError; end
64
+
64
65
  class Skip < Handled; end
65
66
 
66
- include Sidekiq::Util
67
+ include Sidekiq::Component
67
68
 
68
69
  DEFAULT_MAX_RETRY_ATTEMPTS = 25
69
70
 
70
- def initialize(options = {})
71
- @max_retries = Sidekiq.options.merge(options).fetch(:max_retries, DEFAULT_MAX_RETRY_ATTEMPTS)
71
+ def initialize(capsule)
72
+ @config = @capsule = capsule
73
+ @max_retries = Sidekiq.default_configuration[:max_retries] || DEFAULT_MAX_RETRY_ATTEMPTS
72
74
  end
73
75
 
74
76
  # The global retry handler requires only the barest of data.
75
77
  # We want to be able to retry as much as possible so we don't
76
- # require the worker to be instantiated.
78
+ # require the job to be instantiated.
77
79
  def global(jobstr, queue)
78
80
  yield
79
81
  rescue Handled => ex
@@ -87,9 +89,9 @@ module Sidekiq
87
89
 
88
90
  msg = Sidekiq.load_json(jobstr)
89
91
  if msg["retry"]
90
- attempt_retry(nil, msg, queue, e)
92
+ process_retry(nil, msg, queue, e)
91
93
  else
92
- Sidekiq.death_handlers.each do |handler|
94
+ @capsule.config.death_handlers.each do |handler|
93
95
  handler.call(msg, e)
94
96
  rescue => handler_ex
95
97
  handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
@@ -100,14 +102,14 @@ module Sidekiq
100
102
  end
101
103
 
102
104
  # The local retry support means that any errors that occur within
103
- # this block can be associated with the given worker instance.
105
+ # this block can be associated with the given job instance.
104
106
  # This is required to support the `sidekiq_retries_exhausted` block.
105
107
  #
106
108
  # Note that any exception from the block is wrapped in the Skip
107
109
  # exception so the global block does not reprocess the error. The
108
110
  # Skip exception is unwrapped within Sidekiq::Processor#process before
109
111
  # calling the handle_exception handlers.
110
- def local(worker, jobstr, queue)
112
+ def local(jobinst, jobstr, queue)
111
113
  yield
112
114
  rescue Handled => ex
113
115
  raise ex
@@ -120,11 +122,11 @@ module Sidekiq
120
122
 
121
123
  msg = Sidekiq.load_json(jobstr)
122
124
  if msg["retry"].nil?
123
- msg["retry"] = worker.class.get_sidekiq_options["retry"]
125
+ msg["retry"] = jobinst.class.get_sidekiq_options["retry"]
124
126
  end
125
127
 
126
128
  raise e unless msg["retry"]
127
- attempt_retry(worker, msg, queue, e)
129
+ process_retry(jobinst, msg, queue, e)
128
130
  # We've handled this error associated with this job, don't
129
131
  # need to handle it at the global level
130
132
  raise Skip
@@ -132,10 +134,10 @@ module Sidekiq
132
134
 
133
135
  private
134
136
 
135
- # Note that +worker+ can be nil here if an error is raised before we can
136
- # instantiate the worker instance. All access must be guarded and
137
+ # Note that +jobinst+ can be nil here if an error is raised before we can
138
+ # instantiate the job instance. All access must be guarded and
137
139
  # best effort.
138
- def attempt_retry(worker, msg, queue, exception)
140
+ def process_retry(jobinst, msg, queue, exception)
139
141
  max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
140
142
 
141
143
  msg["queue"] = (msg["retry_queue"] || queue)
@@ -166,24 +168,54 @@ module Sidekiq
166
168
  msg["error_backtrace"] = compress_backtrace(lines)
167
169
  end
168
170
 
169
- if count < max_retry_attempts
170
- delay = delay_for(worker, count, exception)
171
- # Logging here can break retries if the logging device raises ENOSPC #3979
172
- # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
173
- retry_at = Time.now.to_f + delay
174
- payload = Sidekiq.dump_json(msg)
175
- Sidekiq.redis do |conn|
176
- conn.zadd("retry", retry_at.to_s, payload)
177
- end
178
- else
179
- # Goodbye dear message, you (re)tried your best I'm sure.
180
- retries_exhausted(worker, msg, exception)
171
+ # Goodbye dear message, you (re)tried your best I'm sure.
172
+ return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts
173
+
174
+ strategy, delay = delay_for(jobinst, count, exception)
175
+ case strategy
176
+ when :discard
177
+ return # poof!
178
+ when :kill
179
+ return retries_exhausted(jobinst, msg, exception)
180
+ end
181
+
182
+ # Logging here can break retries if the logging device raises ENOSPC #3979
183
+ # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
184
+ jitter = rand(10) * (count + 1)
185
+ retry_at = Time.now.to_f + delay + jitter
186
+ payload = Sidekiq.dump_json(msg)
187
+ redis do |conn|
188
+ conn.zadd("retry", retry_at.to_s, payload)
181
189
  end
182
190
  end
183
191
 
184
- def retries_exhausted(worker, msg, exception)
192
+ # returns (strategy, seconds)
193
+ def delay_for(jobinst, count, exception)
194
+ rv = begin
195
+ # sidekiq_retry_in can return two different things:
196
+ # 1. When to retry next, as an integer of seconds
197
+ # 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default
198
+ jobinst&.sidekiq_retry_in_block&.call(count, exception)
199
+ rescue Exception => e
200
+ handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
201
+ nil
202
+ end
203
+
204
+ delay = (count**4) + 15
205
+ if Integer === rv && rv > 0
206
+ delay = rv
207
+ elsif rv == :discard
208
+ return [:discard, nil] # do nothing, job goes poof
209
+ elsif rv == :kill
210
+ return [:kill, nil]
211
+ end
212
+
213
+ [:default, delay]
214
+ end
215
+
216
+ def retries_exhausted(jobinst, msg, exception)
185
217
  begin
186
- block = worker&.sidekiq_retries_exhausted_block
218
+ block = jobinst&.sidekiq_retries_exhausted_block
187
219
  block&.call(msg, exception)
188
220
  rescue => e
189
221
  handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
@@ -191,7 +223,7 @@ module Sidekiq
191
223
 
192
224
  send_to_morgue(msg) unless msg["dead"] == false
193
225
 
194
- Sidekiq.death_handlers.each do |handler|
226
+ @capsule.config.death_handlers.each do |handler|
195
227
  handler.call(msg, exception)
196
228
  rescue => e
197
229
  handle_exception(e, {context: "Error calling death handler", job: msg})
@@ -201,7 +233,15 @@ module Sidekiq
201
233
  def send_to_morgue(msg)
202
234
  logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
203
235
  payload = Sidekiq.dump_json(msg)
204
- DeadSet.new.kill(payload, notify_failure: false)
236
+ now = Time.now.to_f
237
+
238
+ redis do |conn|
239
+ conn.multi do |xa|
240
+ xa.zadd("dead", now.to_s, payload)
241
+ xa.zremrangebyscore("dead", "-inf", now - @capsule.config[:dead_timeout_in_seconds])
242
+ xa.zremrangebyrank("dead", 0, - @capsule.config[:dead_max_jobs])
243
+ end
244
+ end
205
245
  end
206
246
 
207
247
  def retry_attempts_from(msg_retry, default)
@@ -212,26 +252,6 @@ module Sidekiq
212
252
  end
213
253
  end
214
254
 
215
- def delay_for(worker, count, exception)
216
- if worker&.sidekiq_retry_in_block
217
- custom_retry_in = retry_in(worker, count, exception).to_i
218
- return custom_retry_in if custom_retry_in > 0
219
- end
220
- seconds_to_delay(count)
221
- end
222
-
223
- # delayed_job uses the same basic formula
224
- def seconds_to_delay(count)
225
- (count**4) + 15 + (rand(30) * (count + 1))
226
- end
227
-
228
- def retry_in(worker, count, exception)
229
- worker.sidekiq_retry_in_block.call(count, exception)
230
- rescue Exception => e
231
- handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
232
- nil
233
- end
234
-
235
255
  def exception_caused_by_shutdown?(e, checked_causes = [])
236
256
  return false unless e.cause
237
257
 
@@ -0,0 +1,71 @@
1
+ require "securerandom"
2
+ require "time"
3
+
4
+ module Sidekiq
5
+ module JobUtil
6
+ # These functions encapsulate various job utilities.
7
+
8
+ TRANSIENT_ATTRIBUTES = %w[]
9
+
10
+ def validate(item)
11
+ raise(ArgumentError, "Job must be a Hash with 'class' and 'args' keys: `#{item}`") unless item.is_a?(Hash) && item.key?("class") && item.key?("args")
12
+ raise(ArgumentError, "Job args must be an Array: `#{item}`") unless item["args"].is_a?(Array)
13
+ raise(ArgumentError, "Job class must be either a Class or String representation of the class name: `#{item}`") unless item["class"].is_a?(Class) || item["class"].is_a?(String)
14
+ raise(ArgumentError, "Job 'at' must be a Numeric timestamp: `#{item}`") if item.key?("at") && !item["at"].is_a?(Numeric)
15
+ raise(ArgumentError, "Job tags must be an Array: `#{item}`") if item["tags"] && !item["tags"].is_a?(Array)
16
+ end
17
+
18
+ def verify_json(item)
19
+ job_class = item["wrapped"] || item["class"]
20
+ if Sidekiq::Config::DEFAULTS[:on_complex_arguments] == :raise
21
+ msg = <<~EOM
22
+ Job arguments to #{job_class} must be native JSON types, see https://github.com/mperham/sidekiq/wiki/Best-Practices.
23
+ To disable this error, add `Sidekiq.strict_args!(false)` to your initializer.
24
+ EOM
25
+ raise(ArgumentError, msg) unless json_safe?(item)
26
+ elsif Sidekiq::Config::DEFAULTS[:on_complex_arguments] == :warn
27
+ warn <<~EOM unless json_safe?(item)
28
+ Job arguments to #{job_class} do not serialize to JSON safely. This will raise an error in
29
+ Sidekiq 7.0. See https://github.com/mperham/sidekiq/wiki/Best-Practices or raise an error today
30
+ by calling `Sidekiq.strict_args!` during Sidekiq initialization.
31
+ EOM
32
+ end
33
+ end
34
+
35
+ def normalize_item(item)
36
+ validate(item)
37
+
38
+ # merge in the default sidekiq_options for the item's class and/or wrapped element
39
+ # this allows ActiveJobs to control sidekiq_options too.
40
+ defaults = normalized_hash(item["class"])
41
+ defaults = defaults.merge(item["wrapped"].get_sidekiq_options) if item["wrapped"].respond_to?(:get_sidekiq_options)
42
+ item = defaults.merge(item)
43
+
44
+ raise(ArgumentError, "Job must include a valid queue name") if item["queue"].nil? || item["queue"] == ""
45
+
46
+ # remove job attributes which aren't necessary to persist into Redis
47
+ TRANSIENT_ATTRIBUTES.each { |key| item.delete(key) }
48
+
49
+ item["jid"] ||= SecureRandom.hex(12)
50
+ item["class"] = item["class"].to_s
51
+ item["queue"] = item["queue"].to_s
52
+ item["created_at"] ||= Time.now.to_f
53
+ item
54
+ end
55
+
56
+ def normalized_hash(item_class)
57
+ if item_class.is_a?(Class)
58
+ raise(ArgumentError, "Message must include a Sidekiq::Job class, not class name: #{item_class.ancestors.inspect}") unless item_class.respond_to?(:get_sidekiq_options)
59
+ item_class.get_sidekiq_options
60
+ else
61
+ Sidekiq.default_job_options
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def json_safe?(item)
68
+ JSON.parse(JSON.dump(item["args"])) == item["args"]
69
+ end
70
+ end
71
+ end
@@ -1,13 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sidekiq/manager"
4
- require "sidekiq/fetch"
4
+ require "sidekiq/capsule"
5
5
  require "sidekiq/scheduled"
6
+ require "sidekiq/ring_buffer"
6
7
 
7
8
  module Sidekiq
8
- # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
9
+ # The Launcher starts the Capsule Managers, the Poller thread and provides the process heartbeat.
9
10
  class Launcher
10
- include Util
11
+ include Sidekiq::Component
11
12
 
12
13
  STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
13
14
 
@@ -15,50 +16,53 @@ module Sidekiq
15
16
  proc { "sidekiq" },
16
17
  proc { Sidekiq::VERSION },
17
18
  proc { |me, data| data["tag"] },
18
- proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
19
+ proc { |me, data| "[#{Processor::WORK_STATE.size} of #{me.config.total_concurrency} busy]" },
19
20
  proc { |me, data| "stopping" if me.stopping? }
20
21
  ]
21
22
 
22
- attr_accessor :manager, :poller, :fetcher
23
+ attr_accessor :managers, :poller
23
24
 
24
- def initialize(options)
25
- options[:fetch] ||= BasicFetch.new(options)
26
- @manager = Sidekiq::Manager.new(options)
27
- @poller = Sidekiq::Scheduled::Poller.new
25
+ def initialize(config, embedded: false)
26
+ @config = config
27
+ @embedded = embedded
28
+ @managers = config.capsules.values.map do |cap|
29
+ Sidekiq::Manager.new(cap)
30
+ end
31
+ @poller = Sidekiq::Scheduled::Poller.new(@config)
28
32
  @done = false
29
- @options = options
30
33
  end
31
34
 
32
35
  def run
36
+ Sidekiq.freeze!
33
37
  @thread = safe_thread("heartbeat", &method(:start_heartbeat))
34
38
  @poller.start
35
- @manager.start
39
+ @managers.each(&:start)
36
40
  end
37
41
 
38
42
  # Stops this instance from processing any more jobs,
39
43
  #
40
44
  def quiet
45
+ return if @done
46
+
41
47
  @done = true
42
- @manager.quiet
48
+ @managers.each(&:quiet)
43
49
  @poller.terminate
50
+ fire_event(:quiet, reverse: true)
44
51
  end
45
52
 
46
- # Shuts down the process. This method does not
47
- # return until all work is complete and cleaned up.
48
- # It can take up to the timeout to complete.
53
+ # Shuts down this Sidekiq instance. Waits up to the deadline for all jobs to complete.
49
54
  def stop
50
- deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @options[:timeout]
55
+ deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @config[:timeout]
51
56
 
52
- @done = true
53
- @manager.quiet
54
- @poller.terminate
55
-
56
- @manager.stop(deadline)
57
+ quiet
58
+ stoppers = @managers.map do |mgr|
59
+ Thread.new do
60
+ mgr.stop(deadline)
61
+ end
62
+ end
57
63
 
58
- # Requeue everything in case there was a worker who grabbed work while stopped
59
- # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
60
- strategy = @options[:fetch]
61
- strategy.bulk_requeue([], @options)
64
+ fire_event(:shutdown, reverse: true)
65
+ stoppers.each(&:join)
62
66
 
63
67
  clear_heartbeat
64
68
  end
@@ -69,22 +73,26 @@ module Sidekiq
69
73
 
70
74
  private unless $TESTING
71
75
 
76
+ BEAT_PAUSE = 10
77
+
72
78
  def start_heartbeat
73
79
  loop do
74
80
  heartbeat
75
- sleep 5
81
+ sleep BEAT_PAUSE
76
82
  end
77
- Sidekiq.logger.info("Heartbeat stopping...")
83
+ logger.info("Heartbeat stopping...")
78
84
  end
79
85
 
80
86
  def clear_heartbeat
87
+ flush_stats
88
+
81
89
  # Remove record from Redis since we are shutting down.
82
90
  # Note we don't stop the heartbeat thread; if the process
83
91
  # doesn't actually exit, it'll reappear in the Web UI.
84
- Sidekiq.redis do |conn|
85
- conn.pipelined do
86
- conn.srem("processes", identity)
87
- conn.unlink("#{identity}:workers")
92
+ redis do |conn|
93
+ conn.pipelined do |pipeline|
94
+ pipeline.srem("processes", [identity])
95
+ pipeline.unlink("#{identity}:work")
88
96
  end
89
97
  end
90
98
  rescue
@@ -92,81 +100,77 @@ module Sidekiq
92
100
  end
93
101
 
94
102
  def heartbeat
95
- $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
103
+ $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ") unless @embedded
96
104
 
97
105
 
98
106
  end
99
107
 
100
- def self.flush_stats
108
+ def flush_stats
101
109
  fails = Processor::FAILURE.reset
102
110
  procd = Processor::PROCESSED.reset
103
111
  return if fails + procd == 0
104
112
 
105
113
  nowdate = Time.now.utc.strftime("%Y-%m-%d")
106
114
  begin
107
- Sidekiq.redis do |conn|
108
- conn.pipelined do
109
- conn.incrby("stat:processed", procd)
110
- conn.incrby("stat:processed:#{nowdate}", procd)
111
- conn.expire("stat:processed:#{nowdate}", STATS_TTL)
112
-
113
- conn.incrby("stat:failed", fails)
114
- conn.incrby("stat:failed:#{nowdate}", fails)
115
- conn.expire("stat:failed:#{nowdate}", STATS_TTL)
115
+ redis do |conn|
116
+ conn.pipelined do |pipeline|
117
+ pipeline.incrby("stat:processed", procd)
118
+ pipeline.incrby("stat:processed:#{nowdate}", procd)
119
+ pipeline.expire("stat:processed:#{nowdate}", STATS_TTL)
120
+
121
+ pipeline.incrby("stat:failed", fails)
122
+ pipeline.incrby("stat:failed:#{nowdate}", fails)
123
+ pipeline.expire("stat:failed:#{nowdate}", STATS_TTL)
116
124
  end
117
125
  end
118
126
  rescue => ex
119
- # we're exiting the process, things might be shut down so don't
120
- # try to handle the exception
121
- Sidekiq.logger.warn("Unable to flush stats: #{ex}")
127
+ logger.warn("Unable to flush stats: #{ex}")
122
128
  end
123
129
  end
124
- at_exit(&method(:flush_stats))
125
130
 
126
131
  def ❤
127
132
  key = identity
128
133
  fails = procd = 0
129
134
 
130
135
  begin
131
- fails = Processor::FAILURE.reset
132
- procd = Processor::PROCESSED.reset
133
- curstate = Processor::WORKER_STATE.dup
134
-
135
- workers_key = "#{key}:workers"
136
- nowdate = Time.now.utc.strftime("%Y-%m-%d")
137
-
138
- Sidekiq.redis do |conn|
139
- conn.multi do
140
- conn.incrby("stat:processed", procd)
141
- conn.incrby("stat:processed:#{nowdate}", procd)
142
- conn.expire("stat:processed:#{nowdate}", STATS_TTL)
143
-
144
- conn.incrby("stat:failed", fails)
145
- conn.incrby("stat:failed:#{nowdate}", fails)
146
- conn.expire("stat:failed:#{nowdate}", STATS_TTL)
147
-
148
- conn.unlink(workers_key)
136
+ flush_stats
137
+
138
+ curstate = Processor::WORK_STATE.dup
139
+ redis do |conn|
140
+ # work is the current set of executing jobs
141
+ work_key = "#{key}:work"
142
+ conn.pipelined do |transaction|
143
+ transaction.unlink(work_key)
149
144
  curstate.each_pair do |tid, hash|
150
- conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
145
+ transaction.hset(work_key, tid, Sidekiq.dump_json(hash))
151
146
  end
152
- conn.expire(workers_key, 60)
147
+ transaction.expire(work_key, 60)
153
148
  end
154
149
  end
155
150
 
156
- fails = procd = 0
151
+ rtt = check_rtt
157
152
 
158
- _, exists, _, _, msg = Sidekiq.redis { |conn|
159
- conn.multi {
160
- conn.sadd("processes", key)
161
- conn.exists?(key)
162
- conn.hmset(key, "info", to_json, "busy", curstate.size, "beat", Time.now.to_f, "quiet", @done)
163
- conn.expire(key, 60)
164
- conn.rpop("#{key}-signals")
153
+ fails = procd = 0
154
+ kb = memory_usage(::Process.pid)
155
+
156
+ _, exists, _, _, msg = redis { |conn|
157
+ conn.multi { |transaction|
158
+ transaction.sadd("processes", [key])
159
+ transaction.exists(key)
160
+ transaction.hmset(key, "info", to_json,
161
+ "busy", curstate.size,
162
+ "beat", Time.now.to_f,
163
+ "rtt_us", rtt,
164
+ "quiet", @done.to_s,
165
+ "rss", kb)
166
+ transaction.expire(key, 60)
167
+ transaction.rpop("#{key}-signals")
165
168
  }
166
169
  }
167
170
 
168
171
  # first heartbeat or recovering from an outage and need to reestablish our heartbeat
169
- fire_event(:heartbeat) unless exists
172
+ fire_event(:heartbeat) unless exists > 0
173
+ fire_event(:beat, oneshot: false)
170
174
 
171
175
  return unless msg
172
176
 
@@ -180,27 +184,74 @@ module Sidekiq
180
184
  end
181
185
  end
182
186
 
183
- def to_data
184
- @data ||= begin
185
- {
186
- "hostname" => hostname,
187
- "started_at" => Time.now.to_f,
188
- "pid" => ::Process.pid,
189
- "tag" => @options[:tag] || "",
190
- "concurrency" => @options[:concurrency],
191
- "queues" => @options[:queues].uniq,
192
- "labels" => @options[:labels],
193
- "identity" => identity
194
- }
187
+ # We run the heartbeat every five seconds.
188
+ # Capture five samples of RTT, log a warning if each sample
189
+ # is above our warning threshold.
190
+ RTT_READINGS = RingBuffer.new(5)
191
+ RTT_WARNING_LEVEL = 50_000
192
+
193
+ def check_rtt
194
+ a = b = 0
195
+ redis do |x|
196
+ a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
197
+ x.ping
198
+ b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
199
+ end
200
+ rtt = b - a
201
+ RTT_READINGS << rtt
202
+ # Ideal RTT for Redis is < 1000µs
203
+ # Workable is < 10,000µs
204
+ # Log a warning if it's a disaster.
205
+ if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
206
+ logger.warn <<~EOM
207
+ Your Redis network connection is performing extremely poorly.
208
+ Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
209
+ Ensure Redis is running in the same AZ or datacenter as Sidekiq.
210
+ If these values are close to 100,000, that means your Sidekiq process may be
211
+ CPU-saturated; reduce your concurrency and/or see https://github.com/mperham/sidekiq/discussions/5039
212
+ EOM
213
+ RTT_READINGS.reset
195
214
  end
215
+ rtt
216
+ end
217
+
218
+ MEMORY_GRABBER = case RUBY_PLATFORM
219
+ when /linux/
220
+ ->(pid) {
221
+ IO.readlines("/proc/#{$$}/status").each do |line|
222
+ next unless line.start_with?("VmRSS:")
223
+ break line.split[1].to_i
224
+ end
225
+ }
226
+ when /darwin|bsd/
227
+ ->(pid) {
228
+ `ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
229
+ }
230
+ else
231
+ ->(pid) { 0 }
232
+ end
233
+
234
+ def memory_usage(pid)
235
+ MEMORY_GRABBER.call(pid)
236
+ end
237
+
238
+ def to_data
239
+ @data ||= {
240
+ "hostname" => hostname,
241
+ "started_at" => Time.now.to_f,
242
+ "pid" => ::Process.pid,
243
+ "tag" => @config[:tag] || "",
244
+ "concurrency" => @config.total_concurrency,
245
+ "queues" => @config.capsules.values.map { |cap| cap.queues }.flatten.uniq,
246
+ "labels" => @config[:labels].to_a,
247
+ "identity" => identity
248
+ }
196
249
  end
197
250
 
198
251
  def to_json
199
- @json ||= begin
200
- # this data changes infrequently so dump it to a string
201
- # now so we don't need to dump it every heartbeat.
202
- Sidekiq.dump_json(to_data)
203
- end
252
+ # this data changes infrequently so dump it to a string
253
+ # now so we don't need to dump it every heartbeat.
254
+ @json ||= Sidekiq.dump_json(to_data)
204
255
  end
205
256
  end
206
257
  end