sidekiq 4.2.10 → 7.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/Changes.md +859 -7
  3. data/LICENSE.txt +9 -0
  4. data/README.md +49 -50
  5. data/bin/multi_queue_bench +271 -0
  6. data/bin/sidekiq +22 -3
  7. data/bin/sidekiqload +212 -119
  8. data/bin/sidekiqmon +11 -0
  9. data/lib/generators/sidekiq/job_generator.rb +59 -0
  10. data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +2 -2
  11. data/lib/generators/sidekiq/templates/job_spec.rb.erb +6 -0
  12. data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
  13. data/lib/sidekiq/api.rb +680 -315
  14. data/lib/sidekiq/capsule.rb +132 -0
  15. data/lib/sidekiq/cli.rb +268 -248
  16. data/lib/sidekiq/client.rb +136 -101
  17. data/lib/sidekiq/component.rb +68 -0
  18. data/lib/sidekiq/config.rb +293 -0
  19. data/lib/sidekiq/deploy.rb +64 -0
  20. data/lib/sidekiq/embedded.rb +63 -0
  21. data/lib/sidekiq/fetch.rb +49 -42
  22. data/lib/sidekiq/iterable_job.rb +55 -0
  23. data/lib/sidekiq/job/interrupt_handler.rb +24 -0
  24. data/lib/sidekiq/job/iterable/active_record_enumerator.rb +53 -0
  25. data/lib/sidekiq/job/iterable/csv_enumerator.rb +47 -0
  26. data/lib/sidekiq/job/iterable/enumerators.rb +135 -0
  27. data/lib/sidekiq/job/iterable.rb +231 -0
  28. data/lib/sidekiq/job.rb +385 -0
  29. data/lib/sidekiq/job_logger.rb +62 -0
  30. data/lib/sidekiq/job_retry.rb +305 -0
  31. data/lib/sidekiq/job_util.rb +109 -0
  32. data/lib/sidekiq/launcher.rb +208 -108
  33. data/lib/sidekiq/logger.rb +131 -0
  34. data/lib/sidekiq/manager.rb +43 -47
  35. data/lib/sidekiq/metrics/query.rb +158 -0
  36. data/lib/sidekiq/metrics/shared.rb +97 -0
  37. data/lib/sidekiq/metrics/tracking.rb +148 -0
  38. data/lib/sidekiq/middleware/chain.rb +113 -56
  39. data/lib/sidekiq/middleware/current_attributes.rb +113 -0
  40. data/lib/sidekiq/middleware/i18n.rb +7 -7
  41. data/lib/sidekiq/middleware/modules.rb +23 -0
  42. data/lib/sidekiq/monitor.rb +147 -0
  43. data/lib/sidekiq/paginator.rb +28 -16
  44. data/lib/sidekiq/processor.rb +188 -98
  45. data/lib/sidekiq/rails.rb +46 -97
  46. data/lib/sidekiq/redis_client_adapter.rb +114 -0
  47. data/lib/sidekiq/redis_connection.rb +71 -73
  48. data/lib/sidekiq/ring_buffer.rb +31 -0
  49. data/lib/sidekiq/scheduled.rb +140 -51
  50. data/lib/sidekiq/sd_notify.rb +149 -0
  51. data/lib/sidekiq/systemd.rb +26 -0
  52. data/lib/sidekiq/testing/inline.rb +6 -5
  53. data/lib/sidekiq/testing.rb +95 -85
  54. data/lib/sidekiq/transaction_aware_client.rb +51 -0
  55. data/lib/sidekiq/version.rb +3 -1
  56. data/lib/sidekiq/web/action.rb +22 -16
  57. data/lib/sidekiq/web/application.rb +230 -86
  58. data/lib/sidekiq/web/csrf_protection.rb +183 -0
  59. data/lib/sidekiq/web/helpers.rb +241 -104
  60. data/lib/sidekiq/web/router.rb +23 -19
  61. data/lib/sidekiq/web.rb +118 -110
  62. data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
  63. data/lib/sidekiq.rb +96 -185
  64. data/sidekiq.gemspec +26 -27
  65. data/web/assets/images/apple-touch-icon.png +0 -0
  66. data/web/assets/javascripts/application.js +157 -61
  67. data/web/assets/javascripts/base-charts.js +106 -0
  68. data/web/assets/javascripts/chart.min.js +13 -0
  69. data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
  70. data/web/assets/javascripts/dashboard-charts.js +192 -0
  71. data/web/assets/javascripts/dashboard.js +37 -280
  72. data/web/assets/javascripts/metrics.js +298 -0
  73. data/web/assets/stylesheets/application-dark.css +147 -0
  74. data/web/assets/stylesheets/application-rtl.css +163 -0
  75. data/web/assets/stylesheets/application.css +173 -198
  76. data/web/assets/stylesheets/bootstrap-rtl.min.css +9 -0
  77. data/web/assets/stylesheets/bootstrap.css +2 -2
  78. data/web/locales/ar.yml +87 -0
  79. data/web/locales/cs.yml +62 -62
  80. data/web/locales/da.yml +60 -53
  81. data/web/locales/de.yml +65 -53
  82. data/web/locales/el.yml +43 -24
  83. data/web/locales/en.yml +86 -64
  84. data/web/locales/es.yml +70 -53
  85. data/web/locales/fa.yml +65 -64
  86. data/web/locales/fr.yml +83 -62
  87. data/web/locales/gd.yml +99 -0
  88. data/web/locales/he.yml +80 -0
  89. data/web/locales/hi.yml +59 -59
  90. data/web/locales/it.yml +53 -53
  91. data/web/locales/ja.yml +75 -62
  92. data/web/locales/ko.yml +52 -52
  93. data/web/locales/lt.yml +83 -0
  94. data/web/locales/nb.yml +61 -61
  95. data/web/locales/nl.yml +52 -52
  96. data/web/locales/pl.yml +45 -45
  97. data/web/locales/pt-br.yml +83 -55
  98. data/web/locales/pt.yml +51 -51
  99. data/web/locales/ru.yml +68 -63
  100. data/web/locales/sv.yml +53 -53
  101. data/web/locales/ta.yml +60 -60
  102. data/web/locales/tr.yml +101 -0
  103. data/web/locales/uk.yml +62 -61
  104. data/web/locales/ur.yml +80 -0
  105. data/web/locales/vi.yml +83 -0
  106. data/web/locales/zh-cn.yml +43 -16
  107. data/web/locales/zh-tw.yml +42 -8
  108. data/web/views/_footer.erb +21 -3
  109. data/web/views/_job_info.erb +21 -4
  110. data/web/views/_metrics_period_select.erb +12 -0
  111. data/web/views/_nav.erb +5 -19
  112. data/web/views/_paging.erb +3 -1
  113. data/web/views/_poll_link.erb +3 -6
  114. data/web/views/_summary.erb +7 -7
  115. data/web/views/busy.erb +85 -31
  116. data/web/views/dashboard.erb +50 -20
  117. data/web/views/dead.erb +3 -3
  118. data/web/views/filtering.erb +7 -0
  119. data/web/views/layout.erb +17 -6
  120. data/web/views/metrics.erb +91 -0
  121. data/web/views/metrics_for_job.erb +59 -0
  122. data/web/views/morgue.erb +14 -15
  123. data/web/views/queue.erb +34 -24
  124. data/web/views/queues.erb +20 -4
  125. data/web/views/retries.erb +19 -16
  126. data/web/views/retry.erb +3 -3
  127. data/web/views/scheduled.erb +19 -17
  128. metadata +91 -198
  129. data/.github/contributing.md +0 -32
  130. data/.github/issue_template.md +0 -9
  131. data/.gitignore +0 -12
  132. data/.travis.yml +0 -18
  133. data/3.0-Upgrade.md +0 -70
  134. data/4.0-Upgrade.md +0 -53
  135. data/COMM-LICENSE +0 -95
  136. data/Ent-Changes.md +0 -173
  137. data/Gemfile +0 -29
  138. data/LICENSE +0 -9
  139. data/Pro-2.0-Upgrade.md +0 -138
  140. data/Pro-3.0-Upgrade.md +0 -44
  141. data/Pro-Changes.md +0 -628
  142. data/Rakefile +0 -12
  143. data/bin/sidekiqctl +0 -99
  144. data/code_of_conduct.md +0 -50
  145. data/lib/generators/sidekiq/templates/worker_spec.rb.erb +0 -6
  146. data/lib/generators/sidekiq/worker_generator.rb +0 -49
  147. data/lib/sidekiq/core_ext.rb +0 -119
  148. data/lib/sidekiq/exception_handler.rb +0 -31
  149. data/lib/sidekiq/extensions/action_mailer.rb +0 -57
  150. data/lib/sidekiq/extensions/active_record.rb +0 -40
  151. data/lib/sidekiq/extensions/class_methods.rb +0 -40
  152. data/lib/sidekiq/extensions/generic_proxy.rb +0 -25
  153. data/lib/sidekiq/logging.rb +0 -106
  154. data/lib/sidekiq/middleware/server/active_record.rb +0 -13
  155. data/lib/sidekiq/middleware/server/logging.rb +0 -31
  156. data/lib/sidekiq/middleware/server/retry_jobs.rb +0 -205
  157. data/lib/sidekiq/util.rb +0 -63
  158. data/lib/sidekiq/worker.rb +0 -121
@@ -1,106 +1,104 @@
1
1
  # frozen_string_literal: true
2
- require 'connection_pool'
3
- require 'redis'
4
- require 'uri'
2
+
3
+ require "connection_pool"
4
+ require "uri"
5
+ require "sidekiq/redis_client_adapter"
5
6
 
6
7
  module Sidekiq
7
- class RedisConnection
8
+ module RedisConnection
8
9
  class << self
10
+ def create(options = {})
11
+ symbolized_options = deep_symbolize_keys(options)
12
+ symbolized_options[:url] ||= determine_redis_provider
9
13
 
10
- def create(options={})
11
- options = options.symbolize_keys
14
+ logger = symbolized_options.delete(:logger)
15
+ logger&.info { "Sidekiq #{Sidekiq::VERSION} connecting to Redis with options #{scrub(symbolized_options)}" }
12
16
 
13
- options[:url] ||= determine_redis_provider
17
+ raise "Sidekiq 7+ does not support Redis protocol 2" if symbolized_options[:protocol] == 2
14
18
 
15
- size = options[:size] || (Sidekiq.server? ? (Sidekiq.options[:concurrency] + 5) : 5)
19
+ safe = !!symbolized_options.delete(:cluster_safe)
20
+ raise ":nodes not allowed, Sidekiq is not safe to run on Redis Cluster" if !safe && symbolized_options.key?(:nodes)
16
21
 
17
- verify_sizing(size, Sidekiq.options[:concurrency]) if Sidekiq.server?
22
+ size = symbolized_options.delete(:size) || 5
23
+ pool_timeout = symbolized_options.delete(:pool_timeout) || 1
24
+ pool_name = symbolized_options.delete(:pool_name)
18
25
 
19
- pool_timeout = options[:pool_timeout] || 1
20
- log_info(options)
26
+ # Default timeout in redis-client is 1 second, which can be too aggressive
27
+ # if the Sidekiq process is CPU-bound. With 10-15 threads and a thread quantum of 100ms,
28
+ # it can be easy to get the occasional ReadTimeoutError. You can still provide
29
+ # a smaller timeout explicitly:
30
+ # config.redis = { url: "...", timeout: 1 }
31
+ symbolized_options[:timeout] ||= 3
21
32
 
22
- ConnectionPool.new(:timeout => pool_timeout, :size => size) do
23
- build_client(options)
33
+ redis_config = Sidekiq::RedisClientAdapter.new(symbolized_options)
34
+ ConnectionPool.new(timeout: pool_timeout, size: size, name: pool_name) do
35
+ redis_config.new_client
24
36
  end
25
37
  end
26
38
 
27
39
  private
28
40
 
29
- # Sidekiq needs a lot of concurrent Redis connections.
30
- #
31
- # We need a connection for each Processor.
32
- # We need a connection for Pro's real-time change listener
33
- # We need a connection to various features to call Redis every few seconds:
34
- # - the process heartbeat.
35
- # - enterprise's leader election
36
- # - enterprise's cron support
37
- def verify_sizing(size, concurrency)
38
- raise ArgumentError, "Your Redis connection pool is too small for Sidekiq to work. Your pool has #{size} connections but really needs to have at least #{concurrency + 2}" if size <= concurrency
39
- end
40
-
41
- def build_client(options)
42
- namespace = options[:namespace]
43
-
44
- client = Redis.new client_opts(options)
45
- if namespace
46
- begin
47
- require 'redis/namespace'
48
- Redis::Namespace.new(namespace, :redis => client)
49
- rescue LoadError
50
- Sidekiq.logger.error("Your Redis configuration uses the namespace '#{namespace}' but the redis-namespace gem is not included in the Gemfile." \
51
- "Add the gem to your Gemfile to continue using a namespace. Otherwise, remove the namespace parameter.")
52
- exit(-127)
41
+ def deep_symbolize_keys(object)
42
+ case object
43
+ when Hash
44
+ object.each_with_object({}) do |(key, value), result|
45
+ result[key.to_sym] = deep_symbolize_keys(value)
53
46
  end
47
+ when Array
48
+ object.map { |e| deep_symbolize_keys(e) }
54
49
  else
55
- client
50
+ object
56
51
  end
57
52
  end
58
53
 
59
- def client_opts(options)
60
- opts = options.dup
61
- if opts[:namespace]
62
- opts.delete(:namespace)
63
- end
64
-
65
- if opts[:network_timeout]
66
- opts[:timeout] = opts[:network_timeout]
67
- opts.delete(:network_timeout)
68
- end
69
-
70
- opts[:driver] ||= 'ruby'
71
-
72
- # Issue #3303, redis-rb will silently retry an operation.
73
- # This can lead to duplicate jobs if Sidekiq::Client's LPUSH
74
- # is performed twice but I believe this is much, much rarer
75
- # than the reconnect silently fixing a problem; we keep it
76
- # on by default.
77
- opts[:reconnect_attempts] ||= 1
78
-
79
- opts
80
- end
81
-
82
- def log_info(options)
83
- # Don't log Redis AUTH password
54
+ def scrub(options)
84
55
  redacted = "REDACTED"
85
- scrubbed_options = options.dup
56
+
57
+ # Deep clone so we can muck with these options all we want and exclude
58
+ # params from dump-and-load that may contain objects that Marshal is
59
+ # unable to safely dump.
60
+ keys = options.keys - [:logger, :ssl_params]
61
+ scrubbed_options = Marshal.load(Marshal.dump(options.slice(*keys)))
86
62
  if scrubbed_options[:url] && (uri = URI.parse(scrubbed_options[:url])) && uri.password
87
63
  uri.password = redacted
88
64
  scrubbed_options[:url] = uri.to_s
89
65
  end
90
- if scrubbed_options[:password]
91
- scrubbed_options[:password] = redacted
92
- end
93
- if Sidekiq.server?
94
- Sidekiq.logger.info("Booting Sidekiq #{Sidekiq::VERSION} with redis options #{scrubbed_options}")
95
- else
96
- Sidekiq.logger.debug("#{Sidekiq::NAME} client with redis options #{scrubbed_options}")
66
+ scrubbed_options[:password] = redacted if scrubbed_options[:password]
67
+ scrubbed_options[:sentinel_password] = redacted if scrubbed_options[:sentinel_password]
68
+ scrubbed_options[:sentinels]&.each do |sentinel|
69
+ if sentinel.is_a?(String)
70
+ if (uri = URI(sentinel)) && uri.password
71
+ uri.password = redacted
72
+ sentinel.replace(uri.to_s)
73
+ end
74
+ elsif sentinel[:password]
75
+ sentinel[:password] = redacted
76
+ end
97
77
  end
78
+ scrubbed_options
98
79
  end
99
80
 
100
81
  def determine_redis_provider
101
- ENV[ENV['REDIS_PROVIDER'] || 'REDIS_URL']
102
- end
82
+ # If you have this in your environment:
83
+ # MY_REDIS_URL=redis://hostname.example.com:1238/4
84
+ # then set:
85
+ # REDIS_PROVIDER=MY_REDIS_URL
86
+ # and Sidekiq will find your custom URL variable with no custom
87
+ # initialization code at all.
88
+ #
89
+ p = ENV["REDIS_PROVIDER"]
90
+ if p && p =~ /:/
91
+ raise <<~EOM
92
+ REDIS_PROVIDER should be set to the name of the variable which contains the Redis URL, not a URL itself.
93
+ Platforms like Heroku will sell addons that publish a *_URL variable. You need to tell Sidekiq with REDIS_PROVIDER, e.g.:
94
+
95
+ REDISTOGO_URL=redis://somehost.example.com:6379/4
96
+ REDIS_PROVIDER=REDISTOGO_URL
97
+ EOM
98
+ end
103
99
 
100
+ ENV[p.to_s] || ENV["REDIS_URL"]
101
+ end
104
102
  end
105
103
  end
106
104
  end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ module Sidekiq
6
+ class RingBuffer
7
+ include Enumerable
8
+ extend Forwardable
9
+ def_delegators :@buf, :[], :each, :size
10
+
11
+ def initialize(size, default = 0)
12
+ @size = size
13
+ @buf = Array.new(size, default)
14
+ @index = 0
15
+ end
16
+
17
+ def <<(element)
18
+ @buf[@index % @size] = element
19
+ @index += 1
20
+ element
21
+ end
22
+
23
+ def buffer
24
+ @buf
25
+ end
26
+
27
+ def reset(default = 0)
28
+ @buf.fill(default)
29
+ end
30
+ end
31
+ end
@@ -1,35 +1,66 @@
1
1
  # frozen_string_literal: true
2
- require 'sidekiq'
3
- require 'sidekiq/util'
4
- require 'sidekiq/api'
2
+
3
+ require "sidekiq"
4
+ require "sidekiq/component"
5
5
 
6
6
  module Sidekiq
7
7
  module Scheduled
8
- SETS = %w(retry schedule)
8
+ SETS = %w[retry schedule]
9
9
 
10
10
  class Enq
11
- def enqueue_jobs(now=Time.now.to_f.to_s, sorted_sets=SETS)
11
+ include Sidekiq::Component
12
+
13
+ LUA_ZPOPBYSCORE = <<~LUA
14
+ local key, now = KEYS[1], ARGV[1]
15
+ local jobs = redis.call("zrange", key, "-inf", now, "byscore", "limit", 0, 1)
16
+ if jobs[1] then
17
+ redis.call("zrem", key, jobs[1])
18
+ return jobs[1]
19
+ end
20
+ LUA
21
+
22
+ def initialize(container)
23
+ @config = container
24
+ @client = Sidekiq::Client.new(config: container)
25
+ @done = false
26
+ @lua_zpopbyscore_sha = nil
27
+ end
28
+
29
+ def enqueue_jobs(sorted_sets = SETS)
12
30
  # A job's "score" in Redis is the time at which it should be processed.
13
31
  # Just check Redis for the set of jobs with a timestamp before now.
14
- Sidekiq.redis do |conn|
32
+ redis do |conn|
15
33
  sorted_sets.each do |sorted_set|
16
- # Get the next item in the queue if it's score (time to execute) is <= now.
34
+ # Get next item in the queue with score (time to execute) <= now.
17
35
  # We need to go through the list one at a time to reduce the risk of something
18
36
  # going wrong between the time jobs are popped from the scheduled queue and when
19
37
  # they are pushed onto a work queue and losing the jobs.
20
- while job = conn.zrangebyscore(sorted_set, '-inf'.freeze, now, :limit => [0, 1]).first do
21
-
22
- # Pop item off the queue and add it to the work queue. If the job can't be popped from
23
- # the queue, it's because another process already popped it so we can move on to the
24
- # next one.
25
- if conn.zrem(sorted_set, job)
26
- Sidekiq::Client.push(Sidekiq.load_json(job))
27
- Sidekiq::Logging.logger.debug { "enqueued #{sorted_set}: #{job}" }
28
- end
38
+ while !@done && (job = zpopbyscore(conn, keys: [sorted_set], argv: [Time.now.to_f.to_s]))
39
+ @client.push(Sidekiq.load_json(job))
40
+ logger.debug { "enqueued #{sorted_set}: #{job}" }
29
41
  end
30
42
  end
31
43
  end
32
44
  end
45
+
46
+ def terminate
47
+ @done = true
48
+ end
49
+
50
+ private
51
+
52
+ def zpopbyscore(conn, keys: nil, argv: nil)
53
+ if @lua_zpopbyscore_sha.nil?
54
+ @lua_zpopbyscore_sha = conn.script(:load, LUA_ZPOPBYSCORE)
55
+ end
56
+
57
+ conn.call("EVALSHA", @lua_zpopbyscore_sha, keys.size, *keys, *argv)
58
+ rescue RedisClient::CommandError => e
59
+ raise unless e.message.start_with?("NOSCRIPT")
60
+
61
+ @lua_zpopbyscore_sha = nil
62
+ retry
63
+ end
33
64
  end
34
65
 
35
66
  ##
@@ -38,51 +69,47 @@ module Sidekiq
38
69
  # just pops the job back onto its original queue so the
39
70
  # workers can pick it up like any other job.
40
71
  class Poller
41
- include Util
72
+ include Sidekiq::Component
42
73
 
43
74
  INITIAL_WAIT = 10
44
75
 
45
- def initialize
46
- @enq = (Sidekiq.options[:scheduled_enq] || Sidekiq::Scheduled::Enq).new
76
+ def initialize(config)
77
+ @config = config
78
+ @enq = (config[:scheduled_enq] || Sidekiq::Scheduled::Enq).new(config)
47
79
  @sleeper = ConnectionPool::TimedStack.new
48
80
  @done = false
49
81
  @thread = nil
82
+ @count_calls = 0
50
83
  end
51
84
 
52
85
  # Shut down this instance, will pause until the thread is dead.
53
86
  def terminate
54
87
  @done = true
55
- if @thread
56
- t = @thread
57
- @thread = nil
58
- @sleeper << 0
59
- t.value
60
- end
88
+ @enq.terminate
89
+
90
+ @sleeper << 0
91
+ @thread&.value
61
92
  end
62
93
 
63
94
  def start
64
- @thread ||= safe_thread("scheduler") do
95
+ @thread ||= safe_thread("scheduler") {
65
96
  initial_wait
66
97
 
67
- while !@done
98
+ until @done
68
99
  enqueue
69
100
  wait
70
101
  end
71
- Sidekiq.logger.info("Scheduler exiting...")
72
- end
102
+ logger.info("Scheduler exiting...")
103
+ }
73
104
  end
74
105
 
75
106
  def enqueue
76
- begin
77
- @enq.enqueue_jobs
78
- rescue => ex
79
- # Most likely a problem with redis networking.
80
- # Punt and try again at the next interval
81
- logger.error ex.message
82
- ex.backtrace.each do |bt|
83
- logger.error(bt)
84
- end
85
- end
107
+ @enq.enqueue_jobs
108
+ rescue => ex
109
+ # Most likely a problem with redis networking.
110
+ # Punt and try again at the next interval
111
+ logger.error ex.message
112
+ handle_exception(ex)
86
113
  end
87
114
 
88
115
  private
@@ -95,13 +122,41 @@ module Sidekiq
95
122
  # if poll_interval_average hasn't been calculated yet, we can
96
123
  # raise an error trying to reach Redis.
97
124
  logger.error ex.message
98
- logger.error ex.backtrace.first
125
+ handle_exception(ex)
99
126
  sleep 5
100
127
  end
101
128
 
102
- # Calculates a random interval that is ±50% the desired average.
103
129
  def random_poll_interval
104
- poll_interval_average * rand + poll_interval_average.to_f / 2
130
+ # We want one Sidekiq process to schedule jobs every N seconds. We have M processes
131
+ # and **don't** want to coordinate.
132
+ #
133
+ # So in N*M second timespan, we want each process to schedule once. The basic loop is:
134
+ #
135
+ # * sleep a random amount within that N*M timespan
136
+ # * wake up and schedule
137
+ #
138
+ # We want to avoid one edge case: imagine a set of 2 processes, scheduling every 5 seconds,
139
+ # so N*M = 10. Each process decides to randomly sleep 8 seconds, now we've failed to meet
140
+ # that 5 second average. Thankfully each schedule cycle will sleep randomly so the next
141
+ # iteration could see each process sleep for 1 second, undercutting our average.
142
+ #
143
+ # So below 10 processes, we special case and ensure the processes sleep closer to the average.
144
+ # In the example above, each process should schedule every 10 seconds on average. We special
145
+ # case smaller clusters to add 50% so they would sleep somewhere between 5 and 15 seconds.
146
+ # As we run more processes, the scheduling interval average will approach an even spread
147
+ # between 0 and poll interval so we don't need this artificial boost.
148
+ #
149
+ count = process_count
150
+ interval = poll_interval_average(count)
151
+
152
+ if count < 10
153
+ # For small clusters, calculate a random interval that is ±50% the desired average.
154
+ interval * rand + interval.to_f / 2
155
+ else
156
+ # With 10+ processes, we should have enough randomness to get decent polling
157
+ # across the entire timespan
158
+ interval * rand
159
+ end
105
160
  end
106
161
 
107
162
  # We do our best to tune the poll interval to the size of the active Sidekiq
@@ -117,31 +172,65 @@ module Sidekiq
117
172
  # the same time: the thundering herd problem.
118
173
  #
119
174
  # We only do this if poll_interval_average is unset (the default).
120
- def poll_interval_average
121
- Sidekiq.options[:poll_interval_average] ||= scaled_poll_interval
175
+ def poll_interval_average(count)
176
+ @config[:poll_interval_average] || scaled_poll_interval(count)
122
177
  end
123
178
 
124
179
  # Calculates an average poll interval based on the number of known Sidekiq processes.
125
180
  # This minimizes a single point of failure by dispersing check-ins but without taxing
126
181
  # Redis if you run many Sidekiq processes.
127
- def scaled_poll_interval
128
- pcount = Sidekiq::ProcessSet.new.size
182
+ def scaled_poll_interval(process_count)
183
+ process_count * @config[:average_scheduled_poll_interval]
184
+ end
185
+
186
+ def process_count
187
+ pcount = Sidekiq.redis { |conn| conn.scard("processes") }
129
188
  pcount = 1 if pcount == 0
130
- pcount * Sidekiq.options[:average_scheduled_poll_interval]
189
+ pcount
190
+ end
191
+
192
+ # A copy of Sidekiq::ProcessSet#cleanup because server
193
+ # should never depend on sidekiq/api.
194
+ def cleanup
195
+ # dont run cleanup more than once per minute
196
+ return 0 unless redis { |conn| conn.set("process_cleanup", "1", "NX", "EX", "60") }
197
+
198
+ count = 0
199
+ redis do |conn|
200
+ procs = conn.sscan("processes").to_a
201
+ heartbeats = conn.pipelined { |pipeline|
202
+ procs.each do |key|
203
+ pipeline.hget(key, "info")
204
+ end
205
+ }
206
+
207
+ # the hash named key has an expiry of 60 seconds.
208
+ # if it's not found, that means the process has not reported
209
+ # in to Redis and probably died.
210
+ to_prune = procs.select.with_index { |proc, i|
211
+ heartbeats[i].nil?
212
+ }
213
+ count = conn.srem("processes", to_prune) unless to_prune.empty?
214
+ end
215
+ count
131
216
  end
132
217
 
133
218
  def initial_wait
134
- # Have all processes sleep between 5-15 seconds. 10 seconds
135
- # to give time for the heartbeat to register (if the poll interval is going to be calculated by the number
219
+ # Have all processes sleep between 5-15 seconds. 10 seconds to give time for
220
+ # the heartbeat to register (if the poll interval is going to be calculated by the number
136
221
  # of workers), and 5 random seconds to ensure they don't all hit Redis at the same time.
137
222
  total = 0
138
- total += INITIAL_WAIT unless Sidekiq.options[:poll_interval_average]
223
+ total += INITIAL_WAIT unless @config[:poll_interval_average]
139
224
  total += (5 * rand)
140
225
 
141
226
  @sleeper.pop(total)
142
227
  rescue Timeout::Error
228
+ ensure
229
+ # periodically clean out the `processes` set in Redis which can collect
230
+ # references to dead processes over time. The process count affects how
231
+ # often we scan for scheduled jobs.
232
+ cleanup
143
233
  end
144
-
145
234
  end
146
235
  end
147
236
  end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ # The MIT License
4
+ #
5
+ # Copyright (c) 2017, 2018, 2019, 2020 Agis Anastasopoulos
6
+ #
7
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
8
+ # this software and associated documentation files (the "Software"), to deal in
9
+ # the Software without restriction, including without limitation the rights to
10
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11
+ # the Software, and to permit persons to whom the Software is furnished to do so,
12
+ # subject to the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be included in all
15
+ # copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ # This is a copy of https://github.com/agis/ruby-sdnotify as of commit a7d52ee
25
+ # The only changes made was "rehoming" it within the Sidekiq module to avoid
26
+ # namespace collisions and applying standard's code formatting style.
27
+
28
+ require "socket"
29
+
30
+ # SdNotify is a pure-Ruby implementation of sd_notify(3). It can be used to
31
+ # notify systemd about state changes. Methods of this package are no-op on
32
+ # non-systemd systems (eg. Darwin).
33
+ #
34
+ # The API maps closely to the original implementation of sd_notify(3),
35
+ # therefore be sure to check the official man pages prior to using SdNotify.
36
+ #
37
+ # @see https://www.freedesktop.org/software/systemd/man/sd_notify.html
38
+ module Sidekiq
39
+ module SdNotify
40
+ # Exception raised when there's an error writing to the notification socket
41
+ class NotifyError < RuntimeError; end
42
+
43
+ READY = "READY=1"
44
+ RELOADING = "RELOADING=1"
45
+ STOPPING = "STOPPING=1"
46
+ STATUS = "STATUS="
47
+ ERRNO = "ERRNO="
48
+ MAINPID = "MAINPID="
49
+ WATCHDOG = "WATCHDOG=1"
50
+ FDSTORE = "FDSTORE=1"
51
+
52
+ def self.ready(unset_env = false)
53
+ notify(READY, unset_env)
54
+ end
55
+
56
+ def self.reloading(unset_env = false)
57
+ notify(RELOADING, unset_env)
58
+ end
59
+
60
+ def self.stopping(unset_env = false)
61
+ notify(STOPPING, unset_env)
62
+ end
63
+
64
+ # @param status [String] a custom status string that describes the current
65
+ # state of the service
66
+ def self.status(status, unset_env = false)
67
+ notify("#{STATUS}#{status}", unset_env)
68
+ end
69
+
70
+ # @param errno [Integer]
71
+ def self.errno(errno, unset_env = false)
72
+ notify("#{ERRNO}#{errno}", unset_env)
73
+ end
74
+
75
+ # @param pid [Integer]
76
+ def self.mainpid(pid, unset_env = false)
77
+ notify("#{MAINPID}#{pid}", unset_env)
78
+ end
79
+
80
+ def self.watchdog(unset_env = false)
81
+ notify(WATCHDOG, unset_env)
82
+ end
83
+
84
+ def self.fdstore(unset_env = false)
85
+ notify(FDSTORE, unset_env)
86
+ end
87
+
88
+ # @return [Boolean] true if the service manager expects watchdog keep-alive
89
+ # notification messages to be sent from this process.
90
+ #
91
+ # If the $WATCHDOG_USEC environment variable is set,
92
+ # and the $WATCHDOG_PID variable is unset or set to the PID of the current
93
+ # process
94
+ #
95
+ # @note Unlike sd_watchdog_enabled(3), this method does not mutate the
96
+ # environment.
97
+ def self.watchdog?
98
+ wd_usec = ENV["WATCHDOG_USEC"]
99
+ wd_pid = ENV["WATCHDOG_PID"]
100
+
101
+ return false unless wd_usec
102
+
103
+ begin
104
+ wd_usec = Integer(wd_usec)
105
+ rescue
106
+ return false
107
+ end
108
+
109
+ return false if wd_usec <= 0
110
+ return true if !wd_pid || wd_pid == $$.to_s
111
+
112
+ false
113
+ end
114
+
115
+ # Notify systemd with the provided state, via the notification socket, if
116
+ # any.
117
+ #
118
+ # Generally this method will be used indirectly through the other methods
119
+ # of the library.
120
+ #
121
+ # @param state [String]
122
+ # @param unset_env [Boolean]
123
+ #
124
+ # @return [Fixnum, nil] the number of bytes written to the notification
125
+ # socket or nil if there was no socket to report to (eg. the program wasn't
126
+ # started by systemd)
127
+ #
128
+ # @raise [NotifyError] if there was an error communicating with the systemd
129
+ # socket
130
+ #
131
+ # @see https://www.freedesktop.org/software/systemd/man/sd_notify.html
132
+ def self.notify(state, unset_env = false)
133
+ sock = ENV["NOTIFY_SOCKET"]
134
+
135
+ return nil unless sock
136
+
137
+ ENV.delete("NOTIFY_SOCKET") if unset_env
138
+
139
+ begin
140
+ Addrinfo.unix(sock, :DGRAM).connect do |s|
141
+ s.close_on_exec = true
142
+ s.write(state)
143
+ end
144
+ rescue => e
145
+ raise NotifyError, "#{e.class}: #{e.message}", e.backtrace
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Sidekiq's systemd integration allows Sidekiq to inform systemd:
5
+ # 1. when it has successfully started
6
+ # 2. when it is starting shutdown
7
+ # 3. periodically for a liveness check with a watchdog thread
8
+ #
9
+ module Sidekiq
10
+ def self.start_watchdog
11
+ usec = Integer(ENV["WATCHDOG_USEC"])
12
+ return Sidekiq.logger.error("systemd Watchdog too fast: " + usec) if usec < 1_000_000
13
+
14
+ sec_f = usec / 1_000_000.0
15
+ # "It is recommended that a daemon sends a keep-alive notification message
16
+ # to the service manager every half of the time returned here."
17
+ ping_f = sec_f / 2
18
+ Sidekiq.logger.info "Pinging systemd watchdog every #{ping_f.round(1)} sec"
19
+ Thread.new do
20
+ loop do
21
+ sleep ping_f
22
+ Sidekiq::SdNotify.watchdog
23
+ end
24
+ end
25
+ end
26
+ end