sidekiq 6.2.2 → 8.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/Changes.md +726 -11
  3. data/LICENSE.txt +9 -0
  4. data/README.md +70 -39
  5. data/bin/kiq +17 -0
  6. data/bin/lint-herb +13 -0
  7. data/bin/multi_queue_bench +271 -0
  8. data/bin/sidekiq +4 -9
  9. data/bin/sidekiqload +214 -115
  10. data/bin/sidekiqmon +4 -1
  11. data/bin/webload +69 -0
  12. data/lib/active_job/queue_adapters/sidekiq_adapter.rb +124 -0
  13. data/lib/generators/sidekiq/job_generator.rb +71 -0
  14. data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +3 -3
  15. data/lib/generators/sidekiq/templates/{worker_spec.rb.erb → job_spec.rb.erb} +1 -1
  16. data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
  17. data/lib/sidekiq/api.rb +729 -264
  18. data/lib/sidekiq/capsule.rb +135 -0
  19. data/lib/sidekiq/cli.rb +124 -100
  20. data/lib/sidekiq/client.rb +153 -106
  21. data/lib/sidekiq/component.rb +132 -0
  22. data/lib/sidekiq/config.rb +320 -0
  23. data/lib/sidekiq/deploy.rb +64 -0
  24. data/lib/sidekiq/embedded.rb +64 -0
  25. data/lib/sidekiq/fetch.rb +27 -26
  26. data/lib/sidekiq/iterable_job.rb +56 -0
  27. data/lib/sidekiq/job/interrupt_handler.rb +24 -0
  28. data/lib/sidekiq/job/iterable/active_record_enumerator.rb +53 -0
  29. data/lib/sidekiq/job/iterable/csv_enumerator.rb +47 -0
  30. data/lib/sidekiq/job/iterable/enumerators.rb +135 -0
  31. data/lib/sidekiq/job/iterable.rb +322 -0
  32. data/lib/sidekiq/job.rb +397 -5
  33. data/lib/sidekiq/job_logger.rb +23 -32
  34. data/lib/sidekiq/job_retry.rb +141 -68
  35. data/lib/sidekiq/job_util.rb +113 -0
  36. data/lib/sidekiq/launcher.rb +122 -98
  37. data/lib/sidekiq/loader.rb +57 -0
  38. data/lib/sidekiq/logger.rb +27 -106
  39. data/lib/sidekiq/manager.rb +41 -43
  40. data/lib/sidekiq/metrics/query.rb +184 -0
  41. data/lib/sidekiq/metrics/shared.rb +109 -0
  42. data/lib/sidekiq/metrics/tracking.rb +153 -0
  43. data/lib/sidekiq/middleware/chain.rb +96 -51
  44. data/lib/sidekiq/middleware/current_attributes.rb +120 -0
  45. data/lib/sidekiq/middleware/i18n.rb +8 -4
  46. data/lib/sidekiq/middleware/modules.rb +23 -0
  47. data/lib/sidekiq/monitor.rb +16 -6
  48. data/lib/sidekiq/paginator.rb +37 -10
  49. data/lib/sidekiq/processor.rb +105 -87
  50. data/lib/sidekiq/profiler.rb +73 -0
  51. data/lib/sidekiq/rails.rb +49 -36
  52. data/lib/sidekiq/redis_client_adapter.rb +117 -0
  53. data/lib/sidekiq/redis_connection.rb +55 -86
  54. data/lib/sidekiq/ring_buffer.rb +32 -0
  55. data/lib/sidekiq/scheduled.rb +106 -50
  56. data/lib/sidekiq/systemd.rb +2 -0
  57. data/lib/sidekiq/test_api.rb +331 -0
  58. data/lib/sidekiq/testing/inline.rb +2 -30
  59. data/lib/sidekiq/testing.rb +2 -342
  60. data/lib/sidekiq/transaction_aware_client.rb +59 -0
  61. data/lib/sidekiq/tui/controls.rb +53 -0
  62. data/lib/sidekiq/tui/filtering.rb +53 -0
  63. data/lib/sidekiq/tui/tabs/base_tab.rb +204 -0
  64. data/lib/sidekiq/tui/tabs/busy.rb +118 -0
  65. data/lib/sidekiq/tui/tabs/dead.rb +19 -0
  66. data/lib/sidekiq/tui/tabs/home.rb +144 -0
  67. data/lib/sidekiq/tui/tabs/metrics.rb +131 -0
  68. data/lib/sidekiq/tui/tabs/queues.rb +95 -0
  69. data/lib/sidekiq/tui/tabs/retries.rb +19 -0
  70. data/lib/sidekiq/tui/tabs/scheduled.rb +19 -0
  71. data/lib/sidekiq/tui/tabs/set_tab.rb +96 -0
  72. data/lib/sidekiq/tui/tabs.rb +15 -0
  73. data/lib/sidekiq/tui.rb +382 -0
  74. data/lib/sidekiq/version.rb +6 -1
  75. data/lib/sidekiq/web/action.rb +149 -64
  76. data/lib/sidekiq/web/application.rb +376 -268
  77. data/lib/sidekiq/web/config.rb +117 -0
  78. data/lib/sidekiq/web/helpers.rb +213 -87
  79. data/lib/sidekiq/web/router.rb +61 -74
  80. data/lib/sidekiq/web.rb +71 -100
  81. data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
  82. data/lib/sidekiq.rb +95 -196
  83. data/sidekiq.gemspec +14 -11
  84. data/web/assets/images/logo.png +0 -0
  85. data/web/assets/images/status.png +0 -0
  86. data/web/assets/javascripts/application.js +171 -57
  87. data/web/assets/javascripts/base-charts.js +120 -0
  88. data/web/assets/javascripts/chart.min.js +13 -0
  89. data/web/assets/javascripts/chartjs-adapter-date-fns.min.js +7 -0
  90. data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
  91. data/web/assets/javascripts/dashboard-charts.js +194 -0
  92. data/web/assets/javascripts/dashboard.js +41 -274
  93. data/web/assets/javascripts/metrics.js +280 -0
  94. data/web/assets/stylesheets/style.css +776 -0
  95. data/web/locales/ar.yml +72 -70
  96. data/web/locales/cs.yml +64 -62
  97. data/web/locales/da.yml +62 -53
  98. data/web/locales/de.yml +67 -65
  99. data/web/locales/el.yml +45 -24
  100. data/web/locales/en.yml +93 -69
  101. data/web/locales/es.yml +91 -68
  102. data/web/locales/fa.yml +67 -65
  103. data/web/locales/fr.yml +82 -67
  104. data/web/locales/gd.yml +110 -0
  105. data/web/locales/he.yml +67 -64
  106. data/web/locales/hi.yml +61 -59
  107. data/web/locales/it.yml +94 -54
  108. data/web/locales/ja.yml +74 -68
  109. data/web/locales/ko.yml +54 -52
  110. data/web/locales/lt.yml +68 -66
  111. data/web/locales/nb.yml +63 -61
  112. data/web/locales/nl.yml +54 -52
  113. data/web/locales/pl.yml +47 -45
  114. data/web/locales/{pt-br.yml → pt-BR.yml} +85 -56
  115. data/web/locales/pt.yml +53 -51
  116. data/web/locales/ru.yml +69 -66
  117. data/web/locales/sv.yml +55 -53
  118. data/web/locales/ta.yml +62 -60
  119. data/web/locales/tr.yml +102 -0
  120. data/web/locales/uk.yml +87 -61
  121. data/web/locales/ur.yml +66 -64
  122. data/web/locales/vi.yml +69 -67
  123. data/web/locales/zh-CN.yml +107 -0
  124. data/web/locales/{zh-tw.yml → zh-TW.yml} +44 -9
  125. data/web/views/_footer.html.erb +32 -0
  126. data/web/views/_job_info.html.erb +115 -0
  127. data/web/views/_metrics_period_select.html.erb +15 -0
  128. data/web/views/_nav.html.erb +45 -0
  129. data/web/views/_paging.html.erb +26 -0
  130. data/web/views/_poll_link.html.erb +4 -0
  131. data/web/views/_summary.html.erb +40 -0
  132. data/web/views/busy.html.erb +151 -0
  133. data/web/views/dashboard.html.erb +104 -0
  134. data/web/views/dead.html.erb +38 -0
  135. data/web/views/filtering.html.erb +6 -0
  136. data/web/views/layout.html.erb +26 -0
  137. data/web/views/metrics.html.erb +85 -0
  138. data/web/views/metrics_for_job.html.erb +58 -0
  139. data/web/views/morgue.html.erb +69 -0
  140. data/web/views/profiles.html.erb +43 -0
  141. data/web/views/queue.html.erb +57 -0
  142. data/web/views/queues.html.erb +46 -0
  143. data/web/views/retries.html.erb +77 -0
  144. data/web/views/retry.html.erb +39 -0
  145. data/web/views/scheduled.html.erb +64 -0
  146. data/web/views/{scheduled_job_info.erb → scheduled_job_info.html.erb} +3 -3
  147. metadata +130 -61
  148. data/LICENSE +0 -9
  149. data/lib/generators/sidekiq/worker_generator.rb +0 -57
  150. data/lib/sidekiq/delay.rb +0 -41
  151. data/lib/sidekiq/exception_handler.rb +0 -27
  152. data/lib/sidekiq/extensions/action_mailer.rb +0 -48
  153. data/lib/sidekiq/extensions/active_record.rb +0 -43
  154. data/lib/sidekiq/extensions/class_methods.rb +0 -43
  155. data/lib/sidekiq/extensions/generic_proxy.rb +0 -33
  156. data/lib/sidekiq/util.rb +0 -95
  157. data/lib/sidekiq/web/csrf_protection.rb +0 -180
  158. data/lib/sidekiq/worker.rb +0 -244
  159. data/web/assets/stylesheets/application-dark.css +0 -147
  160. data/web/assets/stylesheets/application-rtl.css +0 -246
  161. data/web/assets/stylesheets/application.css +0 -1053
  162. data/web/assets/stylesheets/bootstrap-rtl.min.css +0 -9
  163. data/web/assets/stylesheets/bootstrap.css +0 -5
  164. data/web/locales/zh-cn.yml +0 -68
  165. data/web/views/_footer.erb +0 -20
  166. data/web/views/_job_info.erb +0 -89
  167. data/web/views/_nav.erb +0 -52
  168. data/web/views/_paging.erb +0 -23
  169. data/web/views/_poll_link.erb +0 -7
  170. data/web/views/_status.erb +0 -4
  171. data/web/views/_summary.erb +0 -40
  172. data/web/views/busy.erb +0 -132
  173. data/web/views/dashboard.erb +0 -83
  174. data/web/views/dead.erb +0 -34
  175. data/web/views/layout.erb +0 -42
  176. data/web/views/morgue.erb +0 -78
  177. data/web/views/queue.erb +0 -55
  178. data/web/views/queues.erb +0 -38
  179. data/web/views/retries.erb +0 -83
  180. data/web/views/retry.erb +0 -34
  181. data/web/views/scheduled.erb +0 -57
@@ -1,9 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "sidekiq/util"
4
3
  require "sidekiq/processor"
5
- require "sidekiq/fetch"
6
- require "set"
7
4
 
8
5
  module Sidekiq
9
6
  ##
@@ -21,46 +18,38 @@ module Sidekiq
21
18
  # the shutdown process. The other tasks are performed by other threads.
22
19
  #
23
20
  class Manager
24
- include Util
21
+ include Sidekiq::Component
25
22
 
26
23
  attr_reader :workers
27
- attr_reader :options
24
+ attr_reader :capsule
28
25
 
29
- def initialize(options = {})
30
- logger.debug { options.inspect }
31
- @options = options
32
- @count = options[:concurrency] || 10
26
+ def initialize(capsule)
27
+ @config = @capsule = capsule
28
+ @count = capsule.concurrency
33
29
  raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
34
30
 
35
31
  @done = false
36
32
  @workers = Set.new
33
+ @plock = Mutex.new
37
34
  @count.times do
38
- @workers << Processor.new(self, options)
35
+ @workers << Processor.new(@config, &method(:processor_result))
39
36
  end
40
- @plock = Mutex.new
41
37
  end
42
38
 
43
39
  def start
44
- @workers.each do |x|
45
- x.start
46
- end
40
+ @workers.each(&:start)
47
41
  end
48
42
 
49
43
  def quiet
50
44
  return if @done
51
45
  @done = true
52
46
 
53
- logger.info { "Terminating quiet workers" }
54
- @workers.each { |x| x.terminate }
55
- fire_event(:quiet, reverse: true)
47
+ logger.info { "Terminating quiet threads for #{capsule.name} capsule" }
48
+ @workers.each(&:terminate)
56
49
  end
57
50
 
58
- # hack for quicker development / testing environment #2774
59
- PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
60
-
61
51
  def stop(deadline)
62
52
  quiet
63
- fire_event(:shutdown, reverse: true)
64
53
 
65
54
  # some of the shutdown events can be async,
66
55
  # we don't have any way to know when they're done but
@@ -68,29 +57,20 @@ module Sidekiq
68
57
  sleep PAUSE_TIME
69
58
  return if @workers.empty?
70
59
 
71
- logger.info { "Pausing to allow workers to finish..." }
72
- remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
73
- while remaining > PAUSE_TIME
74
- return if @workers.empty?
75
- sleep PAUSE_TIME
76
- remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
77
- end
60
+ logger.info { "Pausing to allow jobs to finish..." }
61
+ wait_for(deadline) { @workers.empty? }
78
62
  return if @workers.empty?
79
63
 
80
64
  hard_shutdown
65
+ ensure
66
+ capsule.stop
81
67
  end
82
68
 
83
- def processor_stopped(processor)
69
+ def processor_result(processor, reason = nil)
84
70
  @plock.synchronize do
85
71
  @workers.delete(processor)
86
- end
87
- end
88
-
89
- def processor_died(processor, reason)
90
- @plock.synchronize do
91
- @workers.delete(processor)
92
- unless @done
93
- p = Processor.new(self, options)
72
+ if !@done && @count > @workers.size
73
+ p = Processor.new(@config, &method(:processor_result))
94
74
  @workers << p
95
75
  p.start
96
76
  end
@@ -104,7 +84,7 @@ module Sidekiq
104
84
  private
105
85
 
106
86
  def hard_shutdown
107
- # We've reached the timeout and we still have busy workers.
87
+ # We've reached the timeout and we still have busy threads.
108
88
  # They must die but their jobs shall live on.
109
89
  cleanup = nil
110
90
  @plock.synchronize do
@@ -114,22 +94,40 @@ module Sidekiq
114
94
  if cleanup.size > 0
115
95
  jobs = cleanup.map { |p| p.job }.compact
116
96
 
117
- logger.warn { "Terminating #{cleanup.size} busy worker threads" }
118
- logger.warn { "Work still in progress #{jobs.inspect}" }
97
+ logger.warn { "Terminating #{cleanup.size} busy threads" }
98
+ logger.debug { "Jobs still in progress #{jobs.inspect}" }
119
99
 
120
100
  # Re-enqueue unfinished jobs
121
101
  # NOTE: You may notice that we may push a job back to redis before
122
- # the worker thread is terminated. This is ok because Sidekiq's
102
+ # the thread is terminated. This is ok because Sidekiq's
123
103
  # contract says that jobs are run AT LEAST once. Process termination
124
104
  # is delayed until we're certain the jobs are back in Redis because
125
105
  # it is worse to lose a job than to run it twice.
126
- strategy = @options[:fetch]
127
- strategy.bulk_requeue(jobs, @options)
106
+ capsule.fetcher.bulk_requeue(jobs)
128
107
  end
129
108
 
130
109
  cleanup.each do |processor|
131
110
  processor.kill
132
111
  end
112
+
113
+ # when this method returns, we immediately call `exit` which may not give
114
+ # the remaining threads time to run `ensure` blocks, etc. We pause here up
115
+ # to 3 seconds to give threads a minimal amount of time to run `ensure` blocks.
116
+ deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + 3
117
+ wait_for(deadline) { @workers.empty? }
118
+ end
119
+
120
+ # hack for quicker development / testing environment #2774
121
+ PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
122
+
123
+ # Wait for the orblock to be true or the deadline passed.
124
+ def wait_for(deadline, &condblock)
125
+ remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
126
+ while remaining > PAUSE_TIME
127
+ return if condblock.call
128
+ sleep PAUSE_TIME
129
+ remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
130
+ end
133
131
  end
134
132
  end
135
133
  end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "sidekiq"
5
+ require "sidekiq/metrics/shared"
6
+
7
+ module Sidekiq
8
+ module Metrics
9
+ # Allows caller to query for Sidekiq execution metrics within Redis.
10
+ # Caller sets a set of attributes to act as filters. {#fetch} will call
11
+ # Redis and return a Hash of results.
12
+ #
13
+ # NB: all metrics and times/dates are UTC only. We explicitly do not
14
+ # support timezones.
15
+ class Query
16
+ def initialize(pool: nil, now: Time.now)
17
+ @time = now.utc
18
+ @pool = pool || Sidekiq.default_configuration.redis_pool
19
+ @klass = nil
20
+ end
21
+
22
+ ROLLUPS = {
23
+ # minutely aggregates per minute
24
+ minutely: [60, ->(time) { time.strftime("j|%y%m%d|%-H:%M") }],
25
+ # hourly aggregates every 10 minutes so we'll have six data points per hour
26
+ hourly: [600, ->(time) {
27
+ m = time.min
28
+ mins = (m < 10) ? "0" : m.to_s[0]
29
+ time.strftime("j|%y%m%d|%-H:#{mins}")
30
+ }]
31
+ }
32
+
33
+ # Get metric data for all jobs from the last hour
34
+ # +class_filter+: return only results for classes matching filter
35
+ # +minutes+: the number of fine-grained minute buckets to retrieve
36
+ # +hours+: the number of coarser-grained 10-minute buckets to retrieve, in hours
37
+ def top_jobs(class_filter: nil, minutes: nil, hours: nil)
38
+ time = @time
39
+ minutes = 60 unless minutes || hours
40
+
41
+ # DoS protection, sanity check
42
+ minutes = 60 if minutes && minutes > 480
43
+ hours = 72 if hours && hours > 72
44
+
45
+ granularity = hours ? :hourly : :minutely
46
+ result = Result.new(granularity)
47
+ result.ends_at = time
48
+ count = hours ? hours * 6 : minutes
49
+ stride, keyproc = ROLLUPS[granularity]
50
+
51
+ redis_results = @pool.with do |conn|
52
+ conn.pipelined do |pipe|
53
+ count.times do |idx|
54
+ key = keyproc.call(time)
55
+ pipe.hgetall key
56
+ time -= stride
57
+ end
58
+ end
59
+ end
60
+
61
+ result.starts_at = time
62
+ time = @time
63
+ redis_results.each do |hash|
64
+ hash.each do |k, v|
65
+ kls, metric = k.split("|")
66
+ next if class_filter && !class_filter.match?(kls)
67
+ result.job_results[kls].add_metric metric, time, v.to_i
68
+ end
69
+ time -= stride
70
+ end
71
+
72
+ result.marks = fetch_marks(result.starts_at..result.ends_at, granularity)
73
+ result
74
+ end
75
+
76
+ def for_job(klass, minutes: nil, hours: nil)
77
+ time = @time
78
+ minutes = 60 unless minutes || hours
79
+
80
+ # DoS protection, sanity check
81
+ minutes = 60 if minutes && minutes > 480
82
+ hours = 72 if hours && hours > 72
83
+
84
+ granularity = hours ? :hourly : :minutely
85
+ result = Result.new(granularity)
86
+ result.ends_at = time
87
+ count = hours ? hours * 6 : minutes
88
+ stride, keyproc = ROLLUPS[granularity]
89
+
90
+ redis_results = @pool.with do |conn|
91
+ conn.pipelined do |pipe|
92
+ count.times do |idx|
93
+ key = keyproc.call(time)
94
+ pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
95
+ time -= stride
96
+ end
97
+ end
98
+ end
99
+
100
+ result.starts_at = time
101
+ time = @time
102
+ @pool.with do |conn|
103
+ redis_results.each do |(ms, p, f)|
104
+ result.job_results[klass].add_metric "ms", time, ms.to_i if ms
105
+ result.job_results[klass].add_metric "p", time, p.to_i if p
106
+ result.job_results[klass].add_metric "f", time, f.to_i if f
107
+ result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time).reverse if minutes
108
+ time -= stride
109
+ end
110
+ end
111
+
112
+ result.marks = fetch_marks(result.starts_at..result.ends_at, granularity)
113
+ result
114
+ end
115
+
116
+ class Result < Struct.new(:granularity, :starts_at, :ends_at, :size, :job_results, :marks)
117
+ def initialize(granularity = :minutely)
118
+ super
119
+ self.granularity = granularity
120
+ self.marks = []
121
+ self.job_results = Hash.new { |h, k| h[k] = JobResult.new(granularity) }
122
+ end
123
+ end
124
+
125
+ class JobResult < Struct.new(:granularity, :series, :hist, :totals)
126
+ def initialize(granularity = :minutely)
127
+ super
128
+ self.granularity = granularity
129
+ self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
130
+ self.hist = Hash.new { |h, k| h[k] = [] }
131
+ self.totals = Hash.new(0)
132
+ end
133
+
134
+ def add_metric(metric, time, value)
135
+ totals[metric] += value
136
+ series[metric][Query.bkt_time_s(time, granularity)] += value
137
+
138
+ # Include timing measurements in seconds for convenience
139
+ add_metric("s", time, value / 1000.0) if metric == "ms"
140
+ end
141
+
142
+ def add_hist(time, hist_result)
143
+ hist[Query.bkt_time_s(time, granularity)] = hist_result
144
+ end
145
+
146
+ def total_avg(metric = "ms")
147
+ completed = totals["p"] - totals["f"]
148
+ return 0 if completed.zero?
149
+ totals[metric].to_f / completed
150
+ end
151
+
152
+ def series_avg(metric = "ms")
153
+ series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
154
+ completed = series.dig("p", bucket) - series.dig("f", bucket)
155
+ result[bucket] = (completed == 0) ? 0 : value.to_f / completed
156
+ end
157
+ end
158
+ end
159
+
160
+ MarkResult = Struct.new(:time, :label, :bucket)
161
+
162
+ def self.bkt_time_s(time, granularity)
163
+ # truncate time to ten minutes ("8:40", not "8:43") or one minute
164
+ truncation = (granularity == :hourly) ? 600 : 60
165
+ Time.at(time.to_i - time.to_i % truncation).utc.iso8601
166
+ end
167
+
168
+ private
169
+
170
+ def fetch_marks(time_range, granularity)
171
+ [].tap do |result|
172
+ marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
173
+
174
+ marks.each do |timestamp, label|
175
+ time = Time.parse(timestamp)
176
+ if time_range.cover? time
177
+ result << MarkResult.new(time, label, Query.bkt_time_s(time, granularity))
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sidekiq
4
+ module Metrics
5
+ class Counter
6
+ def initialize
7
+ @value = 0
8
+ @lock = Mutex.new
9
+ end
10
+
11
+ def increment
12
+ @lock.synchronize { @value += 1 }
13
+ end
14
+
15
+ def value
16
+ @lock.synchronize { @value }
17
+ end
18
+ end
19
+
20
+ # Implements space-efficient but statistically useful histogram storage.
21
+ # A precise time histogram stores every time. Instead we break times into a set of
22
+ # known buckets and increment counts of the associated time bucket. Even if we call
23
+ # the histogram a million times, we'll still only store 26 buckets.
24
+ # NB: needs to be thread-safe or resiliant to races.
25
+ #
26
+ # To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
27
+ # per bucket per klass per minute. It's unlikely that most people will be executing more
28
+ # than 1000 job/sec for a full minute of a specific type (i.e. overflow 65,536).
29
+ #
30
+ # Histograms are only stored at the fine-grained level, they are not rolled up
31
+ # for longer-term buckets.
32
+ class Histogram
33
+ include Enumerable
34
+
35
+ # This number represents the maximum milliseconds for this bucket.
36
+ # 20 means all job executions up to 20ms, e.g. if a job takes
37
+ # 280ms, it'll increment bucket[7]. Note we can track job executions
38
+ # up to about 5.5 minutes. After that, it's assumed you're probably
39
+ # not too concerned with its performance.
40
+ BUCKET_INTERVALS = [
41
+ 20, 30, 45, 65, 100,
42
+ 150, 225, 335, 500, 750,
43
+ 1100, 1700, 2500, 3800, 5750,
44
+ 8500, 13000, 20000, 30000, 45000,
45
+ 65000, 100000, 150000, 225000, 335000,
46
+ 1e20 # the "maybe your job is too long" bucket
47
+ ].freeze
48
+ LABELS = [
49
+ "20ms", "30ms", "45ms", "65ms", "100ms",
50
+ "150ms", "225ms", "335ms", "500ms", "750ms",
51
+ "1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
52
+ "8.5s", "13s", "20s", "30s", "45s",
53
+ "65s", "100s", "150s", "225s", "335s",
54
+ "Slow"
55
+ ].freeze
56
+ FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
57
+ GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
58
+ GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
59
+ GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
60
+ GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
61
+ GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
62
+ GET u16 #24 GET u16 #25".split
63
+ HISTOGRAM_TTL = 8 * 60 * 60
64
+
65
+ def each
66
+ buckets.each { |counter| yield counter.value }
67
+ end
68
+
69
+ def label(idx)
70
+ LABELS[idx]
71
+ end
72
+
73
+ attr_reader :buckets
74
+ def initialize(klass)
75
+ @klass = klass
76
+ @buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
77
+ end
78
+
79
+ def record_time(ms)
80
+ index_to_use = BUCKET_INTERVALS.each_index do |idx|
81
+ break idx if ms < BUCKET_INTERVALS[idx]
82
+ end
83
+
84
+ @buckets[index_to_use].increment
85
+ end
86
+
87
+ def fetch(conn, now = Time.now)
88
+ window = now.utc.strftime("%-d-%-H:%-M")
89
+ key = "h|#{@klass}-#{window}"
90
+ conn.bitfield_ro(key, *FETCH)
91
+ end
92
+
93
+ def persist(conn, now = Time.now)
94
+ buckets, @buckets = @buckets, []
95
+ window = now.utc.strftime("%-d-%-H:%-M")
96
+ key = "h|#{@klass}-#{window}"
97
+ cmd = [key, "OVERFLOW", "SAT"]
98
+ buckets.each_with_index do |counter, idx|
99
+ val = counter.value
100
+ cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
101
+ end
102
+
103
+ conn.bitfield(*cmd) if cmd.size > 3
104
+ conn.expire(key, HISTOGRAM_TTL)
105
+ key
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "time"
4
+ require "sidekiq"
5
+ require "sidekiq/metrics/shared"
6
+
7
+ # This file contains the components which track execution metrics within Sidekiq.
8
+ module Sidekiq
9
+ module Metrics
10
+ class ExecutionTracker
11
+ include Sidekiq::Component
12
+
13
+ def initialize(config)
14
+ @config = config
15
+ @jobs = Hash.new(0)
16
+ @totals = Hash.new(0)
17
+ @grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
18
+ @lock = Mutex.new
19
+ end
20
+
21
+ def track(queue, klass)
22
+ start = mono_ms
23
+ time_ms = 0
24
+ begin
25
+ begin
26
+ yield
27
+ ensure
28
+ finish = mono_ms
29
+ time_ms = finish - start
30
+ end
31
+ # We don't track time for failed jobs as they can have very unpredictable
32
+ # execution times. more important to know average time for successful jobs so we
33
+ # can better recognize when a perf regression is introduced.
34
+ track_time(klass, time_ms)
35
+ rescue JobRetry::Skip
36
+ # This is raised when iterable job is interrupted.
37
+ track_time(klass, time_ms)
38
+ raise
39
+ rescue Exception
40
+ @lock.synchronize {
41
+ @jobs["#{klass}|f"] += 1
42
+ @totals["f"] += 1
43
+ }
44
+ raise
45
+ ensure
46
+ @lock.synchronize {
47
+ @jobs["#{klass}|p"] += 1
48
+ @totals["p"] += 1
49
+ }
50
+ end
51
+ end
52
+
53
+ # LONG_TERM = 90 * 24 * 60 * 60
54
+ MID_TERM = 3 * 24 * 60 * 60
55
+ SHORT_TERM = 8 * 60 * 60
56
+
57
+ def flush(time = Time.now)
58
+ totals, jobs, grams = reset
59
+ procd = totals["p"]
60
+ fails = totals["f"]
61
+ return if procd == 0 && fails == 0
62
+
63
+ now = time.utc
64
+ # nowdate = now.strftime("%Y%m%d")
65
+ # "250214|8:4" is the 10 minute bucket for Feb 14 2025, 08:43
66
+ nowmid = now.strftime("%y%m%d|%-H:%M")[0..-2]
67
+ # "250214|8:43" is the 1 minute bucket for Feb 14 2025, 08:43
68
+ nowshort = now.strftime("%y%m%d|%-H:%M")
69
+ count = 0
70
+
71
+ redis do |conn|
72
+ # persist fine-grained histogram data
73
+ if grams.size > 0
74
+ conn.pipelined do |pipe|
75
+ grams.each do |_, gram|
76
+ gram.persist(pipe, now)
77
+ end
78
+ end
79
+ end
80
+
81
+ # persist coarse grained execution count + execution millis.
82
+ # note as of today we don't use or do anything with the
83
+ # daily or hourly rollups.
84
+ [
85
+ # ["j", jobs, nowdate, LONG_TERM],
86
+ ["j", jobs, nowmid, MID_TERM],
87
+ ["j", jobs, nowshort, SHORT_TERM]
88
+ ].each do |prefix, data, bucket, ttl|
89
+ conn.pipelined do |xa|
90
+ stats = "#{prefix}|#{bucket}"
91
+ data.each_pair do |key, value|
92
+ xa.hincrby stats, key, value
93
+ count += 1
94
+ end
95
+ xa.expire(stats, ttl)
96
+ end
97
+ end
98
+ logger.debug "Flushed #{count} metrics"
99
+ count
100
+ end
101
+ end
102
+
103
+ private
104
+
105
+ def track_time(klass, time_ms)
106
+ @lock.synchronize {
107
+ @grams[klass].record_time(time_ms)
108
+ @jobs["#{klass}|ms"] += time_ms
109
+ @totals["ms"] += time_ms
110
+ }
111
+ end
112
+
113
+ def reset
114
+ @lock.synchronize {
115
+ array = [@totals, @jobs, @grams]
116
+ reset_instance_variables
117
+ array
118
+ }
119
+ end
120
+
121
+ def reset_instance_variables
122
+ @totals = Hash.new(0)
123
+ @jobs = Hash.new(0)
124
+ @grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
125
+ end
126
+ end
127
+
128
+ class Middleware
129
+ include Sidekiq::ServerMiddleware
130
+
131
+ def initialize(options)
132
+ @exec = options
133
+ end
134
+
135
+ def call(_instance, hash, queue, &block)
136
+ @exec.track(queue, hash["wrapped"] || hash["class"], &block)
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ Sidekiq.configure_server do |config|
143
+ exec = Sidekiq::Metrics::ExecutionTracker.new(config)
144
+ config.server_middleware do |chain|
145
+ chain.add Sidekiq::Metrics::Middleware, exec
146
+ end
147
+ config.on(:beat) do
148
+ exec.flush
149
+ end
150
+ config.on(:exit) do
151
+ exec.flush
152
+ end
153
+ end