sidekiq-iteration 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "throttling"
4
+ require_relative "enumerators"
5
+
6
+ module SidekiqIteration
7
+ module Iteration
8
+ include Enumerators
9
+
10
+ # @private
11
+ def self.included(base)
12
+ base.extend(ClassMethods)
13
+ base.extend(Throttling)
14
+
15
+ base.class_eval do
16
+ throttle_on(backoff: 0) do |job|
17
+ job.class.max_job_runtime &&
18
+ job.start_time &&
19
+ (Time.now.utc - job.start_time) > job.class.max_job_runtime
20
+ end
21
+
22
+ throttle_on(backoff: 0) do
23
+ defined?(Sidekiq::CLI) &&
24
+ Sidekiq::CLI.instance.launcher.stopping?
25
+ end
26
+ end
27
+
28
+ super
29
+ end
30
+
31
+ # @private
32
+ module ClassMethods
33
+ def inherited(base)
34
+ base.throttle_conditions = throttle_conditions.dup
35
+ super
36
+ end
37
+
38
+ def method_added(method_name)
39
+ if method_name == :perform
40
+ raise "Job that is using Iteration cannot redefine #perform"
41
+ end
42
+
43
+ super
44
+ end
45
+
46
+ attr_writer :max_job_runtime
47
+
48
+ def max_job_runtime
49
+ if defined?(@max_job_runtime)
50
+ @max_job_runtime
51
+ else
52
+ SidekiqIteration.max_job_runtime
53
+ end
54
+ end
55
+ end
56
+
57
+ attr_reader :executions,
58
+ :cursor_position,
59
+ :start_time,
60
+ :times_interrupted,
61
+ :total_time,
62
+ :current_run_iterations
63
+
64
+ # @private
65
+ def initialize
66
+ super
67
+ @arguments = nil
68
+ @job_iteration_retry_backoff = nil
69
+ @needs_reenqueue = false
70
+ @current_run_iterations = 0
71
+ end
72
+
73
+ # @private
74
+ def perform(*arguments)
75
+ extract_previous_runs_metadata(arguments)
76
+ @arguments = arguments
77
+ interruptible_perform(*arguments)
78
+ end
79
+
80
+ # A hook to override that will be called when the job starts iterating.
81
+ # Is called only once, for the first time.
82
+ def on_start
83
+ end
84
+
85
+ # A hook to override that will be called when the job resumes iterating.
86
+ def on_resume
87
+ end
88
+
89
+ # A hook to override that will be called each time the job is interrupted.
90
+ # This can be due to throttling (throttle enumerator), `max_job_runtime` configuration,
91
+ # or sidekiq restarting.
92
+ def on_shutdown
93
+ end
94
+
95
+ # A hook to override that will be called when the job finished iterating.
96
+ def on_complete
97
+ end
98
+
99
+ # The enumerator to be iterated over.
100
+ #
101
+ # @return [Enumerator]
102
+ #
103
+ # @raise [NotImplementedError] with a message advising subclasses to
104
+ # implement an override for this method.
105
+ #
106
+ def build_enumerator(*)
107
+ raise NotImplementedError, "#{self.class.name} must implement a 'build_enumerator' method"
108
+ end
109
+
110
+ # The action to be performed on each item from the enumerator.
111
+ #
112
+ # @return [void]
113
+ #
114
+ # @raise [NotImplementedError] with a message advising subclasses to
115
+ # implement an override for this method.
116
+ #
117
+ def each_iteration(*)
118
+ raise NotImplementedError, "#{self.class.name} must implement an 'each_iteration' method"
119
+ end
120
+
121
+ private
122
+ def extract_previous_runs_metadata(arguments)
123
+ options =
124
+ if arguments.last.is_a?(Hash) && arguments.last.key?("sidekiq_iteration")
125
+ arguments.pop["sidekiq_iteration"]
126
+ else
127
+ {}
128
+ end
129
+
130
+ @executions = options["executions"] || 0
131
+ @cursor_position = options["cursor_position"]
132
+ @times_interrupted = options["times_interrupted"] || 0
133
+ @total_time = options["total_time"] || 0
134
+ end
135
+
136
+ def interruptible_perform(*arguments)
137
+ @executions += 1
138
+ @start_time = Time.now.utc
139
+
140
+ enumerator = build_enumerator(*arguments, cursor: cursor_position)
141
+ unless enumerator
142
+ SidekiqIteration.logger.info("[SidekiqIteration::Iteration] `build_enumerator` returned nil. Skipping the job.")
143
+ return
144
+ end
145
+
146
+ assert_enumerator!(enumerator)
147
+
148
+ if executions == 1 && times_interrupted == 0
149
+ on_start
150
+ else
151
+ on_resume
152
+ end
153
+
154
+ completed = catch(:abort) do
155
+ iterate_with_enumerator(enumerator, arguments)
156
+ end
157
+
158
+ on_shutdown
159
+ completed = handle_completed(completed)
160
+
161
+ if @needs_reenqueue
162
+ reenqueue_iteration_job
163
+ elsif completed
164
+ on_complete
165
+ output_interrupt_summary
166
+ end
167
+ end
168
+
169
+ def iterate_with_enumerator(enumerator, arguments)
170
+ found_record = false
171
+ @needs_reenqueue = false
172
+
173
+ enumerator.each do |object_from_enumerator, index|
174
+ found_record = true
175
+ each_iteration(object_from_enumerator, *arguments)
176
+ @cursor_position = index
177
+ @current_run_iterations += 1
178
+
179
+ throttle_condition = find_throttle_condition
180
+ if throttle_condition
181
+ @job_iteration_retry_backoff = throttle_condition.backoff
182
+ @needs_reenqueue = true
183
+ return false
184
+ end
185
+ end
186
+
187
+ unless found_record
188
+ SidekiqIteration.logger.info(
189
+ "[SidekiqIteration::Iteration] Enumerator found nothing to iterate! " \
190
+ "times_interrupted=#{times_interrupted} cursor_position=#{cursor_position}",
191
+ )
192
+ end
193
+
194
+ adjust_total_time
195
+ true
196
+ end
197
+
198
+ def reenqueue_iteration_job
199
+ SidekiqIteration.logger.info("[SidekiqIteration::Iteration] Interrupting and re-enqueueing the job cursor_position=#{cursor_position}")
200
+
201
+ adjust_total_time
202
+ @times_interrupted += 1
203
+
204
+ arguments = @arguments
205
+ arguments.push(
206
+ "sidekiq_iteration" => {
207
+ "executions" => executions,
208
+ "cursor_position" => cursor_position,
209
+ "times_interrupted" => times_interrupted,
210
+ "total_time" => total_time,
211
+ },
212
+ )
213
+ self.class.perform_in(@job_iteration_retry_backoff, *arguments)
214
+ end
215
+
216
+ def adjust_total_time
217
+ @total_time += (Time.now.utc.to_f - start_time.to_f).round(6)
218
+ end
219
+
220
+ def assert_enumerator!(enum)
221
+ return if enum.is_a?(Enumerator)
222
+
223
+ raise ArgumentError, <<~MSG
224
+ #build_enumerator is expected to return Enumerator object, but returned #{enum.class}.
225
+ Example:
226
+ def build_enumerator(params, cursor:)
227
+ enumerator_builder.active_record_on_records(
228
+ Shop.find(params[:shop_id]).products,
229
+ cursor: cursor
230
+ )
231
+ end
232
+ MSG
233
+ end
234
+
235
+ def output_interrupt_summary
236
+ SidekiqIteration.logger.info(
237
+ format("[SidekiqIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f", times_interrupted, total_time),
238
+ )
239
+ end
240
+
241
+ def find_throttle_condition
242
+ self.class.throttle_conditions.find do |throttle_condition|
243
+ throttle_condition.valid?(self)
244
+ end
245
+ end
246
+
247
+ def handle_completed(completed)
248
+ case completed
249
+ when nil # someone aborted the job but wants to call the on_complete callback
250
+ true
251
+ when true # rubocop:disable Lint/DuplicateBranch
252
+ true
253
+ when false, :skip_complete_callback
254
+ false
255
+ when Array # can be used to return early from the enumerator
256
+ reason, backoff = completed
257
+ raise "Unknown reason: #{reason}" unless reason == :retry
258
+
259
+ @job_iteration_retry_backoff = backoff
260
+ @needs_reenqueue = true
261
+ false
262
+ else
263
+ raise "Unexpected thrown value: #{completed.inspect}"
264
+ end
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq/job_retry"
4
+
5
+ module SidekiqIteration
6
+ # @private
7
+ module JobRetryPatch
8
+ private
9
+ def process_retry(jobinst, msg, queue, exception)
10
+ if jobinst.is_a?(Iteration)
11
+ unless msg["args"].last.is_a?(Hash)
12
+ msg["args"].push({})
13
+ end
14
+
15
+ msg["args"].last["sidekiq_iteration"] = {
16
+ "executions" => jobinst.executions,
17
+ "cursor_position" => jobinst.cursor_position,
18
+ "times_interrupted" => jobinst.times_interrupted,
19
+ "total_time" => jobinst.total_time,
20
+ }
21
+ end
22
+
23
+ super
24
+ end
25
+ end
26
+ end
27
+
28
+ if Sidekiq::JobRetry.instance_method(:process_retry)
29
+ Sidekiq::JobRetry.prepend(SidekiqIteration::JobRetryPatch)
30
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SidekiqIteration
4
+ # @private
5
+ class NestedEnumerator
6
+ def initialize(enums, cursor: nil)
7
+ unless enums.all?(Proc)
8
+ raise ArgumentError, "enums must contain only procs/lambdas"
9
+ end
10
+
11
+ if cursor && enums.size != cursor.size
12
+ raise ArgumentError, "cursor should have one item per enum"
13
+ end
14
+
15
+ @enums = enums
16
+ @cursor = cursor || Array.new(enums.size)
17
+ end
18
+
19
+ def each(&block)
20
+ return to_enum unless block_given?
21
+
22
+ iterate([], [], 0, &block)
23
+ end
24
+
25
+ private
26
+ def iterate(current_items, current_cursor, index, &block)
27
+ cursor = @cursor[index]
28
+ enum = @enums[index].call(*current_items, cursor)
29
+
30
+ enum.each do |item, cursor_value|
31
+ if index == @cursor.size - 1
32
+ yield item, current_cursor + [cursor_value]
33
+ else
34
+ iterate(current_items + [item], current_cursor + [cursor_value], index + 1, &block)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SidekiqIteration
4
+ module Throttling
5
+ # @private
6
+ class ThrottleCondition
7
+ def initialize(condition, backoff)
8
+ @condition = condition
9
+ @backoff = backoff
10
+ end
11
+
12
+ def valid?(job)
13
+ @condition.call(job)
14
+ end
15
+
16
+ def backoff
17
+ if @backoff.is_a?(Proc)
18
+ @backoff.call
19
+ else
20
+ @backoff
21
+ end
22
+ end
23
+ end
24
+
25
+ # @private
26
+ attr_writer :throttle_conditions
27
+
28
+ # @private
29
+ def throttle_conditions
30
+ @throttle_conditions ||= []
31
+ end
32
+
33
+ # Add a condition under which this job will be throttled.
34
+ #
35
+ # @param backoff [Numeric, #call] (30) a custom backoff (in seconds).
36
+ # This is the time to wait before retrying the job.
37
+ # @yieldparam job [Sidekiq::Job] current sidekiq job that is yielded to `condition` proc
38
+ # @yieldreturn [Boolean] whether the throttle condition is being met,
39
+ # indicating that the job should throttle.
40
+ #
41
+ def throttle_on(backoff: 30, &condition)
42
+ throttle_conditions << ThrottleCondition.new(condition, backoff)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SidekiqIteration
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq"
4
+ require_relative "sidekiq_iteration/version"
5
+
6
+ module SidekiqIteration
7
+ class << self
8
+ # Use this to _always_ interrupt the job after it's been running for more than N seconds.
9
+ #
10
+ # This setting will make it to always interrupt a job after it's been iterating for 5 minutes.
11
+ # Defaults to nil which means that jobs will not be interrupted except on termination signal.
12
+ #
13
+ # @example Global setting
14
+ # SidekiqIteration.max_job_runtime = 5.minutes
15
+ #
16
+ # @example Per-job setting
17
+ # class MyJob
18
+ # # ...
19
+ # self.max_job_runtime = 1.minute
20
+ # # ...
21
+ # end
22
+ #
23
+ attr_accessor :max_job_runtime
24
+
25
+ # Set a custom logger for sidekiq-iteration.
26
+ # Defaults to `Sidekiq.logger`.
27
+ #
28
+ # @example
29
+ # SidekiqIteration.logger = Logger.new("log/sidekiq-iteration.log")
30
+ #
31
+ attr_writer :logger
32
+
33
+ def logger
34
+ @logger ||= Sidekiq.logger
35
+ end
36
+ end
37
+ end
38
+
39
+ require_relative "sidekiq_iteration/iteration"
40
+ require_relative "sidekiq_iteration/job_retry_patch"
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sidekiq-iteration
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - fatkodima
8
+ - Shopify
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2022-11-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: sidekiq
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '6.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '6.0'
28
+ description:
29
+ email:
30
+ - fatkodima123@gmail.com
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - CHANGELOG.md
36
+ - LICENSE.txt
37
+ - README.md
38
+ - guides/best-practices.md
39
+ - guides/custom-enumerator.md
40
+ - guides/iteration-how-it-works.md
41
+ - guides/throttling.md
42
+ - lib/sidekiq-iteration.rb
43
+ - lib/sidekiq_iteration.rb
44
+ - lib/sidekiq_iteration/active_record_batch_enumerator.rb
45
+ - lib/sidekiq_iteration/active_record_cursor.rb
46
+ - lib/sidekiq_iteration/active_record_enumerator.rb
47
+ - lib/sidekiq_iteration/csv_enumerator.rb
48
+ - lib/sidekiq_iteration/enumerators.rb
49
+ - lib/sidekiq_iteration/iteration.rb
50
+ - lib/sidekiq_iteration/job_retry_patch.rb
51
+ - lib/sidekiq_iteration/nested_enumerator.rb
52
+ - lib/sidekiq_iteration/throttling.rb
53
+ - lib/sidekiq_iteration/version.rb
54
+ homepage: https://github.com/fatkodima/sidekiq-iteration
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://github.com/fatkodima/sidekiq-iteration
59
+ source_code_uri: https://github.com/fatkodima/sidekiq-iteration
60
+ changelog_uri: https://github.com/fatkodima/sidekiq-iteration/blob/master/CHANGELOG.md
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 2.7.0
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubygems_version: 3.1.6
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: Makes your sidekiq jobs interruptible and resumable.
80
+ test_files: []