sidekiq-iteration 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "throttling"
4
+ require_relative "enumerators"
5
+
6
+ module SidekiqIteration
7
+ module Iteration
8
+ include Enumerators
9
+
10
+ # @private
11
+ def self.included(base)
12
+ base.extend(ClassMethods)
13
+ base.extend(Throttling)
14
+
15
+ base.class_eval do
16
+ throttle_on(backoff: 0) do |job|
17
+ job.class.max_job_runtime &&
18
+ job.start_time &&
19
+ (Time.now.utc - job.start_time) > job.class.max_job_runtime
20
+ end
21
+
22
+ throttle_on(backoff: 0) do
23
+ defined?(Sidekiq::CLI) &&
24
+ Sidekiq::CLI.instance.launcher.stopping?
25
+ end
26
+ end
27
+
28
+ super
29
+ end
30
+
31
+ # @private
32
+ module ClassMethods
33
+ def inherited(base)
34
+ base.throttle_conditions = throttle_conditions.dup
35
+ super
36
+ end
37
+
38
+ def method_added(method_name)
39
+ if method_name == :perform
40
+ raise "Job that is using Iteration cannot redefine #perform"
41
+ end
42
+
43
+ super
44
+ end
45
+
46
+ attr_writer :max_job_runtime
47
+
48
+ def max_job_runtime
49
+ if defined?(@max_job_runtime)
50
+ @max_job_runtime
51
+ else
52
+ SidekiqIteration.max_job_runtime
53
+ end
54
+ end
55
+ end
56
+
57
+ attr_reader :executions,
58
+ :cursor_position,
59
+ :start_time,
60
+ :times_interrupted,
61
+ :total_time,
62
+ :current_run_iterations
63
+
64
+ # @private
65
+ def initialize
66
+ super
67
+ @arguments = nil
68
+ @job_iteration_retry_backoff = nil
69
+ @needs_reenqueue = false
70
+ @current_run_iterations = 0
71
+ end
72
+
73
+ # @private
74
+ def perform(*arguments)
75
+ extract_previous_runs_metadata(arguments)
76
+ @arguments = arguments
77
+ interruptible_perform(*arguments)
78
+ end
79
+
80
+ # A hook to override that will be called when the job starts iterating.
81
+ # Is called only once, for the first time.
82
+ def on_start
83
+ end
84
+
85
+ # A hook to override that will be called when the job resumes iterating.
86
+ def on_resume
87
+ end
88
+
89
+ # A hook to override that will be called each time the job is interrupted.
90
+ # This can be due to throttling (throttle enumerator), `max_job_runtime` configuration,
91
+ # or sidekiq restarting.
92
+ def on_shutdown
93
+ end
94
+
95
+ # A hook to override that will be called when the job finished iterating.
96
+ def on_complete
97
+ end
98
+
99
+ # The enumerator to be iterated over.
100
+ #
101
+ # @return [Enumerator]
102
+ #
103
+ # @raise [NotImplementedError] with a message advising subclasses to
104
+ # implement an override for this method.
105
+ #
106
+ def build_enumerator(*)
107
+ raise NotImplementedError, "#{self.class.name} must implement a 'build_enumerator' method"
108
+ end
109
+
110
+ # The action to be performed on each item from the enumerator.
111
+ #
112
+ # @return [void]
113
+ #
114
+ # @raise [NotImplementedError] with a message advising subclasses to
115
+ # implement an override for this method.
116
+ #
117
+ def each_iteration(*)
118
+ raise NotImplementedError, "#{self.class.name} must implement an 'each_iteration' method"
119
+ end
120
+
121
+ private
122
+ def extract_previous_runs_metadata(arguments)
123
+ options =
124
+ if arguments.last.is_a?(Hash) && arguments.last.key?("sidekiq_iteration")
125
+ arguments.pop["sidekiq_iteration"]
126
+ else
127
+ {}
128
+ end
129
+
130
+ @executions = options["executions"] || 0
131
+ @cursor_position = options["cursor_position"]
132
+ @times_interrupted = options["times_interrupted"] || 0
133
+ @total_time = options["total_time"] || 0
134
+ end
135
+
136
+ def interruptible_perform(*arguments)
137
+ @executions += 1
138
+ @start_time = Time.now.utc
139
+
140
+ enumerator = build_enumerator(*arguments, cursor: cursor_position)
141
+ unless enumerator
142
+ SidekiqIteration.logger.info("[SidekiqIteration::Iteration] `build_enumerator` returned nil. Skipping the job.")
143
+ return
144
+ end
145
+
146
+ assert_enumerator!(enumerator)
147
+
148
+ if executions == 1 && times_interrupted == 0
149
+ on_start
150
+ else
151
+ on_resume
152
+ end
153
+
154
+ completed = catch(:abort) do
155
+ iterate_with_enumerator(enumerator, arguments)
156
+ end
157
+
158
+ on_shutdown
159
+ completed = handle_completed(completed)
160
+
161
+ if @needs_reenqueue
162
+ reenqueue_iteration_job
163
+ elsif completed
164
+ on_complete
165
+ output_interrupt_summary
166
+ end
167
+ end
168
+
169
+ def iterate_with_enumerator(enumerator, arguments)
170
+ found_record = false
171
+ @needs_reenqueue = false
172
+
173
+ enumerator.each do |object_from_enumerator, index|
174
+ found_record = true
175
+ each_iteration(object_from_enumerator, *arguments)
176
+ @cursor_position = index
177
+ @current_run_iterations += 1
178
+
179
+ throttle_condition = find_throttle_condition
180
+ if throttle_condition
181
+ @job_iteration_retry_backoff = throttle_condition.backoff
182
+ @needs_reenqueue = true
183
+ return false
184
+ end
185
+ end
186
+
187
+ unless found_record
188
+ SidekiqIteration.logger.info(
189
+ "[SidekiqIteration::Iteration] Enumerator found nothing to iterate! " \
190
+ "times_interrupted=#{times_interrupted} cursor_position=#{cursor_position}",
191
+ )
192
+ end
193
+
194
+ adjust_total_time
195
+ true
196
+ end
197
+
198
+ def reenqueue_iteration_job
199
+ SidekiqIteration.logger.info("[SidekiqIteration::Iteration] Interrupting and re-enqueueing the job cursor_position=#{cursor_position}")
200
+
201
+ adjust_total_time
202
+ @times_interrupted += 1
203
+
204
+ arguments = @arguments
205
+ arguments.push(
206
+ "sidekiq_iteration" => {
207
+ "executions" => executions,
208
+ "cursor_position" => cursor_position,
209
+ "times_interrupted" => times_interrupted,
210
+ "total_time" => total_time,
211
+ },
212
+ )
213
+ self.class.perform_in(@job_iteration_retry_backoff, *arguments)
214
+ end
215
+
216
+ def adjust_total_time
217
+ @total_time += (Time.now.utc.to_f - start_time.to_f).round(6)
218
+ end
219
+
220
+ def assert_enumerator!(enum)
221
+ return if enum.is_a?(Enumerator)
222
+
223
+ raise ArgumentError, <<~MSG
224
+ #build_enumerator is expected to return Enumerator object, but returned #{enum.class}.
225
+ Example:
226
+ def build_enumerator(params, cursor:)
227
+ enumerator_builder.active_record_on_records(
228
+ Shop.find(params[:shop_id]).products,
229
+ cursor: cursor
230
+ )
231
+ end
232
+ MSG
233
+ end
234
+
235
+ def output_interrupt_summary
236
+ SidekiqIteration.logger.info(
237
+ format("[SidekiqIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f", times_interrupted, total_time),
238
+ )
239
+ end
240
+
241
+ def find_throttle_condition
242
+ self.class.throttle_conditions.find do |throttle_condition|
243
+ throttle_condition.valid?(self)
244
+ end
245
+ end
246
+
247
+ def handle_completed(completed)
248
+ case completed
249
+ when nil # someone aborted the job but wants to call the on_complete callback
250
+ true
251
+ when true # rubocop:disable Lint/DuplicateBranch
252
+ true
253
+ when false, :skip_complete_callback
254
+ false
255
+ when Array # can be used to return early from the enumerator
256
+ reason, backoff = completed
257
+ raise "Unknown reason: #{reason}" unless reason == :retry
258
+
259
+ @job_iteration_retry_backoff = backoff
260
+ @needs_reenqueue = true
261
+ false
262
+ else
263
+ raise "Unexpected thrown value: #{completed.inspect}"
264
+ end
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq/job_retry"
4
+
5
+ module SidekiqIteration
6
+ # @private
7
+ module JobRetryPatch
8
+ private
9
+ def process_retry(jobinst, msg, queue, exception)
10
+ if jobinst.is_a?(Iteration)
11
+ unless msg["args"].last.is_a?(Hash)
12
+ msg["args"].push({})
13
+ end
14
+
15
+ msg["args"].last["sidekiq_iteration"] = {
16
+ "executions" => jobinst.executions,
17
+ "cursor_position" => jobinst.cursor_position,
18
+ "times_interrupted" => jobinst.times_interrupted,
19
+ "total_time" => jobinst.total_time,
20
+ }
21
+ end
22
+
23
+ super
24
+ end
25
+ end
26
+ end
27
+
28
+ if Sidekiq::JobRetry.instance_method(:process_retry)
29
+ Sidekiq::JobRetry.prepend(SidekiqIteration::JobRetryPatch)
30
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SidekiqIteration
4
+ # @private
5
+ class NestedEnumerator
6
+ def initialize(enums, cursor: nil)
7
+ unless enums.all?(Proc)
8
+ raise ArgumentError, "enums must contain only procs/lambdas"
9
+ end
10
+
11
+ if cursor && enums.size != cursor.size
12
+ raise ArgumentError, "cursor should have one item per enum"
13
+ end
14
+
15
+ @enums = enums
16
+ @cursor = cursor || Array.new(enums.size)
17
+ end
18
+
19
+ def each(&block)
20
+ return to_enum unless block_given?
21
+
22
+ iterate([], [], 0, &block)
23
+ end
24
+
25
+ private
26
+ def iterate(current_items, current_cursor, index, &block)
27
+ cursor = @cursor[index]
28
+ enum = @enums[index].call(*current_items, cursor)
29
+
30
+ enum.each do |item, cursor_value|
31
+ if index == @cursor.size - 1
32
+ yield item, current_cursor + [cursor_value]
33
+ else
34
+ iterate(current_items + [item], current_cursor + [cursor_value], index + 1, &block)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SidekiqIteration
4
+ module Throttling
5
+ # @private
6
+ class ThrottleCondition
7
+ def initialize(condition, backoff)
8
+ @condition = condition
9
+ @backoff = backoff
10
+ end
11
+
12
+ def valid?(job)
13
+ @condition.call(job)
14
+ end
15
+
16
+ def backoff
17
+ if @backoff.is_a?(Proc)
18
+ @backoff.call
19
+ else
20
+ @backoff
21
+ end
22
+ end
23
+ end
24
+
25
+ # @private
26
+ attr_writer :throttle_conditions
27
+
28
+ # @private
29
+ def throttle_conditions
30
+ @throttle_conditions ||= []
31
+ end
32
+
33
+ # Add a condition under which this job will be throttled.
34
+ #
35
+ # @param backoff [Numeric, #call] (30) a custom backoff (in seconds).
36
+ # This is the time to wait before retrying the job.
37
+ # @yieldparam job [Sidekiq::Job] current sidekiq job that is yielded to `condition` proc
38
+ # @yieldreturn [Boolean] whether the throttle condition is being met,
39
+ # indicating that the job should throttle.
40
+ #
41
+ def throttle_on(backoff: 30, &condition)
42
+ throttle_conditions << ThrottleCondition.new(condition, backoff)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SidekiqIteration
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq"
4
+ require_relative "sidekiq_iteration/version"
5
+
6
+ module SidekiqIteration
7
+ class << self
8
+ # Use this to _always_ interrupt the job after it's been running for more than N seconds.
9
+ #
10
+ # This setting will make it to always interrupt a job after it's been iterating for 5 minutes.
11
+ # Defaults to nil which means that jobs will not be interrupted except on termination signal.
12
+ #
13
+ # @example Global setting
14
+ # SidekiqIteration.max_job_runtime = 5.minutes
15
+ #
16
+ # @example Per-job setting
17
+ # class MyJob
18
+ # # ...
19
+ # self.max_job_runtime = 1.minute
20
+ # # ...
21
+ # end
22
+ #
23
+ attr_accessor :max_job_runtime
24
+
25
+ # Set a custom logger for sidekiq-iteration.
26
+ # Defaults to `Sidekiq.logger`.
27
+ #
28
+ # @example
29
+ # SidekiqIteration.logger = Logger.new("log/sidekiq-iteration.log")
30
+ #
31
+ attr_writer :logger
32
+
33
+ def logger
34
+ @logger ||= Sidekiq.logger
35
+ end
36
+ end
37
+ end
38
+
39
+ require_relative "sidekiq_iteration/iteration"
40
+ require_relative "sidekiq_iteration/job_retry_patch"
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sidekiq-iteration
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - fatkodima
8
+ - Shopify
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2022-11-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: sidekiq
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '6.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '6.0'
28
+ description:
29
+ email:
30
+ - fatkodima123@gmail.com
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - CHANGELOG.md
36
+ - LICENSE.txt
37
+ - README.md
38
+ - guides/best-practices.md
39
+ - guides/custom-enumerator.md
40
+ - guides/iteration-how-it-works.md
41
+ - guides/throttling.md
42
+ - lib/sidekiq-iteration.rb
43
+ - lib/sidekiq_iteration.rb
44
+ - lib/sidekiq_iteration/active_record_batch_enumerator.rb
45
+ - lib/sidekiq_iteration/active_record_cursor.rb
46
+ - lib/sidekiq_iteration/active_record_enumerator.rb
47
+ - lib/sidekiq_iteration/csv_enumerator.rb
48
+ - lib/sidekiq_iteration/enumerators.rb
49
+ - lib/sidekiq_iteration/iteration.rb
50
+ - lib/sidekiq_iteration/job_retry_patch.rb
51
+ - lib/sidekiq_iteration/nested_enumerator.rb
52
+ - lib/sidekiq_iteration/throttling.rb
53
+ - lib/sidekiq_iteration/version.rb
54
+ homepage: https://github.com/fatkodima/sidekiq-iteration
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://github.com/fatkodima/sidekiq-iteration
59
+ source_code_uri: https://github.com/fatkodima/sidekiq-iteration
60
+ changelog_uri: https://github.com/fatkodima/sidekiq-iteration/blob/master/CHANGELOG.md
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 2.7.0
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubygems_version: 3.1.6
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: Makes your sidekiq jobs interruptible and resumable.
80
+ test_files: []