sidekiq-iteration 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +265 -0
- data/guides/best-practices.md +71 -0
- data/guides/custom-enumerator.md +98 -0
- data/guides/iteration-how-it-works.md +71 -0
- data/guides/throttling.md +42 -0
- data/lib/sidekiq-iteration.rb +3 -0
- data/lib/sidekiq_iteration/active_record_batch_enumerator.rb +127 -0
- data/lib/sidekiq_iteration/active_record_cursor.rb +89 -0
- data/lib/sidekiq_iteration/active_record_enumerator.rb +69 -0
- data/lib/sidekiq_iteration/csv_enumerator.rb +85 -0
- data/lib/sidekiq_iteration/enumerators.rb +187 -0
- data/lib/sidekiq_iteration/iteration.rb +267 -0
- data/lib/sidekiq_iteration/job_retry_patch.rb +30 -0
- data/lib/sidekiq_iteration/nested_enumerator.rb +39 -0
- data/lib/sidekiq_iteration/throttling.rb +45 -0
- data/lib/sidekiq_iteration/version.rb +5 -0
- data/lib/sidekiq_iteration.rb +40 -0
- metadata +80 -0
@@ -0,0 +1,267 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "throttling"
|
4
|
+
require_relative "enumerators"
|
5
|
+
|
6
|
+
module SidekiqIteration
|
7
|
+
module Iteration
|
8
|
+
include Enumerators
|
9
|
+
|
10
|
+
# @private
|
11
|
+
def self.included(base)
|
12
|
+
base.extend(ClassMethods)
|
13
|
+
base.extend(Throttling)
|
14
|
+
|
15
|
+
base.class_eval do
|
16
|
+
throttle_on(backoff: 0) do |job|
|
17
|
+
job.class.max_job_runtime &&
|
18
|
+
job.start_time &&
|
19
|
+
(Time.now.utc - job.start_time) > job.class.max_job_runtime
|
20
|
+
end
|
21
|
+
|
22
|
+
throttle_on(backoff: 0) do
|
23
|
+
defined?(Sidekiq::CLI) &&
|
24
|
+
Sidekiq::CLI.instance.launcher.stopping?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
# @private
|
32
|
+
module ClassMethods
|
33
|
+
def inherited(base)
|
34
|
+
base.throttle_conditions = throttle_conditions.dup
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
def method_added(method_name)
|
39
|
+
if method_name == :perform
|
40
|
+
raise "Job that is using Iteration cannot redefine #perform"
|
41
|
+
end
|
42
|
+
|
43
|
+
super
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_writer :max_job_runtime
|
47
|
+
|
48
|
+
def max_job_runtime
|
49
|
+
if defined?(@max_job_runtime)
|
50
|
+
@max_job_runtime
|
51
|
+
else
|
52
|
+
SidekiqIteration.max_job_runtime
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
attr_reader :executions,
|
58
|
+
:cursor_position,
|
59
|
+
:start_time,
|
60
|
+
:times_interrupted,
|
61
|
+
:total_time,
|
62
|
+
:current_run_iterations
|
63
|
+
|
64
|
+
# @private
|
65
|
+
def initialize
|
66
|
+
super
|
67
|
+
@arguments = nil
|
68
|
+
@job_iteration_retry_backoff = nil
|
69
|
+
@needs_reenqueue = false
|
70
|
+
@current_run_iterations = 0
|
71
|
+
end
|
72
|
+
|
73
|
+
# @private
|
74
|
+
def perform(*arguments)
|
75
|
+
extract_previous_runs_metadata(arguments)
|
76
|
+
@arguments = arguments
|
77
|
+
interruptible_perform(*arguments)
|
78
|
+
end
|
79
|
+
|
80
|
+
# A hook to override that will be called when the job starts iterating.
|
81
|
+
# Is called only once, for the first time.
|
82
|
+
def on_start
|
83
|
+
end
|
84
|
+
|
85
|
+
# A hook to override that will be called when the job resumes iterating.
|
86
|
+
def on_resume
|
87
|
+
end
|
88
|
+
|
89
|
+
# A hook to override that will be called each time the job is interrupted.
|
90
|
+
# This can be due to throttling (throttle enumerator), `max_job_runtime` configuration,
|
91
|
+
# or sidekiq restarting.
|
92
|
+
def on_shutdown
|
93
|
+
end
|
94
|
+
|
95
|
+
# A hook to override that will be called when the job finished iterating.
|
96
|
+
def on_complete
|
97
|
+
end
|
98
|
+
|
99
|
+
# The enumerator to be iterated over.
|
100
|
+
#
|
101
|
+
# @return [Enumerator]
|
102
|
+
#
|
103
|
+
# @raise [NotImplementedError] with a message advising subclasses to
|
104
|
+
# implement an override for this method.
|
105
|
+
#
|
106
|
+
def build_enumerator(*)
|
107
|
+
raise NotImplementedError, "#{self.class.name} must implement a 'build_enumerator' method"
|
108
|
+
end
|
109
|
+
|
110
|
+
# The action to be performed on each item from the enumerator.
|
111
|
+
#
|
112
|
+
# @return [void]
|
113
|
+
#
|
114
|
+
# @raise [NotImplementedError] with a message advising subclasses to
|
115
|
+
# implement an override for this method.
|
116
|
+
#
|
117
|
+
def each_iteration(*)
|
118
|
+
raise NotImplementedError, "#{self.class.name} must implement an 'each_iteration' method"
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def extract_previous_runs_metadata(arguments)
|
123
|
+
options =
|
124
|
+
if arguments.last.is_a?(Hash) && arguments.last.key?("sidekiq_iteration")
|
125
|
+
arguments.pop["sidekiq_iteration"]
|
126
|
+
else
|
127
|
+
{}
|
128
|
+
end
|
129
|
+
|
130
|
+
@executions = options["executions"] || 0
|
131
|
+
@cursor_position = options["cursor_position"]
|
132
|
+
@times_interrupted = options["times_interrupted"] || 0
|
133
|
+
@total_time = options["total_time"] || 0
|
134
|
+
end
|
135
|
+
|
136
|
+
def interruptible_perform(*arguments)
|
137
|
+
@executions += 1
|
138
|
+
@start_time = Time.now.utc
|
139
|
+
|
140
|
+
enumerator = build_enumerator(*arguments, cursor: cursor_position)
|
141
|
+
unless enumerator
|
142
|
+
SidekiqIteration.logger.info("[SidekiqIteration::Iteration] `build_enumerator` returned nil. Skipping the job.")
|
143
|
+
return
|
144
|
+
end
|
145
|
+
|
146
|
+
assert_enumerator!(enumerator)
|
147
|
+
|
148
|
+
if executions == 1 && times_interrupted == 0
|
149
|
+
on_start
|
150
|
+
else
|
151
|
+
on_resume
|
152
|
+
end
|
153
|
+
|
154
|
+
completed = catch(:abort) do
|
155
|
+
iterate_with_enumerator(enumerator, arguments)
|
156
|
+
end
|
157
|
+
|
158
|
+
on_shutdown
|
159
|
+
completed = handle_completed(completed)
|
160
|
+
|
161
|
+
if @needs_reenqueue
|
162
|
+
reenqueue_iteration_job
|
163
|
+
elsif completed
|
164
|
+
on_complete
|
165
|
+
output_interrupt_summary
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def iterate_with_enumerator(enumerator, arguments)
|
170
|
+
found_record = false
|
171
|
+
@needs_reenqueue = false
|
172
|
+
|
173
|
+
enumerator.each do |object_from_enumerator, index|
|
174
|
+
found_record = true
|
175
|
+
each_iteration(object_from_enumerator, *arguments)
|
176
|
+
@cursor_position = index
|
177
|
+
@current_run_iterations += 1
|
178
|
+
|
179
|
+
throttle_condition = find_throttle_condition
|
180
|
+
if throttle_condition
|
181
|
+
@job_iteration_retry_backoff = throttle_condition.backoff
|
182
|
+
@needs_reenqueue = true
|
183
|
+
return false
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
unless found_record
|
188
|
+
SidekiqIteration.logger.info(
|
189
|
+
"[SidekiqIteration::Iteration] Enumerator found nothing to iterate! " \
|
190
|
+
"times_interrupted=#{times_interrupted} cursor_position=#{cursor_position}",
|
191
|
+
)
|
192
|
+
end
|
193
|
+
|
194
|
+
adjust_total_time
|
195
|
+
true
|
196
|
+
end
|
197
|
+
|
198
|
+
def reenqueue_iteration_job
|
199
|
+
SidekiqIteration.logger.info("[SidekiqIteration::Iteration] Interrupting and re-enqueueing the job cursor_position=#{cursor_position}")
|
200
|
+
|
201
|
+
adjust_total_time
|
202
|
+
@times_interrupted += 1
|
203
|
+
|
204
|
+
arguments = @arguments
|
205
|
+
arguments.push(
|
206
|
+
"sidekiq_iteration" => {
|
207
|
+
"executions" => executions,
|
208
|
+
"cursor_position" => cursor_position,
|
209
|
+
"times_interrupted" => times_interrupted,
|
210
|
+
"total_time" => total_time,
|
211
|
+
},
|
212
|
+
)
|
213
|
+
self.class.perform_in(@job_iteration_retry_backoff, *arguments)
|
214
|
+
end
|
215
|
+
|
216
|
+
def adjust_total_time
|
217
|
+
@total_time += (Time.now.utc.to_f - start_time.to_f).round(6)
|
218
|
+
end
|
219
|
+
|
220
|
+
def assert_enumerator!(enum)
|
221
|
+
return if enum.is_a?(Enumerator)
|
222
|
+
|
223
|
+
raise ArgumentError, <<~MSG
|
224
|
+
#build_enumerator is expected to return Enumerator object, but returned #{enum.class}.
|
225
|
+
Example:
|
226
|
+
def build_enumerator(params, cursor:)
|
227
|
+
enumerator_builder.active_record_on_records(
|
228
|
+
Shop.find(params[:shop_id]).products,
|
229
|
+
cursor: cursor
|
230
|
+
)
|
231
|
+
end
|
232
|
+
MSG
|
233
|
+
end
|
234
|
+
|
235
|
+
def output_interrupt_summary
|
236
|
+
SidekiqIteration.logger.info(
|
237
|
+
format("[SidekiqIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f", times_interrupted, total_time),
|
238
|
+
)
|
239
|
+
end
|
240
|
+
|
241
|
+
def find_throttle_condition
|
242
|
+
self.class.throttle_conditions.find do |throttle_condition|
|
243
|
+
throttle_condition.valid?(self)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def handle_completed(completed)
|
248
|
+
case completed
|
249
|
+
when nil # someone aborted the job but wants to call the on_complete callback
|
250
|
+
true
|
251
|
+
when true # rubocop:disable Lint/DuplicateBranch
|
252
|
+
true
|
253
|
+
when false, :skip_complete_callback
|
254
|
+
false
|
255
|
+
when Array # can be used to return early from the enumerator
|
256
|
+
reason, backoff = completed
|
257
|
+
raise "Unknown reason: #{reason}" unless reason == :retry
|
258
|
+
|
259
|
+
@job_iteration_retry_backoff = backoff
|
260
|
+
@needs_reenqueue = true
|
261
|
+
false
|
262
|
+
else
|
263
|
+
raise "Unexpected thrown value: #{completed.inspect}"
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq/job_retry"
|
4
|
+
|
5
|
+
module SidekiqIteration
|
6
|
+
# @private
|
7
|
+
module JobRetryPatch
|
8
|
+
private
|
9
|
+
def process_retry(jobinst, msg, queue, exception)
|
10
|
+
if jobinst.is_a?(Iteration)
|
11
|
+
unless msg["args"].last.is_a?(Hash)
|
12
|
+
msg["args"].push({})
|
13
|
+
end
|
14
|
+
|
15
|
+
msg["args"].last["sidekiq_iteration"] = {
|
16
|
+
"executions" => jobinst.executions,
|
17
|
+
"cursor_position" => jobinst.cursor_position,
|
18
|
+
"times_interrupted" => jobinst.times_interrupted,
|
19
|
+
"total_time" => jobinst.total_time,
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
if Sidekiq::JobRetry.instance_method(:process_retry)
|
29
|
+
Sidekiq::JobRetry.prepend(SidekiqIteration::JobRetryPatch)
|
30
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SidekiqIteration
|
4
|
+
# @private
|
5
|
+
class NestedEnumerator
|
6
|
+
def initialize(enums, cursor: nil)
|
7
|
+
unless enums.all?(Proc)
|
8
|
+
raise ArgumentError, "enums must contain only procs/lambdas"
|
9
|
+
end
|
10
|
+
|
11
|
+
if cursor && enums.size != cursor.size
|
12
|
+
raise ArgumentError, "cursor should have one item per enum"
|
13
|
+
end
|
14
|
+
|
15
|
+
@enums = enums
|
16
|
+
@cursor = cursor || Array.new(enums.size)
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
return to_enum unless block_given?
|
21
|
+
|
22
|
+
iterate([], [], 0, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def iterate(current_items, current_cursor, index, &block)
|
27
|
+
cursor = @cursor[index]
|
28
|
+
enum = @enums[index].call(*current_items, cursor)
|
29
|
+
|
30
|
+
enum.each do |item, cursor_value|
|
31
|
+
if index == @cursor.size - 1
|
32
|
+
yield item, current_cursor + [cursor_value]
|
33
|
+
else
|
34
|
+
iterate(current_items + [item], current_cursor + [cursor_value], index + 1, &block)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SidekiqIteration
|
4
|
+
module Throttling
|
5
|
+
# @private
|
6
|
+
class ThrottleCondition
|
7
|
+
def initialize(condition, backoff)
|
8
|
+
@condition = condition
|
9
|
+
@backoff = backoff
|
10
|
+
end
|
11
|
+
|
12
|
+
def valid?(job)
|
13
|
+
@condition.call(job)
|
14
|
+
end
|
15
|
+
|
16
|
+
def backoff
|
17
|
+
if @backoff.is_a?(Proc)
|
18
|
+
@backoff.call
|
19
|
+
else
|
20
|
+
@backoff
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# @private
|
26
|
+
attr_writer :throttle_conditions
|
27
|
+
|
28
|
+
# @private
|
29
|
+
def throttle_conditions
|
30
|
+
@throttle_conditions ||= []
|
31
|
+
end
|
32
|
+
|
33
|
+
# Add a condition under which this job will be throttled.
|
34
|
+
#
|
35
|
+
# @param backoff [Numeric, #call] (30) a custom backoff (in seconds).
|
36
|
+
# This is the time to wait before retrying the job.
|
37
|
+
# @yieldparam job [Sidekiq::Job] current sidekiq job that is yielded to `condition` proc
|
38
|
+
# @yieldreturn [Boolean] whether the throttle condition is being met,
|
39
|
+
# indicating that the job should throttle.
|
40
|
+
#
|
41
|
+
def throttle_on(backoff: 30, &condition)
|
42
|
+
throttle_conditions << ThrottleCondition.new(condition, backoff)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
require_relative "sidekiq_iteration/version"
|
5
|
+
|
6
|
+
module SidekiqIteration
|
7
|
+
class << self
|
8
|
+
# Use this to _always_ interrupt the job after it's been running for more than N seconds.
|
9
|
+
#
|
10
|
+
# This setting will make it to always interrupt a job after it's been iterating for 5 minutes.
|
11
|
+
# Defaults to nil which means that jobs will not be interrupted except on termination signal.
|
12
|
+
#
|
13
|
+
# @example Global setting
|
14
|
+
# SidekiqIteration.max_job_runtime = 5.minutes
|
15
|
+
#
|
16
|
+
# @example Per-job setting
|
17
|
+
# class MyJob
|
18
|
+
# # ...
|
19
|
+
# self.max_job_runtime = 1.minute
|
20
|
+
# # ...
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
attr_accessor :max_job_runtime
|
24
|
+
|
25
|
+
# Set a custom logger for sidekiq-iteration.
|
26
|
+
# Defaults to `Sidekiq.logger`.
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# SidekiqIteration.logger = Logger.new("log/sidekiq-iteration.log")
|
30
|
+
#
|
31
|
+
attr_writer :logger
|
32
|
+
|
33
|
+
def logger
|
34
|
+
@logger ||= Sidekiq.logger
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
require_relative "sidekiq_iteration/iteration"
|
40
|
+
require_relative "sidekiq_iteration/job_retry_patch"
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sidekiq-iteration
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- fatkodima
|
8
|
+
- Shopify
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2022-11-02 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: sidekiq
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '6.0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '6.0'
|
28
|
+
description:
|
29
|
+
email:
|
30
|
+
- fatkodima123@gmail.com
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- CHANGELOG.md
|
36
|
+
- LICENSE.txt
|
37
|
+
- README.md
|
38
|
+
- guides/best-practices.md
|
39
|
+
- guides/custom-enumerator.md
|
40
|
+
- guides/iteration-how-it-works.md
|
41
|
+
- guides/throttling.md
|
42
|
+
- lib/sidekiq-iteration.rb
|
43
|
+
- lib/sidekiq_iteration.rb
|
44
|
+
- lib/sidekiq_iteration/active_record_batch_enumerator.rb
|
45
|
+
- lib/sidekiq_iteration/active_record_cursor.rb
|
46
|
+
- lib/sidekiq_iteration/active_record_enumerator.rb
|
47
|
+
- lib/sidekiq_iteration/csv_enumerator.rb
|
48
|
+
- lib/sidekiq_iteration/enumerators.rb
|
49
|
+
- lib/sidekiq_iteration/iteration.rb
|
50
|
+
- lib/sidekiq_iteration/job_retry_patch.rb
|
51
|
+
- lib/sidekiq_iteration/nested_enumerator.rb
|
52
|
+
- lib/sidekiq_iteration/throttling.rb
|
53
|
+
- lib/sidekiq_iteration/version.rb
|
54
|
+
homepage: https://github.com/fatkodima/sidekiq-iteration
|
55
|
+
licenses:
|
56
|
+
- MIT
|
57
|
+
metadata:
|
58
|
+
homepage_uri: https://github.com/fatkodima/sidekiq-iteration
|
59
|
+
source_code_uri: https://github.com/fatkodima/sidekiq-iteration
|
60
|
+
changelog_uri: https://github.com/fatkodima/sidekiq-iteration/blob/master/CHANGELOG.md
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options: []
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 2.7.0
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
requirements: []
|
76
|
+
rubygems_version: 3.1.6
|
77
|
+
signing_key:
|
78
|
+
specification_version: 4
|
79
|
+
summary: Makes your sidekiq jobs interruptible and resumable.
|
80
|
+
test_files: []
|