job-iteration 1.3.6 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +42 -22
- data/.github/workflows/cla.yml +22 -0
- data/.rubocop.yml +3 -3
- data/CHANGELOG.md +27 -1
- data/Gemfile +0 -1
- data/Gemfile.lock +64 -67
- data/README.md +26 -8
- data/dev.yml +2 -2
- data/guides/argument-semantics.md +128 -0
- data/guides/best-practices.md +72 -32
- data/guides/custom-enumerator.md +76 -28
- data/guides/iteration-how-it-works.md +2 -18
- data/{railgun.yml → isogun.yml} +0 -4
- data/lib/job-iteration/active_record_batch_enumerator.rb +1 -1
- data/lib/job-iteration/active_record_cursor.rb +6 -3
- data/lib/job-iteration/active_record_enumerator.rb +5 -1
- data/lib/job-iteration/csv_enumerator.rb +1 -1
- data/lib/job-iteration/enumerator_builder.rb +47 -9
- data/lib/job-iteration/iteration.rb +64 -35
- data/lib/job-iteration/log_subscriber.rb +38 -0
- data/lib/job-iteration/nested_enumerator.rb +48 -0
- data/lib/job-iteration/version.rb +1 -1
- data/lib/job-iteration.rb +29 -0
- metadata +8 -4
@@ -8,11 +8,16 @@ module JobIteration
|
|
8
8
|
|
9
9
|
attr_accessor(
|
10
10
|
:cursor_position,
|
11
|
-
:start_time,
|
12
11
|
:times_interrupted,
|
13
|
-
:total_time,
|
14
12
|
)
|
15
13
|
|
14
|
+
# The time when the job starts running. If the job is interrupted and runs again, the value is updated.
|
15
|
+
attr_accessor :start_time
|
16
|
+
|
17
|
+
# The total time the job has been running, including multiple iterations.
|
18
|
+
# The time isn't reset if the job is interrupted.
|
19
|
+
attr_accessor :total_time
|
20
|
+
|
16
21
|
class CursorError < ArgumentError
|
17
22
|
attr_reader :cursor
|
18
23
|
|
@@ -39,6 +44,33 @@ module JobIteration
|
|
39
44
|
define_callbacks :start
|
40
45
|
define_callbacks :shutdown
|
41
46
|
define_callbacks :complete
|
47
|
+
|
48
|
+
class_attribute(
|
49
|
+
:job_iteration_max_job_runtime,
|
50
|
+
instance_writer: false,
|
51
|
+
instance_predicate: false,
|
52
|
+
default: JobIteration.max_job_runtime,
|
53
|
+
)
|
54
|
+
|
55
|
+
singleton_class.prepend(PrependedClassMethods)
|
56
|
+
end
|
57
|
+
|
58
|
+
module PrependedClassMethods
|
59
|
+
def job_iteration_max_job_runtime=(new)
|
60
|
+
existing = job_iteration_max_job_runtime
|
61
|
+
|
62
|
+
if existing && (!new || new > existing)
|
63
|
+
existing_label = existing.inspect
|
64
|
+
new_label = new ? new.inspect : "#{new.inspect} (no limit)"
|
65
|
+
raise(
|
66
|
+
ArgumentError,
|
67
|
+
"job_iteration_max_job_runtime may only decrease; " \
|
68
|
+
"#{self} tried to increase it from #{existing_label} to #{new_label}",
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
super
|
73
|
+
end
|
42
74
|
end
|
43
75
|
|
44
76
|
module ClassMethods
|
@@ -68,7 +100,7 @@ module JobIteration
|
|
68
100
|
|
69
101
|
def initialize(*arguments)
|
70
102
|
super
|
71
|
-
@job_iteration_retry_backoff =
|
103
|
+
@job_iteration_retry_backoff = JobIteration.default_retry_backoff
|
72
104
|
@needs_reenqueue = false
|
73
105
|
self.times_interrupted = 0
|
74
106
|
self.total_time = 0.0
|
@@ -87,12 +119,14 @@ module JobIteration
|
|
87
119
|
def deserialize(job_data) # @private
|
88
120
|
super
|
89
121
|
self.cursor_position = job_data["cursor_position"]
|
90
|
-
self.times_interrupted = job_data["times_interrupted"] || 0
|
91
|
-
self.total_time = job_data["total_time"] || 0
|
122
|
+
self.times_interrupted = Integer(job_data["times_interrupted"] || 0)
|
123
|
+
self.total_time = Float(job_data["total_time"] || 0.0)
|
92
124
|
end
|
93
125
|
|
94
126
|
def perform(*params) # @private
|
95
127
|
interruptible_perform(*params)
|
128
|
+
|
129
|
+
nil
|
96
130
|
end
|
97
131
|
|
98
132
|
def retry_job(*, **)
|
@@ -110,13 +144,12 @@ module JobIteration
|
|
110
144
|
self.start_time = Time.now.utc
|
111
145
|
|
112
146
|
enumerator = nil
|
113
|
-
ActiveSupport::Notifications.instrument("build_enumerator.iteration",
|
147
|
+
ActiveSupport::Notifications.instrument("build_enumerator.iteration", instrumentation_tags) do
|
114
148
|
enumerator = build_enumerator(*arguments, cursor: cursor_position)
|
115
149
|
end
|
116
150
|
|
117
151
|
unless enumerator
|
118
|
-
|
119
|
-
"Skipping the job.")
|
152
|
+
ActiveSupport::Notifications.instrument("nil_enumerator.iteration", instrumentation_tags)
|
120
153
|
return
|
121
154
|
end
|
122
155
|
|
@@ -125,7 +158,10 @@ module JobIteration
|
|
125
158
|
if executions == 1 && times_interrupted == 0
|
126
159
|
run_callbacks(:start)
|
127
160
|
else
|
128
|
-
ActiveSupport::Notifications.instrument(
|
161
|
+
ActiveSupport::Notifications.instrument(
|
162
|
+
"resumed.iteration",
|
163
|
+
instrumentation_tags.merge(times_interrupted: times_interrupted, total_time: total_time),
|
164
|
+
)
|
129
165
|
end
|
130
166
|
|
131
167
|
completed = catch(:abort) do
|
@@ -139,7 +175,10 @@ module JobIteration
|
|
139
175
|
reenqueue_iteration_job
|
140
176
|
elsif completed
|
141
177
|
run_callbacks(:complete)
|
142
|
-
|
178
|
+
ActiveSupport::Notifications.instrument(
|
179
|
+
"completed.iteration",
|
180
|
+
instrumentation_tags.merge(times_interrupted: times_interrupted, total_time: total_time),
|
181
|
+
)
|
143
182
|
end
|
144
183
|
end
|
145
184
|
|
@@ -148,14 +187,15 @@ module JobIteration
|
|
148
187
|
found_record = false
|
149
188
|
@needs_reenqueue = false
|
150
189
|
|
151
|
-
enumerator.each do |object_from_enumerator,
|
190
|
+
enumerator.each do |object_from_enumerator, cursor_from_enumerator|
|
152
191
|
# Deferred until 2.0.0
|
153
|
-
# assert_valid_cursor!(
|
192
|
+
# assert_valid_cursor!(cursor_from_enumerator)
|
154
193
|
|
155
|
-
|
194
|
+
tags = instrumentation_tags.merge(cursor_position: cursor_from_enumerator)
|
195
|
+
ActiveSupport::Notifications.instrument("each_iteration.iteration", tags) do
|
156
196
|
found_record = true
|
157
197
|
each_iteration(object_from_enumerator, *arguments)
|
158
|
-
self.cursor_position =
|
198
|
+
self.cursor_position = cursor_from_enumerator
|
159
199
|
end
|
160
200
|
|
161
201
|
next unless job_should_exit?
|
@@ -165,25 +205,19 @@ module JobIteration
|
|
165
205
|
return false
|
166
206
|
end
|
167
207
|
|
168
|
-
|
169
|
-
"
|
170
|
-
|
208
|
+
ActiveSupport::Notifications.instrument(
|
209
|
+
"not_found.iteration",
|
210
|
+
instrumentation_tags.merge(times_interrupted: times_interrupted),
|
171
211
|
) unless found_record
|
172
212
|
|
173
|
-
adjust_total_time
|
174
|
-
|
175
213
|
true
|
176
|
-
|
177
|
-
|
178
|
-
def record_unit_of_work(&block)
|
179
|
-
ActiveSupport::Notifications.instrument("each_iteration.iteration", iteration_instrumentation_tags, &block)
|
214
|
+
ensure
|
215
|
+
adjust_total_time
|
180
216
|
end
|
181
217
|
|
182
218
|
def reenqueue_iteration_job
|
183
|
-
ActiveSupport::Notifications.instrument("interrupted.iteration",
|
184
|
-
logger.info("[JobIteration::Iteration] Interrupting and re-enqueueing the job cursor_position=#{cursor_position}")
|
219
|
+
ActiveSupport::Notifications.instrument("interrupted.iteration", instrumentation_tags)
|
185
220
|
|
186
|
-
adjust_total_time
|
187
221
|
self.times_interrupted += 1
|
188
222
|
|
189
223
|
self.already_in_queue = true if respond_to?(:already_in_queue=)
|
@@ -225,7 +259,7 @@ module JobIteration
|
|
225
259
|
unless respond_to?(:each_iteration, true)
|
226
260
|
raise(
|
227
261
|
ArgumentError,
|
228
|
-
"Iteration job (#{self.class}) must implement #each_iteration method"
|
262
|
+
"Iteration job (#{self.class}) must implement #each_iteration method",
|
229
263
|
)
|
230
264
|
end
|
231
265
|
|
@@ -252,17 +286,12 @@ module JobIteration
|
|
252
286
|
method.parameters
|
253
287
|
end
|
254
288
|
|
255
|
-
def
|
256
|
-
{ job_class: self.class.name }
|
257
|
-
end
|
258
|
-
|
259
|
-
def output_interrupt_summary
|
260
|
-
message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
|
261
|
-
logger.info(Kernel.format(message, times_interrupted, total_time))
|
289
|
+
def instrumentation_tags
|
290
|
+
{ job_class: self.class.name, cursor_position: cursor_position }
|
262
291
|
end
|
263
292
|
|
264
293
|
def job_should_exit?
|
265
|
-
if
|
294
|
+
if job_iteration_max_job_runtime && start_time && (Time.now.utc - start_time) > job_iteration_max_job_runtime
|
266
295
|
return true
|
267
296
|
end
|
268
297
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
class LogSubscriber < ActiveSupport::LogSubscriber
|
5
|
+
def logger
|
6
|
+
JobIteration.logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def nil_enumerator(event)
|
10
|
+
info do
|
11
|
+
"[JobIteration::Iteration] `build_enumerator` returned nil. Skipping the job."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def not_found(event)
|
16
|
+
info do
|
17
|
+
"[JobIteration::Iteration] Enumerator found nothing to iterate! " \
|
18
|
+
"times_interrupted=#{event.payload[:times_interrupted]} cursor_position=#{event.payload[:cursor_position]}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def interrupted(event)
|
23
|
+
info do
|
24
|
+
"[JobIteration::Iteration] Interrupting and re-enqueueing the job " \
|
25
|
+
"cursor_position=#{event.payload[:cursor_position]}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def completed(event)
|
30
|
+
info do
|
31
|
+
message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
|
32
|
+
Kernel.format(message, event.payload[:times_interrupted], event.payload[:total_time])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
JobIteration::LogSubscriber.attach_to(:iteration)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
# @private
|
5
|
+
class NestedEnumerator
|
6
|
+
def initialize(enums, cursor: nil)
|
7
|
+
unless enums.all?(Proc)
|
8
|
+
raise ArgumentError, "enums must contain only procs/lambdas"
|
9
|
+
end
|
10
|
+
|
11
|
+
if cursor && enums.size != cursor.size
|
12
|
+
raise ArgumentError, "cursor should have one object per enum"
|
13
|
+
end
|
14
|
+
|
15
|
+
@enums = enums
|
16
|
+
@cursors = cursor || Array.new(enums.size)
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
return to_enum unless block_given?
|
21
|
+
|
22
|
+
iterate([], 0, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def iterate(current_objects, index, &block)
|
28
|
+
enumerator = @enums[index].call(*current_objects, @cursors[index])
|
29
|
+
|
30
|
+
enumerator.each do |object_from_enumerator, cursor_from_enumerator|
|
31
|
+
if index == @cursors.size - 1
|
32
|
+
# we've reached the innermost enumerator, yield for `iterate_with_enumerator`
|
33
|
+
updated_cursor = @cursors.dup
|
34
|
+
updated_cursor[index] = cursor_from_enumerator
|
35
|
+
yield object_from_enumerator, updated_cursor
|
36
|
+
else
|
37
|
+
# we need to go deeper
|
38
|
+
next_index = index + 1
|
39
|
+
iterate(current_objects + [object_from_enumerator], next_index, &block)
|
40
|
+
# reset cursor at the index of the nested enumerator that just finished, so we don't skip items when that
|
41
|
+
# index is reused in the next nested iteration
|
42
|
+
@cursors[next_index] = nil
|
43
|
+
@cursors[index] = cursor_from_enumerator
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/job-iteration.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "active_job"
|
3
4
|
require_relative "./job-iteration/version"
|
4
5
|
require_relative "./job-iteration/enumerator_builder"
|
5
6
|
require_relative "./job-iteration/iteration"
|
7
|
+
require_relative "./job-iteration/log_subscriber"
|
6
8
|
|
7
9
|
module JobIteration
|
8
10
|
IntegrationLoadError = Class.new(StandardError)
|
@@ -11,6 +13,14 @@ module JobIteration
|
|
11
13
|
|
12
14
|
extend self
|
13
15
|
|
16
|
+
attr_writer :logger
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def logger
|
20
|
+
@logger || ActiveJob::Base.logger
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
14
24
|
# Use this to _always_ interrupt the job after it's been running for more than N seconds.
|
15
25
|
# @example
|
16
26
|
#
|
@@ -18,8 +28,27 @@ module JobIteration
|
|
18
28
|
#
|
19
29
|
# This setting will make it to always interrupt a job after it's been iterating for 5 minutes.
|
20
30
|
# Defaults to nil which means that jobs will not be interrupted except on termination signal.
|
31
|
+
#
|
32
|
+
# This setting can be further reduced (but not increased) by using the inheritable per-class
|
33
|
+
# job_iteration_max_job_runtime setting.
|
34
|
+
# @example
|
35
|
+
#
|
36
|
+
# class MyJob < ActiveJob::Base
|
37
|
+
# include JobIteration::Iteration
|
38
|
+
# self.job_iteration_max_job_runtime = 1.minute
|
39
|
+
# # ...
|
21
40
|
attr_accessor :max_job_runtime
|
22
41
|
|
42
|
+
# Configures a delay duration to wait before resuming an interrupted job.
|
43
|
+
# @example
|
44
|
+
#
|
45
|
+
# JobIteration.default_retry_backoff = 10.seconds
|
46
|
+
#
|
47
|
+
# Defaults to nil which means interrupted jobs will be retried immediately.
|
48
|
+
# This value will be ignored when an interruption is raised by a throttle enumerator,
|
49
|
+
# where the throttle backoff value will take precedence over this setting.
|
50
|
+
attr_accessor :default_retry_backoff
|
51
|
+
|
23
52
|
# Used internally for hooking into job processing frameworks like Sidekiq and Resque.
|
24
53
|
attr_accessor :interruption_adapter
|
25
54
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -47,6 +47,7 @@ extra_rdoc_files: []
|
|
47
47
|
files:
|
48
48
|
- ".github/dependabot.yml"
|
49
49
|
- ".github/workflows/ci.yml"
|
50
|
+
- ".github/workflows/cla.yml"
|
50
51
|
- ".gitignore"
|
51
52
|
- ".rubocop.yml"
|
52
53
|
- ".yardopts"
|
@@ -65,10 +66,12 @@ files:
|
|
65
66
|
- gemfiles/rails_6_1.gemfile
|
66
67
|
- gemfiles/rails_7_0.gemfile
|
67
68
|
- gemfiles/rails_edge.gemfile
|
69
|
+
- guides/argument-semantics.md
|
68
70
|
- guides/best-practices.md
|
69
71
|
- guides/custom-enumerator.md
|
70
72
|
- guides/iteration-how-it-works.md
|
71
73
|
- guides/throttling.md
|
74
|
+
- isogun.yml
|
72
75
|
- job-iteration.gemspec
|
73
76
|
- lib/job-iteration.rb
|
74
77
|
- lib/job-iteration/active_record_batch_enumerator.rb
|
@@ -79,10 +82,11 @@ files:
|
|
79
82
|
- lib/job-iteration/integrations/resque.rb
|
80
83
|
- lib/job-iteration/integrations/sidekiq.rb
|
81
84
|
- lib/job-iteration/iteration.rb
|
85
|
+
- lib/job-iteration/log_subscriber.rb
|
86
|
+
- lib/job-iteration/nested_enumerator.rb
|
82
87
|
- lib/job-iteration/test_helper.rb
|
83
88
|
- lib/job-iteration/throttle_enumerator.rb
|
84
89
|
- lib/job-iteration/version.rb
|
85
|
-
- railgun.yml
|
86
90
|
homepage: https://github.com/shopify/job-iteration
|
87
91
|
licenses:
|
88
92
|
- MIT
|
@@ -104,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
108
|
- !ruby/object:Gem::Version
|
105
109
|
version: '0'
|
106
110
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
111
|
+
rubygems_version: 3.4.19
|
108
112
|
signing_key:
|
109
113
|
specification_version: 4
|
110
114
|
summary: Makes your background jobs interruptible and resumable.
|