job-iteration 1.3.6 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +42 -22
- data/.github/workflows/cla.yml +22 -0
- data/.rubocop.yml +3 -3
- data/CHANGELOG.md +27 -1
- data/Gemfile +0 -1
- data/Gemfile.lock +64 -67
- data/README.md +26 -8
- data/dev.yml +2 -2
- data/guides/argument-semantics.md +128 -0
- data/guides/best-practices.md +72 -32
- data/guides/custom-enumerator.md +76 -28
- data/guides/iteration-how-it-works.md +2 -18
- data/{railgun.yml → isogun.yml} +0 -4
- data/lib/job-iteration/active_record_batch_enumerator.rb +1 -1
- data/lib/job-iteration/active_record_cursor.rb +6 -3
- data/lib/job-iteration/active_record_enumerator.rb +5 -1
- data/lib/job-iteration/csv_enumerator.rb +1 -1
- data/lib/job-iteration/enumerator_builder.rb +47 -9
- data/lib/job-iteration/iteration.rb +64 -35
- data/lib/job-iteration/log_subscriber.rb +38 -0
- data/lib/job-iteration/nested_enumerator.rb +48 -0
- data/lib/job-iteration/version.rb +1 -1
- data/lib/job-iteration.rb +29 -0
- metadata +8 -4
@@ -8,11 +8,16 @@ module JobIteration
|
|
8
8
|
|
9
9
|
attr_accessor(
|
10
10
|
:cursor_position,
|
11
|
-
:start_time,
|
12
11
|
:times_interrupted,
|
13
|
-
:total_time,
|
14
12
|
)
|
15
13
|
|
14
|
+
# The time when the job starts running. If the job is interrupted and runs again, the value is updated.
|
15
|
+
attr_accessor :start_time
|
16
|
+
|
17
|
+
# The total time the job has been running, including multiple iterations.
|
18
|
+
# The time isn't reset if the job is interrupted.
|
19
|
+
attr_accessor :total_time
|
20
|
+
|
16
21
|
class CursorError < ArgumentError
|
17
22
|
attr_reader :cursor
|
18
23
|
|
@@ -39,6 +44,33 @@ module JobIteration
|
|
39
44
|
define_callbacks :start
|
40
45
|
define_callbacks :shutdown
|
41
46
|
define_callbacks :complete
|
47
|
+
|
48
|
+
class_attribute(
|
49
|
+
:job_iteration_max_job_runtime,
|
50
|
+
instance_writer: false,
|
51
|
+
instance_predicate: false,
|
52
|
+
default: JobIteration.max_job_runtime,
|
53
|
+
)
|
54
|
+
|
55
|
+
singleton_class.prepend(PrependedClassMethods)
|
56
|
+
end
|
57
|
+
|
58
|
+
module PrependedClassMethods
|
59
|
+
def job_iteration_max_job_runtime=(new)
|
60
|
+
existing = job_iteration_max_job_runtime
|
61
|
+
|
62
|
+
if existing && (!new || new > existing)
|
63
|
+
existing_label = existing.inspect
|
64
|
+
new_label = new ? new.inspect : "#{new.inspect} (no limit)"
|
65
|
+
raise(
|
66
|
+
ArgumentError,
|
67
|
+
"job_iteration_max_job_runtime may only decrease; " \
|
68
|
+
"#{self} tried to increase it from #{existing_label} to #{new_label}",
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
super
|
73
|
+
end
|
42
74
|
end
|
43
75
|
|
44
76
|
module ClassMethods
|
@@ -68,7 +100,7 @@ module JobIteration
|
|
68
100
|
|
69
101
|
def initialize(*arguments)
|
70
102
|
super
|
71
|
-
@job_iteration_retry_backoff =
|
103
|
+
@job_iteration_retry_backoff = JobIteration.default_retry_backoff
|
72
104
|
@needs_reenqueue = false
|
73
105
|
self.times_interrupted = 0
|
74
106
|
self.total_time = 0.0
|
@@ -87,12 +119,14 @@ module JobIteration
|
|
87
119
|
def deserialize(job_data) # @private
|
88
120
|
super
|
89
121
|
self.cursor_position = job_data["cursor_position"]
|
90
|
-
self.times_interrupted = job_data["times_interrupted"] || 0
|
91
|
-
self.total_time = job_data["total_time"] || 0
|
122
|
+
self.times_interrupted = Integer(job_data["times_interrupted"] || 0)
|
123
|
+
self.total_time = Float(job_data["total_time"] || 0.0)
|
92
124
|
end
|
93
125
|
|
94
126
|
def perform(*params) # @private
|
95
127
|
interruptible_perform(*params)
|
128
|
+
|
129
|
+
nil
|
96
130
|
end
|
97
131
|
|
98
132
|
def retry_job(*, **)
|
@@ -110,13 +144,12 @@ module JobIteration
|
|
110
144
|
self.start_time = Time.now.utc
|
111
145
|
|
112
146
|
enumerator = nil
|
113
|
-
ActiveSupport::Notifications.instrument("build_enumerator.iteration",
|
147
|
+
ActiveSupport::Notifications.instrument("build_enumerator.iteration", instrumentation_tags) do
|
114
148
|
enumerator = build_enumerator(*arguments, cursor: cursor_position)
|
115
149
|
end
|
116
150
|
|
117
151
|
unless enumerator
|
118
|
-
|
119
|
-
"Skipping the job.")
|
152
|
+
ActiveSupport::Notifications.instrument("nil_enumerator.iteration", instrumentation_tags)
|
120
153
|
return
|
121
154
|
end
|
122
155
|
|
@@ -125,7 +158,10 @@ module JobIteration
|
|
125
158
|
if executions == 1 && times_interrupted == 0
|
126
159
|
run_callbacks(:start)
|
127
160
|
else
|
128
|
-
ActiveSupport::Notifications.instrument(
|
161
|
+
ActiveSupport::Notifications.instrument(
|
162
|
+
"resumed.iteration",
|
163
|
+
instrumentation_tags.merge(times_interrupted: times_interrupted, total_time: total_time),
|
164
|
+
)
|
129
165
|
end
|
130
166
|
|
131
167
|
completed = catch(:abort) do
|
@@ -139,7 +175,10 @@ module JobIteration
|
|
139
175
|
reenqueue_iteration_job
|
140
176
|
elsif completed
|
141
177
|
run_callbacks(:complete)
|
142
|
-
|
178
|
+
ActiveSupport::Notifications.instrument(
|
179
|
+
"completed.iteration",
|
180
|
+
instrumentation_tags.merge(times_interrupted: times_interrupted, total_time: total_time),
|
181
|
+
)
|
143
182
|
end
|
144
183
|
end
|
145
184
|
|
@@ -148,14 +187,15 @@ module JobIteration
|
|
148
187
|
found_record = false
|
149
188
|
@needs_reenqueue = false
|
150
189
|
|
151
|
-
enumerator.each do |object_from_enumerator,
|
190
|
+
enumerator.each do |object_from_enumerator, cursor_from_enumerator|
|
152
191
|
# Deferred until 2.0.0
|
153
|
-
# assert_valid_cursor!(
|
192
|
+
# assert_valid_cursor!(cursor_from_enumerator)
|
154
193
|
|
155
|
-
|
194
|
+
tags = instrumentation_tags.merge(cursor_position: cursor_from_enumerator)
|
195
|
+
ActiveSupport::Notifications.instrument("each_iteration.iteration", tags) do
|
156
196
|
found_record = true
|
157
197
|
each_iteration(object_from_enumerator, *arguments)
|
158
|
-
self.cursor_position =
|
198
|
+
self.cursor_position = cursor_from_enumerator
|
159
199
|
end
|
160
200
|
|
161
201
|
next unless job_should_exit?
|
@@ -165,25 +205,19 @@ module JobIteration
|
|
165
205
|
return false
|
166
206
|
end
|
167
207
|
|
168
|
-
|
169
|
-
"
|
170
|
-
|
208
|
+
ActiveSupport::Notifications.instrument(
|
209
|
+
"not_found.iteration",
|
210
|
+
instrumentation_tags.merge(times_interrupted: times_interrupted),
|
171
211
|
) unless found_record
|
172
212
|
|
173
|
-
adjust_total_time
|
174
|
-
|
175
213
|
true
|
176
|
-
|
177
|
-
|
178
|
-
def record_unit_of_work(&block)
|
179
|
-
ActiveSupport::Notifications.instrument("each_iteration.iteration", iteration_instrumentation_tags, &block)
|
214
|
+
ensure
|
215
|
+
adjust_total_time
|
180
216
|
end
|
181
217
|
|
182
218
|
def reenqueue_iteration_job
|
183
|
-
ActiveSupport::Notifications.instrument("interrupted.iteration",
|
184
|
-
logger.info("[JobIteration::Iteration] Interrupting and re-enqueueing the job cursor_position=#{cursor_position}")
|
219
|
+
ActiveSupport::Notifications.instrument("interrupted.iteration", instrumentation_tags)
|
185
220
|
|
186
|
-
adjust_total_time
|
187
221
|
self.times_interrupted += 1
|
188
222
|
|
189
223
|
self.already_in_queue = true if respond_to?(:already_in_queue=)
|
@@ -225,7 +259,7 @@ module JobIteration
|
|
225
259
|
unless respond_to?(:each_iteration, true)
|
226
260
|
raise(
|
227
261
|
ArgumentError,
|
228
|
-
"Iteration job (#{self.class}) must implement #each_iteration method"
|
262
|
+
"Iteration job (#{self.class}) must implement #each_iteration method",
|
229
263
|
)
|
230
264
|
end
|
231
265
|
|
@@ -252,17 +286,12 @@ module JobIteration
|
|
252
286
|
method.parameters
|
253
287
|
end
|
254
288
|
|
255
|
-
def
|
256
|
-
{ job_class: self.class.name }
|
257
|
-
end
|
258
|
-
|
259
|
-
def output_interrupt_summary
|
260
|
-
message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
|
261
|
-
logger.info(Kernel.format(message, times_interrupted, total_time))
|
289
|
+
def instrumentation_tags
|
290
|
+
{ job_class: self.class.name, cursor_position: cursor_position }
|
262
291
|
end
|
263
292
|
|
264
293
|
def job_should_exit?
|
265
|
-
if
|
294
|
+
if job_iteration_max_job_runtime && start_time && (Time.now.utc - start_time) > job_iteration_max_job_runtime
|
266
295
|
return true
|
267
296
|
end
|
268
297
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
class LogSubscriber < ActiveSupport::LogSubscriber
|
5
|
+
def logger
|
6
|
+
JobIteration.logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def nil_enumerator(event)
|
10
|
+
info do
|
11
|
+
"[JobIteration::Iteration] `build_enumerator` returned nil. Skipping the job."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def not_found(event)
|
16
|
+
info do
|
17
|
+
"[JobIteration::Iteration] Enumerator found nothing to iterate! " \
|
18
|
+
"times_interrupted=#{event.payload[:times_interrupted]} cursor_position=#{event.payload[:cursor_position]}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def interrupted(event)
|
23
|
+
info do
|
24
|
+
"[JobIteration::Iteration] Interrupting and re-enqueueing the job " \
|
25
|
+
"cursor_position=#{event.payload[:cursor_position]}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def completed(event)
|
30
|
+
info do
|
31
|
+
message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
|
32
|
+
Kernel.format(message, event.payload[:times_interrupted], event.payload[:total_time])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
JobIteration::LogSubscriber.attach_to(:iteration)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
# @private
|
5
|
+
class NestedEnumerator
|
6
|
+
def initialize(enums, cursor: nil)
|
7
|
+
unless enums.all?(Proc)
|
8
|
+
raise ArgumentError, "enums must contain only procs/lambdas"
|
9
|
+
end
|
10
|
+
|
11
|
+
if cursor && enums.size != cursor.size
|
12
|
+
raise ArgumentError, "cursor should have one object per enum"
|
13
|
+
end
|
14
|
+
|
15
|
+
@enums = enums
|
16
|
+
@cursors = cursor || Array.new(enums.size)
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
return to_enum unless block_given?
|
21
|
+
|
22
|
+
iterate([], 0, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def iterate(current_objects, index, &block)
|
28
|
+
enumerator = @enums[index].call(*current_objects, @cursors[index])
|
29
|
+
|
30
|
+
enumerator.each do |object_from_enumerator, cursor_from_enumerator|
|
31
|
+
if index == @cursors.size - 1
|
32
|
+
# we've reached the innermost enumerator, yield for `iterate_with_enumerator`
|
33
|
+
updated_cursor = @cursors.dup
|
34
|
+
updated_cursor[index] = cursor_from_enumerator
|
35
|
+
yield object_from_enumerator, updated_cursor
|
36
|
+
else
|
37
|
+
# we need to go deeper
|
38
|
+
next_index = index + 1
|
39
|
+
iterate(current_objects + [object_from_enumerator], next_index, &block)
|
40
|
+
# reset cursor at the index of the nested enumerator that just finished, so we don't skip items when that
|
41
|
+
# index is reused in the next nested iteration
|
42
|
+
@cursors[next_index] = nil
|
43
|
+
@cursors[index] = cursor_from_enumerator
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/job-iteration.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "active_job"
|
3
4
|
require_relative "./job-iteration/version"
|
4
5
|
require_relative "./job-iteration/enumerator_builder"
|
5
6
|
require_relative "./job-iteration/iteration"
|
7
|
+
require_relative "./job-iteration/log_subscriber"
|
6
8
|
|
7
9
|
module JobIteration
|
8
10
|
IntegrationLoadError = Class.new(StandardError)
|
@@ -11,6 +13,14 @@ module JobIteration
|
|
11
13
|
|
12
14
|
extend self
|
13
15
|
|
16
|
+
attr_writer :logger
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def logger
|
20
|
+
@logger || ActiveJob::Base.logger
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
14
24
|
# Use this to _always_ interrupt the job after it's been running for more than N seconds.
|
15
25
|
# @example
|
16
26
|
#
|
@@ -18,8 +28,27 @@ module JobIteration
|
|
18
28
|
#
|
19
29
|
# This setting will make it to always interrupt a job after it's been iterating for 5 minutes.
|
20
30
|
# Defaults to nil which means that jobs will not be interrupted except on termination signal.
|
31
|
+
#
|
32
|
+
# This setting can be further reduced (but not increased) by using the inheritable per-class
|
33
|
+
# job_iteration_max_job_runtime setting.
|
34
|
+
# @example
|
35
|
+
#
|
36
|
+
# class MyJob < ActiveJob::Base
|
37
|
+
# include JobIteration::Iteration
|
38
|
+
# self.job_iteration_max_job_runtime = 1.minute
|
39
|
+
# # ...
|
21
40
|
attr_accessor :max_job_runtime
|
22
41
|
|
42
|
+
# Configures a delay duration to wait before resuming an interrupted job.
|
43
|
+
# @example
|
44
|
+
#
|
45
|
+
# JobIteration.default_retry_backoff = 10.seconds
|
46
|
+
#
|
47
|
+
# Defaults to nil which means interrupted jobs will be retried immediately.
|
48
|
+
# This value will be ignored when an interruption is raised by a throttle enumerator,
|
49
|
+
# where the throttle backoff value will take precedence over this setting.
|
50
|
+
attr_accessor :default_retry_backoff
|
51
|
+
|
23
52
|
# Used internally for hooking into job processing frameworks like Sidekiq and Resque.
|
24
53
|
attr_accessor :interruption_adapter
|
25
54
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -47,6 +47,7 @@ extra_rdoc_files: []
|
|
47
47
|
files:
|
48
48
|
- ".github/dependabot.yml"
|
49
49
|
- ".github/workflows/ci.yml"
|
50
|
+
- ".github/workflows/cla.yml"
|
50
51
|
- ".gitignore"
|
51
52
|
- ".rubocop.yml"
|
52
53
|
- ".yardopts"
|
@@ -65,10 +66,12 @@ files:
|
|
65
66
|
- gemfiles/rails_6_1.gemfile
|
66
67
|
- gemfiles/rails_7_0.gemfile
|
67
68
|
- gemfiles/rails_edge.gemfile
|
69
|
+
- guides/argument-semantics.md
|
68
70
|
- guides/best-practices.md
|
69
71
|
- guides/custom-enumerator.md
|
70
72
|
- guides/iteration-how-it-works.md
|
71
73
|
- guides/throttling.md
|
74
|
+
- isogun.yml
|
72
75
|
- job-iteration.gemspec
|
73
76
|
- lib/job-iteration.rb
|
74
77
|
- lib/job-iteration/active_record_batch_enumerator.rb
|
@@ -79,10 +82,11 @@ files:
|
|
79
82
|
- lib/job-iteration/integrations/resque.rb
|
80
83
|
- lib/job-iteration/integrations/sidekiq.rb
|
81
84
|
- lib/job-iteration/iteration.rb
|
85
|
+
- lib/job-iteration/log_subscriber.rb
|
86
|
+
- lib/job-iteration/nested_enumerator.rb
|
82
87
|
- lib/job-iteration/test_helper.rb
|
83
88
|
- lib/job-iteration/throttle_enumerator.rb
|
84
89
|
- lib/job-iteration/version.rb
|
85
|
-
- railgun.yml
|
86
90
|
homepage: https://github.com/shopify/job-iteration
|
87
91
|
licenses:
|
88
92
|
- MIT
|
@@ -104,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
108
|
- !ruby/object:Gem::Version
|
105
109
|
version: '0'
|
106
110
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
111
|
+
rubygems_version: 3.4.19
|
108
112
|
signing_key:
|
109
113
|
specification_version: 4
|
110
114
|
summary: Makes your background jobs interruptible and resumable.
|