job-iteration 1.3.6 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,11 +8,16 @@ module JobIteration
8
8
 
9
9
  attr_accessor(
10
10
  :cursor_position,
11
- :start_time,
12
11
  :times_interrupted,
13
- :total_time,
14
12
  )
15
13
 
14
+ # The time when the job starts running. If the job is interrupted and runs again, the value is updated.
15
+ attr_accessor :start_time
16
+
17
+ # The total time the job has been running, including multiple iterations.
18
+ # The time isn't reset if the job is interrupted.
19
+ attr_accessor :total_time
20
+
16
21
  class CursorError < ArgumentError
17
22
  attr_reader :cursor
18
23
 
@@ -39,6 +44,33 @@ module JobIteration
39
44
  define_callbacks :start
40
45
  define_callbacks :shutdown
41
46
  define_callbacks :complete
47
+
48
+ class_attribute(
49
+ :job_iteration_max_job_runtime,
50
+ instance_writer: false,
51
+ instance_predicate: false,
52
+ default: JobIteration.max_job_runtime,
53
+ )
54
+
55
+ singleton_class.prepend(PrependedClassMethods)
56
+ end
57
+
58
+ module PrependedClassMethods
59
+ def job_iteration_max_job_runtime=(new)
60
+ existing = job_iteration_max_job_runtime
61
+
62
+ if existing && (!new || new > existing)
63
+ existing_label = existing.inspect
64
+ new_label = new ? new.inspect : "#{new.inspect} (no limit)"
65
+ raise(
66
+ ArgumentError,
67
+ "job_iteration_max_job_runtime may only decrease; " \
68
+ "#{self} tried to increase it from #{existing_label} to #{new_label}",
69
+ )
70
+ end
71
+
72
+ super
73
+ end
42
74
  end
43
75
 
44
76
  module ClassMethods
@@ -68,7 +100,7 @@ module JobIteration
68
100
 
69
101
  def initialize(*arguments)
70
102
  super
71
- @job_iteration_retry_backoff = nil
103
+ @job_iteration_retry_backoff = JobIteration.default_retry_backoff
72
104
  @needs_reenqueue = false
73
105
  self.times_interrupted = 0
74
106
  self.total_time = 0.0
@@ -87,12 +119,14 @@ module JobIteration
87
119
  def deserialize(job_data) # @private
88
120
  super
89
121
  self.cursor_position = job_data["cursor_position"]
90
- self.times_interrupted = job_data["times_interrupted"] || 0
91
- self.total_time = job_data["total_time"] || 0
122
+ self.times_interrupted = Integer(job_data["times_interrupted"] || 0)
123
+ self.total_time = Float(job_data["total_time"] || 0.0)
92
124
  end
93
125
 
94
126
  def perform(*params) # @private
95
127
  interruptible_perform(*params)
128
+
129
+ nil
96
130
  end
97
131
 
98
132
  def retry_job(*, **)
@@ -110,13 +144,12 @@ module JobIteration
110
144
  self.start_time = Time.now.utc
111
145
 
112
146
  enumerator = nil
113
- ActiveSupport::Notifications.instrument("build_enumerator.iteration", iteration_instrumentation_tags) do
147
+ ActiveSupport::Notifications.instrument("build_enumerator.iteration", instrumentation_tags) do
114
148
  enumerator = build_enumerator(*arguments, cursor: cursor_position)
115
149
  end
116
150
 
117
151
  unless enumerator
118
- logger.info("[JobIteration::Iteration] `build_enumerator` returned nil. " \
119
- "Skipping the job.")
152
+ ActiveSupport::Notifications.instrument("nil_enumerator.iteration", instrumentation_tags)
120
153
  return
121
154
  end
122
155
 
@@ -125,7 +158,10 @@ module JobIteration
125
158
  if executions == 1 && times_interrupted == 0
126
159
  run_callbacks(:start)
127
160
  else
128
- ActiveSupport::Notifications.instrument("resumed.iteration", iteration_instrumentation_tags)
161
+ ActiveSupport::Notifications.instrument(
162
+ "resumed.iteration",
163
+ instrumentation_tags.merge(times_interrupted: times_interrupted, total_time: total_time),
164
+ )
129
165
  end
130
166
 
131
167
  completed = catch(:abort) do
@@ -139,7 +175,10 @@ module JobIteration
139
175
  reenqueue_iteration_job
140
176
  elsif completed
141
177
  run_callbacks(:complete)
142
- output_interrupt_summary
178
+ ActiveSupport::Notifications.instrument(
179
+ "completed.iteration",
180
+ instrumentation_tags.merge(times_interrupted: times_interrupted, total_time: total_time),
181
+ )
143
182
  end
144
183
  end
145
184
 
@@ -148,14 +187,15 @@ module JobIteration
148
187
  found_record = false
149
188
  @needs_reenqueue = false
150
189
 
151
- enumerator.each do |object_from_enumerator, index|
190
+ enumerator.each do |object_from_enumerator, cursor_from_enumerator|
152
191
  # Deferred until 2.0.0
153
- # assert_valid_cursor!(index)
192
+ # assert_valid_cursor!(cursor_from_enumerator)
154
193
 
155
- record_unit_of_work do
194
+ tags = instrumentation_tags.merge(cursor_position: cursor_from_enumerator)
195
+ ActiveSupport::Notifications.instrument("each_iteration.iteration", tags) do
156
196
  found_record = true
157
197
  each_iteration(object_from_enumerator, *arguments)
158
- self.cursor_position = index
198
+ self.cursor_position = cursor_from_enumerator
159
199
  end
160
200
 
161
201
  next unless job_should_exit?
@@ -165,25 +205,19 @@ module JobIteration
165
205
  return false
166
206
  end
167
207
 
168
- logger.info(
169
- "[JobIteration::Iteration] Enumerator found nothing to iterate! " \
170
- "times_interrupted=#{times_interrupted} cursor_position=#{cursor_position}"
208
+ ActiveSupport::Notifications.instrument(
209
+ "not_found.iteration",
210
+ instrumentation_tags.merge(times_interrupted: times_interrupted),
171
211
  ) unless found_record
172
212
 
173
- adjust_total_time
174
-
175
213
  true
176
- end
177
-
178
- def record_unit_of_work(&block)
179
- ActiveSupport::Notifications.instrument("each_iteration.iteration", iteration_instrumentation_tags, &block)
214
+ ensure
215
+ adjust_total_time
180
216
  end
181
217
 
182
218
  def reenqueue_iteration_job
183
- ActiveSupport::Notifications.instrument("interrupted.iteration", iteration_instrumentation_tags)
184
- logger.info("[JobIteration::Iteration] Interrupting and re-enqueueing the job cursor_position=#{cursor_position}")
219
+ ActiveSupport::Notifications.instrument("interrupted.iteration", instrumentation_tags)
185
220
 
186
- adjust_total_time
187
221
  self.times_interrupted += 1
188
222
 
189
223
  self.already_in_queue = true if respond_to?(:already_in_queue=)
@@ -225,7 +259,7 @@ module JobIteration
225
259
  unless respond_to?(:each_iteration, true)
226
260
  raise(
227
261
  ArgumentError,
228
- "Iteration job (#{self.class}) must implement #each_iteration method"
262
+ "Iteration job (#{self.class}) must implement #each_iteration method",
229
263
  )
230
264
  end
231
265
 
@@ -252,17 +286,12 @@ module JobIteration
252
286
  method.parameters
253
287
  end
254
288
 
255
- def iteration_instrumentation_tags
256
- { job_class: self.class.name }
257
- end
258
-
259
- def output_interrupt_summary
260
- message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
261
- logger.info(Kernel.format(message, times_interrupted, total_time))
289
+ def instrumentation_tags
290
+ { job_class: self.class.name, cursor_position: cursor_position }
262
291
  end
263
292
 
264
293
  def job_should_exit?
265
- if ::JobIteration.max_job_runtime && start_time && (Time.now.utc - start_time) > ::JobIteration.max_job_runtime
294
+ if job_iteration_max_job_runtime && start_time && (Time.now.utc - start_time) > job_iteration_max_job_runtime
266
295
  return true
267
296
  end
268
297
 
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JobIteration
4
+ class LogSubscriber < ActiveSupport::LogSubscriber
5
+ def logger
6
+ JobIteration.logger
7
+ end
8
+
9
+ def nil_enumerator(event)
10
+ info do
11
+ "[JobIteration::Iteration] `build_enumerator` returned nil. Skipping the job."
12
+ end
13
+ end
14
+
15
+ def not_found(event)
16
+ info do
17
+ "[JobIteration::Iteration] Enumerator found nothing to iterate! " \
18
+ "times_interrupted=#{event.payload[:times_interrupted]} cursor_position=#{event.payload[:cursor_position]}"
19
+ end
20
+ end
21
+
22
+ def interrupted(event)
23
+ info do
24
+ "[JobIteration::Iteration] Interrupting and re-enqueueing the job " \
25
+ "cursor_position=#{event.payload[:cursor_position]}"
26
+ end
27
+ end
28
+
29
+ def completed(event)
30
+ info do
31
+ message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
32
+ Kernel.format(message, event.payload[:times_interrupted], event.payload[:total_time])
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ JobIteration::LogSubscriber.attach_to(:iteration)
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JobIteration
4
+ # @private
5
+ class NestedEnumerator
6
+ def initialize(enums, cursor: nil)
7
+ unless enums.all?(Proc)
8
+ raise ArgumentError, "enums must contain only procs/lambdas"
9
+ end
10
+
11
+ if cursor && enums.size != cursor.size
12
+ raise ArgumentError, "cursor should have one object per enum"
13
+ end
14
+
15
+ @enums = enums
16
+ @cursors = cursor || Array.new(enums.size)
17
+ end
18
+
19
+ def each(&block)
20
+ return to_enum unless block_given?
21
+
22
+ iterate([], 0, &block)
23
+ end
24
+
25
+ private
26
+
27
+ def iterate(current_objects, index, &block)
28
+ enumerator = @enums[index].call(*current_objects, @cursors[index])
29
+
30
+ enumerator.each do |object_from_enumerator, cursor_from_enumerator|
31
+ if index == @cursors.size - 1
32
+ # we've reached the innermost enumerator, yield for `iterate_with_enumerator`
33
+ updated_cursor = @cursors.dup
34
+ updated_cursor[index] = cursor_from_enumerator
35
+ yield object_from_enumerator, updated_cursor
36
+ else
37
+ # we need to go deeper
38
+ next_index = index + 1
39
+ iterate(current_objects + [object_from_enumerator], next_index, &block)
40
+ # reset cursor at the index of the nested enumerator that just finished, so we don't skip items when that
41
+ # index is reused in the next nested iteration
42
+ @cursors[next_index] = nil
43
+ @cursors[index] = cursor_from_enumerator
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JobIteration
4
- VERSION = "1.3.6"
4
+ VERSION = "1.4.0"
5
5
  end
data/lib/job-iteration.rb CHANGED
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "active_job"
3
4
  require_relative "./job-iteration/version"
4
5
  require_relative "./job-iteration/enumerator_builder"
5
6
  require_relative "./job-iteration/iteration"
7
+ require_relative "./job-iteration/log_subscriber"
6
8
 
7
9
  module JobIteration
8
10
  IntegrationLoadError = Class.new(StandardError)
@@ -11,6 +13,10 @@ module JobIteration
11
13
 
12
14
  extend self
13
15
 
16
+ attr_accessor :logger
17
+
18
+ self.logger = ActiveJob::Base.logger
19
+
14
20
  # Use this to _always_ interrupt the job after it's been running for more than N seconds.
15
21
  # @example
16
22
  #
@@ -18,8 +24,27 @@ module JobIteration
18
24
  #
19
25
  # This setting will make it to always interrupt a job after it's been iterating for 5 minutes.
20
26
  # Defaults to nil which means that jobs will not be interrupted except on termination signal.
27
+ #
28
+ # This setting can be further reduced (but not increased) by using the inheritable per-class
29
+ # job_iteration_max_job_runtime setting.
30
+ # @example
31
+ #
32
+ # class MyJob < ActiveJob::Base
33
+ # include JobIteration::Iteration
34
+ # self.job_iteration_max_job_runtime = 1.minute
35
+ # # ...
21
36
  attr_accessor :max_job_runtime
22
37
 
38
+ # Configures a delay duration to wait before resuming an interrupted job.
39
+ # @example
40
+ #
41
+ # JobIteration.default_retry_backoff = 10.seconds
42
+ #
43
+ # Defaults to nil which means interrupted jobs will be retried immediately.
44
+ # This value will be ignored when an interruption is raised by a throttle enumerator,
45
+ # where the throttle backoff value will take precedence over this setting.
46
+ attr_accessor :default_retry_backoff
47
+
23
48
  # Used internally for hooking into job processing frameworks like Sidekiq and Resque.
24
49
  attr_accessor :interruption_adapter
25
50
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: job-iteration
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.6
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-10 00:00:00.000000000 Z
11
+ date: 2023-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -47,6 +47,7 @@ extra_rdoc_files: []
47
47
  files:
48
48
  - ".github/dependabot.yml"
49
49
  - ".github/workflows/ci.yml"
50
+ - ".github/workflows/cla.yml"
50
51
  - ".gitignore"
51
52
  - ".rubocop.yml"
52
53
  - ".yardopts"
@@ -65,10 +66,12 @@ files:
65
66
  - gemfiles/rails_6_1.gemfile
66
67
  - gemfiles/rails_7_0.gemfile
67
68
  - gemfiles/rails_edge.gemfile
69
+ - guides/argument-semantics.md
68
70
  - guides/best-practices.md
69
71
  - guides/custom-enumerator.md
70
72
  - guides/iteration-how-it-works.md
71
73
  - guides/throttling.md
74
+ - isogun.yml
72
75
  - job-iteration.gemspec
73
76
  - lib/job-iteration.rb
74
77
  - lib/job-iteration/active_record_batch_enumerator.rb
@@ -79,10 +82,11 @@ files:
79
82
  - lib/job-iteration/integrations/resque.rb
80
83
  - lib/job-iteration/integrations/sidekiq.rb
81
84
  - lib/job-iteration/iteration.rb
85
+ - lib/job-iteration/log_subscriber.rb
86
+ - lib/job-iteration/nested_enumerator.rb
82
87
  - lib/job-iteration/test_helper.rb
83
88
  - lib/job-iteration/throttle_enumerator.rb
84
89
  - lib/job-iteration/version.rb
85
- - railgun.yml
86
90
  homepage: https://github.com/shopify/job-iteration
87
91
  licenses:
88
92
  - MIT
@@ -104,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
108
  - !ruby/object:Gem::Version
105
109
  version: '0'
106
110
  requirements: []
107
- rubygems_version: 3.2.20
111
+ rubygems_version: 3.4.18
108
112
  signing_key:
109
113
  specification_version: 4
110
114
  summary: Makes your background jobs interruptible and resumable.