rocketjob 5.2.0.beta1 → 5.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8920ff4a319e838729863bbf2a7a66a4ee003ac9328cc1c3d3942fc59cecab2c
4
- data.tar.gz: 3c5f4927370915b3188295cd181f5b1dc4ae64a17b0cd070e946cfa82b7517c6
3
+ metadata.gz: 42be7df83c1d38b5ef3a41751e3cc6afd6fb885638a7d144bfd1f71ff5de441e
4
+ data.tar.gz: fd3942b7fe1aa2d76bda3395561bea56122b9eab58c0ae65d99fbea76e3d771c
5
5
  SHA512:
6
- metadata.gz: 1395412d271a4a1cde3d1f80fa6d45d5a0e948454b34e6a1d1221631b718d1f4fb16e3683a4a57af1e1f64d724219b70ab65cd84f7a2c9658d67e0608d263973
7
- data.tar.gz: f5b161cf5dcb32126dff42b645b3356707ea74623f011af9a6e7bf0b8fee6403254b302ec5644a85181c207dbe70cfa3a82349be4a2bf52127e68e9f4ebaf5ab
6
+ metadata.gz: 324641130fd1bb0724058d81cdf33dad02bbdeef2047413a479dcb0bb0782be37be3e0aad9b5c57d9ea200e6cc30889ce06ce8130716f91399472bf317ed4089
7
+ data.tar.gz: dfe9e7f121e7fee9c4713d5fc819e3889d1e0d1d208ac14cd4678f83ce0f802f51e2067cd10d687301a9f7470a609eab6259036e191ae8e6198f7a7830cbbd82
data/README.md CHANGED
@@ -3,9 +3,9 @@
3
3
 
4
4
  Ruby's missing batch system
5
5
 
6
- Checkout http://rocketjob.io/
6
+ Checkout https://rocketjob.io/
7
7
 
8
- ![Rocket Job](http://rocketjob.io/images/rocket/rocket-icon-512x512.png)
8
+ ![Rocket Job](https://rocketjob.io/images/rocket/rocket-icon-512x512.png)
9
9
 
10
10
  ## Documentation
11
11
 
@@ -24,6 +24,7 @@ module RocketJob
24
24
  autoload :LowerPriority, "rocket_job/batch/lower_priority"
25
25
  autoload :Performance, "rocket_job/batch/performance"
26
26
  autoload :Statistics, "rocket_job/batch/statistics"
27
+ autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
27
28
  autoload :Result, "rocket_job/batch/result"
28
29
  autoload :Results, "rocket_job/batch/results"
29
30
  autoload :Tabular, "rocket_job/batch/tabular"
@@ -18,7 +18,7 @@ module RocketJob
18
18
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
19
  end
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
21
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(**rocket_job_io_slice_arguments("inputs", category))
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -34,7 +34,7 @@ module RocketJob
34
34
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
35
  end
36
36
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
37
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(**rocket_job_io_slice_arguments("outputs", category))
38
38
  end
39
39
 
40
40
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -6,7 +6,7 @@ module RocketJob
6
6
  #
7
7
  # Example:
8
8
  # # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # include RocketJob::Batch
11
11
  #
12
12
  # # Define a custom mysql throttle
@@ -5,7 +5,7 @@ module RocketJob
5
5
  # Throttle the number of slices of a specific batch job that are processed at the same time.
6
6
  #
7
7
  # Example:
8
- # class MyJob < RocketJob
8
+ # class MyJob < RocketJob::Job
9
9
  # include RocketJob::Batch
10
10
  #
11
11
  # # Maximum number of slices to process at the same time for each running instance.
@@ -53,6 +53,19 @@ module RocketJob
53
53
  conn.where(:id.ne => slice.id).count >= throttle_running_workers
54
54
  end
55
55
  end
56
+
57
+ # Returns [Boolean] whether the throttle for this job has been exceeded
58
+ #
59
+ # With a Batch job, allow a higher priority queued job to replace a running one with
60
+ # a lower priority.
61
+ def throttle_running_jobs_exceeded?
62
+ return unless throttle_running_jobs&.positive?
63
+
64
+ # Cannot use this class since it will include instances of parent job classes.
65
+ RocketJob::Job.with(read: {mode: :primary}) do |conn|
66
+ conn.running.where("_type" => self.class.name, :id.ne => id, :priority.lte => priority).count >= throttle_running_jobs
67
+ end
68
+ end
56
69
  end
57
70
  end
58
71
  end
@@ -0,0 +1,72 @@
1
+ require "active_support/concern"
2
+ require "fugit"
3
+
4
+ module RocketJob
5
+ module Batch
6
+ # For a batch job that can run over a long period of time it can be useful
7
+ # to prevent its slices from being processed outside a predefined processing window.
8
+ #
9
+ # This plugin supports up to 2 different processing windows.
10
+ #
11
+ # For example, do not run this job during business hours.
12
+ # Allow it to run from 5pm until 8am the following day Mon through Fri.
13
+ #
14
+ # class AfterHoursJob < RocketJob::Job
15
+ # include RocketJob::Batch
16
+ # include RocketJob::Batch::ThrottleWindows
17
+ #
18
+ # # Monday through Thursday the job can start processing at 5pm Eastern.
19
+ # self.primary_schedule = "0 17 * * 1-4 America/New_York"
20
+ # # Slices are allowed to run until 8am the following day, which is 15 hours long:
21
+ # self.primary_duration = 15.hours
22
+ #
23
+ # # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
24
+ # self.secondary_schedule = "0 17 * * 5 America/New_York"
25
+ # # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
26
+ # self.secondary_duration = 63.hours
27
+ # end
28
+ #
29
+ # Notes:
30
+ # * These schedules do not affect when the job is started, completed, or when `before_batch` or
31
+ # `after_batch` processing is performed. It only limits when individual slices are processed.
32
+ module ThrottleWindows
33
+ extend ActiveSupport::Concern
34
+
35
+ included do
36
+ # Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
37
+ field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
38
+ # Duration in seconds of the primary window.
39
+ field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
40
+
41
+ # Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
42
+ field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
43
+ # Duration in seconds of the secondary window.
44
+ field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
45
+
46
+ define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
47
+
48
+ validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
49
+ record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def throttle_windows_exceeded?
56
+ exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
57
+ if exceeded && secondary_schedule && secondary_duration
58
+ exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
59
+ end
60
+ exceeded
61
+ end
62
+
63
+ def throttle_outside_window?(schedule, duration)
64
+ cron = Fugit::Cron.new(schedule)
65
+ time = Time.now.utc + 1
66
+ # Add 1 second since right now could be the very beginning of the processing window.
67
+ previous_time = cron.previous_time(time).to_utc_time
68
+ previous_time + duration < time
69
+ end
70
+ end
71
+ end
72
+ end
@@ -28,10 +28,6 @@ module RocketJob
28
28
  #
29
29
  # If an exception was thrown the entire slice of records is marked as failed.
30
30
  #
31
- # If the mongo_ha gem has been loaded, then the connection to mongo is
32
- # automatically re-established and the job will resume anytime a
33
- # Mongo connection failure occurs.
34
- #
35
31
  # Thread-safe, can be called by multiple threads at the same time
36
32
  def rocket_job_work(worker, re_raise_exceptions = false)
37
33
  raise "Job must be started before calling #rocket_job_work" unless running?
@@ -114,8 +110,6 @@ module RocketJob
114
110
  servers
115
111
  end
116
112
 
117
- private
118
-
119
113
  def rocket_job_batch_throttled?(slice, worker)
120
114
  filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
121
115
  return false unless filter
@@ -129,46 +123,59 @@ module RocketJob
129
123
  # Process a single slice from Mongo
130
124
  # Once the slice has been successfully processed it will be removed from the input collection
131
125
  # Returns [Integer] the number of records successfully processed
132
- def rocket_job_process_slice(slice)
133
- # TODO: Skip records already processed
134
- @rocket_job_record_number = slice.first_record_number || 0
135
- @rocket_job_slice = slice
126
+ def rocket_job_process_slice(slice, &block)
127
+ @rocket_job_slice = slice
128
+ count = 0
136
129
 
137
- processed_records = 0
138
130
  run_callbacks(:slice) do
139
131
  # Allow before_slice callbacks to fail, complete or abort this slice.
140
132
  return 0 unless running?
141
133
 
142
- RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
143
- slice.each do |record|
144
- SemanticLogger.named_tagged(record: @rocket_job_record_number) do
145
- writer << rocket_job_batch_perform(slice, record)
146
- processed_records += 1
147
- end
148
- # JRuby thinks self.rocket_job_record_number= is private and cannot be accessed
149
- @rocket_job_record_number += 1
150
- end
151
- end
152
- @rocket_job_slice = nil
153
- @rocket_job_record_number = nil
134
+ count = rocket_job_perform_slice(slice, &block)
154
135
  end
136
+ @rocket_job_slice = nil
155
137
 
156
138
  # On successful completion remove the slice from the input queue
157
139
  # TODO: Add option to complete slice instead of destroying it to retain input data.
158
140
  slice.destroy
159
- processed_records
141
+ count
142
+ end
143
+
144
+ # Perform individual slice without callbacks
145
+ def rocket_job_perform_slice(slice, &block)
146
+ count = 0
147
+ RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
148
+ records = slice.records
149
+
150
+ # Skip records already processed, if any.
151
+ # slice.processing_record_number ||= 0
152
+ # TODO: Must append to existing output slices before this can be enabled.
153
+ # if !collect_output && (slice.processing_record_number > 1)
154
+ # records = records[slice.processing_record_number - 1..-1]
155
+ # end
156
+ # Until the changes above have been implemented, reprocess all records in the slice.
157
+ slice.processing_record_number = 0
158
+
159
+ records.each do |record|
160
+ slice.processing_record_number += 1
161
+ SemanticLogger.named_tagged(record: slice.current_record_number) do
162
+ writer << rocket_job_batch_perform(slice, record, &block)
163
+ count += 1
164
+ end
165
+ end
166
+ end
167
+ count
160
168
  end
161
169
 
162
170
  # Perform a single record within the current slice.
163
171
  def rocket_job_batch_perform(slice, record)
164
- slice.processing_record_number ||= 0
165
- slice.processing_record_number += 1
172
+ @rocket_job_record_number = slice.current_record_number
166
173
 
167
174
  return block_given? ? yield(record) : perform(record) if _perform_callbacks.empty?
168
175
 
169
176
  # @rocket_job_input and @rocket_job_output can be modified by before/around callbacks
170
- @rocket_job_input = record
171
- @rocket_job_output = nil
177
+ @rocket_job_input = record
178
+ @rocket_job_output = nil
172
179
 
173
180
  run_callbacks(:perform) do
174
181
  @rocket_job_output =
@@ -179,9 +186,9 @@ module RocketJob
179
186
  end
180
187
  end
181
188
 
182
- @rocket_job_input = nil
183
- result = @rocket_job_output
184
- @rocket_job_output = nil
189
+ @rocket_job_input = nil
190
+ result = @rocket_job_output
191
+ @rocket_job_output = nil
185
192
  result
186
193
  end
187
194
 
@@ -197,24 +204,7 @@ module RocketJob
197
204
  if failed_count.positive? && (input_count == failed_count)
198
205
  # Reload to pull in any counters or other data that was modified.
199
206
  reload unless new_record?
200
- if may_fail?
201
- fail_job = true
202
- unless new_record?
203
- # Fail job iff no other worker has already finished it
204
- # Must set write concern to at least 1 since we need the nModified back
205
- result = self.class.with(write: {w: 1}) do |query|
206
- query.
207
- where(id: id, state: :running, sub_state: :processing).
208
- update({"$set" => {state: :failed, worker_name: worker_name}})
209
- end
210
- fail_job = false unless result.modified_count.positive?
211
- end
212
- if fail_job
213
- message = "#{failed_count} slices failed to process"
214
- self.exception = JobException.new(message: message)
215
- fail!(worker_name, message)
216
- end
217
- end
207
+ rocket_job_batch_fail!(worker_name) if may_fail?
218
208
  return true
219
209
  end
220
210
 
@@ -237,15 +227,38 @@ module RocketJob
237
227
 
238
228
  # Reload to pull in any counters or other data that was modified.
239
229
  reload
230
+
240
231
  if result.modified_count.positive?
241
232
  rocket_job_batch_run_after_callbacks(false)
242
- else
233
+ elsif aborted?
243
234
  # Repeat cleanup in case this worker was still running when the job was aborted
244
- cleanup! if aborted?
235
+ cleanup!
245
236
  end
246
237
  true
247
238
  end
248
239
 
240
+ # Fail the job
241
+ def rocket_job_batch_fail!(worker_name)
242
+ fail_job = true
243
+
244
+ unless new_record?
245
+ # Fail job iff no other worker has already finished it
246
+ # Must set write concern to at least 1 since we need the nModified back
247
+ result = self.class.with(write: {w: 1}) do |query|
248
+ query.
249
+ where(id: id, state: :running, sub_state: :processing).
250
+ update({"$set" => {state: :failed, worker_name: worker_name}})
251
+ end
252
+ fail_job = false unless result.modified_count.positive?
253
+ end
254
+
255
+ return unless fail_job
256
+
257
+ message = "#{input.failed.count} slices failed to process"
258
+ self.exception = JobException.new(message: message)
259
+ new_record? ? fail(worker_name, message) : fail!(worker_name, message)
260
+ end
261
+
249
262
  # Run the before_batch callbacks
250
263
  # Saves the current state before and after running callbacks if callbacks present
251
264
  def rocket_job_batch_run_before_callbacks
@@ -104,8 +104,6 @@ module RocketJob
104
104
  end
105
105
  end
106
106
 
107
- private
108
-
109
107
  @load_time = Time.now.utc
110
108
  @subscribers = Concurrent::Map.new { Concurrent::Array.new }
111
109
 
@@ -22,8 +22,6 @@ module RocketJob
22
22
  @collection_name = collection_name&.to_sym
23
23
  end
24
24
 
25
- private
26
-
27
25
  module ClassMethods
28
26
  def with_collection(collection_name)
29
27
  all.with_collection(collection_name)
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -17,7 +18,9 @@ module RocketJob
17
18
 
18
19
  field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
19
20
 
20
- validate :rocket_job_cron_valid
21
+ validates_each :cron_schedule do |record, attr, value|
22
+ record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
23
+ end
21
24
  before_save :rocket_job_cron_set_run_at
22
25
 
23
26
  private
@@ -49,24 +52,14 @@ module RocketJob
49
52
  # The next time as of this time.
50
53
  # Default: Time.now
51
54
  def rocket_job_cron_next_time(time = Time.now)
52
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
55
+ Fugit::Cron.new(cron_schedule).next_time.to_utc_time
53
56
  end
54
57
 
55
- private
56
-
57
58
  def rocket_job_cron_set_run_at
58
59
  return unless cron_schedule
59
60
 
60
61
  self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
61
62
  end
62
-
63
- def rocket_job_cron_valid
64
- return unless cron_schedule
65
-
66
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
67
- rescue ArgumentError => e
68
- errors.add(:cron_schedule, e.message)
69
- end
70
63
  end
71
64
  end
72
65
  end
@@ -7,7 +7,7 @@ module RocketJob
7
7
  #
8
8
  # Example:
9
9
  # # Do not run this job when the MySQL slave delay exceeds 5 minutes.
10
- # class MyJob < RocketJob
10
+ # class MyJob < RocketJob::Job
11
11
  # # Define a custom mysql throttle
12
12
  # # Prevents all jobs of this class from running on the current server.
13
13
  # define_throttle :mysql_throttle_exceeded?
@@ -6,7 +6,7 @@ module RocketJob
6
6
  # Throttle the number of jobs of a specific class that are processed at the same time.
7
7
  #
8
8
  # Example:
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # # Maximum number of jobs of this class to process at the same time.
11
11
  # self.throttle_running_jobs = 25
12
12
  #
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -47,18 +48,14 @@ module RocketJob
47
48
 
48
49
  validates_presence_of :processing_schedule, :processing_duration
49
50
  validates_each :processing_schedule do |record, attr, value|
50
- begin
51
- RocketJob::Plugins::Rufus::CronLine.new(value)
52
- rescue ArgumentError => e
53
- record.errors.add(attr, e.message)
54
- end
51
+ record.errors.add(attr, "Invalid schedule: #{value.inspect}") unless Fugit::Cron.new(value)
55
52
  end
56
53
  end
57
54
 
58
55
  # Returns [true|false] whether this job is currently inside its processing window
59
56
  def rocket_job_processing_window_active?
60
- time = Time.now
61
- previous_time = rocket_job_processing_schedule.previous_time(time)
57
+ time = Time.now.utc
58
+ previous_time = Fugit::Cron.new(processing_schedule).previous_time(time).to_utc_time
62
59
  # Inside previous processing window?
63
60
  previous_time + processing_duration > time
64
61
  end
@@ -69,17 +66,14 @@ module RocketJob
69
66
  def rocket_job_processing_window_check
70
67
  return if rocket_job_processing_window_active?
71
68
 
72
- logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{rocket_job_processing_schedule.next_time}")
69
+ next_time = Fugit::Cron.new(processing_schedule).next_time.to_utc_time
70
+ logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{next_time}")
73
71
  self.worker_name ||= "inline"
74
72
  requeue!(worker_name)
75
73
  end
76
74
 
77
75
  def rocket_job_processing_window_set_run_at
78
- self.run_at = rocket_job_processing_schedule.next_time unless rocket_job_processing_window_active?
79
- end
80
-
81
- def rocket_job_processing_schedule
82
- RocketJob::Plugins::Rufus::CronLine.new(processing_schedule)
76
+ self.run_at = Fugit::Cron.new(processing_schedule).next_time.to_utc_time unless rocket_job_processing_window_active?
83
77
  end
84
78
  end
85
79
  end