rocketjob 5.2.0.beta1 → 5.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8920ff4a319e838729863bbf2a7a66a4ee003ac9328cc1c3d3942fc59cecab2c
4
- data.tar.gz: 3c5f4927370915b3188295cd181f5b1dc4ae64a17b0cd070e946cfa82b7517c6
3
+ metadata.gz: 42be7df83c1d38b5ef3a41751e3cc6afd6fb885638a7d144bfd1f71ff5de441e
4
+ data.tar.gz: fd3942b7fe1aa2d76bda3395561bea56122b9eab58c0ae65d99fbea76e3d771c
5
5
  SHA512:
6
- metadata.gz: 1395412d271a4a1cde3d1f80fa6d45d5a0e948454b34e6a1d1221631b718d1f4fb16e3683a4a57af1e1f64d724219b70ab65cd84f7a2c9658d67e0608d263973
7
- data.tar.gz: f5b161cf5dcb32126dff42b645b3356707ea74623f011af9a6e7bf0b8fee6403254b302ec5644a85181c207dbe70cfa3a82349be4a2bf52127e68e9f4ebaf5ab
6
+ metadata.gz: 324641130fd1bb0724058d81cdf33dad02bbdeef2047413a479dcb0bb0782be37be3e0aad9b5c57d9ea200e6cc30889ce06ce8130716f91399472bf317ed4089
7
+ data.tar.gz: dfe9e7f121e7fee9c4713d5fc819e3889d1e0d1d208ac14cd4678f83ce0f802f51e2067cd10d687301a9f7470a609eab6259036e191ae8e6198f7a7830cbbd82
data/README.md CHANGED
@@ -3,9 +3,9 @@
3
3
 
4
4
  Ruby's missing batch system
5
5
 
6
- Checkout http://rocketjob.io/
6
+ Checkout https://rocketjob.io/
7
7
 
8
- ![Rocket Job](http://rocketjob.io/images/rocket/rocket-icon-512x512.png)
8
+ ![Rocket Job](https://rocketjob.io/images/rocket/rocket-icon-512x512.png)
9
9
 
10
10
  ## Documentation
11
11
 
@@ -24,6 +24,7 @@ module RocketJob
24
24
  autoload :LowerPriority, "rocket_job/batch/lower_priority"
25
25
  autoload :Performance, "rocket_job/batch/performance"
26
26
  autoload :Statistics, "rocket_job/batch/statistics"
27
+ autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
27
28
  autoload :Result, "rocket_job/batch/result"
28
29
  autoload :Results, "rocket_job/batch/results"
29
30
  autoload :Tabular, "rocket_job/batch/tabular"
@@ -18,7 +18,7 @@ module RocketJob
18
18
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
19
  end
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
21
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(**rocket_job_io_slice_arguments("inputs", category))
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -34,7 +34,7 @@ module RocketJob
34
34
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
35
  end
36
36
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
37
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(**rocket_job_io_slice_arguments("outputs", category))
38
38
  end
39
39
 
40
40
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -6,7 +6,7 @@ module RocketJob
6
6
  #
7
7
  # Example:
8
8
  # # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # include RocketJob::Batch
11
11
  #
12
12
  # # Define a custom mysql throttle
@@ -5,7 +5,7 @@ module RocketJob
5
5
  # Throttle the number of slices of a specific batch job that are processed at the same time.
6
6
  #
7
7
  # Example:
8
- # class MyJob < RocketJob
8
+ # class MyJob < RocketJob::Job
9
9
  # include RocketJob::Batch
10
10
  #
11
11
  # # Maximum number of slices to process at the same time for each running instance.
@@ -53,6 +53,19 @@ module RocketJob
53
53
  conn.where(:id.ne => slice.id).count >= throttle_running_workers
54
54
  end
55
55
  end
56
+
57
+ # Returns [Boolean] whether the throttle for this job has been exceeded
58
+ #
59
+ # With a Batch job, allow a higher priority queued job to replace a running one with
60
+ # a lower priority.
61
+ def throttle_running_jobs_exceeded?
62
+ return unless throttle_running_jobs&.positive?
63
+
64
+ # Cannot use this class since it will include instances of parent job classes.
65
+ RocketJob::Job.with(read: {mode: :primary}) do |conn|
66
+ conn.running.where("_type" => self.class.name, :id.ne => id, :priority.lte => priority).count >= throttle_running_jobs
67
+ end
68
+ end
56
69
  end
57
70
  end
58
71
  end
@@ -0,0 +1,72 @@
1
+ require "active_support/concern"
2
+ require "fugit"
3
+
4
+ module RocketJob
5
+ module Batch
6
+ # For a batch job that can run over a long period of time it can be useful
7
+ # to prevent its slices from being processed outside a predefined processing window.
8
+ #
9
+ # This plugin supports up to 2 different processing windows.
10
+ #
11
+ # For example, do not run this job during business hours.
12
+ # Allow it to run from 5pm until 8am the following day Mon through Fri.
13
+ #
14
+ # class AfterHoursJob < RocketJob::Job
15
+ # include RocketJob::Batch
16
+ # include RocketJob::Batch::ThrottleWindows
17
+ #
18
+ # # Monday through Thursday the job can start processing at 5pm Eastern.
19
+ # self.primary_schedule = "0 17 * * 1-4 America/New_York"
20
+ # # Slices are allowed to run until 8am the following day, which is 15 hours long:
21
+ # self.primary_duration = 15.hours
22
+ #
23
+ # # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
24
+ # self.secondary_schedule = "0 17 * * 5 America/New_York"
25
+ # # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
26
+ # self.secondary_duration = 63.hours
27
+ # end
28
+ #
29
+ # Notes:
30
+ # * These schedules do not affect when the job is started, completed, or when `before_batch` or
31
+ # `after_batch` processing is performed. It only limits when individual slices are processed.
32
+ module ThrottleWindows
33
+ extend ActiveSupport::Concern
34
+
35
+ included do
36
+ # Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
37
+ field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
38
+ # Duration in seconds of the primary window.
39
+ field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
40
+
41
+ # Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
42
+ field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
43
+ # Duration in seconds of the secondary window.
44
+ field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
45
+
46
+ define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
47
+
48
+ validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
49
+ record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def throttle_windows_exceeded?
56
+ exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
57
+ if exceeded && secondary_schedule && secondary_duration
58
+ exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
59
+ end
60
+ exceeded
61
+ end
62
+
63
+ def throttle_outside_window?(schedule, duration)
64
+ cron = Fugit::Cron.new(schedule)
65
+ time = Time.now.utc + 1
66
+ # Add 1 second since right now could be the very beginning of the processing window.
67
+ previous_time = cron.previous_time(time).to_utc_time
68
+ previous_time + duration < time
69
+ end
70
+ end
71
+ end
72
+ end
@@ -28,10 +28,6 @@ module RocketJob
28
28
  #
29
29
  # If an exception was thrown the entire slice of records is marked as failed.
30
30
  #
31
- # If the mongo_ha gem has been loaded, then the connection to mongo is
32
- # automatically re-established and the job will resume anytime a
33
- # Mongo connection failure occurs.
34
- #
35
31
  # Thread-safe, can be called by multiple threads at the same time
36
32
  def rocket_job_work(worker, re_raise_exceptions = false)
37
33
  raise "Job must be started before calling #rocket_job_work" unless running?
@@ -114,8 +110,6 @@ module RocketJob
114
110
  servers
115
111
  end
116
112
 
117
- private
118
-
119
113
  def rocket_job_batch_throttled?(slice, worker)
120
114
  filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
121
115
  return false unless filter
@@ -129,46 +123,59 @@ module RocketJob
129
123
  # Process a single slice from Mongo
130
124
  # Once the slice has been successfully processed it will be removed from the input collection
131
125
  # Returns [Integer] the number of records successfully processed
132
- def rocket_job_process_slice(slice)
133
- # TODO: Skip records already processed
134
- @rocket_job_record_number = slice.first_record_number || 0
135
- @rocket_job_slice = slice
126
+ def rocket_job_process_slice(slice, &block)
127
+ @rocket_job_slice = slice
128
+ count = 0
136
129
 
137
- processed_records = 0
138
130
  run_callbacks(:slice) do
139
131
  # Allow before_slice callbacks to fail, complete or abort this slice.
140
132
  return 0 unless running?
141
133
 
142
- RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
143
- slice.each do |record|
144
- SemanticLogger.named_tagged(record: @rocket_job_record_number) do
145
- writer << rocket_job_batch_perform(slice, record)
146
- processed_records += 1
147
- end
148
- # JRuby thinks self.rocket_job_record_number= is private and cannot be accessed
149
- @rocket_job_record_number += 1
150
- end
151
- end
152
- @rocket_job_slice = nil
153
- @rocket_job_record_number = nil
134
+ count = rocket_job_perform_slice(slice, &block)
154
135
  end
136
+ @rocket_job_slice = nil
155
137
 
156
138
  # On successful completion remove the slice from the input queue
157
139
  # TODO: Add option to complete slice instead of destroying it to retain input data.
158
140
  slice.destroy
159
- processed_records
141
+ count
142
+ end
143
+
144
+ # Perform individual slice without callbacks
145
+ def rocket_job_perform_slice(slice, &block)
146
+ count = 0
147
+ RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
148
+ records = slice.records
149
+
150
+ # Skip records already processed, if any.
151
+ # slice.processing_record_number ||= 0
152
+ # TODO: Must append to existing output slices before this can be enabled.
153
+ # if !collect_output && (slice.processing_record_number > 1)
154
+ # records = records[slice.processing_record_number - 1..-1]
155
+ # end
156
+ # Until the changes above have been implemented, reprocess all records in the slice.
157
+ slice.processing_record_number = 0
158
+
159
+ records.each do |record|
160
+ slice.processing_record_number += 1
161
+ SemanticLogger.named_tagged(record: slice.current_record_number) do
162
+ writer << rocket_job_batch_perform(slice, record, &block)
163
+ count += 1
164
+ end
165
+ end
166
+ end
167
+ count
160
168
  end
161
169
 
162
170
  # Perform a single record within the current slice.
163
171
  def rocket_job_batch_perform(slice, record)
164
- slice.processing_record_number ||= 0
165
- slice.processing_record_number += 1
172
+ @rocket_job_record_number = slice.current_record_number
166
173
 
167
174
  return block_given? ? yield(record) : perform(record) if _perform_callbacks.empty?
168
175
 
169
176
  # @rocket_job_input and @rocket_job_output can be modified by before/around callbacks
170
- @rocket_job_input = record
171
- @rocket_job_output = nil
177
+ @rocket_job_input = record
178
+ @rocket_job_output = nil
172
179
 
173
180
  run_callbacks(:perform) do
174
181
  @rocket_job_output =
@@ -179,9 +186,9 @@ module RocketJob
179
186
  end
180
187
  end
181
188
 
182
- @rocket_job_input = nil
183
- result = @rocket_job_output
184
- @rocket_job_output = nil
189
+ @rocket_job_input = nil
190
+ result = @rocket_job_output
191
+ @rocket_job_output = nil
185
192
  result
186
193
  end
187
194
 
@@ -197,24 +204,7 @@ module RocketJob
197
204
  if failed_count.positive? && (input_count == failed_count)
198
205
  # Reload to pull in any counters or other data that was modified.
199
206
  reload unless new_record?
200
- if may_fail?
201
- fail_job = true
202
- unless new_record?
203
- # Fail job iff no other worker has already finished it
204
- # Must set write concern to at least 1 since we need the nModified back
205
- result = self.class.with(write: {w: 1}) do |query|
206
- query.
207
- where(id: id, state: :running, sub_state: :processing).
208
- update({"$set" => {state: :failed, worker_name: worker_name}})
209
- end
210
- fail_job = false unless result.modified_count.positive?
211
- end
212
- if fail_job
213
- message = "#{failed_count} slices failed to process"
214
- self.exception = JobException.new(message: message)
215
- fail!(worker_name, message)
216
- end
217
- end
207
+ rocket_job_batch_fail!(worker_name) if may_fail?
218
208
  return true
219
209
  end
220
210
 
@@ -237,15 +227,38 @@ module RocketJob
237
227
 
238
228
  # Reload to pull in any counters or other data that was modified.
239
229
  reload
230
+
240
231
  if result.modified_count.positive?
241
232
  rocket_job_batch_run_after_callbacks(false)
242
- else
233
+ elsif aborted?
243
234
  # Repeat cleanup in case this worker was still running when the job was aborted
244
- cleanup! if aborted?
235
+ cleanup!
245
236
  end
246
237
  true
247
238
  end
248
239
 
240
+ # Fail the job
241
+ def rocket_job_batch_fail!(worker_name)
242
+ fail_job = true
243
+
244
+ unless new_record?
245
+ # Fail job iff no other worker has already finished it
246
+ # Must set write concern to at least 1 since we need the nModified back
247
+ result = self.class.with(write: {w: 1}) do |query|
248
+ query.
249
+ where(id: id, state: :running, sub_state: :processing).
250
+ update({"$set" => {state: :failed, worker_name: worker_name}})
251
+ end
252
+ fail_job = false unless result.modified_count.positive?
253
+ end
254
+
255
+ return unless fail_job
256
+
257
+ message = "#{input.failed.count} slices failed to process"
258
+ self.exception = JobException.new(message: message)
259
+ new_record? ? fail(worker_name, message) : fail!(worker_name, message)
260
+ end
261
+
249
262
  # Run the before_batch callbacks
250
263
  # Saves the current state before and after running callbacks if callbacks present
251
264
  def rocket_job_batch_run_before_callbacks
@@ -104,8 +104,6 @@ module RocketJob
104
104
  end
105
105
  end
106
106
 
107
- private
108
-
109
107
  @load_time = Time.now.utc
110
108
  @subscribers = Concurrent::Map.new { Concurrent::Array.new }
111
109
 
@@ -22,8 +22,6 @@ module RocketJob
22
22
  @collection_name = collection_name&.to_sym
23
23
  end
24
24
 
25
- private
26
-
27
25
  module ClassMethods
28
26
  def with_collection(collection_name)
29
27
  all.with_collection(collection_name)
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -17,7 +18,9 @@ module RocketJob
17
18
 
18
19
  field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
19
20
 
20
- validate :rocket_job_cron_valid
21
+ validates_each :cron_schedule do |record, attr, value|
22
+ record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
23
+ end
21
24
  before_save :rocket_job_cron_set_run_at
22
25
 
23
26
  private
@@ -49,24 +52,14 @@ module RocketJob
49
52
  # The next time as of this time.
50
53
  # Default: Time.now
51
54
  def rocket_job_cron_next_time(time = Time.now)
52
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
55
+ Fugit::Cron.new(cron_schedule).next_time.to_utc_time
53
56
  end
54
57
 
55
- private
56
-
57
58
  def rocket_job_cron_set_run_at
58
59
  return unless cron_schedule
59
60
 
60
61
  self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
61
62
  end
62
-
63
- def rocket_job_cron_valid
64
- return unless cron_schedule
65
-
66
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
67
- rescue ArgumentError => e
68
- errors.add(:cron_schedule, e.message)
69
- end
70
63
  end
71
64
  end
72
65
  end
@@ -7,7 +7,7 @@ module RocketJob
7
7
  #
8
8
  # Example:
9
9
  # # Do not run this job when the MySQL slave delay exceeds 5 minutes.
10
- # class MyJob < RocketJob
10
+ # class MyJob < RocketJob::Job
11
11
  # # Define a custom mysql throttle
12
12
  # # Prevents all jobs of this class from running on the current server.
13
13
  # define_throttle :mysql_throttle_exceeded?
@@ -6,7 +6,7 @@ module RocketJob
6
6
  # Throttle the number of jobs of a specific class that are processed at the same time.
7
7
  #
8
8
  # Example:
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # # Maximum number of jobs of this class to process at the same time.
11
11
  # self.throttle_running_jobs = 25
12
12
  #
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -47,18 +48,14 @@ module RocketJob
47
48
 
48
49
  validates_presence_of :processing_schedule, :processing_duration
49
50
  validates_each :processing_schedule do |record, attr, value|
50
- begin
51
- RocketJob::Plugins::Rufus::CronLine.new(value)
52
- rescue ArgumentError => e
53
- record.errors.add(attr, e.message)
54
- end
51
+ record.errors.add(attr, "Invalid schedule: #{value.inspect}") unless Fugit::Cron.new(value)
55
52
  end
56
53
  end
57
54
 
58
55
  # Returns [true|false] whether this job is currently inside its processing window
59
56
  def rocket_job_processing_window_active?
60
- time = Time.now
61
- previous_time = rocket_job_processing_schedule.previous_time(time)
57
+ time = Time.now.utc
58
+ previous_time = Fugit::Cron.new(processing_schedule).previous_time(time).to_utc_time
62
59
  # Inside previous processing window?
63
60
  previous_time + processing_duration > time
64
61
  end
@@ -69,17 +66,14 @@ module RocketJob
69
66
  def rocket_job_processing_window_check
70
67
  return if rocket_job_processing_window_active?
71
68
 
72
- logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{rocket_job_processing_schedule.next_time}")
69
+ next_time = Fugit::Cron.new(processing_schedule).next_time.to_utc_time
70
+ logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{next_time}")
73
71
  self.worker_name ||= "inline"
74
72
  requeue!(worker_name)
75
73
  end
76
74
 
77
75
  def rocket_job_processing_window_set_run_at
78
- self.run_at = rocket_job_processing_schedule.next_time unless rocket_job_processing_window_active?
79
- end
80
-
81
- def rocket_job_processing_schedule
82
- RocketJob::Plugins::Rufus::CronLine.new(processing_schedule)
76
+ self.run_at = Fugit::Cron.new(processing_schedule).next_time.to_utc_time unless rocket_job_processing_window_active?
83
77
  end
84
78
  end
85
79
  end