rocketjob 5.2.0.beta1 → 5.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/rocket_job/batch.rb +1 -0
- data/lib/rocket_job/batch/io.rb +2 -2
- data/lib/rocket_job/batch/throttle.rb +1 -1
- data/lib/rocket_job/batch/throttle_running_workers.rb +14 -1
- data/lib/rocket_job/batch/throttle_windows.rb +72 -0
- data/lib/rocket_job/batch/worker.rb +64 -51
- data/lib/rocket_job/event.rb +0 -2
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +0 -2
- data/lib/rocket_job/plugins/cron.rb +5 -12
- data/lib/rocket_job/plugins/job/throttle.rb +1 -1
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/processing_window.rb +7 -13
- data/lib/rocket_job/sliced/slice.rb +5 -7
- data/lib/rocket_job/version.rb +1 -1
- metadata +21 -8
- data/lib/rocket_job/plugins/rufus/cron_line.rb +0 -520
- data/lib/rocket_job/plugins/rufus/zo_time.rb +0 -524
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42be7df83c1d38b5ef3a41751e3cc6afd6fb885638a7d144bfd1f71ff5de441e
|
4
|
+
data.tar.gz: fd3942b7fe1aa2d76bda3395561bea56122b9eab58c0ae65d99fbea76e3d771c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 324641130fd1bb0724058d81cdf33dad02bbdeef2047413a479dcb0bb0782be37be3e0aad9b5c57d9ea200e6cc30889ce06ce8130716f91399472bf317ed4089
|
7
|
+
data.tar.gz: dfe9e7f121e7fee9c4713d5fc819e3889d1e0d1d208ac14cd4678f83ce0f802f51e2067cd10d687301a9f7470a609eab6259036e191ae8e6198f7a7830cbbd82
|
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
|
4
4
|
Ruby's missing batch system
|
5
5
|
|
6
|
-
Checkout
|
6
|
+
Checkout https://rocketjob.io/
|
7
7
|
|
8
|
-
![Rocket Job](
|
8
|
+
![Rocket Job](https://rocketjob.io/images/rocket/rocket-icon-512x512.png)
|
9
9
|
|
10
10
|
## Documentation
|
11
11
|
|
data/lib/rocket_job/batch.rb
CHANGED
@@ -24,6 +24,7 @@ module RocketJob
|
|
24
24
|
autoload :LowerPriority, "rocket_job/batch/lower_priority"
|
25
25
|
autoload :Performance, "rocket_job/batch/performance"
|
26
26
|
autoload :Statistics, "rocket_job/batch/statistics"
|
27
|
+
autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
|
27
28
|
autoload :Result, "rocket_job/batch/result"
|
28
29
|
autoload :Results, "rocket_job/batch/results"
|
29
30
|
autoload :Tabular, "rocket_job/batch/tabular"
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -18,7 +18,7 @@ module RocketJob
|
|
18
18
|
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
19
|
end
|
20
20
|
|
21
|
-
(@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
|
21
|
+
(@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(**rocket_job_io_slice_arguments("inputs", category))
|
22
22
|
end
|
23
23
|
|
24
24
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
@@ -34,7 +34,7 @@ module RocketJob
|
|
34
34
|
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
35
|
end
|
36
36
|
|
37
|
-
(@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
|
37
|
+
(@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(**rocket_job_io_slice_arguments("outputs", category))
|
38
38
|
end
|
39
39
|
|
40
40
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
@@ -5,7 +5,7 @@ module RocketJob
|
|
5
5
|
# Throttle the number of slices of a specific batch job that are processed at the same time.
|
6
6
|
#
|
7
7
|
# Example:
|
8
|
-
# class MyJob < RocketJob
|
8
|
+
# class MyJob < RocketJob::Job
|
9
9
|
# include RocketJob::Batch
|
10
10
|
#
|
11
11
|
# # Maximum number of slices to process at the same time for each running instance.
|
@@ -53,6 +53,19 @@ module RocketJob
|
|
53
53
|
conn.where(:id.ne => slice.id).count >= throttle_running_workers
|
54
54
|
end
|
55
55
|
end
|
56
|
+
|
57
|
+
# Returns [Boolean] whether the throttle for this job has been exceeded
|
58
|
+
#
|
59
|
+
# With a Batch job, allow a higher priority queued job to replace a running one with
|
60
|
+
# a lower priority.
|
61
|
+
def throttle_running_jobs_exceeded?
|
62
|
+
return unless throttle_running_jobs&.positive?
|
63
|
+
|
64
|
+
# Cannot use this class since it will include instances of parent job classes.
|
65
|
+
RocketJob::Job.with(read: {mode: :primary}) do |conn|
|
66
|
+
conn.running.where("_type" => self.class.name, :id.ne => id, :priority.lte => priority).count >= throttle_running_jobs
|
67
|
+
end
|
68
|
+
end
|
56
69
|
end
|
57
70
|
end
|
58
71
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "active_support/concern"
|
2
|
+
require "fugit"
|
3
|
+
|
4
|
+
module RocketJob
|
5
|
+
module Batch
|
6
|
+
# For a batch job that can run over a long period of time it can be useful
|
7
|
+
# to prevent its slices from being processed outside a predefined processing window.
|
8
|
+
#
|
9
|
+
# This plugin supports up to 2 different processing windows.
|
10
|
+
#
|
11
|
+
# For example, do not run this job during business hours.
|
12
|
+
# Allow it to run from 5pm until 8am the following day Mon through Fri.
|
13
|
+
#
|
14
|
+
# class AfterHoursJob < RocketJob::Job
|
15
|
+
# include RocketJob::Batch
|
16
|
+
# include RocketJob::Batch::ThrottleWindows
|
17
|
+
#
|
18
|
+
# # Monday through Thursday the job can start processing at 5pm Eastern.
|
19
|
+
# self.primary_schedule = "0 17 * * 1-4 America/New_York"
|
20
|
+
# # Slices are allowed to run until 8am the following day, which is 15 hours long:
|
21
|
+
# self.primary_duration = 15.hours
|
22
|
+
#
|
23
|
+
# # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
|
24
|
+
# self.secondary_schedule = "0 17 * * 5 America/New_York"
|
25
|
+
# # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
|
26
|
+
# self.secondary_duration = 63.hours
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Notes:
|
30
|
+
# * These schedules do not affect when the job is started, completed, or when `before_batch` or
|
31
|
+
# `after_batch` processing is performed. It only limits when individual slices are processed.
|
32
|
+
module ThrottleWindows
|
33
|
+
extend ActiveSupport::Concern
|
34
|
+
|
35
|
+
included do
|
36
|
+
# Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
37
|
+
field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
38
|
+
# Duration in seconds of the primary window.
|
39
|
+
field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
40
|
+
|
41
|
+
# Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
42
|
+
field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
43
|
+
# Duration in seconds of the secondary window.
|
44
|
+
field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
45
|
+
|
46
|
+
define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
|
47
|
+
|
48
|
+
validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
|
49
|
+
record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def throttle_windows_exceeded?
|
56
|
+
exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
|
57
|
+
if exceeded && secondary_schedule && secondary_duration
|
58
|
+
exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
|
59
|
+
end
|
60
|
+
exceeded
|
61
|
+
end
|
62
|
+
|
63
|
+
def throttle_outside_window?(schedule, duration)
|
64
|
+
cron = Fugit::Cron.new(schedule)
|
65
|
+
time = Time.now.utc + 1
|
66
|
+
# Add 1 second since right now could be the very beginning of the processing window.
|
67
|
+
previous_time = cron.previous_time(time).to_utc_time
|
68
|
+
previous_time + duration < time
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -28,10 +28,6 @@ module RocketJob
|
|
28
28
|
#
|
29
29
|
# If an exception was thrown the entire slice of records is marked as failed.
|
30
30
|
#
|
31
|
-
# If the mongo_ha gem has been loaded, then the connection to mongo is
|
32
|
-
# automatically re-established and the job will resume anytime a
|
33
|
-
# Mongo connection failure occurs.
|
34
|
-
#
|
35
31
|
# Thread-safe, can be called by multiple threads at the same time
|
36
32
|
def rocket_job_work(worker, re_raise_exceptions = false)
|
37
33
|
raise "Job must be started before calling #rocket_job_work" unless running?
|
@@ -114,8 +110,6 @@ module RocketJob
|
|
114
110
|
servers
|
115
111
|
end
|
116
112
|
|
117
|
-
private
|
118
|
-
|
119
113
|
def rocket_job_batch_throttled?(slice, worker)
|
120
114
|
filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
|
121
115
|
return false unless filter
|
@@ -129,46 +123,59 @@ module RocketJob
|
|
129
123
|
# Process a single slice from Mongo
|
130
124
|
# Once the slice has been successfully processed it will be removed from the input collection
|
131
125
|
# Returns [Integer] the number of records successfully processed
|
132
|
-
def rocket_job_process_slice(slice)
|
133
|
-
|
134
|
-
|
135
|
-
@rocket_job_slice = slice
|
126
|
+
def rocket_job_process_slice(slice, &block)
|
127
|
+
@rocket_job_slice = slice
|
128
|
+
count = 0
|
136
129
|
|
137
|
-
processed_records = 0
|
138
130
|
run_callbacks(:slice) do
|
139
131
|
# Allow before_slice callbacks to fail, complete or abort this slice.
|
140
132
|
return 0 unless running?
|
141
133
|
|
142
|
-
|
143
|
-
slice.each do |record|
|
144
|
-
SemanticLogger.named_tagged(record: @rocket_job_record_number) do
|
145
|
-
writer << rocket_job_batch_perform(slice, record)
|
146
|
-
processed_records += 1
|
147
|
-
end
|
148
|
-
# JRuby thinks self.rocket_job_record_number= is private and cannot be accessed
|
149
|
-
@rocket_job_record_number += 1
|
150
|
-
end
|
151
|
-
end
|
152
|
-
@rocket_job_slice = nil
|
153
|
-
@rocket_job_record_number = nil
|
134
|
+
count = rocket_job_perform_slice(slice, &block)
|
154
135
|
end
|
136
|
+
@rocket_job_slice = nil
|
155
137
|
|
156
138
|
# On successful completion remove the slice from the input queue
|
157
139
|
# TODO: Add option to complete slice instead of destroying it to retain input data.
|
158
140
|
slice.destroy
|
159
|
-
|
141
|
+
count
|
142
|
+
end
|
143
|
+
|
144
|
+
# Perform individual slice without callbacks
|
145
|
+
def rocket_job_perform_slice(slice, &block)
|
146
|
+
count = 0
|
147
|
+
RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
|
148
|
+
records = slice.records
|
149
|
+
|
150
|
+
# Skip records already processed, if any.
|
151
|
+
# slice.processing_record_number ||= 0
|
152
|
+
# TODO: Must append to existing output slices before this can be enabled.
|
153
|
+
# if !collect_output && (slice.processing_record_number > 1)
|
154
|
+
# records = records[slice.processing_record_number - 1..-1]
|
155
|
+
# end
|
156
|
+
# Until the changes above have been implemented, reprocess all records in the slice.
|
157
|
+
slice.processing_record_number = 0
|
158
|
+
|
159
|
+
records.each do |record|
|
160
|
+
slice.processing_record_number += 1
|
161
|
+
SemanticLogger.named_tagged(record: slice.current_record_number) do
|
162
|
+
writer << rocket_job_batch_perform(slice, record, &block)
|
163
|
+
count += 1
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
count
|
160
168
|
end
|
161
169
|
|
162
170
|
# Perform a single record within the current slice.
|
163
171
|
def rocket_job_batch_perform(slice, record)
|
164
|
-
slice.
|
165
|
-
slice.processing_record_number += 1
|
172
|
+
@rocket_job_record_number = slice.current_record_number
|
166
173
|
|
167
174
|
return block_given? ? yield(record) : perform(record) if _perform_callbacks.empty?
|
168
175
|
|
169
176
|
# @rocket_job_input and @rocket_job_output can be modified by before/around callbacks
|
170
|
-
@rocket_job_input
|
171
|
-
@rocket_job_output
|
177
|
+
@rocket_job_input = record
|
178
|
+
@rocket_job_output = nil
|
172
179
|
|
173
180
|
run_callbacks(:perform) do
|
174
181
|
@rocket_job_output =
|
@@ -179,9 +186,9 @@ module RocketJob
|
|
179
186
|
end
|
180
187
|
end
|
181
188
|
|
182
|
-
@rocket_job_input
|
183
|
-
result
|
184
|
-
@rocket_job_output
|
189
|
+
@rocket_job_input = nil
|
190
|
+
result = @rocket_job_output
|
191
|
+
@rocket_job_output = nil
|
185
192
|
result
|
186
193
|
end
|
187
194
|
|
@@ -197,24 +204,7 @@ module RocketJob
|
|
197
204
|
if failed_count.positive? && (input_count == failed_count)
|
198
205
|
# Reload to pull in any counters or other data that was modified.
|
199
206
|
reload unless new_record?
|
200
|
-
if may_fail?
|
201
|
-
fail_job = true
|
202
|
-
unless new_record?
|
203
|
-
# Fail job iff no other worker has already finished it
|
204
|
-
# Must set write concern to at least 1 since we need the nModified back
|
205
|
-
result = self.class.with(write: {w: 1}) do |query|
|
206
|
-
query.
|
207
|
-
where(id: id, state: :running, sub_state: :processing).
|
208
|
-
update({"$set" => {state: :failed, worker_name: worker_name}})
|
209
|
-
end
|
210
|
-
fail_job = false unless result.modified_count.positive?
|
211
|
-
end
|
212
|
-
if fail_job
|
213
|
-
message = "#{failed_count} slices failed to process"
|
214
|
-
self.exception = JobException.new(message: message)
|
215
|
-
fail!(worker_name, message)
|
216
|
-
end
|
217
|
-
end
|
207
|
+
rocket_job_batch_fail!(worker_name) if may_fail?
|
218
208
|
return true
|
219
209
|
end
|
220
210
|
|
@@ -237,15 +227,38 @@ module RocketJob
|
|
237
227
|
|
238
228
|
# Reload to pull in any counters or other data that was modified.
|
239
229
|
reload
|
230
|
+
|
240
231
|
if result.modified_count.positive?
|
241
232
|
rocket_job_batch_run_after_callbacks(false)
|
242
|
-
|
233
|
+
elsif aborted?
|
243
234
|
# Repeat cleanup in case this worker was still running when the job was aborted
|
244
|
-
cleanup!
|
235
|
+
cleanup!
|
245
236
|
end
|
246
237
|
true
|
247
238
|
end
|
248
239
|
|
240
|
+
# Fail the job
|
241
|
+
def rocket_job_batch_fail!(worker_name)
|
242
|
+
fail_job = true
|
243
|
+
|
244
|
+
unless new_record?
|
245
|
+
# Fail job iff no other worker has already finished it
|
246
|
+
# Must set write concern to at least 1 since we need the nModified back
|
247
|
+
result = self.class.with(write: {w: 1}) do |query|
|
248
|
+
query.
|
249
|
+
where(id: id, state: :running, sub_state: :processing).
|
250
|
+
update({"$set" => {state: :failed, worker_name: worker_name}})
|
251
|
+
end
|
252
|
+
fail_job = false unless result.modified_count.positive?
|
253
|
+
end
|
254
|
+
|
255
|
+
return unless fail_job
|
256
|
+
|
257
|
+
message = "#{input.failed.count} slices failed to process"
|
258
|
+
self.exception = JobException.new(message: message)
|
259
|
+
new_record? ? fail(worker_name, message) : fail!(worker_name, message)
|
260
|
+
end
|
261
|
+
|
249
262
|
# Run the before_batch callbacks
|
250
263
|
# Saves the current state before and after running callbacks if callbacks present
|
251
264
|
def rocket_job_batch_run_before_callbacks
|
data/lib/rocket_job/event.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/concern"
|
2
|
+
require "fugit"
|
2
3
|
|
3
4
|
module RocketJob
|
4
5
|
module Plugins
|
@@ -17,7 +18,9 @@ module RocketJob
|
|
17
18
|
|
18
19
|
field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
19
20
|
|
20
|
-
|
21
|
+
validates_each :cron_schedule do |record, attr, value|
|
22
|
+
record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
23
|
+
end
|
21
24
|
before_save :rocket_job_cron_set_run_at
|
22
25
|
|
23
26
|
private
|
@@ -49,24 +52,14 @@ module RocketJob
|
|
49
52
|
# The next time as of this time.
|
50
53
|
# Default: Time.now
|
51
54
|
def rocket_job_cron_next_time(time = Time.now)
|
52
|
-
|
55
|
+
Fugit::Cron.new(cron_schedule).next_time.to_utc_time
|
53
56
|
end
|
54
57
|
|
55
|
-
private
|
56
|
-
|
57
58
|
def rocket_job_cron_set_run_at
|
58
59
|
return unless cron_schedule
|
59
60
|
|
60
61
|
self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
|
61
62
|
end
|
62
|
-
|
63
|
-
def rocket_job_cron_valid
|
64
|
-
return unless cron_schedule
|
65
|
-
|
66
|
-
RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
|
67
|
-
rescue ArgumentError => e
|
68
|
-
errors.add(:cron_schedule, e.message)
|
69
|
-
end
|
70
63
|
end
|
71
64
|
end
|
72
65
|
end
|
@@ -7,7 +7,7 @@ module RocketJob
|
|
7
7
|
#
|
8
8
|
# Example:
|
9
9
|
# # Do not run this job when the MySQL slave delay exceeds 5 minutes.
|
10
|
-
# class MyJob < RocketJob
|
10
|
+
# class MyJob < RocketJob::Job
|
11
11
|
# # Define a custom mysql throttle
|
12
12
|
# # Prevents all jobs of this class from running on the current server.
|
13
13
|
# define_throttle :mysql_throttle_exceeded?
|
@@ -6,7 +6,7 @@ module RocketJob
|
|
6
6
|
# Throttle the number of jobs of a specific class that are processed at the same time.
|
7
7
|
#
|
8
8
|
# Example:
|
9
|
-
# class MyJob < RocketJob
|
9
|
+
# class MyJob < RocketJob::Job
|
10
10
|
# # Maximum number of jobs of this class to process at the same time.
|
11
11
|
# self.throttle_running_jobs = 25
|
12
12
|
#
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/concern"
|
2
|
+
require "fugit"
|
2
3
|
|
3
4
|
module RocketJob
|
4
5
|
module Plugins
|
@@ -47,18 +48,14 @@ module RocketJob
|
|
47
48
|
|
48
49
|
validates_presence_of :processing_schedule, :processing_duration
|
49
50
|
validates_each :processing_schedule do |record, attr, value|
|
50
|
-
|
51
|
-
RocketJob::Plugins::Rufus::CronLine.new(value)
|
52
|
-
rescue ArgumentError => e
|
53
|
-
record.errors.add(attr, e.message)
|
54
|
-
end
|
51
|
+
record.errors.add(attr, "Invalid schedule: #{value.inspect}") unless Fugit::Cron.new(value)
|
55
52
|
end
|
56
53
|
end
|
57
54
|
|
58
55
|
# Returns [true|false] whether this job is currently inside its processing window
|
59
56
|
def rocket_job_processing_window_active?
|
60
|
-
time = Time.now
|
61
|
-
previous_time =
|
57
|
+
time = Time.now.utc
|
58
|
+
previous_time = Fugit::Cron.new(processing_schedule).previous_time(time).to_utc_time
|
62
59
|
# Inside previous processing window?
|
63
60
|
previous_time + processing_duration > time
|
64
61
|
end
|
@@ -69,17 +66,14 @@ module RocketJob
|
|
69
66
|
def rocket_job_processing_window_check
|
70
67
|
return if rocket_job_processing_window_active?
|
71
68
|
|
72
|
-
|
69
|
+
next_time = Fugit::Cron.new(processing_schedule).next_time.to_utc_time
|
70
|
+
logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{next_time}")
|
73
71
|
self.worker_name ||= "inline"
|
74
72
|
requeue!(worker_name)
|
75
73
|
end
|
76
74
|
|
77
75
|
def rocket_job_processing_window_set_run_at
|
78
|
-
self.run_at =
|
79
|
-
end
|
80
|
-
|
81
|
-
def rocket_job_processing_schedule
|
82
|
-
RocketJob::Plugins::Rufus::CronLine.new(processing_schedule)
|
76
|
+
self.run_at = Fugit::Cron.new(processing_schedule).next_time.to_utc_time unless rocket_job_processing_window_active?
|
83
77
|
end
|
84
78
|
end
|
85
79
|
end
|