rocketjob 5.2.0.beta1 → 5.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/rocket_job/batch.rb +1 -0
- data/lib/rocket_job/batch/io.rb +2 -2
- data/lib/rocket_job/batch/throttle.rb +1 -1
- data/lib/rocket_job/batch/throttle_running_workers.rb +14 -1
- data/lib/rocket_job/batch/throttle_windows.rb +72 -0
- data/lib/rocket_job/batch/worker.rb +64 -51
- data/lib/rocket_job/event.rb +0 -2
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +0 -2
- data/lib/rocket_job/plugins/cron.rb +5 -12
- data/lib/rocket_job/plugins/job/throttle.rb +1 -1
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/processing_window.rb +7 -13
- data/lib/rocket_job/sliced/slice.rb +5 -7
- data/lib/rocket_job/version.rb +1 -1
- metadata +21 -8
- data/lib/rocket_job/plugins/rufus/cron_line.rb +0 -520
- data/lib/rocket_job/plugins/rufus/zo_time.rb +0 -524
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42be7df83c1d38b5ef3a41751e3cc6afd6fb885638a7d144bfd1f71ff5de441e
|
4
|
+
data.tar.gz: fd3942b7fe1aa2d76bda3395561bea56122b9eab58c0ae65d99fbea76e3d771c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 324641130fd1bb0724058d81cdf33dad02bbdeef2047413a479dcb0bb0782be37be3e0aad9b5c57d9ea200e6cc30889ce06ce8130716f91399472bf317ed4089
|
7
|
+
data.tar.gz: dfe9e7f121e7fee9c4713d5fc819e3889d1e0d1d208ac14cd4678f83ce0f802f51e2067cd10d687301a9f7470a609eab6259036e191ae8e6198f7a7830cbbd82
|
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
|
4
4
|
Ruby's missing batch system
|
5
5
|
|
6
|
-
Checkout
|
6
|
+
Checkout https://rocketjob.io/
|
7
7
|
|
8
|
-

|
9
9
|
|
10
10
|
## Documentation
|
11
11
|
|
data/lib/rocket_job/batch.rb
CHANGED
@@ -24,6 +24,7 @@ module RocketJob
|
|
24
24
|
autoload :LowerPriority, "rocket_job/batch/lower_priority"
|
25
25
|
autoload :Performance, "rocket_job/batch/performance"
|
26
26
|
autoload :Statistics, "rocket_job/batch/statistics"
|
27
|
+
autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
|
27
28
|
autoload :Result, "rocket_job/batch/result"
|
28
29
|
autoload :Results, "rocket_job/batch/results"
|
29
30
|
autoload :Tabular, "rocket_job/batch/tabular"
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -18,7 +18,7 @@ module RocketJob
|
|
18
18
|
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
19
|
end
|
20
20
|
|
21
|
-
(@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
|
21
|
+
(@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(**rocket_job_io_slice_arguments("inputs", category))
|
22
22
|
end
|
23
23
|
|
24
24
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
@@ -34,7 +34,7 @@ module RocketJob
|
|
34
34
|
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
35
|
end
|
36
36
|
|
37
|
-
(@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
|
37
|
+
(@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(**rocket_job_io_slice_arguments("outputs", category))
|
38
38
|
end
|
39
39
|
|
40
40
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
@@ -5,7 +5,7 @@ module RocketJob
|
|
5
5
|
# Throttle the number of slices of a specific batch job that are processed at the same time.
|
6
6
|
#
|
7
7
|
# Example:
|
8
|
-
# class MyJob < RocketJob
|
8
|
+
# class MyJob < RocketJob::Job
|
9
9
|
# include RocketJob::Batch
|
10
10
|
#
|
11
11
|
# # Maximum number of slices to process at the same time for each running instance.
|
@@ -53,6 +53,19 @@ module RocketJob
|
|
53
53
|
conn.where(:id.ne => slice.id).count >= throttle_running_workers
|
54
54
|
end
|
55
55
|
end
|
56
|
+
|
57
|
+
# Returns [Boolean] whether the throttle for this job has been exceeded
|
58
|
+
#
|
59
|
+
# With a Batch job, allow a higher priority queued job to replace a running one with
|
60
|
+
# a lower priority.
|
61
|
+
def throttle_running_jobs_exceeded?
|
62
|
+
return unless throttle_running_jobs&.positive?
|
63
|
+
|
64
|
+
# Cannot use this class since it will include instances of parent job classes.
|
65
|
+
RocketJob::Job.with(read: {mode: :primary}) do |conn|
|
66
|
+
conn.running.where("_type" => self.class.name, :id.ne => id, :priority.lte => priority).count >= throttle_running_jobs
|
67
|
+
end
|
68
|
+
end
|
56
69
|
end
|
57
70
|
end
|
58
71
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "active_support/concern"
|
2
|
+
require "fugit"
|
3
|
+
|
4
|
+
module RocketJob
|
5
|
+
module Batch
|
6
|
+
# For a batch job that can run over a long period of time it can be useful
|
7
|
+
# to prevent its slices from being processed outside a predefined processing window.
|
8
|
+
#
|
9
|
+
# This plugin supports up to 2 different processing windows.
|
10
|
+
#
|
11
|
+
# For example, do not run this job during business hours.
|
12
|
+
# Allow it to run from 5pm until 8am the following day Mon through Fri.
|
13
|
+
#
|
14
|
+
# class AfterHoursJob < RocketJob::Job
|
15
|
+
# include RocketJob::Batch
|
16
|
+
# include RocketJob::Batch::ThrottleWindows
|
17
|
+
#
|
18
|
+
# # Monday through Thursday the job can start processing at 5pm Eastern.
|
19
|
+
# self.primary_schedule = "0 17 * * 1-4 America/New_York"
|
20
|
+
# # Slices are allowed to run until 8am the following day, which is 15 hours long:
|
21
|
+
# self.primary_duration = 15.hours
|
22
|
+
#
|
23
|
+
# # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
|
24
|
+
# self.secondary_schedule = "0 17 * * 5 America/New_York"
|
25
|
+
# # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
|
26
|
+
# self.secondary_duration = 63.hours
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Notes:
|
30
|
+
# * These schedules do not affect when the job is started, completed, or when `before_batch` or
|
31
|
+
# `after_batch` processing is performed. It only limits when individual slices are processed.
|
32
|
+
module ThrottleWindows
|
33
|
+
extend ActiveSupport::Concern
|
34
|
+
|
35
|
+
included do
|
36
|
+
# Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
37
|
+
field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
38
|
+
# Duration in seconds of the primary window.
|
39
|
+
field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
40
|
+
|
41
|
+
# Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
42
|
+
field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
43
|
+
# Duration in seconds of the secondary window.
|
44
|
+
field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
45
|
+
|
46
|
+
define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
|
47
|
+
|
48
|
+
validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
|
49
|
+
record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def throttle_windows_exceeded?
|
56
|
+
exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
|
57
|
+
if exceeded && secondary_schedule && secondary_duration
|
58
|
+
exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
|
59
|
+
end
|
60
|
+
exceeded
|
61
|
+
end
|
62
|
+
|
63
|
+
def throttle_outside_window?(schedule, duration)
|
64
|
+
cron = Fugit::Cron.new(schedule)
|
65
|
+
time = Time.now.utc + 1
|
66
|
+
# Add 1 second since right now could be the very beginning of the processing window.
|
67
|
+
previous_time = cron.previous_time(time).to_utc_time
|
68
|
+
previous_time + duration < time
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -28,10 +28,6 @@ module RocketJob
|
|
28
28
|
#
|
29
29
|
# If an exception was thrown the entire slice of records is marked as failed.
|
30
30
|
#
|
31
|
-
# If the mongo_ha gem has been loaded, then the connection to mongo is
|
32
|
-
# automatically re-established and the job will resume anytime a
|
33
|
-
# Mongo connection failure occurs.
|
34
|
-
#
|
35
31
|
# Thread-safe, can be called by multiple threads at the same time
|
36
32
|
def rocket_job_work(worker, re_raise_exceptions = false)
|
37
33
|
raise "Job must be started before calling #rocket_job_work" unless running?
|
@@ -114,8 +110,6 @@ module RocketJob
|
|
114
110
|
servers
|
115
111
|
end
|
116
112
|
|
117
|
-
private
|
118
|
-
|
119
113
|
def rocket_job_batch_throttled?(slice, worker)
|
120
114
|
filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
|
121
115
|
return false unless filter
|
@@ -129,46 +123,59 @@ module RocketJob
|
|
129
123
|
# Process a single slice from Mongo
|
130
124
|
# Once the slice has been successfully processed it will be removed from the input collection
|
131
125
|
# Returns [Integer] the number of records successfully processed
|
132
|
-
def rocket_job_process_slice(slice)
|
133
|
-
|
134
|
-
|
135
|
-
@rocket_job_slice = slice
|
126
|
+
def rocket_job_process_slice(slice, &block)
|
127
|
+
@rocket_job_slice = slice
|
128
|
+
count = 0
|
136
129
|
|
137
|
-
processed_records = 0
|
138
130
|
run_callbacks(:slice) do
|
139
131
|
# Allow before_slice callbacks to fail, complete or abort this slice.
|
140
132
|
return 0 unless running?
|
141
133
|
|
142
|
-
|
143
|
-
slice.each do |record|
|
144
|
-
SemanticLogger.named_tagged(record: @rocket_job_record_number) do
|
145
|
-
writer << rocket_job_batch_perform(slice, record)
|
146
|
-
processed_records += 1
|
147
|
-
end
|
148
|
-
# JRuby thinks self.rocket_job_record_number= is private and cannot be accessed
|
149
|
-
@rocket_job_record_number += 1
|
150
|
-
end
|
151
|
-
end
|
152
|
-
@rocket_job_slice = nil
|
153
|
-
@rocket_job_record_number = nil
|
134
|
+
count = rocket_job_perform_slice(slice, &block)
|
154
135
|
end
|
136
|
+
@rocket_job_slice = nil
|
155
137
|
|
156
138
|
# On successful completion remove the slice from the input queue
|
157
139
|
# TODO: Add option to complete slice instead of destroying it to retain input data.
|
158
140
|
slice.destroy
|
159
|
-
|
141
|
+
count
|
142
|
+
end
|
143
|
+
|
144
|
+
# Perform individual slice without callbacks
|
145
|
+
def rocket_job_perform_slice(slice, &block)
|
146
|
+
count = 0
|
147
|
+
RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
|
148
|
+
records = slice.records
|
149
|
+
|
150
|
+
# Skip records already processed, if any.
|
151
|
+
# slice.processing_record_number ||= 0
|
152
|
+
# TODO: Must append to existing output slices before this can be enabled.
|
153
|
+
# if !collect_output && (slice.processing_record_number > 1)
|
154
|
+
# records = records[slice.processing_record_number - 1..-1]
|
155
|
+
# end
|
156
|
+
# Until the changes above have been implemented, reprocess all records in the slice.
|
157
|
+
slice.processing_record_number = 0
|
158
|
+
|
159
|
+
records.each do |record|
|
160
|
+
slice.processing_record_number += 1
|
161
|
+
SemanticLogger.named_tagged(record: slice.current_record_number) do
|
162
|
+
writer << rocket_job_batch_perform(slice, record, &block)
|
163
|
+
count += 1
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
count
|
160
168
|
end
|
161
169
|
|
162
170
|
# Perform a single record within the current slice.
|
163
171
|
def rocket_job_batch_perform(slice, record)
|
164
|
-
slice.
|
165
|
-
slice.processing_record_number += 1
|
172
|
+
@rocket_job_record_number = slice.current_record_number
|
166
173
|
|
167
174
|
return block_given? ? yield(record) : perform(record) if _perform_callbacks.empty?
|
168
175
|
|
169
176
|
# @rocket_job_input and @rocket_job_output can be modified by before/around callbacks
|
170
|
-
@rocket_job_input
|
171
|
-
@rocket_job_output
|
177
|
+
@rocket_job_input = record
|
178
|
+
@rocket_job_output = nil
|
172
179
|
|
173
180
|
run_callbacks(:perform) do
|
174
181
|
@rocket_job_output =
|
@@ -179,9 +186,9 @@ module RocketJob
|
|
179
186
|
end
|
180
187
|
end
|
181
188
|
|
182
|
-
@rocket_job_input
|
183
|
-
result
|
184
|
-
@rocket_job_output
|
189
|
+
@rocket_job_input = nil
|
190
|
+
result = @rocket_job_output
|
191
|
+
@rocket_job_output = nil
|
185
192
|
result
|
186
193
|
end
|
187
194
|
|
@@ -197,24 +204,7 @@ module RocketJob
|
|
197
204
|
if failed_count.positive? && (input_count == failed_count)
|
198
205
|
# Reload to pull in any counters or other data that was modified.
|
199
206
|
reload unless new_record?
|
200
|
-
if may_fail?
|
201
|
-
fail_job = true
|
202
|
-
unless new_record?
|
203
|
-
# Fail job iff no other worker has already finished it
|
204
|
-
# Must set write concern to at least 1 since we need the nModified back
|
205
|
-
result = self.class.with(write: {w: 1}) do |query|
|
206
|
-
query.
|
207
|
-
where(id: id, state: :running, sub_state: :processing).
|
208
|
-
update({"$set" => {state: :failed, worker_name: worker_name}})
|
209
|
-
end
|
210
|
-
fail_job = false unless result.modified_count.positive?
|
211
|
-
end
|
212
|
-
if fail_job
|
213
|
-
message = "#{failed_count} slices failed to process"
|
214
|
-
self.exception = JobException.new(message: message)
|
215
|
-
fail!(worker_name, message)
|
216
|
-
end
|
217
|
-
end
|
207
|
+
rocket_job_batch_fail!(worker_name) if may_fail?
|
218
208
|
return true
|
219
209
|
end
|
220
210
|
|
@@ -237,15 +227,38 @@ module RocketJob
|
|
237
227
|
|
238
228
|
# Reload to pull in any counters or other data that was modified.
|
239
229
|
reload
|
230
|
+
|
240
231
|
if result.modified_count.positive?
|
241
232
|
rocket_job_batch_run_after_callbacks(false)
|
242
|
-
|
233
|
+
elsif aborted?
|
243
234
|
# Repeat cleanup in case this worker was still running when the job was aborted
|
244
|
-
cleanup!
|
235
|
+
cleanup!
|
245
236
|
end
|
246
237
|
true
|
247
238
|
end
|
248
239
|
|
240
|
+
# Fail the job
|
241
|
+
def rocket_job_batch_fail!(worker_name)
|
242
|
+
fail_job = true
|
243
|
+
|
244
|
+
unless new_record?
|
245
|
+
# Fail job iff no other worker has already finished it
|
246
|
+
# Must set write concern to at least 1 since we need the nModified back
|
247
|
+
result = self.class.with(write: {w: 1}) do |query|
|
248
|
+
query.
|
249
|
+
where(id: id, state: :running, sub_state: :processing).
|
250
|
+
update({"$set" => {state: :failed, worker_name: worker_name}})
|
251
|
+
end
|
252
|
+
fail_job = false unless result.modified_count.positive?
|
253
|
+
end
|
254
|
+
|
255
|
+
return unless fail_job
|
256
|
+
|
257
|
+
message = "#{input.failed.count} slices failed to process"
|
258
|
+
self.exception = JobException.new(message: message)
|
259
|
+
new_record? ? fail(worker_name, message) : fail!(worker_name, message)
|
260
|
+
end
|
261
|
+
|
249
262
|
# Run the before_batch callbacks
|
250
263
|
# Saves the current state before and after running callbacks if callbacks present
|
251
264
|
def rocket_job_batch_run_before_callbacks
|
data/lib/rocket_job/event.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/concern"
|
2
|
+
require "fugit"
|
2
3
|
|
3
4
|
module RocketJob
|
4
5
|
module Plugins
|
@@ -17,7 +18,9 @@ module RocketJob
|
|
17
18
|
|
18
19
|
field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
19
20
|
|
20
|
-
|
21
|
+
validates_each :cron_schedule do |record, attr, value|
|
22
|
+
record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
23
|
+
end
|
21
24
|
before_save :rocket_job_cron_set_run_at
|
22
25
|
|
23
26
|
private
|
@@ -49,24 +52,14 @@ module RocketJob
|
|
49
52
|
# The next time as of this time.
|
50
53
|
# Default: Time.now
|
51
54
|
def rocket_job_cron_next_time(time = Time.now)
|
52
|
-
|
55
|
+
Fugit::Cron.new(cron_schedule).next_time.to_utc_time
|
53
56
|
end
|
54
57
|
|
55
|
-
private
|
56
|
-
|
57
58
|
def rocket_job_cron_set_run_at
|
58
59
|
return unless cron_schedule
|
59
60
|
|
60
61
|
self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
|
61
62
|
end
|
62
|
-
|
63
|
-
def rocket_job_cron_valid
|
64
|
-
return unless cron_schedule
|
65
|
-
|
66
|
-
RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
|
67
|
-
rescue ArgumentError => e
|
68
|
-
errors.add(:cron_schedule, e.message)
|
69
|
-
end
|
70
63
|
end
|
71
64
|
end
|
72
65
|
end
|
@@ -7,7 +7,7 @@ module RocketJob
|
|
7
7
|
#
|
8
8
|
# Example:
|
9
9
|
# # Do not run this job when the MySQL slave delay exceeds 5 minutes.
|
10
|
-
# class MyJob < RocketJob
|
10
|
+
# class MyJob < RocketJob::Job
|
11
11
|
# # Define a custom mysql throttle
|
12
12
|
# # Prevents all jobs of this class from running on the current server.
|
13
13
|
# define_throttle :mysql_throttle_exceeded?
|
@@ -6,7 +6,7 @@ module RocketJob
|
|
6
6
|
# Throttle the number of jobs of a specific class that are processed at the same time.
|
7
7
|
#
|
8
8
|
# Example:
|
9
|
-
# class MyJob < RocketJob
|
9
|
+
# class MyJob < RocketJob::Job
|
10
10
|
# # Maximum number of jobs of this class to process at the same time.
|
11
11
|
# self.throttle_running_jobs = 25
|
12
12
|
#
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/concern"
|
2
|
+
require "fugit"
|
2
3
|
|
3
4
|
module RocketJob
|
4
5
|
module Plugins
|
@@ -47,18 +48,14 @@ module RocketJob
|
|
47
48
|
|
48
49
|
validates_presence_of :processing_schedule, :processing_duration
|
49
50
|
validates_each :processing_schedule do |record, attr, value|
|
50
|
-
|
51
|
-
RocketJob::Plugins::Rufus::CronLine.new(value)
|
52
|
-
rescue ArgumentError => e
|
53
|
-
record.errors.add(attr, e.message)
|
54
|
-
end
|
51
|
+
record.errors.add(attr, "Invalid schedule: #{value.inspect}") unless Fugit::Cron.new(value)
|
55
52
|
end
|
56
53
|
end
|
57
54
|
|
58
55
|
# Returns [true|false] whether this job is currently inside its processing window
|
59
56
|
def rocket_job_processing_window_active?
|
60
|
-
time = Time.now
|
61
|
-
previous_time =
|
57
|
+
time = Time.now.utc
|
58
|
+
previous_time = Fugit::Cron.new(processing_schedule).previous_time(time).to_utc_time
|
62
59
|
# Inside previous processing window?
|
63
60
|
previous_time + processing_duration > time
|
64
61
|
end
|
@@ -69,17 +66,14 @@ module RocketJob
|
|
69
66
|
def rocket_job_processing_window_check
|
70
67
|
return if rocket_job_processing_window_active?
|
71
68
|
|
72
|
-
|
69
|
+
next_time = Fugit::Cron.new(processing_schedule).next_time.to_utc_time
|
70
|
+
logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{next_time}")
|
73
71
|
self.worker_name ||= "inline"
|
74
72
|
requeue!(worker_name)
|
75
73
|
end
|
76
74
|
|
77
75
|
def rocket_job_processing_window_set_run_at
|
78
|
-
self.run_at =
|
79
|
-
end
|
80
|
-
|
81
|
-
def rocket_job_processing_schedule
|
82
|
-
RocketJob::Plugins::Rufus::CronLine.new(processing_schedule)
|
76
|
+
self.run_at = Fugit::Cron.new(processing_schedule).next_time.to_utc_time unless rocket_job_processing_window_active?
|
83
77
|
end
|
84
78
|
end
|
85
79
|
end
|