rocketjob 5.2.0 → 5.4.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/rocket_job/batch.rb +1 -0
- data/lib/rocket_job/batch/io.rb +14 -19
- data/lib/rocket_job/batch/model.rb +2 -2
- data/lib/rocket_job/batch/tabular/input.rb +9 -5
- data/lib/rocket_job/batch/tabular/output.rb +9 -3
- data/lib/rocket_job/batch/throttle.rb +1 -1
- data/lib/rocket_job/batch/throttle_running_workers.rb +1 -1
- data/lib/rocket_job/batch/throttle_windows.rb +72 -0
- data/lib/rocket_job/batch/worker.rb +2 -8
- data/lib/rocket_job/event.rb +0 -2
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +0 -2
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +12 -0
- data/lib/rocket_job/jobs/copy_file_job.rb +1 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +2 -5
- data/lib/rocket_job/jobs/upload_file_job.rb +1 -1
- data/lib/rocket_job/plugins/cron.rb +6 -23
- data/lib/rocket_job/plugins/job/throttle.rb +1 -1
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/job/worker.rb +5 -4
- data/lib/rocket_job/plugins/processing_window.rb +7 -13
- data/lib/rocket_job/sliced.rb +91 -0
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
- data/lib/rocket_job/sliced/input.rb +3 -3
- data/lib/rocket_job/sliced/slice.rb +6 -0
- data/lib/rocket_job/sliced/slices.rb +6 -0
- data/lib/rocket_job/subscribers/server.rb +9 -3
- data/lib/rocket_job/supervisor.rb +3 -1
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker_pool.rb +1 -0
- data/lib/rocketjob.rb +7 -20
- metadata +27 -11
- data/lib/rocket_job/plugins/rufus/cron_line.rb +0 -520
- data/lib/rocket_job/plugins/rufus/zo_time.rb +0 -524
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55ab6ca2b3f76cdb4ddf679a2d3e88e1d6a6f3106a69349129a267b629ef4a53
|
4
|
+
data.tar.gz: 4e0d07878fb4265179b4a270650cc9b89ca4bca55f1d8f9a3451cb3064062c35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fdc7ae3827d987404f431410cb81161fbfa269dfe7575a4a86a6abf362c59c6acd0d2c7e9856273ced396eeaf20e50561f1be10a7de0d5c4ae45e5648d15d083
|
7
|
+
data.tar.gz: 7bb9e9ac90569e78e135293efcd357a0d75037b5417f5f530f18ae3f17f44acf28d7de5b4aeef880a0315117f2992623958a4fb4c9df93ca7273369ac052e759
|
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
|
4
4
|
Ruby's missing batch system
|
5
5
|
|
6
|
-
Checkout
|
6
|
+
Checkout https://rocketjob.io/
|
7
7
|
|
8
|
-
![Rocket Job](
|
8
|
+
![Rocket Job](https://rocketjob.io/images/rocket/rocket-icon-512x512.png)
|
9
9
|
|
10
10
|
## Documentation
|
11
11
|
|
data/lib/rocket_job/batch.rb
CHANGED
@@ -24,6 +24,7 @@ module RocketJob
|
|
24
24
|
autoload :LowerPriority, "rocket_job/batch/lower_priority"
|
25
25
|
autoload :Performance, "rocket_job/batch/performance"
|
26
26
|
autoload :Statistics, "rocket_job/batch/statistics"
|
27
|
+
autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
|
27
28
|
autoload :Result, "rocket_job/batch/result"
|
28
29
|
autoload :Results, "rocket_job/batch/results"
|
29
30
|
autoload :Tabular, "rocket_job/batch/tabular"
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -18,7 +18,7 @@ module RocketJob
|
|
18
18
|
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
19
|
end
|
20
20
|
|
21
|
-
(@inputs ||= {})[category] ||= RocketJob::Sliced
|
21
|
+
(@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
|
22
22
|
end
|
23
23
|
|
24
24
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
@@ -34,7 +34,7 @@ module RocketJob
|
|
34
34
|
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
35
|
end
|
36
36
|
|
37
|
-
(@outputs ||= {})[category] ||= RocketJob::Sliced
|
37
|
+
(@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
|
38
38
|
end
|
39
39
|
|
40
40
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
@@ -355,8 +355,18 @@ module RocketJob
|
|
355
355
|
|
356
356
|
return output(category).download(header_line: header_line, &block) if block
|
357
357
|
|
358
|
-
|
359
|
-
|
358
|
+
output_collection = output(category)
|
359
|
+
|
360
|
+
if output_collection.binary?
|
361
|
+
IOStreams.new(stream).stream(:none).writer(**args) do |io|
|
362
|
+
raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
|
363
|
+
|
364
|
+
output_collection.download { |record| io << record[:binary] }
|
365
|
+
end
|
366
|
+
else
|
367
|
+
IOStreams.new(stream).writer(:line, **args) do |io|
|
368
|
+
output_collection.download(header_line: header_line) { |record| io << record }
|
369
|
+
end
|
360
370
|
end
|
361
371
|
end
|
362
372
|
|
@@ -393,21 +403,6 @@ module RocketJob
|
|
393
403
|
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
394
404
|
end
|
395
405
|
end
|
396
|
-
|
397
|
-
private
|
398
|
-
|
399
|
-
def rocket_job_io_slice_arguments(collection_type, category)
|
400
|
-
collection_name = "rocket_job.#{collection_type}.#{id}"
|
401
|
-
collection_name << ".#{category}" unless category == :main
|
402
|
-
|
403
|
-
args = {collection_name: collection_name, slice_size: slice_size}
|
404
|
-
if encrypt
|
405
|
-
args[:slice_class] = Sliced::EncryptedSlice
|
406
|
-
elsif compress
|
407
|
-
args[:slice_class] = Sliced::CompressedSlice
|
408
|
-
end
|
409
|
-
args
|
410
|
-
end
|
411
406
|
end
|
412
407
|
end
|
413
408
|
end
|
@@ -44,12 +44,12 @@ module RocketJob
|
|
44
44
|
# Compress uploaded records.
|
45
45
|
# The fields are not affected in any way, only the data stored in the
|
46
46
|
# records and results collections will compressed
|
47
|
-
field :compress, type:
|
47
|
+
field :compress, type: Object, default: false, class_attribute: true
|
48
48
|
|
49
49
|
# Encrypt uploaded records.
|
50
50
|
# The fields are not affected in any way, only the data stored in the
|
51
51
|
# records and results collections will be encrypted
|
52
|
-
field :encrypt, type:
|
52
|
+
field :encrypt, type: Object, default: false, class_attribute: true
|
53
53
|
|
54
54
|
#
|
55
55
|
# Values that jobs can also update during processing
|
@@ -12,6 +12,7 @@ module RocketJob
|
|
12
12
|
included do
|
13
13
|
field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
|
14
14
|
field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
|
15
|
+
field :tabular_input_options, type: Hash, class_attribute: true
|
15
16
|
|
16
17
|
# tabular_input_mode: [:line | :array | :hash]
|
17
18
|
# :line
|
@@ -53,7 +54,9 @@ module RocketJob
|
|
53
54
|
input_stream = stream.nil? ? nil : IOStreams.new(stream)
|
54
55
|
|
55
56
|
if stream && (tabular_input_type == :text)
|
56
|
-
|
57
|
+
# Cannot change the length of fixed width lines
|
58
|
+
replace = tabular_input_format == :fixed ? " " : ""
|
59
|
+
input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
|
57
60
|
end
|
58
61
|
|
59
62
|
# If an input header is not required, then we don't extract it'
|
@@ -96,14 +99,15 @@ module RocketJob
|
|
96
99
|
allowed_columns: tabular_input_white_list,
|
97
100
|
required_columns: tabular_input_required,
|
98
101
|
skip_unknown: tabular_input_skip_unknown,
|
99
|
-
format: tabular_input_format
|
102
|
+
format: tabular_input_format,
|
103
|
+
format_options: tabular_input_options&.deep_symbolize_keys
|
100
104
|
)
|
101
105
|
end
|
102
106
|
|
103
107
|
def tabular_input_render
|
104
|
-
|
105
|
-
|
106
|
-
|
108
|
+
return if tabular_input_header.blank? && tabular_input.header?
|
109
|
+
|
110
|
+
@rocket_job_input = tabular_input.record_parse(@rocket_job_input)
|
107
111
|
end
|
108
112
|
|
109
113
|
# Cleanse custom input header if supplied.
|
@@ -12,6 +12,7 @@ module RocketJob
|
|
12
12
|
included do
|
13
13
|
field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
|
14
14
|
field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
|
15
|
+
field :tabular_output_options, type: Hash, class_attribute: true
|
15
16
|
|
16
17
|
validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
|
17
18
|
|
@@ -31,8 +32,9 @@ module RocketJob
|
|
31
32
|
|
32
33
|
# Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
|
33
34
|
def download(file_name_or_io = nil, category: :main, **args, &block)
|
34
|
-
|
35
|
-
|
35
|
+
unless tabular_output.requires_header?(category)
|
36
|
+
return super(file_name_or_io, category: category, **args, &block)
|
37
|
+
end
|
36
38
|
|
37
39
|
header = tabular_output.render_header(category)
|
38
40
|
super(file_name_or_io, header_line: header, category: category, **args, &block)
|
@@ -43,7 +45,11 @@ module RocketJob
|
|
43
45
|
# Delimited instance used for this slice, by a single worker (thread)
|
44
46
|
def tabular_output
|
45
47
|
@tabular_output ||= Tabular.new(
|
46
|
-
main: IOStreams::Tabular.new(
|
48
|
+
main: IOStreams::Tabular.new(
|
49
|
+
columns: tabular_output_header,
|
50
|
+
format: tabular_output_format,
|
51
|
+
format_options: tabular_output_options&.deep_symbolize_keys
|
52
|
+
)
|
47
53
|
)
|
48
54
|
end
|
49
55
|
|
@@ -5,7 +5,7 @@ module RocketJob
|
|
5
5
|
# Throttle the number of slices of a specific batch job that are processed at the same time.
|
6
6
|
#
|
7
7
|
# Example:
|
8
|
-
# class MyJob < RocketJob
|
8
|
+
# class MyJob < RocketJob::Job
|
9
9
|
# include RocketJob::Batch
|
10
10
|
#
|
11
11
|
# # Maximum number of slices to process at the same time for each running instance.
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "active_support/concern"
|
2
|
+
require "fugit"
|
3
|
+
|
4
|
+
module RocketJob
|
5
|
+
module Batch
|
6
|
+
# For a batch job that can run over a long period of time it can be useful
|
7
|
+
# to prevent its slices from being processed outside a predefined processing window.
|
8
|
+
#
|
9
|
+
# This plugin supports up to 2 different processing windows.
|
10
|
+
#
|
11
|
+
# For example, do not run this job during business hours.
|
12
|
+
# Allow it to run from 5pm until 8am the following day Mon through Fri.
|
13
|
+
#
|
14
|
+
# class AfterHoursJob < RocketJob::Job
|
15
|
+
# include RocketJob::Batch
|
16
|
+
# include RocketJob::Batch::ThrottleWindows
|
17
|
+
#
|
18
|
+
# # Monday through Thursday the job can start processing at 5pm Eastern.
|
19
|
+
# self.primary_schedule = "0 17 * * 1-4 America/New_York"
|
20
|
+
# # Slices are allowed to run until 8am the following day, which is 15 hours long:
|
21
|
+
# self.primary_duration = 15.hours
|
22
|
+
#
|
23
|
+
# # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
|
24
|
+
# self.secondary_schedule = "0 17 * * 5 America/New_York"
|
25
|
+
# # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
|
26
|
+
# self.secondary_duration = 63.hours
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Notes:
|
30
|
+
# * These schedules do not affect when the job is started, completed, or when `before_batch` or
|
31
|
+
# `after_batch` processing is performed. It only limits when individual slices are processed.
|
32
|
+
module ThrottleWindows
|
33
|
+
extend ActiveSupport::Concern
|
34
|
+
|
35
|
+
included do
|
36
|
+
# Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
37
|
+
field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
38
|
+
# Duration in seconds of the primary window.
|
39
|
+
field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
40
|
+
|
41
|
+
# Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
42
|
+
field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
43
|
+
# Duration in seconds of the secondary window.
|
44
|
+
field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
45
|
+
|
46
|
+
define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
|
47
|
+
|
48
|
+
validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
|
49
|
+
record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def throttle_windows_exceeded?
|
56
|
+
exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
|
57
|
+
if exceeded && secondary_schedule && secondary_duration
|
58
|
+
exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
|
59
|
+
end
|
60
|
+
exceeded
|
61
|
+
end
|
62
|
+
|
63
|
+
def throttle_outside_window?(schedule, duration)
|
64
|
+
cron = Fugit::Cron.new(schedule)
|
65
|
+
time = Time.now.utc + 1
|
66
|
+
# Add 1 second since right now could be the very beginning of the processing window.
|
67
|
+
previous_time = cron.previous_time(time).to_utc_time
|
68
|
+
previous_time + duration < time
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -28,10 +28,6 @@ module RocketJob
|
|
28
28
|
#
|
29
29
|
# If an exception was thrown the entire slice of records is marked as failed.
|
30
30
|
#
|
31
|
-
# If the mongo_ha gem has been loaded, then the connection to mongo is
|
32
|
-
# automatically re-established and the job will resume anytime a
|
33
|
-
# Mongo connection failure occurs.
|
34
|
-
#
|
35
31
|
# Thread-safe, can be called by multiple threads at the same time
|
36
32
|
def rocket_job_work(worker, re_raise_exceptions = false)
|
37
33
|
raise "Job must be started before calling #rocket_job_work" unless running?
|
@@ -50,7 +46,7 @@ module RocketJob
|
|
50
46
|
next if slice.failed?
|
51
47
|
|
52
48
|
slice.fail_on_exception!(re_raise_exceptions) { rocket_job_process_slice(slice) }
|
53
|
-
elsif record_count && rocket_job_batch_complete?(worker.name)
|
49
|
+
elsif record_count && fail_on_exception!(re_raise_exceptions) { rocket_job_batch_complete?(worker.name) }
|
54
50
|
return false
|
55
51
|
else
|
56
52
|
logger.debug "No more work available for this job"
|
@@ -114,8 +110,6 @@ module RocketJob
|
|
114
110
|
servers
|
115
111
|
end
|
116
112
|
|
117
|
-
private
|
118
|
-
|
119
113
|
def rocket_job_batch_throttled?(slice, worker)
|
120
114
|
filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
|
121
115
|
return false unless filter
|
@@ -154,7 +148,7 @@ module RocketJob
|
|
154
148
|
records = slice.records
|
155
149
|
|
156
150
|
# Skip records already processed, if any.
|
157
|
-
#slice.processing_record_number ||= 0
|
151
|
+
# slice.processing_record_number ||= 0
|
158
152
|
# TODO: Must append to existing output slices before this can be enabled.
|
159
153
|
# if !collect_output && (slice.processing_record_number > 1)
|
160
154
|
# records = records[slice.processing_record_number - 1..-1]
|
data/lib/rocket_job/event.rb
CHANGED
@@ -0,0 +1,12 @@
|
|
1
|
+
require "mongoid/fields/validators/macro"
|
2
|
+
require "semantic_logger"
|
3
|
+
module RocketJob
|
4
|
+
module RemoveMongoidWarnings
|
5
|
+
# Remove annoying warnings about Symbols type being deprecated.
|
6
|
+
def validate_options(*params)
|
7
|
+
SemanticLogger.silence(:error) { super(*params) }
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
::Mongoid::Fields::Validators::Macro.extend(RocketJob::RemoveMongoidWarnings)
|
@@ -25,7 +25,7 @@ module RocketJob
|
|
25
25
|
|
26
26
|
self.destroy_on_complete = false
|
27
27
|
# Number of times to automatically retry the copy. Set to `0` for no retry attempts.
|
28
|
-
self.retry_limit =
|
28
|
+
self.retry_limit = 10
|
29
29
|
|
30
30
|
# File names in IOStreams URL format.
|
31
31
|
field :source_url, type: String, user_editable: true
|
@@ -1,8 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
rescue LoadError
|
4
|
-
raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
|
5
|
-
end
|
1
|
+
require "active_record"
|
2
|
+
require "sync_attr"
|
6
3
|
|
7
4
|
# Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
|
8
5
|
#
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/concern"
|
2
|
+
require "fugit"
|
2
3
|
|
3
4
|
module RocketJob
|
4
5
|
module Plugins
|
@@ -17,7 +18,9 @@ module RocketJob
|
|
17
18
|
|
18
19
|
field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
19
20
|
|
20
|
-
|
21
|
+
validates_each :cron_schedule do |record, attr, value|
|
22
|
+
record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
23
|
+
end
|
21
24
|
before_save :rocket_job_cron_set_run_at
|
22
25
|
|
23
26
|
private
|
@@ -42,30 +45,10 @@ module RocketJob
|
|
42
45
|
end
|
43
46
|
end
|
44
47
|
|
45
|
-
# Returns [Time] the next time this job will be scheduled to run at.
|
46
|
-
#
|
47
|
-
# Parameters
|
48
|
-
# time: [Time]
|
49
|
-
# The next time as of this time.
|
50
|
-
# Default: Time.now
|
51
|
-
def rocket_job_cron_next_time(time = Time.now)
|
52
|
-
RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
48
|
def rocket_job_cron_set_run_at
|
58
|
-
return
|
59
|
-
|
60
|
-
self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
|
61
|
-
end
|
62
|
-
|
63
|
-
def rocket_job_cron_valid
|
64
|
-
return unless cron_schedule
|
49
|
+
return if cron_schedule.nil? || !(cron_schedule_changed? && !run_at_changed?)
|
65
50
|
|
66
|
-
|
67
|
-
rescue ArgumentError => e
|
68
|
-
errors.add(:cron_schedule, e.message)
|
51
|
+
self.run_at = Fugit::Cron.new(cron_schedule).next_time.to_utc_time
|
69
52
|
end
|
70
53
|
end
|
71
54
|
end
|
@@ -7,7 +7,7 @@ module RocketJob
|
|
7
7
|
#
|
8
8
|
# Example:
|
9
9
|
# # Do not run this job when the MySQL slave delay exceeds 5 minutes.
|
10
|
-
# class MyJob < RocketJob
|
10
|
+
# class MyJob < RocketJob::Job
|
11
11
|
# # Define a custom mysql throttle
|
12
12
|
# # Prevents all jobs of this class from running on the current server.
|
13
13
|
# define_throttle :mysql_throttle_exceeded?
|
@@ -6,7 +6,7 @@ module RocketJob
|
|
6
6
|
# Throttle the number of jobs of a specific class that are processed at the same time.
|
7
7
|
#
|
8
8
|
# Example:
|
9
|
-
# class MyJob < RocketJob
|
9
|
+
# class MyJob < RocketJob::Job
|
10
10
|
# # Maximum number of jobs of this class to process at the same time.
|
11
11
|
# self.throttle_running_jobs = 25
|
12
12
|
#
|
@@ -73,12 +73,13 @@ module RocketJob
|
|
73
73
|
if failed? || !may_fail?
|
74
74
|
self.exception = JobException.from_exception(e)
|
75
75
|
exception.worker_name = worker_name
|
76
|
-
save! unless new_record? || destroyed?
|
77
|
-
elsif new_record? || destroyed?
|
78
|
-
fail(worker_name, e)
|
79
76
|
else
|
80
|
-
fail
|
77
|
+
fail(worker_name, e)
|
81
78
|
end
|
79
|
+
|
80
|
+
# Prevent validation failures from failing the job
|
81
|
+
save(validate: false) unless new_record? || destroyed?
|
82
|
+
|
82
83
|
raise e if re_raise_exceptions
|
83
84
|
end
|
84
85
|
end
|