rocketjob 5.2.0 → 5.4.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/rocket_job/batch.rb +1 -0
- data/lib/rocket_job/batch/io.rb +14 -19
- data/lib/rocket_job/batch/model.rb +2 -2
- data/lib/rocket_job/batch/tabular/input.rb +9 -5
- data/lib/rocket_job/batch/tabular/output.rb +9 -3
- data/lib/rocket_job/batch/throttle.rb +1 -1
- data/lib/rocket_job/batch/throttle_running_workers.rb +1 -1
- data/lib/rocket_job/batch/throttle_windows.rb +72 -0
- data/lib/rocket_job/batch/worker.rb +2 -8
- data/lib/rocket_job/event.rb +0 -2
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +0 -2
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +12 -0
- data/lib/rocket_job/jobs/copy_file_job.rb +1 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +2 -5
- data/lib/rocket_job/jobs/upload_file_job.rb +1 -1
- data/lib/rocket_job/plugins/cron.rb +6 -23
- data/lib/rocket_job/plugins/job/throttle.rb +1 -1
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/job/worker.rb +5 -4
- data/lib/rocket_job/plugins/processing_window.rb +7 -13
- data/lib/rocket_job/sliced.rb +91 -0
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
- data/lib/rocket_job/sliced/input.rb +3 -3
- data/lib/rocket_job/sliced/slice.rb +6 -0
- data/lib/rocket_job/sliced/slices.rb +6 -0
- data/lib/rocket_job/subscribers/server.rb +9 -3
- data/lib/rocket_job/supervisor.rb +3 -1
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker_pool.rb +1 -0
- data/lib/rocketjob.rb +7 -20
- metadata +27 -11
- data/lib/rocket_job/plugins/rufus/cron_line.rb +0 -520
- data/lib/rocket_job/plugins/rufus/zo_time.rb +0 -524
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55ab6ca2b3f76cdb4ddf679a2d3e88e1d6a6f3106a69349129a267b629ef4a53
|
4
|
+
data.tar.gz: 4e0d07878fb4265179b4a270650cc9b89ca4bca55f1d8f9a3451cb3064062c35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fdc7ae3827d987404f431410cb81161fbfa269dfe7575a4a86a6abf362c59c6acd0d2c7e9856273ced396eeaf20e50561f1be10a7de0d5c4ae45e5648d15d083
|
7
|
+
data.tar.gz: 7bb9e9ac90569e78e135293efcd357a0d75037b5417f5f530f18ae3f17f44acf28d7de5b4aeef880a0315117f2992623958a4fb4c9df93ca7273369ac052e759
|
data/README.md
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
|
4
4
|
Ruby's missing batch system
|
5
5
|
|
6
|
-
Checkout
|
6
|
+
Checkout https://rocketjob.io/
|
7
7
|
|
8
|
-

|
9
9
|
|
10
10
|
## Documentation
|
11
11
|
|
data/lib/rocket_job/batch.rb
CHANGED
@@ -24,6 +24,7 @@ module RocketJob
|
|
24
24
|
autoload :LowerPriority, "rocket_job/batch/lower_priority"
|
25
25
|
autoload :Performance, "rocket_job/batch/performance"
|
26
26
|
autoload :Statistics, "rocket_job/batch/statistics"
|
27
|
+
autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
|
27
28
|
autoload :Result, "rocket_job/batch/result"
|
28
29
|
autoload :Results, "rocket_job/batch/results"
|
29
30
|
autoload :Tabular, "rocket_job/batch/tabular"
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -18,7 +18,7 @@ module RocketJob
|
|
18
18
|
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
19
|
end
|
20
20
|
|
21
|
-
(@inputs ||= {})[category] ||= RocketJob::Sliced
|
21
|
+
(@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
|
22
22
|
end
|
23
23
|
|
24
24
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
@@ -34,7 +34,7 @@ module RocketJob
|
|
34
34
|
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
35
|
end
|
36
36
|
|
37
|
-
(@outputs ||= {})[category] ||= RocketJob::Sliced
|
37
|
+
(@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
|
38
38
|
end
|
39
39
|
|
40
40
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
@@ -355,8 +355,18 @@ module RocketJob
|
|
355
355
|
|
356
356
|
return output(category).download(header_line: header_line, &block) if block
|
357
357
|
|
358
|
-
|
359
|
-
|
358
|
+
output_collection = output(category)
|
359
|
+
|
360
|
+
if output_collection.binary?
|
361
|
+
IOStreams.new(stream).stream(:none).writer(**args) do |io|
|
362
|
+
raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
|
363
|
+
|
364
|
+
output_collection.download { |record| io << record[:binary] }
|
365
|
+
end
|
366
|
+
else
|
367
|
+
IOStreams.new(stream).writer(:line, **args) do |io|
|
368
|
+
output_collection.download(header_line: header_line) { |record| io << record }
|
369
|
+
end
|
360
370
|
end
|
361
371
|
end
|
362
372
|
|
@@ -393,21 +403,6 @@ module RocketJob
|
|
393
403
|
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
394
404
|
end
|
395
405
|
end
|
396
|
-
|
397
|
-
private
|
398
|
-
|
399
|
-
def rocket_job_io_slice_arguments(collection_type, category)
|
400
|
-
collection_name = "rocket_job.#{collection_type}.#{id}"
|
401
|
-
collection_name << ".#{category}" unless category == :main
|
402
|
-
|
403
|
-
args = {collection_name: collection_name, slice_size: slice_size}
|
404
|
-
if encrypt
|
405
|
-
args[:slice_class] = Sliced::EncryptedSlice
|
406
|
-
elsif compress
|
407
|
-
args[:slice_class] = Sliced::CompressedSlice
|
408
|
-
end
|
409
|
-
args
|
410
|
-
end
|
411
406
|
end
|
412
407
|
end
|
413
408
|
end
|
@@ -44,12 +44,12 @@ module RocketJob
|
|
44
44
|
# Compress uploaded records.
|
45
45
|
# The fields are not affected in any way, only the data stored in the
|
46
46
|
# records and results collections will compressed
|
47
|
-
field :compress, type:
|
47
|
+
field :compress, type: Object, default: false, class_attribute: true
|
48
48
|
|
49
49
|
# Encrypt uploaded records.
|
50
50
|
# The fields are not affected in any way, only the data stored in the
|
51
51
|
# records and results collections will be encrypted
|
52
|
-
field :encrypt, type:
|
52
|
+
field :encrypt, type: Object, default: false, class_attribute: true
|
53
53
|
|
54
54
|
#
|
55
55
|
# Values that jobs can also update during processing
|
@@ -12,6 +12,7 @@ module RocketJob
|
|
12
12
|
included do
|
13
13
|
field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
|
14
14
|
field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
|
15
|
+
field :tabular_input_options, type: Hash, class_attribute: true
|
15
16
|
|
16
17
|
# tabular_input_mode: [:line | :array | :hash]
|
17
18
|
# :line
|
@@ -53,7 +54,9 @@ module RocketJob
|
|
53
54
|
input_stream = stream.nil? ? nil : IOStreams.new(stream)
|
54
55
|
|
55
56
|
if stream && (tabular_input_type == :text)
|
56
|
-
|
57
|
+
# Cannot change the length of fixed width lines
|
58
|
+
replace = tabular_input_format == :fixed ? " " : ""
|
59
|
+
input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
|
57
60
|
end
|
58
61
|
|
59
62
|
# If an input header is not required, then we don't extract it'
|
@@ -96,14 +99,15 @@ module RocketJob
|
|
96
99
|
allowed_columns: tabular_input_white_list,
|
97
100
|
required_columns: tabular_input_required,
|
98
101
|
skip_unknown: tabular_input_skip_unknown,
|
99
|
-
format: tabular_input_format
|
102
|
+
format: tabular_input_format,
|
103
|
+
format_options: tabular_input_options&.deep_symbolize_keys
|
100
104
|
)
|
101
105
|
end
|
102
106
|
|
103
107
|
def tabular_input_render
|
104
|
-
|
105
|
-
|
106
|
-
|
108
|
+
return if tabular_input_header.blank? && tabular_input.header?
|
109
|
+
|
110
|
+
@rocket_job_input = tabular_input.record_parse(@rocket_job_input)
|
107
111
|
end
|
108
112
|
|
109
113
|
# Cleanse custom input header if supplied.
|
@@ -12,6 +12,7 @@ module RocketJob
|
|
12
12
|
included do
|
13
13
|
field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
|
14
14
|
field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
|
15
|
+
field :tabular_output_options, type: Hash, class_attribute: true
|
15
16
|
|
16
17
|
validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
|
17
18
|
|
@@ -31,8 +32,9 @@ module RocketJob
|
|
31
32
|
|
32
33
|
# Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
|
33
34
|
def download(file_name_or_io = nil, category: :main, **args, &block)
|
34
|
-
|
35
|
-
|
35
|
+
unless tabular_output.requires_header?(category)
|
36
|
+
return super(file_name_or_io, category: category, **args, &block)
|
37
|
+
end
|
36
38
|
|
37
39
|
header = tabular_output.render_header(category)
|
38
40
|
super(file_name_or_io, header_line: header, category: category, **args, &block)
|
@@ -43,7 +45,11 @@ module RocketJob
|
|
43
45
|
# Delimited instance used for this slice, by a single worker (thread)
|
44
46
|
def tabular_output
|
45
47
|
@tabular_output ||= Tabular.new(
|
46
|
-
main: IOStreams::Tabular.new(
|
48
|
+
main: IOStreams::Tabular.new(
|
49
|
+
columns: tabular_output_header,
|
50
|
+
format: tabular_output_format,
|
51
|
+
format_options: tabular_output_options&.deep_symbolize_keys
|
52
|
+
)
|
47
53
|
)
|
48
54
|
end
|
49
55
|
|
@@ -5,7 +5,7 @@ module RocketJob
|
|
5
5
|
# Throttle the number of slices of a specific batch job that are processed at the same time.
|
6
6
|
#
|
7
7
|
# Example:
|
8
|
-
# class MyJob < RocketJob
|
8
|
+
# class MyJob < RocketJob::Job
|
9
9
|
# include RocketJob::Batch
|
10
10
|
#
|
11
11
|
# # Maximum number of slices to process at the same time for each running instance.
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "active_support/concern"
|
2
|
+
require "fugit"
|
3
|
+
|
4
|
+
module RocketJob
|
5
|
+
module Batch
|
6
|
+
# For a batch job that can run over a long period of time it can be useful
|
7
|
+
# to prevent its slices from being processed outside a predefined processing window.
|
8
|
+
#
|
9
|
+
# This plugin supports up to 2 different processing windows.
|
10
|
+
#
|
11
|
+
# For example, do not run this job during business hours.
|
12
|
+
# Allow it to run from 5pm until 8am the following day Mon through Fri.
|
13
|
+
#
|
14
|
+
# class AfterHoursJob < RocketJob::Job
|
15
|
+
# include RocketJob::Batch
|
16
|
+
# include RocketJob::Batch::ThrottleWindows
|
17
|
+
#
|
18
|
+
# # Monday through Thursday the job can start processing at 5pm Eastern.
|
19
|
+
# self.primary_schedule = "0 17 * * 1-4 America/New_York"
|
20
|
+
# # Slices are allowed to run until 8am the following day, which is 15 hours long:
|
21
|
+
# self.primary_duration = 15.hours
|
22
|
+
#
|
23
|
+
# # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
|
24
|
+
# self.secondary_schedule = "0 17 * * 5 America/New_York"
|
25
|
+
# # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
|
26
|
+
# self.secondary_duration = 63.hours
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Notes:
|
30
|
+
# * These schedules do not affect when the job is started, completed, or when `before_batch` or
|
31
|
+
# `after_batch` processing is performed. It only limits when individual slices are processed.
|
32
|
+
module ThrottleWindows
|
33
|
+
extend ActiveSupport::Concern
|
34
|
+
|
35
|
+
included do
|
36
|
+
# Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
37
|
+
field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
38
|
+
# Duration in seconds of the primary window.
|
39
|
+
field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
40
|
+
|
41
|
+
# Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
|
42
|
+
field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
43
|
+
# Duration in seconds of the secondary window.
|
44
|
+
field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
45
|
+
|
46
|
+
define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
|
47
|
+
|
48
|
+
validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
|
49
|
+
record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def throttle_windows_exceeded?
|
56
|
+
exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
|
57
|
+
if exceeded && secondary_schedule && secondary_duration
|
58
|
+
exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
|
59
|
+
end
|
60
|
+
exceeded
|
61
|
+
end
|
62
|
+
|
63
|
+
def throttle_outside_window?(schedule, duration)
|
64
|
+
cron = Fugit::Cron.new(schedule)
|
65
|
+
time = Time.now.utc + 1
|
66
|
+
# Add 1 second since right now could be the very beginning of the processing window.
|
67
|
+
previous_time = cron.previous_time(time).to_utc_time
|
68
|
+
previous_time + duration < time
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -28,10 +28,6 @@ module RocketJob
|
|
28
28
|
#
|
29
29
|
# If an exception was thrown the entire slice of records is marked as failed.
|
30
30
|
#
|
31
|
-
# If the mongo_ha gem has been loaded, then the connection to mongo is
|
32
|
-
# automatically re-established and the job will resume anytime a
|
33
|
-
# Mongo connection failure occurs.
|
34
|
-
#
|
35
31
|
# Thread-safe, can be called by multiple threads at the same time
|
36
32
|
def rocket_job_work(worker, re_raise_exceptions = false)
|
37
33
|
raise "Job must be started before calling #rocket_job_work" unless running?
|
@@ -50,7 +46,7 @@ module RocketJob
|
|
50
46
|
next if slice.failed?
|
51
47
|
|
52
48
|
slice.fail_on_exception!(re_raise_exceptions) { rocket_job_process_slice(slice) }
|
53
|
-
elsif record_count && rocket_job_batch_complete?(worker.name)
|
49
|
+
elsif record_count && fail_on_exception!(re_raise_exceptions) { rocket_job_batch_complete?(worker.name) }
|
54
50
|
return false
|
55
51
|
else
|
56
52
|
logger.debug "No more work available for this job"
|
@@ -114,8 +110,6 @@ module RocketJob
|
|
114
110
|
servers
|
115
111
|
end
|
116
112
|
|
117
|
-
private
|
118
|
-
|
119
113
|
def rocket_job_batch_throttled?(slice, worker)
|
120
114
|
filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
|
121
115
|
return false unless filter
|
@@ -154,7 +148,7 @@ module RocketJob
|
|
154
148
|
records = slice.records
|
155
149
|
|
156
150
|
# Skip records already processed, if any.
|
157
|
-
#slice.processing_record_number ||= 0
|
151
|
+
# slice.processing_record_number ||= 0
|
158
152
|
# TODO: Must append to existing output slices before this can be enabled.
|
159
153
|
# if !collect_output && (slice.processing_record_number > 1)
|
160
154
|
# records = records[slice.processing_record_number - 1..-1]
|
data/lib/rocket_job/event.rb
CHANGED
@@ -0,0 +1,12 @@
|
|
1
|
+
require "mongoid/fields/validators/macro"
|
2
|
+
require "semantic_logger"
|
3
|
+
module RocketJob
|
4
|
+
module RemoveMongoidWarnings
|
5
|
+
# Remove annoying warnings about Symbols type being deprecated.
|
6
|
+
def validate_options(*params)
|
7
|
+
SemanticLogger.silence(:error) { super(*params) }
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
::Mongoid::Fields::Validators::Macro.extend(RocketJob::RemoveMongoidWarnings)
|
@@ -25,7 +25,7 @@ module RocketJob
|
|
25
25
|
|
26
26
|
self.destroy_on_complete = false
|
27
27
|
# Number of times to automatically retry the copy. Set to `0` for no retry attempts.
|
28
|
-
self.retry_limit =
|
28
|
+
self.retry_limit = 10
|
29
29
|
|
30
30
|
# File names in IOStreams URL format.
|
31
31
|
field :source_url, type: String, user_editable: true
|
@@ -1,8 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
rescue LoadError
|
4
|
-
raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
|
5
|
-
end
|
1
|
+
require "active_record"
|
2
|
+
require "sync_attr"
|
6
3
|
|
7
4
|
# Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
|
8
5
|
#
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/concern"
|
2
|
+
require "fugit"
|
2
3
|
|
3
4
|
module RocketJob
|
4
5
|
module Plugins
|
@@ -17,7 +18,9 @@ module RocketJob
|
|
17
18
|
|
18
19
|
field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
19
20
|
|
20
|
-
|
21
|
+
validates_each :cron_schedule do |record, attr, value|
|
22
|
+
record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
|
23
|
+
end
|
21
24
|
before_save :rocket_job_cron_set_run_at
|
22
25
|
|
23
26
|
private
|
@@ -42,30 +45,10 @@ module RocketJob
|
|
42
45
|
end
|
43
46
|
end
|
44
47
|
|
45
|
-
# Returns [Time] the next time this job will be scheduled to run at.
|
46
|
-
#
|
47
|
-
# Parameters
|
48
|
-
# time: [Time]
|
49
|
-
# The next time as of this time.
|
50
|
-
# Default: Time.now
|
51
|
-
def rocket_job_cron_next_time(time = Time.now)
|
52
|
-
RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
48
|
def rocket_job_cron_set_run_at
|
58
|
-
return
|
59
|
-
|
60
|
-
self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
|
61
|
-
end
|
62
|
-
|
63
|
-
def rocket_job_cron_valid
|
64
|
-
return unless cron_schedule
|
49
|
+
return if cron_schedule.nil? || !(cron_schedule_changed? && !run_at_changed?)
|
65
50
|
|
66
|
-
|
67
|
-
rescue ArgumentError => e
|
68
|
-
errors.add(:cron_schedule, e.message)
|
51
|
+
self.run_at = Fugit::Cron.new(cron_schedule).next_time.to_utc_time
|
69
52
|
end
|
70
53
|
end
|
71
54
|
end
|
@@ -7,7 +7,7 @@ module RocketJob
|
|
7
7
|
#
|
8
8
|
# Example:
|
9
9
|
# # Do not run this job when the MySQL slave delay exceeds 5 minutes.
|
10
|
-
# class MyJob < RocketJob
|
10
|
+
# class MyJob < RocketJob::Job
|
11
11
|
# # Define a custom mysql throttle
|
12
12
|
# # Prevents all jobs of this class from running on the current server.
|
13
13
|
# define_throttle :mysql_throttle_exceeded?
|
@@ -6,7 +6,7 @@ module RocketJob
|
|
6
6
|
# Throttle the number of jobs of a specific class that are processed at the same time.
|
7
7
|
#
|
8
8
|
# Example:
|
9
|
-
# class MyJob < RocketJob
|
9
|
+
# class MyJob < RocketJob::Job
|
10
10
|
# # Maximum number of jobs of this class to process at the same time.
|
11
11
|
# self.throttle_running_jobs = 25
|
12
12
|
#
|
@@ -73,12 +73,13 @@ module RocketJob
|
|
73
73
|
if failed? || !may_fail?
|
74
74
|
self.exception = JobException.from_exception(e)
|
75
75
|
exception.worker_name = worker_name
|
76
|
-
save! unless new_record? || destroyed?
|
77
|
-
elsif new_record? || destroyed?
|
78
|
-
fail(worker_name, e)
|
79
76
|
else
|
80
|
-
fail
|
77
|
+
fail(worker_name, e)
|
81
78
|
end
|
79
|
+
|
80
|
+
# Prevent validation failures from failing the job
|
81
|
+
save(validate: false) unless new_record? || destroyed?
|
82
|
+
|
82
83
|
raise e if re_raise_exceptions
|
83
84
|
end
|
84
85
|
end
|