rocketjob 5.3.3 → 6.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -5
- data/bin/rocketjob_batch_perf +1 -1
- data/bin/rocketjob_perf +1 -1
- data/lib/rocket_job/batch.rb +3 -0
- data/lib/rocket_job/batch/categories.rb +338 -0
- data/lib/rocket_job/batch/io.rb +132 -69
- data/lib/rocket_job/batch/model.rb +20 -68
- data/lib/rocket_job/batch/performance.rb +20 -8
- data/lib/rocket_job/batch/statistics.rb +35 -13
- data/lib/rocket_job/batch/tabular.rb +2 -0
- data/lib/rocket_job/batch/tabular/input.rb +8 -6
- data/lib/rocket_job/batch/tabular/output.rb +4 -2
- data/lib/rocket_job/batch/throttle_running_workers.rb +8 -17
- data/lib/rocket_job/batch/worker.rb +27 -24
- data/lib/rocket_job/category/base.rb +78 -0
- data/lib/rocket_job/category/input.rb +110 -0
- data/lib/rocket_job/category/output.rb +25 -0
- data/lib/rocket_job/cli.rb +24 -16
- data/lib/rocket_job/dirmon_entry.rb +22 -12
- data/lib/rocket_job/event.rb +1 -1
- data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
- data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
- data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +1 -1
- data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
- data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
- data/lib/rocket_job/jobs/performance_job.rb +3 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +5 -4
- data/lib/rocket_job/jobs/upload_file_job.rb +47 -10
- data/lib/rocket_job/lookup_collection.rb +68 -0
- data/lib/rocket_job/plugins/job/model.rb +25 -50
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +12 -4
- data/lib/rocket_job/plugins/job/worker.rb +2 -7
- data/lib/rocket_job/plugins/restart.rb +12 -5
- data/lib/rocket_job/plugins/state_machine.rb +2 -1
- data/lib/rocket_job/ractor_worker.rb +42 -0
- data/lib/rocket_job/server/model.rb +1 -1
- data/lib/rocket_job/sliced.rb +36 -0
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
- data/lib/rocket_job/sliced/input.rb +4 -4
- data/lib/rocket_job/sliced/slice.rb +11 -13
- data/lib/rocket_job/sliced/slices.rb +20 -2
- data/lib/rocket_job/sliced/writer/output.rb +33 -44
- data/lib/rocket_job/subscribers/server.rb +1 -1
- data/lib/rocket_job/thread_worker.rb +46 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +21 -55
- data/lib/rocket_job/worker_pool.rb +5 -7
- data/lib/rocketjob.rb +52 -59
- metadata +43 -33
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
- data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -29,20 +29,28 @@ module RocketJob
|
|
29
29
|
class_attribute :throttle_running_jobs
|
30
30
|
self.throttle_running_jobs = nil
|
31
31
|
|
32
|
+
# Allow jobs to be throttled by group name instance of the job class name.
|
33
|
+
field :throttle_group, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
|
34
|
+
|
32
35
|
define_throttle :throttle_running_jobs_exceeded?
|
33
36
|
end
|
34
37
|
|
35
38
|
private
|
36
39
|
|
37
|
-
# Returns [
|
40
|
+
# Returns [true|false] whether the throttle for this job has been exceeded
|
38
41
|
def throttle_running_jobs_exceeded?
|
39
|
-
return unless throttle_running_jobs&.positive?
|
42
|
+
return false unless throttle_running_jobs&.positive?
|
40
43
|
|
41
|
-
# Cannot use this class since it will include instances of parent job classes.
|
42
44
|
RocketJob::Job.with(read: {mode: :primary}) do |conn|
|
43
|
-
|
45
|
+
query = throttle_running_jobs_base_query
|
46
|
+
throttle_group ? query["throttle_group"] = throttle_group : query["_type"] = self.class.name
|
47
|
+
conn.running.where(query).count >= throttle_running_jobs
|
44
48
|
end
|
45
49
|
end
|
50
|
+
|
51
|
+
def throttle_running_jobs_base_query
|
52
|
+
{:id.ne => id}
|
53
|
+
end
|
46
54
|
end
|
47
55
|
end
|
48
56
|
end
|
@@ -48,11 +48,11 @@ module RocketJob
|
|
48
48
|
def perform_now
|
49
49
|
raise(::Mongoid::Errors::Validations, self) unless valid?
|
50
50
|
|
51
|
-
worker = RocketJob::Worker.new
|
51
|
+
worker = RocketJob::Worker.new
|
52
52
|
start if may_start?
|
53
53
|
# Re-Raise exceptions
|
54
54
|
rocket_job_work(worker, true) if running?
|
55
|
-
|
55
|
+
@rocket_job_output
|
56
56
|
end
|
57
57
|
|
58
58
|
def perform(*)
|
@@ -106,11 +106,6 @@ module RocketJob
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
|
-
if collect_output?
|
110
|
-
# Result must be a Hash, if not put it in a Hash
|
111
|
-
self.result = @rocket_job_output.is_a?(Hash) ? @rocket_job_output : {"result" => @rocket_job_output}
|
112
|
-
end
|
113
|
-
|
114
109
|
if new_record? || destroyed?
|
115
110
|
complete if may_complete?
|
116
111
|
else
|
@@ -91,8 +91,16 @@ module RocketJob
|
|
91
91
|
logger.info("Job has expired. Not creating a new instance.")
|
92
92
|
return
|
93
93
|
end
|
94
|
-
|
95
|
-
|
94
|
+
job_attrs =
|
95
|
+
rocket_job_restart_attributes.each_with_object({}) { |attr, attrs| attrs[attr] = send(attr) }
|
96
|
+
job = self.class.new(job_attrs)
|
97
|
+
|
98
|
+
# Copy across input and output categories to new scheduled job so that all of the
|
99
|
+
# settings are remembered between instance. Example: slice_size
|
100
|
+
job.input_categories = input_categories if respond_to?(:input_categories)
|
101
|
+
job.output_categories = output_categories if respond_to?(:output_categories)
|
102
|
+
|
103
|
+
rocket_job_restart_save(job)
|
96
104
|
end
|
97
105
|
|
98
106
|
def rocket_job_restart_abort
|
@@ -101,11 +109,10 @@ module RocketJob
|
|
101
109
|
|
102
110
|
# Allow Singleton to prevent the creation of a new job if one is already running
|
103
111
|
# Retry since the delete may not have persisted to disk yet.
|
104
|
-
def
|
112
|
+
def rocket_job_restart_save(job, retry_limit = 10, sleep_interval = 0.5)
|
105
113
|
count = 0
|
106
114
|
while count < retry_limit
|
107
|
-
job
|
108
|
-
if job.persisted?
|
115
|
+
if job.save
|
109
116
|
logger.info("Created a new job instance: #{job.id}")
|
110
117
|
return true
|
111
118
|
else
|
@@ -51,7 +51,8 @@ module RocketJob
|
|
51
51
|
# Validate methods are any of Symbol String Proc
|
52
52
|
methods.each do |method|
|
53
53
|
unless method.is_a?(Symbol) || method.is_a?(String)
|
54
|
-
raise(ArgumentError,
|
54
|
+
raise(ArgumentError,
|
55
|
+
"#{action}_#{event_name} currently does not support any options. Only Symbol and String method names can be supplied.")
|
55
56
|
end
|
56
57
|
end
|
57
58
|
methods
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module RocketJob
|
2
|
+
# Run each worker in its own "Ractor".
|
3
|
+
class RactorWorker < Worker
|
4
|
+
attr_reader :thread
|
5
|
+
|
6
|
+
def initialize(id:, server_name:)
|
7
|
+
super(id: id, server_name: server_name)
|
8
|
+
@shutdown = Concurrent::Event.new
|
9
|
+
@thread = Ractor.new(name: "rocketjob-#{id}") { run }
|
10
|
+
end
|
11
|
+
|
12
|
+
def alive?
|
13
|
+
@thread.alive?
|
14
|
+
end
|
15
|
+
|
16
|
+
def backtrace
|
17
|
+
@thread.backtrace
|
18
|
+
end
|
19
|
+
|
20
|
+
def join(*args)
|
21
|
+
@thread.join(*args)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Send each active worker the RocketJob::ShutdownException so that stops processing immediately.
|
25
|
+
def kill
|
26
|
+
@thread.raise(Shutdown, "Shutdown due to kill request for worker: #{name}") if @thread.alive?
|
27
|
+
end
|
28
|
+
|
29
|
+
def shutdown?
|
30
|
+
@shutdown.set?
|
31
|
+
end
|
32
|
+
|
33
|
+
def shutdown!
|
34
|
+
@shutdown.set
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns [true|false] whether the shutdown indicator was set
|
38
|
+
def wait_for_shutdown?(timeout = nil)
|
39
|
+
@shutdown.wait(timeout)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -28,7 +28,7 @@ module RocketJob
|
|
28
28
|
|
29
29
|
# Current state
|
30
30
|
# Internal use only. Do not set this field directly
|
31
|
-
field :state, type:
|
31
|
+
field :state, type: Mongoid::StringifiedSymbol, default: :starting
|
32
32
|
|
33
33
|
index({name: 1}, background: true, unique: true)
|
34
34
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Sliced
|
3
|
+
autoload :BZip2OutputSlice, "rocket_job/sliced/bzip2_output_slice"
|
4
|
+
autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
|
5
|
+
autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
|
6
|
+
autoload :Input, "rocket_job/sliced/input"
|
7
|
+
autoload :Output, "rocket_job/sliced/output"
|
8
|
+
autoload :Slice, "rocket_job/sliced/slice"
|
9
|
+
autoload :Slices, "rocket_job/sliced/slices"
|
10
|
+
autoload :Store, "rocket_job/sliced/store"
|
11
|
+
|
12
|
+
module Writer
|
13
|
+
autoload :Input, "rocket_job/sliced/writer/input"
|
14
|
+
autoload :Output, "rocket_job/sliced/writer/output"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns [RocketJob::Sliced::Slices] for the relevant direction and category.
|
18
|
+
def self.factory(direction, category, job)
|
19
|
+
collection_name = "rocket_job.#{direction}s.#{job.id}"
|
20
|
+
collection_name << ".#{category.name}" unless category.name == :main
|
21
|
+
|
22
|
+
case direction
|
23
|
+
when :input
|
24
|
+
RocketJob::Sliced::Input.new(
|
25
|
+
collection_name: collection_name,
|
26
|
+
slice_class: category.serializer_class,
|
27
|
+
slice_size: category.slice_size
|
28
|
+
)
|
29
|
+
when :output
|
30
|
+
RocketJob::Sliced::Output.new(collection_name: collection_name, slice_class: category.serializer_class)
|
31
|
+
else
|
32
|
+
raise(ArgumentError, "Unknown direction: #{direction.inspect}")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Sliced
|
3
|
+
# This is a specialized output serializer that renders each output slice as a single BZip2 compressed stream.
|
4
|
+
# BZip2 allows multiple output streams to be written into a single BZip2 file.
|
5
|
+
#
|
6
|
+
# Notes:
|
7
|
+
# * The `bzip2` linux command line utility supports multiple embedded BZip2 stream,
|
8
|
+
# but some other custom implementations may not. They may only read the first slice and stop.
|
9
|
+
# * It is only designed for use on output collections.
|
10
|
+
#
|
11
|
+
# To download the output when using this slice:
|
12
|
+
#
|
13
|
+
# # Download the binary BZip2 streams into a single file
|
14
|
+
# IOStreams.path(output_file_name).stream(:none).writer do |io|
|
15
|
+
# job.download { |slice| io << slice[:binary] }
|
16
|
+
# end
|
17
|
+
class BZip2OutputSlice < ::RocketJob::Sliced::Slice
|
18
|
+
# This is a specialized binary slice for creating binary data from each slice
|
19
|
+
# that must be downloaded as-is into output files.
|
20
|
+
def self.binary?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def parse_records
|
27
|
+
records = attributes.delete("records")
|
28
|
+
|
29
|
+
# Convert BSON::Binary to a string
|
30
|
+
@records = [{binary: records.data}]
|
31
|
+
end
|
32
|
+
|
33
|
+
def serialize_records
|
34
|
+
return [] if @records.nil? || @records.empty?
|
35
|
+
|
36
|
+
lines = records.to_a.join("\n") + "\n"
|
37
|
+
s = StringIO.new
|
38
|
+
IOStreams::Bzip2::Writer.stream(s) { |io| io.write(lines) }
|
39
|
+
BSON::Binary.new(s.string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -5,7 +5,7 @@ module RocketJob
|
|
5
5
|
# Create indexes before uploading
|
6
6
|
create_indexes
|
7
7
|
Writer::Input.collect(self, on_first: on_first, &block)
|
8
|
-
rescue
|
8
|
+
rescue Exception => e
|
9
9
|
drop
|
10
10
|
raise(e)
|
11
11
|
end
|
@@ -73,7 +73,7 @@ module RocketJob
|
|
73
73
|
count += 1
|
74
74
|
end
|
75
75
|
count
|
76
|
-
rescue
|
76
|
+
rescue Exception => e
|
77
77
|
drop
|
78
78
|
raise(e)
|
79
79
|
end
|
@@ -91,7 +91,7 @@ module RocketJob
|
|
91
91
|
count += 1
|
92
92
|
end
|
93
93
|
count
|
94
|
-
rescue
|
94
|
+
rescue Exception => e
|
95
95
|
drop
|
96
96
|
raise(e)
|
97
97
|
end
|
@@ -139,7 +139,7 @@ module RocketJob
|
|
139
139
|
document = all.queued.
|
140
140
|
sort("_id" => 1).
|
141
141
|
find_one_and_update(
|
142
|
-
{"$set" => {worker_name: worker_name, state:
|
142
|
+
{"$set" => {worker_name: worker_name, state: "running", started_at: Time.now}},
|
143
143
|
return_document: :after
|
144
144
|
)
|
145
145
|
document.collection_name = collection_name if document
|
@@ -33,7 +33,7 @@ module RocketJob
|
|
33
33
|
#
|
34
34
|
|
35
35
|
# Current state, as set by AASM
|
36
|
-
field :state, type:
|
36
|
+
field :state, type: Mongoid::StringifiedSymbol, default: :queued
|
37
37
|
|
38
38
|
# When processing started on this slice
|
39
39
|
field :started_at, type: Time
|
@@ -94,6 +94,12 @@ module RocketJob
|
|
94
94
|
end
|
95
95
|
end
|
96
96
|
|
97
|
+
# Returns whether this is a specialized binary slice for creating binary data from each slice
|
98
|
+
# that is then just downloaded as-is into output files.
|
99
|
+
def self.binary?
|
100
|
+
false
|
101
|
+
end
|
102
|
+
|
97
103
|
# `records` array has special handling so that it can be modified in place instead of having
|
98
104
|
# to replace the entire array every time. For example, when appending lines with `<<`.
|
99
105
|
def records
|
@@ -133,18 +139,10 @@ module RocketJob
|
|
133
139
|
|
134
140
|
# Returns [Hash] the slice as a Hash for storage purposes
|
135
141
|
# Compresses / Encrypts the slice according to the job setting
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
attrs
|
141
|
-
end
|
142
|
-
else
|
143
|
-
def as_document
|
144
|
-
attrs = super
|
145
|
-
attrs["records"] = serialize_records if @records
|
146
|
-
attrs
|
147
|
-
end
|
142
|
+
def as_attributes
|
143
|
+
attrs = super
|
144
|
+
attrs["records"] = serialize_records if @records
|
145
|
+
attrs
|
148
146
|
end
|
149
147
|
|
150
148
|
def inspect
|
@@ -42,10 +42,16 @@ module RocketJob
|
|
42
42
|
slice
|
43
43
|
end
|
44
44
|
|
45
|
+
# Returns whether this collection contains specialized binary slices for creating binary data from each slice
|
46
|
+
# that is then just downloaded as-is into output files.
|
47
|
+
def binary?
|
48
|
+
slice_class.binary?
|
49
|
+
end
|
50
|
+
|
45
51
|
# Returns output slices in the order of their id
|
46
52
|
# which is usually the order in which they were written.
|
47
|
-
def each
|
48
|
-
all.sort(id: 1).each
|
53
|
+
def each(&block)
|
54
|
+
all.sort(id: 1).each(&block)
|
49
55
|
end
|
50
56
|
|
51
57
|
# Insert a new slice into the collection
|
@@ -90,6 +96,17 @@ module RocketJob
|
|
90
96
|
slice
|
91
97
|
end
|
92
98
|
|
99
|
+
# Append to an existing slice if already present
|
100
|
+
def append(slice, input_slice)
|
101
|
+
existing_slice = all.where(id: input_slice.id).first
|
102
|
+
return insert(slice, input_slice) unless existing_slice
|
103
|
+
|
104
|
+
extra_records = slice.is_a?(Slice) ? slice.records : slice
|
105
|
+
existing_slice.records = existing_slice.records + extra_records
|
106
|
+
existing_slice.save!
|
107
|
+
existing_slice
|
108
|
+
end
|
109
|
+
|
93
110
|
alias << insert
|
94
111
|
|
95
112
|
# Index for find_and_modify only if it is not already present
|
@@ -133,6 +150,7 @@ module RocketJob
|
|
133
150
|
def last
|
134
151
|
all.sort("_id" => -1).first
|
135
152
|
end
|
153
|
+
|
136
154
|
# rubocop:enable Style/RedundantSort
|
137
155
|
|
138
156
|
# Returns [Array<Struct>] grouped exceptions by class name,
|
@@ -1,30 +1,37 @@
|
|
1
1
|
module RocketJob
|
2
2
|
module Sliced
|
3
3
|
module Writer
|
4
|
-
|
5
|
-
class Output
|
4
|
+
class Null
|
6
5
|
attr_reader :job, :categorized_records
|
7
|
-
attr_accessor :input_slice
|
8
|
-
|
9
|
-
# Collect output results and write to output collections
|
10
|
-
# iff job is collecting output
|
11
|
-
# Notes:
|
12
|
-
# Nothing is saved if an exception is raised inside the block
|
13
|
-
def self.collect(job, input_slice = nil)
|
14
|
-
if job.collect_output?
|
15
|
-
writer = new(job, input_slice)
|
16
|
-
yield(writer)
|
17
|
-
writer.close
|
18
|
-
else
|
19
|
-
writer = NullWriter.new(job, input_slice)
|
20
|
-
yield(writer)
|
21
|
-
end
|
22
|
-
end
|
6
|
+
attr_accessor :input_slice, :append
|
23
7
|
|
24
|
-
def initialize(job, input_slice
|
8
|
+
def initialize(job, input_slice: nil, append: false)
|
25
9
|
@job = job
|
26
10
|
@input_slice = input_slice
|
27
11
|
@categorized_records = {}
|
12
|
+
@append = append
|
13
|
+
end
|
14
|
+
|
15
|
+
def <<(_)
|
16
|
+
# noop
|
17
|
+
end
|
18
|
+
|
19
|
+
def close
|
20
|
+
# noop
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Internal class for writing categorized results into output slices
|
25
|
+
class Output < Null
|
26
|
+
# Collect output results and write to output collections
|
27
|
+
# iff job is collecting output
|
28
|
+
# Notes:
|
29
|
+
# Partial slices are saved when an exception is raised inside the block
|
30
|
+
def self.collect(job, **args)
|
31
|
+
writer = job.output_categories.present? ? new(job, **args) : Null.new(job, **args)
|
32
|
+
yield(writer)
|
33
|
+
ensure
|
34
|
+
writer&.close
|
28
35
|
end
|
29
36
|
|
30
37
|
# Writes the supplied result, RocketJob::Batch::Result or RocketJob::Batch::Results
|
@@ -40,7 +47,8 @@ module RocketJob
|
|
40
47
|
# Write categorized results to their relevant collections
|
41
48
|
def close
|
42
49
|
categorized_records.each_pair do |category, results|
|
43
|
-
job.output(category)
|
50
|
+
collection = job.output(category)
|
51
|
+
append ? collection.append(results, input_slice) : collection.insert(results, input_slice)
|
44
52
|
end
|
45
53
|
end
|
46
54
|
|
@@ -48,35 +56,16 @@ module RocketJob
|
|
48
56
|
|
49
57
|
# Stores the categorized result from one result
|
50
58
|
def extract_categorized_result(result)
|
51
|
-
|
52
|
-
value
|
59
|
+
named_category = :main
|
60
|
+
value = result
|
53
61
|
if result.is_a?(RocketJob::Batch::Result)
|
54
|
-
|
55
|
-
value
|
56
|
-
raise(ArgumentError, "Invalid RocketJob Output Category: #{category}") if job.output_categories.exclude?(category)
|
62
|
+
named_category = result.category
|
63
|
+
value = result.value
|
57
64
|
end
|
58
|
-
(categorized_records[
|
65
|
+
(categorized_records[named_category] ||= []) << value unless value.nil? && !job.output_category(named_category).nils
|
59
66
|
end
|
60
67
|
end
|
61
68
|
|
62
|
-
class NullWriter
|
63
|
-
attr_reader :job, :categorized_records
|
64
|
-
attr_accessor :input_slice
|
65
|
-
|
66
|
-
def initialize(job, input_slice = nil)
|
67
|
-
@job = job
|
68
|
-
@input_slice = input_slice
|
69
|
-
@categorized_records = {}
|
70
|
-
end
|
71
|
-
|
72
|
-
def <<(_)
|
73
|
-
# noop
|
74
|
-
end
|
75
|
-
|
76
|
-
def close
|
77
|
-
# noop
|
78
|
-
end
|
79
|
-
end
|
80
69
|
end
|
81
70
|
end
|
82
71
|
end
|