rocketjob 3.5.2 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +63 -1
- data/bin/rocketjob +1 -0
- data/bin/rocketjob_batch_perf +11 -0
- data/lib/rocket_job/batch.rb +32 -0
- data/lib/rocket_job/batch/callbacks.rb +40 -0
- data/lib/rocket_job/batch/io.rb +154 -0
- data/lib/rocket_job/batch/logger.rb +57 -0
- data/lib/rocket_job/batch/lower_priority.rb +54 -0
- data/lib/rocket_job/batch/model.rb +157 -0
- data/lib/rocket_job/batch/performance.rb +99 -0
- data/lib/rocket_job/batch/result.rb +8 -0
- data/lib/rocket_job/batch/results.rb +9 -0
- data/lib/rocket_job/batch/state_machine.rb +102 -0
- data/lib/rocket_job/batch/statistics.rb +88 -0
- data/lib/rocket_job/batch/tabular.rb +56 -0
- data/lib/rocket_job/batch/tabular/input.rb +123 -0
- data/lib/rocket_job/batch/tabular/output.rb +59 -0
- data/lib/rocket_job/batch/throttle.rb +91 -0
- data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
- data/lib/rocket_job/batch/worker.rb +288 -0
- data/lib/rocket_job/cli.rb +29 -7
- data/lib/rocket_job/config.rb +1 -1
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
- data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
- data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
- data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
- data/lib/rocket_job/jobs/performance_job.rb +18 -0
- data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
- data/lib/rocket_job/plugins/document.rb +2 -8
- data/lib/rocket_job/plugins/job/persistence.rb +6 -4
- data/lib/rocket_job/plugins/job/throttle.rb +3 -6
- data/lib/rocket_job/plugins/job/worker.rb +2 -2
- data/lib/rocket_job/server.rb +14 -3
- data/lib/rocket_job/sliced/input.rb +336 -0
- data/lib/rocket_job/sliced/output.rb +99 -0
- data/lib/rocket_job/sliced/slice.rb +166 -0
- data/lib/rocket_job/sliced/slices.rb +166 -0
- data/lib/rocket_job/sliced/writer/input.rb +60 -0
- data/lib/rocket_job/sliced/writer/output.rb +82 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +2 -2
- data/lib/rocketjob.rb +28 -0
- metadata +51 -62
- data/test/config/database.yml +0 -5
- data/test/config/mongoid.yml +0 -88
- data/test/config_test.rb +0 -10
- data/test/dirmon_entry_test.rb +0 -313
- data/test/dirmon_job_test.rb +0 -216
- data/test/files/text.txt +0 -3
- data/test/job_test.rb +0 -71
- data/test/jobs/housekeeping_job_test.rb +0 -102
- data/test/jobs/on_demand_job_test.rb +0 -59
- data/test/jobs/upload_file_job_test.rb +0 -107
- data/test/plugins/cron_test.rb +0 -166
- data/test/plugins/job/callbacks_test.rb +0 -166
- data/test/plugins/job/defaults_test.rb +0 -53
- data/test/plugins/job/logger_test.rb +0 -56
- data/test/plugins/job/model_test.rb +0 -94
- data/test/plugins/job/persistence_test.rb +0 -94
- data/test/plugins/job/state_machine_test.rb +0 -116
- data/test/plugins/job/throttle_test.rb +0 -111
- data/test/plugins/job/worker_test.rb +0 -199
- data/test/plugins/processing_window_test.rb +0 -109
- data/test/plugins/restart_test.rb +0 -193
- data/test/plugins/retry_test.rb +0 -88
- data/test/plugins/singleton_test.rb +0 -92
- data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
- data/test/plugins/state_machine_test.rb +0 -67
- data/test/plugins/transaction_test.rb +0 -84
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +0 -17
@@ -0,0 +1,56 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Batch
|
3
|
+
# Format output results.
|
4
|
+
#
|
5
|
+
# Takes Batch::Results, Batch::Result, Hash, Array, or String and renders it for output.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# tabular = Tabular.new(
|
10
|
+
# main: IOStreams::Tabular.new(columns: main_file_headers, format: tabular_output_format),
|
11
|
+
# exceptions: IOStreams::Tabular.new(columns: exception_file_headers, format: tabular_output_format)
|
12
|
+
# )
|
13
|
+
#
|
14
|
+
# tabular.render(row)
|
15
|
+
class Tabular
|
16
|
+
autoload :Input, 'rocket_job/batch/tabular/input'
|
17
|
+
autoload :Output, 'rocket_job/batch/tabular/output'
|
18
|
+
|
19
|
+
def initialize(map)
|
20
|
+
@map = map
|
21
|
+
end
|
22
|
+
|
23
|
+
def [](category = :main)
|
24
|
+
@map[category] || raise("No tabular map defined for category: #{category.inspect}")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Iterate over responses and format using Tabular
|
28
|
+
def render(row, category = :main)
|
29
|
+
if row.is_a?(Batch::Results)
|
30
|
+
results = Batch::Results.new
|
31
|
+
row.each { |result| results << render(result) }
|
32
|
+
results
|
33
|
+
elsif row.is_a?(Batch::Result)
|
34
|
+
row.value = self[row.category].render(row.value)
|
35
|
+
row
|
36
|
+
elsif row.blank?
|
37
|
+
nil
|
38
|
+
else
|
39
|
+
self[category].render(row)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def render_header(category = :main)
|
44
|
+
self[category].render_header
|
45
|
+
end
|
46
|
+
|
47
|
+
def requires_header?(category = :main)
|
48
|
+
self[category].requires_header?
|
49
|
+
end
|
50
|
+
|
51
|
+
def header?(category = :main)
|
52
|
+
self[category].header?
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
class Tabular
|
6
|
+
# For the simple case where all `input_categories` have the same format,
|
7
|
+
# If multiple input categories are used with different formats, then use IOStreams::Tabular directly
|
8
|
+
# instead of this plugin.
|
9
|
+
module Input
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
|
12
|
+
included do
|
13
|
+
field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
|
14
|
+
field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
|
15
|
+
|
16
|
+
# tabular_input_mode: [:line | :row | :record]
|
17
|
+
# :line
|
18
|
+
# Uploads the file a line (String) at a time for processing by workers.
|
19
|
+
# :row
|
20
|
+
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
21
|
+
# :record
|
22
|
+
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
23
|
+
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
24
|
+
field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
|
25
|
+
|
26
|
+
validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
|
27
|
+
validates_inclusion_of :tabular_input_mode, in: %i[line row record]
|
28
|
+
validate :tabular_input_header_present
|
29
|
+
|
30
|
+
class_attribute :tabular_input_white_list
|
31
|
+
class_attribute :tabular_input_required
|
32
|
+
class_attribute :tabular_input_skip_unknown
|
33
|
+
|
34
|
+
# Cleanse all uploaded data by removing non-printable characters
|
35
|
+
# and any characters that cannot be converted to UTF-8
|
36
|
+
class_attribute :tabular_input_type
|
37
|
+
|
38
|
+
self.tabular_input_white_list = nil
|
39
|
+
self.tabular_input_required = nil
|
40
|
+
self.tabular_input_skip_unknown = true
|
41
|
+
self.tabular_input_type = :text
|
42
|
+
|
43
|
+
before_perform :tabular_input_render
|
44
|
+
end
|
45
|
+
|
46
|
+
# Extract the header line during the upload.
|
47
|
+
#
|
48
|
+
# Overrides: RocketJob::Batch::IO#upload
|
49
|
+
#
|
50
|
+
# Notes:
|
51
|
+
# - When supplying a block the header must be set manually
|
52
|
+
def upload(file_name_or_io = nil, **args, &block)
|
53
|
+
if tabular_input_type == :text
|
54
|
+
args[:encoding] = 'UTF-8'
|
55
|
+
args[:encode_cleaner] = :printable
|
56
|
+
args[:encode_replace] = ''
|
57
|
+
end
|
58
|
+
|
59
|
+
# If an input header is not required, then we don't extract it'
|
60
|
+
return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
|
61
|
+
|
62
|
+
# If the header is already set then it is not expected in the file
|
63
|
+
if tabular_input_header.present?
|
64
|
+
tabular_input_cleanse_header
|
65
|
+
return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
case tabular_input_mode
|
69
|
+
when :line
|
70
|
+
parse_header = -> (line) do
|
71
|
+
tabular_input.parse_header(line)
|
72
|
+
tabular_input_cleanse_header
|
73
|
+
self.tabular_input_header = tabular_input.header.columns
|
74
|
+
end
|
75
|
+
super(file_name_or_io, on_first: parse_header, stream_mode: tabular_input_mode, **args, &block)
|
76
|
+
when :row
|
77
|
+
set_header = -> (row) do
|
78
|
+
tabular_input.header.columns = row
|
79
|
+
tabular_input_cleanse_header
|
80
|
+
self.tabular_input_header = tabular_input.header.columns
|
81
|
+
end
|
82
|
+
super(file_name_or_io, on_first: set_header, stream_mode: tabular_input_mode, **args, &block)
|
83
|
+
when :record
|
84
|
+
super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
|
85
|
+
else
|
86
|
+
raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
# Shared instance used for this slice, by a single worker (thread)
|
93
|
+
def tabular_input
|
94
|
+
@tabular_input ||= IOStreams::Tabular.new(
|
95
|
+
columns: tabular_input_header,
|
96
|
+
allowed_columns: tabular_input_white_list,
|
97
|
+
required_columns: tabular_input_required,
|
98
|
+
skip_unknown: tabular_input_skip_unknown,
|
99
|
+
format: tabular_input_format
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def tabular_input_render
|
104
|
+
@rocket_job_input = tabular_input.record_parse(@rocket_job_input) unless tabular_input_header.blank? && tabular_input.header?
|
105
|
+
end
|
106
|
+
|
107
|
+
# Cleanse custom input header if supplied.
|
108
|
+
def tabular_input_cleanse_header
|
109
|
+
ignored_columns = tabular_input.header.cleanse!
|
110
|
+
logger.warn('Stripped out invalid columns from custom header', ignored_columns) unless ignored_columns.empty?
|
111
|
+
|
112
|
+
self.tabular_input_header = tabular_input.header.columns
|
113
|
+
end
|
114
|
+
|
115
|
+
def tabular_input_header_present
|
116
|
+
return if tabular_input_header.present? || !tabular_input.header? || (tabular_input_mode == :record)
|
117
|
+
|
118
|
+
errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
class Tabular
|
6
|
+
# For the simple case where all `output_categories` have the same format,
|
7
|
+
# If multiple output categories are used with different formats, then use IOStreams::Tabular directly
|
8
|
+
# instead of this plugin.
|
9
|
+
module Output
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
|
12
|
+
included do
|
13
|
+
field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
|
14
|
+
field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
|
15
|
+
|
16
|
+
validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
|
17
|
+
|
18
|
+
after_perform :tabular_output_render
|
19
|
+
end
|
20
|
+
|
21
|
+
# Clear out cached tabular_output any time header or format is changed.
|
22
|
+
def tabular_output_header=(tabular_output_header)
|
23
|
+
super(tabular_output_header)
|
24
|
+
@tabular_output = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
def tabular_output_format=(tabular_output_format)
|
28
|
+
super(tabular_output_format)
|
29
|
+
@tabular_output = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
# Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
|
33
|
+
def download(file_name_or_io = nil, category: :main, **args, &block)
|
34
|
+
# No header required
|
35
|
+
return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
|
36
|
+
|
37
|
+
header = tabular_output.render_header(category)
|
38
|
+
super(file_name_or_io, header_line: header, category: category, **args, &block)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# Delimited instance used for this slice, by a single worker (thread)
|
44
|
+
def tabular_output
|
45
|
+
@tabular_output ||= Tabular.new(
|
46
|
+
main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Render the output from the perform.
|
51
|
+
def tabular_output_render
|
52
|
+
return unless collect_output?
|
53
|
+
|
54
|
+
@rocket_job_output = tabular_output.render(@rocket_job_output)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
# Rocket Job Batch Throttling Framework.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
|
9
|
+
# class MyJob < RocketJob
|
10
|
+
# include RocketJob::Batch
|
11
|
+
#
|
12
|
+
# # Define a custom mysql throttle
|
13
|
+
# # Prevents all slices from this job from running on the current server.
|
14
|
+
# define_batch_throttle :mysql_throttle_exceeded?
|
15
|
+
#
|
16
|
+
# def perform(record)
|
17
|
+
# # ....
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# private
|
21
|
+
#
|
22
|
+
# # Returns true if the MySQL slave delay exceeds 5 minutes
|
23
|
+
# def mysql_throttle_exceeded?
|
24
|
+
# status = ActiveRecord::Base.connection.connection.select_one('show slave status')
|
25
|
+
# seconds_delay = Hash(status)['Seconds_Behind_Master'].to_i
|
26
|
+
# seconds_delay >= 300
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
module Throttle
|
30
|
+
extend ActiveSupport::Concern
|
31
|
+
|
32
|
+
included do
|
33
|
+
class_attribute :rocket_job_batch_throttles
|
34
|
+
self.rocket_job_batch_throttles = []
|
35
|
+
end
|
36
|
+
|
37
|
+
module ClassMethods
|
38
|
+
# Add a new throttle.
|
39
|
+
#
|
40
|
+
# Parameters:
|
41
|
+
# method_name: [Symbol]
|
42
|
+
# Name of method to call to evaluate whether a throttle has been exceeded.
|
43
|
+
# Note: Must return true or false.
|
44
|
+
# filter: [Symbol|Proc]
|
45
|
+
# Name of method to call to return the filter when the throttle has been exceeded.
|
46
|
+
# Or, a block that will return the filter.
|
47
|
+
# Default: :throttle_filter_class (Throttle all jobs of this class)
|
48
|
+
#
|
49
|
+
# Note: Throttles are executed in the order they are defined.
|
50
|
+
def define_batch_throttle(method_name, filter: :throttle_filter_class)
|
51
|
+
unless filter.is_a?(Symbol) || filter.is_a?(Proc)
|
52
|
+
raise(ArgumentError, "Filter for #{method_name} must be a Symbol or Proc")
|
53
|
+
end
|
54
|
+
if batch_throttle?(method_name)
|
55
|
+
raise(ArgumentError, "Cannot define #{method_name} twice, undefine previous throttle first")
|
56
|
+
end
|
57
|
+
|
58
|
+
self.rocket_job_batch_throttles += [ThrottleDefinition.new(method_name, filter)]
|
59
|
+
end
|
60
|
+
|
61
|
+
# Undefine a previously defined throttle
|
62
|
+
def undefine_batch_throttle(method_name)
|
63
|
+
rocket_job_batch_throttles.delete_if { |throttle| throttle.method_name == method_name }
|
64
|
+
end
|
65
|
+
|
66
|
+
# Has a throttle been defined?
|
67
|
+
def batch_throttle?(method_name)
|
68
|
+
rocket_job_batch_throttles.any? { |throttle| throttle.method_name == method_name }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
ThrottleDefinition = Struct.new(:method_name, :filter)
|
75
|
+
|
76
|
+
# Returns the matching filter, or nil if no throttles were triggered.
|
77
|
+
def rocket_job_batch_evaluate_throttles(slice)
|
78
|
+
rocket_job_batch_throttles.each do |throttle|
|
79
|
+
throttle_exceeded = method(throttle.method_name).arity == 0 ? send(throttle.method_name) : send(throttle.method_name, slice)
|
80
|
+
next unless throttle_exceeded
|
81
|
+
|
82
|
+
logger.debug { "Batch Throttle: #{throttle.method_name} has been exceeded. #{self.class.name}:#{id}" }
|
83
|
+
filter = throttle.filter
|
84
|
+
return filter.is_a?(Proc) ? filter.call(self) : send(filter)
|
85
|
+
end
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
# Throttle the number of slices of a specific batch job that are processed at the same time.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# class MyJob < RocketJob
|
9
|
+
# include RocketJob::Batch
|
10
|
+
#
|
11
|
+
# # Maximum number of slices to process at the same time for each running instance.
|
12
|
+
# self.throttle_running_slices = 25
|
13
|
+
#
|
14
|
+
# def perform(record)
|
15
|
+
# # ....
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# It attempts to ensure that the number of workers do not exceed this number.
|
20
|
+
# This is not a hard limit and it is possible for the number of workers to
|
21
|
+
# slightly exceed this value at times. It can also occur that the number of
|
22
|
+
# slices running can drop below this number for a short period.
|
23
|
+
#
|
24
|
+
# This value can be modified while a job is running. The change will be picked
|
25
|
+
# up at the start of processing slices, or after processing a slice and
|
26
|
+
# `re_check_seconds` has been exceeded.
|
27
|
+
#
|
28
|
+
# 0 or nil : No limits in place
|
29
|
+
#
|
30
|
+
# Default: nil
|
31
|
+
module ThrottleRunningSlices
|
32
|
+
extend ActiveSupport::Concern
|
33
|
+
|
34
|
+
included do
|
35
|
+
field :throttle_running_slices, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
36
|
+
|
37
|
+
validates :throttle_running_slices, numericality: {greater_than_or_equal_to: 0}, allow_nil: true
|
38
|
+
|
39
|
+
define_batch_throttle :throttle_running_slices_exceeded?, filter: :throttle_filter_id
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# Returns [Boolean] whether the throttle for this job has been exceeded
|
45
|
+
def throttle_running_slices_exceeded?(slice)
|
46
|
+
throttle_running_slices &&
|
47
|
+
(throttle_running_slices != 0) &&
|
48
|
+
(input.running.where(:id.ne => slice.id).count >= throttle_running_slices)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,288 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
module Worker
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
# While working on a slice, the current slice is available via this reader
|
10
|
+
attr_reader :rocket_job_slice, :rocket_job_record_number
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
attr_writer :rocket_job_slice, :rocket_job_record_number
|
15
|
+
end
|
16
|
+
|
17
|
+
# Processes records in each available slice for this job. Slices are processed
|
18
|
+
# one at a time to allow for concurrent calls to this method to increase
|
19
|
+
# throughput. Processing will continue until there are no more jobs available
|
20
|
+
# for this job.
|
21
|
+
#
|
22
|
+
# Returns [true|false] whether this job should be excluded from the next lookup
|
23
|
+
#
|
24
|
+
# Slices are destroyed after their records are successfully processed
|
25
|
+
#
|
26
|
+
# Results are stored in the output collection if `collect_output?`
|
27
|
+
# `nil` results from workers are kept if `collect_nil_output`
|
28
|
+
#
|
29
|
+
# If an exception was thrown the entire slice of records is marked as failed.
|
30
|
+
#
|
31
|
+
# If the mongo_ha gem has been loaded, then the connection to mongo is
|
32
|
+
# automatically re-established and the job will resume anytime a
|
33
|
+
# Mongo connection failure occurs.
|
34
|
+
#
|
35
|
+
# Thread-safe, can be called by multiple threads at the same time
|
36
|
+
def rocket_job_work(worker, re_raise_exceptions = false, filter = {})
|
37
|
+
raise 'Job must be started before calling #rocket_job_work' unless running?
|
38
|
+
start_time = Time.now
|
39
|
+
if sub_state != :processing
|
40
|
+
rocket_job_handle_callbacks(worker, re_raise_exceptions)
|
41
|
+
return false unless running?
|
42
|
+
end
|
43
|
+
|
44
|
+
while !worker.shutdown?
|
45
|
+
if slice = input.next_slice(worker.name)
|
46
|
+
# Grab a slice before checking the throttle to reduce concurrency race condition.
|
47
|
+
if new_filter = rocket_job_batch_evaluate_throttles(slice)
|
48
|
+
# Restore retrieved slice so that other workers can process it later.
|
49
|
+
slice.set(worker_name: nil, state: :queued, started_at: nil)
|
50
|
+
self.class.send(:rocket_job_merge_filter, filter, new_filter)
|
51
|
+
return true
|
52
|
+
end
|
53
|
+
|
54
|
+
SemanticLogger.named_tagged(slice: slice.id.to_s) do
|
55
|
+
rocket_job_process_slice(slice, re_raise_exceptions)
|
56
|
+
end
|
57
|
+
else
|
58
|
+
break if record_count && rocket_job_batch_complete?(worker.name)
|
59
|
+
logger.debug 'No more work available for this job'
|
60
|
+
self.class.send(:rocket_job_merge_filter, filter, throttle_filter_id)
|
61
|
+
return true
|
62
|
+
end
|
63
|
+
|
64
|
+
# Allow new jobs with a higher priority to interrupt this job
|
65
|
+
break if (Time.now - start_time) >= Config.instance.re_check_seconds
|
66
|
+
end
|
67
|
+
false
|
68
|
+
end
|
69
|
+
|
70
|
+
# Prior to a job being made available for processing it can be processed one
|
71
|
+
# slice at a time.
|
72
|
+
#
|
73
|
+
# For example, to extract the header row which would be in the first slice.
|
74
|
+
#
|
75
|
+
# Returns [Integer] the number of records processed in the slice
|
76
|
+
#
|
77
|
+
# Note: The slice will be removed from processing when this method completes
|
78
|
+
def work_first_slice(&block)
|
79
|
+
raise '#work_first_slice can only be called from within before_batch callbacks' unless sub_state == :before
|
80
|
+
# TODO Make these settings configurable
|
81
|
+
count = 0
|
82
|
+
wait_seconds = 5
|
83
|
+
while (slice = input.first).nil?
|
84
|
+
break if count > 10
|
85
|
+
logger.info "First slice has not arrived yet, sleeping for #{wait_seconds} seconds"
|
86
|
+
sleep wait_seconds
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
|
90
|
+
if slice = input.first
|
91
|
+
SemanticLogger.named_tagged(slice: slice.id.to_s) do
|
92
|
+
# TODO Persist that the first slice is being processed by this worker
|
93
|
+
slice.start
|
94
|
+
rocket_job_process_slice(slice, true, &block)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# No records processed
|
98
|
+
0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns [Array<ActiveWorker>] All workers actively working on this job
|
103
|
+
def rocket_job_active_workers(server_name = nil)
|
104
|
+
servers = []
|
105
|
+
case sub_state
|
106
|
+
when :before, :after
|
107
|
+
unless server_name && !worker_on_server?(server_name)
|
108
|
+
servers << ActiveWorker.new(worker_name, started_at, self) if running?
|
109
|
+
end
|
110
|
+
when :processing
|
111
|
+
query = input.running
|
112
|
+
query = query.where(worker_name: /\A#{server_name}/) if server_name
|
113
|
+
query.each do |slice|
|
114
|
+
servers << ActiveWorker.new(slice.worker_name, slice.started_at, self)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
servers
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
# Process a single slice from Mongo
|
123
|
+
# Once the slice has been successfully processed it will be removed from the input collection
|
124
|
+
# Returns [Integer] the number of records successfully processed
|
125
|
+
def rocket_job_process_slice(slice, re_raise_exceptions)
|
126
|
+
slice_record_number = 0
|
127
|
+
@rocket_job_record_number = slice.first_record_number || 0
|
128
|
+
@rocket_job_slice = slice
|
129
|
+
run_callbacks :slice do
|
130
|
+
RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
|
131
|
+
slice.each do |record|
|
132
|
+
slice_record_number += 1
|
133
|
+
SemanticLogger.named_tagged(record: @rocket_job_record_number) do
|
134
|
+
if _perform_callbacks.empty?
|
135
|
+
@rocket_job_output = block_given? ? yield(record) : perform(record)
|
136
|
+
else
|
137
|
+
# Allows @rocket_job_input to be modified by before/around callbacks
|
138
|
+
@rocket_job_input = record
|
139
|
+
# Allow callbacks to fail, complete or abort the job
|
140
|
+
if running?
|
141
|
+
if block_given?
|
142
|
+
run_callbacks(:perform) { @rocket_job_output = yield(@rocket_job_input) }
|
143
|
+
else
|
144
|
+
# Allows @rocket_job_output to be modified by after/around callbacks
|
145
|
+
run_callbacks(:perform) { @rocket_job_output = perform(@rocket_job_input) }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
writer << @rocket_job_output
|
150
|
+
end
|
151
|
+
# JRuby says self.rocket_job_record_number= is private and cannot be accessed
|
152
|
+
@rocket_job_record_number += 1
|
153
|
+
end
|
154
|
+
end
|
155
|
+
@rocket_job_input = @rocket_job_slice = @rocket_job_output = nil
|
156
|
+
end
|
157
|
+
|
158
|
+
# On successful completion remove the slice from the input queue
|
159
|
+
# TODO Option to complete slice instead of destroying it to retain input data
|
160
|
+
slice.destroy
|
161
|
+
slice_record_number
|
162
|
+
rescue Exception => exc
|
163
|
+
slice.fail!(exc, slice_record_number)
|
164
|
+
raise exc if re_raise_exceptions
|
165
|
+
slice_record_number > 0 ? slice_record_number - 1 : 0
|
166
|
+
end
|
167
|
+
|
168
|
+
# Checks for completion and runs after_batch if defined
|
169
|
+
# Returns true if the job is now complete/aborted/failed
|
170
|
+
def rocket_job_batch_complete?(worker_name)
|
171
|
+
return true unless running?
|
172
|
+
return false unless record_count
|
173
|
+
|
174
|
+
# Only failed slices left?
|
175
|
+
input_count = input.count
|
176
|
+
failed_count = input.failed.count
|
177
|
+
if (failed_count > 0) && (input_count == failed_count)
|
178
|
+
# Reload to pull in any counters or other data that was modified.
|
179
|
+
reload unless new_record?
|
180
|
+
if may_fail?
|
181
|
+
fail_job = true
|
182
|
+
unless new_record?
|
183
|
+
# Fail job iff no other worker has already finished it
|
184
|
+
# Must set write concern to at least 1 since we need the nModified back
|
185
|
+
result = self.class.with(write: {w: 1}) do |query|
|
186
|
+
query.
|
187
|
+
where(id: id, state: :running, sub_state: :processing).
|
188
|
+
update({'$set' => {state: :failed, worker_name: worker_name}})
|
189
|
+
end
|
190
|
+
fail_job = false unless result.modified_count > 0
|
191
|
+
end
|
192
|
+
if fail_job
|
193
|
+
message = "#{failed_count} slices failed to process"
|
194
|
+
self.exception = JobException.new(message: message)
|
195
|
+
fail!(worker_name, message)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
return true
|
199
|
+
end
|
200
|
+
|
201
|
+
# Any work left?
|
202
|
+
return false if input_count > 0
|
203
|
+
|
204
|
+
# If the job was not saved to the queue, do not save any changes
|
205
|
+
if new_record?
|
206
|
+
rocket_job_batch_run_after_callbacks(false)
|
207
|
+
return true
|
208
|
+
end
|
209
|
+
|
210
|
+
# Complete job iff no other worker has already completed it
|
211
|
+
# Must set write concern to at least 1 since we need the nModified back
|
212
|
+
result = self.class.with(write: {w: 1}) do |query|
|
213
|
+
query.
|
214
|
+
where(id: id, state: :running, sub_state: :processing).
|
215
|
+
update('$set' => {sub_state: :after, worker_name: worker_name})
|
216
|
+
end
|
217
|
+
|
218
|
+
# Reload to pull in any counters or other data that was modified.
|
219
|
+
reload
|
220
|
+
if result.modified_count > 0
|
221
|
+
rocket_job_batch_run_after_callbacks(false)
|
222
|
+
else
|
223
|
+
# Repeat cleanup in case this worker was still running when the job was aborted
|
224
|
+
cleanup! if aborted?
|
225
|
+
end
|
226
|
+
true
|
227
|
+
end
|
228
|
+
|
229
|
+
# Run the before_batch callbacks
|
230
|
+
# Saves the current state before and after running callbacks if callbacks present
|
231
|
+
def rocket_job_batch_run_before_callbacks
|
232
|
+
unless _before_batch_callbacks.empty?
|
233
|
+
self.sub_state = :before
|
234
|
+
save! unless new_record? || destroyed?
|
235
|
+
logger.measure_info(
|
236
|
+
'before_batch',
|
237
|
+
metric: "#{self.class.name}/before_batch",
|
238
|
+
log_exception: :full,
|
239
|
+
on_exception_level: :error,
|
240
|
+
silence: log_level
|
241
|
+
) do
|
242
|
+
run_callbacks(:before_batch)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
self.sub_state = :processing
|
246
|
+
save! unless new_record? || destroyed?
|
247
|
+
end
|
248
|
+
|
249
|
+
# Run the after_batch callbacks
|
250
|
+
# Saves the current state before and after running callbacks if callbacks present
|
251
|
+
def rocket_job_batch_run_after_callbacks(save_before = true)
|
252
|
+
unless _after_batch_callbacks.empty?
|
253
|
+
self.sub_state = :after
|
254
|
+
save! if save_before && !new_record? && !destroyed?
|
255
|
+
logger.measure_info(
|
256
|
+
'after_batch',
|
257
|
+
metric: "#{self.class.name}/after_batch",
|
258
|
+
log_exception: :full,
|
259
|
+
on_exception_level: :error,
|
260
|
+
silence: log_level
|
261
|
+
) do
|
262
|
+
run_callbacks(:after_batch)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
if new_record? || destroyed?
|
266
|
+
complete if may_complete?
|
267
|
+
else
|
268
|
+
may_complete? ? complete! : save!
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Handle before and after callbacks
|
273
|
+
def rocket_job_handle_callbacks(worker, re_raise_exceptions)
|
274
|
+
rocket_job_fail_on_exception!(worker.name, re_raise_exceptions) do
|
275
|
+
# If this is the first worker to pickup this job
|
276
|
+
if sub_state == :before
|
277
|
+
rocket_job_batch_run_before_callbacks
|
278
|
+
# Check for 0 record jobs
|
279
|
+
rocket_job_batch_complete?(worker.name) if running?
|
280
|
+
elsif sub_state == :after
|
281
|
+
rocket_job_batch_run_after_callbacks
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|