rocketjob 3.5.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +63 -1
- data/bin/rocketjob +1 -0
- data/bin/rocketjob_batch_perf +11 -0
- data/lib/rocket_job/batch.rb +32 -0
- data/lib/rocket_job/batch/callbacks.rb +40 -0
- data/lib/rocket_job/batch/io.rb +154 -0
- data/lib/rocket_job/batch/logger.rb +57 -0
- data/lib/rocket_job/batch/lower_priority.rb +54 -0
- data/lib/rocket_job/batch/model.rb +157 -0
- data/lib/rocket_job/batch/performance.rb +99 -0
- data/lib/rocket_job/batch/result.rb +8 -0
- data/lib/rocket_job/batch/results.rb +9 -0
- data/lib/rocket_job/batch/state_machine.rb +102 -0
- data/lib/rocket_job/batch/statistics.rb +88 -0
- data/lib/rocket_job/batch/tabular.rb +56 -0
- data/lib/rocket_job/batch/tabular/input.rb +123 -0
- data/lib/rocket_job/batch/tabular/output.rb +59 -0
- data/lib/rocket_job/batch/throttle.rb +91 -0
- data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
- data/lib/rocket_job/batch/worker.rb +288 -0
- data/lib/rocket_job/cli.rb +29 -7
- data/lib/rocket_job/config.rb +1 -1
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
- data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
- data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
- data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
- data/lib/rocket_job/jobs/performance_job.rb +18 -0
- data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
- data/lib/rocket_job/plugins/document.rb +2 -8
- data/lib/rocket_job/plugins/job/persistence.rb +6 -4
- data/lib/rocket_job/plugins/job/throttle.rb +3 -6
- data/lib/rocket_job/plugins/job/worker.rb +2 -2
- data/lib/rocket_job/server.rb +14 -3
- data/lib/rocket_job/sliced/input.rb +336 -0
- data/lib/rocket_job/sliced/output.rb +99 -0
- data/lib/rocket_job/sliced/slice.rb +166 -0
- data/lib/rocket_job/sliced/slices.rb +166 -0
- data/lib/rocket_job/sliced/writer/input.rb +60 -0
- data/lib/rocket_job/sliced/writer/output.rb +82 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +2 -2
- data/lib/rocketjob.rb +28 -0
- metadata +51 -62
- data/test/config/database.yml +0 -5
- data/test/config/mongoid.yml +0 -88
- data/test/config_test.rb +0 -10
- data/test/dirmon_entry_test.rb +0 -313
- data/test/dirmon_job_test.rb +0 -216
- data/test/files/text.txt +0 -3
- data/test/job_test.rb +0 -71
- data/test/jobs/housekeeping_job_test.rb +0 -102
- data/test/jobs/on_demand_job_test.rb +0 -59
- data/test/jobs/upload_file_job_test.rb +0 -107
- data/test/plugins/cron_test.rb +0 -166
- data/test/plugins/job/callbacks_test.rb +0 -166
- data/test/plugins/job/defaults_test.rb +0 -53
- data/test/plugins/job/logger_test.rb +0 -56
- data/test/plugins/job/model_test.rb +0 -94
- data/test/plugins/job/persistence_test.rb +0 -94
- data/test/plugins/job/state_machine_test.rb +0 -116
- data/test/plugins/job/throttle_test.rb +0 -111
- data/test/plugins/job/worker_test.rb +0 -199
- data/test/plugins/processing_window_test.rb +0 -109
- data/test/plugins/restart_test.rb +0 -193
- data/test/plugins/retry_test.rb +0 -88
- data/test/plugins/singleton_test.rb +0 -92
- data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
- data/test/plugins/state_machine_test.rb +0 -67
- data/test/plugins/transaction_test.rb +0 -84
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +0 -17
@@ -0,0 +1,56 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Batch
|
3
|
+
# Format output results.
|
4
|
+
#
|
5
|
+
# Takes Batch::Results, Batch::Result, Hash, Array, or String and renders it for output.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# tabular = Tabular.new(
|
10
|
+
# main: IOStreams::Tabular.new(columns: main_file_headers, format: tabular_output_format),
|
11
|
+
# exceptions: IOStreams::Tabular.new(columns: exception_file_headers, format: tabular_output_format)
|
12
|
+
# )
|
13
|
+
#
|
14
|
+
# tabular.render(row)
|
15
|
+
class Tabular
|
16
|
+
autoload :Input, 'rocket_job/batch/tabular/input'
|
17
|
+
autoload :Output, 'rocket_job/batch/tabular/output'
|
18
|
+
|
19
|
+
def initialize(map)
|
20
|
+
@map = map
|
21
|
+
end
|
22
|
+
|
23
|
+
def [](category = :main)
|
24
|
+
@map[category] || raise("No tabular map defined for category: #{category.inspect}")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Iterate over responses and format using Tabular
|
28
|
+
def render(row, category = :main)
|
29
|
+
if row.is_a?(Batch::Results)
|
30
|
+
results = Batch::Results.new
|
31
|
+
row.each { |result| results << render(result) }
|
32
|
+
results
|
33
|
+
elsif row.is_a?(Batch::Result)
|
34
|
+
row.value = self[row.category].render(row.value)
|
35
|
+
row
|
36
|
+
elsif row.blank?
|
37
|
+
nil
|
38
|
+
else
|
39
|
+
self[category].render(row)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def render_header(category = :main)
|
44
|
+
self[category].render_header
|
45
|
+
end
|
46
|
+
|
47
|
+
def requires_header?(category = :main)
|
48
|
+
self[category].requires_header?
|
49
|
+
end
|
50
|
+
|
51
|
+
def header?(category = :main)
|
52
|
+
self[category].header?
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
class Tabular
|
6
|
+
# For the simple case where all `input_categories` have the same format,
|
7
|
+
# If multiple input categories are used with different formats, then use IOStreams::Tabular directly
|
8
|
+
# instead of this plugin.
|
9
|
+
module Input
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
|
12
|
+
included do
|
13
|
+
field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
|
14
|
+
field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
|
15
|
+
|
16
|
+
# tabular_input_mode: [:line | :row | :record]
|
17
|
+
# :line
|
18
|
+
# Uploads the file a line (String) at a time for processing by workers.
|
19
|
+
# :row
|
20
|
+
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
21
|
+
# :record
|
22
|
+
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
23
|
+
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
24
|
+
field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
|
25
|
+
|
26
|
+
validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
|
27
|
+
validates_inclusion_of :tabular_input_mode, in: %i[line row record]
|
28
|
+
validate :tabular_input_header_present
|
29
|
+
|
30
|
+
class_attribute :tabular_input_white_list
|
31
|
+
class_attribute :tabular_input_required
|
32
|
+
class_attribute :tabular_input_skip_unknown
|
33
|
+
|
34
|
+
# Cleanse all uploaded data by removing non-printable characters
|
35
|
+
# and any characters that cannot be converted to UTF-8
|
36
|
+
class_attribute :tabular_input_type
|
37
|
+
|
38
|
+
self.tabular_input_white_list = nil
|
39
|
+
self.tabular_input_required = nil
|
40
|
+
self.tabular_input_skip_unknown = true
|
41
|
+
self.tabular_input_type = :text
|
42
|
+
|
43
|
+
before_perform :tabular_input_render
|
44
|
+
end
|
45
|
+
|
46
|
+
# Extract the header line during the upload.
|
47
|
+
#
|
48
|
+
# Overrides: RocketJob::Batch::IO#upload
|
49
|
+
#
|
50
|
+
# Notes:
|
51
|
+
# - When supplying a block the header must be set manually
|
52
|
+
def upload(file_name_or_io = nil, **args, &block)
|
53
|
+
if tabular_input_type == :text
|
54
|
+
args[:encoding] = 'UTF-8'
|
55
|
+
args[:encode_cleaner] = :printable
|
56
|
+
args[:encode_replace] = ''
|
57
|
+
end
|
58
|
+
|
59
|
+
# If an input header is not required, then we don't extract it'
|
60
|
+
return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
|
61
|
+
|
62
|
+
# If the header is already set then it is not expected in the file
|
63
|
+
if tabular_input_header.present?
|
64
|
+
tabular_input_cleanse_header
|
65
|
+
return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
case tabular_input_mode
|
69
|
+
when :line
|
70
|
+
parse_header = -> (line) do
|
71
|
+
tabular_input.parse_header(line)
|
72
|
+
tabular_input_cleanse_header
|
73
|
+
self.tabular_input_header = tabular_input.header.columns
|
74
|
+
end
|
75
|
+
super(file_name_or_io, on_first: parse_header, stream_mode: tabular_input_mode, **args, &block)
|
76
|
+
when :row
|
77
|
+
set_header = -> (row) do
|
78
|
+
tabular_input.header.columns = row
|
79
|
+
tabular_input_cleanse_header
|
80
|
+
self.tabular_input_header = tabular_input.header.columns
|
81
|
+
end
|
82
|
+
super(file_name_or_io, on_first: set_header, stream_mode: tabular_input_mode, **args, &block)
|
83
|
+
when :record
|
84
|
+
super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
|
85
|
+
else
|
86
|
+
raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
# Shared instance used for this slice, by a single worker (thread)
|
93
|
+
def tabular_input
|
94
|
+
@tabular_input ||= IOStreams::Tabular.new(
|
95
|
+
columns: tabular_input_header,
|
96
|
+
allowed_columns: tabular_input_white_list,
|
97
|
+
required_columns: tabular_input_required,
|
98
|
+
skip_unknown: tabular_input_skip_unknown,
|
99
|
+
format: tabular_input_format
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def tabular_input_render
|
104
|
+
@rocket_job_input = tabular_input.record_parse(@rocket_job_input) unless tabular_input_header.blank? && tabular_input.header?
|
105
|
+
end
|
106
|
+
|
107
|
+
# Cleanse custom input header if supplied.
|
108
|
+
def tabular_input_cleanse_header
|
109
|
+
ignored_columns = tabular_input.header.cleanse!
|
110
|
+
logger.warn('Stripped out invalid columns from custom header', ignored_columns) unless ignored_columns.empty?
|
111
|
+
|
112
|
+
self.tabular_input_header = tabular_input.header.columns
|
113
|
+
end
|
114
|
+
|
115
|
+
def tabular_input_header_present
|
116
|
+
return if tabular_input_header.present? || !tabular_input.header? || (tabular_input_mode == :record)
|
117
|
+
|
118
|
+
errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
class Tabular
|
6
|
+
# For the simple case where all `output_categories` have the same format,
|
7
|
+
# If multiple output categories are used with different formats, then use IOStreams::Tabular directly
|
8
|
+
# instead of this plugin.
|
9
|
+
module Output
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
|
12
|
+
included do
|
13
|
+
field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
|
14
|
+
field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
|
15
|
+
|
16
|
+
validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
|
17
|
+
|
18
|
+
after_perform :tabular_output_render
|
19
|
+
end
|
20
|
+
|
21
|
+
# Clear out cached tabular_output any time header or format is changed.
|
22
|
+
def tabular_output_header=(tabular_output_header)
|
23
|
+
super(tabular_output_header)
|
24
|
+
@tabular_output = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
def tabular_output_format=(tabular_output_format)
|
28
|
+
super(tabular_output_format)
|
29
|
+
@tabular_output = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
# Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
|
33
|
+
def download(file_name_or_io = nil, category: :main, **args, &block)
|
34
|
+
# No header required
|
35
|
+
return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
|
36
|
+
|
37
|
+
header = tabular_output.render_header(category)
|
38
|
+
super(file_name_or_io, header_line: header, category: category, **args, &block)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# Delimited instance used for this slice, by a single worker (thread)
|
44
|
+
def tabular_output
|
45
|
+
@tabular_output ||= Tabular.new(
|
46
|
+
main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Render the output from the perform.
|
51
|
+
def tabular_output_render
|
52
|
+
return unless collect_output?
|
53
|
+
|
54
|
+
@rocket_job_output = tabular_output.render(@rocket_job_output)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
# Rocket Job Batch Throttling Framework.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
|
9
|
+
# class MyJob < RocketJob
|
10
|
+
# include RocketJob::Batch
|
11
|
+
#
|
12
|
+
# # Define a custom mysql throttle
|
13
|
+
# # Prevents all slices from this job from running on the current server.
|
14
|
+
# define_batch_throttle :mysql_throttle_exceeded?
|
15
|
+
#
|
16
|
+
# def perform(record)
|
17
|
+
# # ....
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# private
|
21
|
+
#
|
22
|
+
# # Returns true if the MySQL slave delay exceeds 5 minutes
|
23
|
+
# def mysql_throttle_exceeded?
|
24
|
+
# status = ActiveRecord::Base.connection.connection.select_one('show slave status')
|
25
|
+
# seconds_delay = Hash(status)['Seconds_Behind_Master'].to_i
|
26
|
+
# seconds_delay >= 300
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
module Throttle
|
30
|
+
extend ActiveSupport::Concern
|
31
|
+
|
32
|
+
included do
|
33
|
+
class_attribute :rocket_job_batch_throttles
|
34
|
+
self.rocket_job_batch_throttles = []
|
35
|
+
end
|
36
|
+
|
37
|
+
module ClassMethods
|
38
|
+
# Add a new throttle.
|
39
|
+
#
|
40
|
+
# Parameters:
|
41
|
+
# method_name: [Symbol]
|
42
|
+
# Name of method to call to evaluate whether a throttle has been exceeded.
|
43
|
+
# Note: Must return true or false.
|
44
|
+
# filter: [Symbol|Proc]
|
45
|
+
# Name of method to call to return the filter when the throttle has been exceeded.
|
46
|
+
# Or, a block that will return the filter.
|
47
|
+
# Default: :throttle_filter_class (Throttle all jobs of this class)
|
48
|
+
#
|
49
|
+
# Note: Throttles are executed in the order they are defined.
|
50
|
+
def define_batch_throttle(method_name, filter: :throttle_filter_class)
|
51
|
+
unless filter.is_a?(Symbol) || filter.is_a?(Proc)
|
52
|
+
raise(ArgumentError, "Filter for #{method_name} must be a Symbol or Proc")
|
53
|
+
end
|
54
|
+
if batch_throttle?(method_name)
|
55
|
+
raise(ArgumentError, "Cannot define #{method_name} twice, undefine previous throttle first")
|
56
|
+
end
|
57
|
+
|
58
|
+
self.rocket_job_batch_throttles += [ThrottleDefinition.new(method_name, filter)]
|
59
|
+
end
|
60
|
+
|
61
|
+
# Undefine a previously defined throttle
|
62
|
+
def undefine_batch_throttle(method_name)
|
63
|
+
rocket_job_batch_throttles.delete_if { |throttle| throttle.method_name == method_name }
|
64
|
+
end
|
65
|
+
|
66
|
+
# Has a throttle been defined?
|
67
|
+
def batch_throttle?(method_name)
|
68
|
+
rocket_job_batch_throttles.any? { |throttle| throttle.method_name == method_name }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
ThrottleDefinition = Struct.new(:method_name, :filter)
|
75
|
+
|
76
|
+
# Returns the matching filter, or nil if no throttles were triggered.
|
77
|
+
def rocket_job_batch_evaluate_throttles(slice)
|
78
|
+
rocket_job_batch_throttles.each do |throttle|
|
79
|
+
throttle_exceeded = method(throttle.method_name).arity == 0 ? send(throttle.method_name) : send(throttle.method_name, slice)
|
80
|
+
next unless throttle_exceeded
|
81
|
+
|
82
|
+
logger.debug { "Batch Throttle: #{throttle.method_name} has been exceeded. #{self.class.name}:#{id}" }
|
83
|
+
filter = throttle.filter
|
84
|
+
return filter.is_a?(Proc) ? filter.call(self) : send(filter)
|
85
|
+
end
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
# Throttle the number of slices of a specific batch job that are processed at the same time.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# class MyJob < RocketJob
|
9
|
+
# include RocketJob::Batch
|
10
|
+
#
|
11
|
+
# # Maximum number of slices to process at the same time for each running instance.
|
12
|
+
# self.throttle_running_slices = 25
|
13
|
+
#
|
14
|
+
# def perform(record)
|
15
|
+
# # ....
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# It attempts to ensure that the number of workers do not exceed this number.
|
20
|
+
# This is not a hard limit and it is possible for the number of workers to
|
21
|
+
# slightly exceed this value at times. It can also occur that the number of
|
22
|
+
# slices running can drop below this number for a short period.
|
23
|
+
#
|
24
|
+
# This value can be modified while a job is running. The change will be picked
|
25
|
+
# up at the start of processing slices, or after processing a slice and
|
26
|
+
# `re_check_seconds` has been exceeded.
|
27
|
+
#
|
28
|
+
# 0 or nil : No limits in place
|
29
|
+
#
|
30
|
+
# Default: nil
|
31
|
+
module ThrottleRunningSlices
|
32
|
+
extend ActiveSupport::Concern
|
33
|
+
|
34
|
+
included do
|
35
|
+
field :throttle_running_slices, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
|
36
|
+
|
37
|
+
validates :throttle_running_slices, numericality: {greater_than_or_equal_to: 0}, allow_nil: true
|
38
|
+
|
39
|
+
define_batch_throttle :throttle_running_slices_exceeded?, filter: :throttle_filter_id
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# Returns [Boolean] whether the throttle for this job has been exceeded
|
45
|
+
def throttle_running_slices_exceeded?(slice)
|
46
|
+
throttle_running_slices &&
|
47
|
+
(throttle_running_slices != 0) &&
|
48
|
+
(input.running.where(:id.ne => slice.id).count >= throttle_running_slices)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,288 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
module Worker
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
# While working on a slice, the current slice is available via this reader
|
10
|
+
attr_reader :rocket_job_slice, :rocket_job_record_number
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
attr_writer :rocket_job_slice, :rocket_job_record_number
|
15
|
+
end
|
16
|
+
|
17
|
+
# Processes records in each available slice for this job. Slices are processed
|
18
|
+
# one at a time to allow for concurrent calls to this method to increase
|
19
|
+
# throughput. Processing will continue until there are no more jobs available
|
20
|
+
# for this job.
|
21
|
+
#
|
22
|
+
# Returns [true|false] whether this job should be excluded from the next lookup
|
23
|
+
#
|
24
|
+
# Slices are destroyed after their records are successfully processed
|
25
|
+
#
|
26
|
+
# Results are stored in the output collection if `collect_output?`
|
27
|
+
# `nil` results from workers are kept if `collect_nil_output`
|
28
|
+
#
|
29
|
+
# If an exception was thrown the entire slice of records is marked as failed.
|
30
|
+
#
|
31
|
+
# If the mongo_ha gem has been loaded, then the connection to mongo is
|
32
|
+
# automatically re-established and the job will resume anytime a
|
33
|
+
# Mongo connection failure occurs.
|
34
|
+
#
|
35
|
+
# Thread-safe, can be called by multiple threads at the same time
|
36
|
+
def rocket_job_work(worker, re_raise_exceptions = false, filter = {})
|
37
|
+
raise 'Job must be started before calling #rocket_job_work' unless running?
|
38
|
+
start_time = Time.now
|
39
|
+
if sub_state != :processing
|
40
|
+
rocket_job_handle_callbacks(worker, re_raise_exceptions)
|
41
|
+
return false unless running?
|
42
|
+
end
|
43
|
+
|
44
|
+
while !worker.shutdown?
|
45
|
+
if slice = input.next_slice(worker.name)
|
46
|
+
# Grab a slice before checking the throttle to reduce concurrency race condition.
|
47
|
+
if new_filter = rocket_job_batch_evaluate_throttles(slice)
|
48
|
+
# Restore retrieved slice so that other workers can process it later.
|
49
|
+
slice.set(worker_name: nil, state: :queued, started_at: nil)
|
50
|
+
self.class.send(:rocket_job_merge_filter, filter, new_filter)
|
51
|
+
return true
|
52
|
+
end
|
53
|
+
|
54
|
+
SemanticLogger.named_tagged(slice: slice.id.to_s) do
|
55
|
+
rocket_job_process_slice(slice, re_raise_exceptions)
|
56
|
+
end
|
57
|
+
else
|
58
|
+
break if record_count && rocket_job_batch_complete?(worker.name)
|
59
|
+
logger.debug 'No more work available for this job'
|
60
|
+
self.class.send(:rocket_job_merge_filter, filter, throttle_filter_id)
|
61
|
+
return true
|
62
|
+
end
|
63
|
+
|
64
|
+
# Allow new jobs with a higher priority to interrupt this job
|
65
|
+
break if (Time.now - start_time) >= Config.instance.re_check_seconds
|
66
|
+
end
|
67
|
+
false
|
68
|
+
end
|
69
|
+
|
70
|
+
# Prior to a job being made available for processing it can be processed one
|
71
|
+
# slice at a time.
|
72
|
+
#
|
73
|
+
# For example, to extract the header row which would be in the first slice.
|
74
|
+
#
|
75
|
+
# Returns [Integer] the number of records processed in the slice
|
76
|
+
#
|
77
|
+
# Note: The slice will be removed from processing when this method completes
|
78
|
+
def work_first_slice(&block)
|
79
|
+
raise '#work_first_slice can only be called from within before_batch callbacks' unless sub_state == :before
|
80
|
+
# TODO Make these settings configurable
|
81
|
+
count = 0
|
82
|
+
wait_seconds = 5
|
83
|
+
while (slice = input.first).nil?
|
84
|
+
break if count > 10
|
85
|
+
logger.info "First slice has not arrived yet, sleeping for #{wait_seconds} seconds"
|
86
|
+
sleep wait_seconds
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
|
90
|
+
if slice = input.first
|
91
|
+
SemanticLogger.named_tagged(slice: slice.id.to_s) do
|
92
|
+
# TODO Persist that the first slice is being processed by this worker
|
93
|
+
slice.start
|
94
|
+
rocket_job_process_slice(slice, true, &block)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# No records processed
|
98
|
+
0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns [Array<ActiveWorker>] All workers actively working on this job
|
103
|
+
def rocket_job_active_workers(server_name = nil)
|
104
|
+
servers = []
|
105
|
+
case sub_state
|
106
|
+
when :before, :after
|
107
|
+
unless server_name && !worker_on_server?(server_name)
|
108
|
+
servers << ActiveWorker.new(worker_name, started_at, self) if running?
|
109
|
+
end
|
110
|
+
when :processing
|
111
|
+
query = input.running
|
112
|
+
query = query.where(worker_name: /\A#{server_name}/) if server_name
|
113
|
+
query.each do |slice|
|
114
|
+
servers << ActiveWorker.new(slice.worker_name, slice.started_at, self)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
servers
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
# Process a single slice from Mongo
|
123
|
+
# Once the slice has been successfully processed it will be removed from the input collection
|
124
|
+
# Returns [Integer] the number of records successfully processed
|
125
|
+
def rocket_job_process_slice(slice, re_raise_exceptions)
|
126
|
+
slice_record_number = 0
|
127
|
+
@rocket_job_record_number = slice.first_record_number || 0
|
128
|
+
@rocket_job_slice = slice
|
129
|
+
run_callbacks :slice do
|
130
|
+
RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
|
131
|
+
slice.each do |record|
|
132
|
+
slice_record_number += 1
|
133
|
+
SemanticLogger.named_tagged(record: @rocket_job_record_number) do
|
134
|
+
if _perform_callbacks.empty?
|
135
|
+
@rocket_job_output = block_given? ? yield(record) : perform(record)
|
136
|
+
else
|
137
|
+
# Allows @rocket_job_input to be modified by before/around callbacks
|
138
|
+
@rocket_job_input = record
|
139
|
+
# Allow callbacks to fail, complete or abort the job
|
140
|
+
if running?
|
141
|
+
if block_given?
|
142
|
+
run_callbacks(:perform) { @rocket_job_output = yield(@rocket_job_input) }
|
143
|
+
else
|
144
|
+
# Allows @rocket_job_output to be modified by after/around callbacks
|
145
|
+
run_callbacks(:perform) { @rocket_job_output = perform(@rocket_job_input) }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
writer << @rocket_job_output
|
150
|
+
end
|
151
|
+
# JRuby says self.rocket_job_record_number= is private and cannot be accessed
|
152
|
+
@rocket_job_record_number += 1
|
153
|
+
end
|
154
|
+
end
|
155
|
+
@rocket_job_input = @rocket_job_slice = @rocket_job_output = nil
|
156
|
+
end
|
157
|
+
|
158
|
+
# On successful completion remove the slice from the input queue
|
159
|
+
# TODO Option to complete slice instead of destroying it to retain input data
|
160
|
+
slice.destroy
|
161
|
+
slice_record_number
|
162
|
+
rescue Exception => exc
|
163
|
+
slice.fail!(exc, slice_record_number)
|
164
|
+
raise exc if re_raise_exceptions
|
165
|
+
slice_record_number > 0 ? slice_record_number - 1 : 0
|
166
|
+
end
|
167
|
+
|
168
|
+
# Checks for completion and runs after_batch if defined
|
169
|
+
# Returns true if the job is now complete/aborted/failed
|
170
|
+
def rocket_job_batch_complete?(worker_name)
|
171
|
+
return true unless running?
|
172
|
+
return false unless record_count
|
173
|
+
|
174
|
+
# Only failed slices left?
|
175
|
+
input_count = input.count
|
176
|
+
failed_count = input.failed.count
|
177
|
+
if (failed_count > 0) && (input_count == failed_count)
|
178
|
+
# Reload to pull in any counters or other data that was modified.
|
179
|
+
reload unless new_record?
|
180
|
+
if may_fail?
|
181
|
+
fail_job = true
|
182
|
+
unless new_record?
|
183
|
+
# Fail job iff no other worker has already finished it
|
184
|
+
# Must set write concern to at least 1 since we need the nModified back
|
185
|
+
result = self.class.with(write: {w: 1}) do |query|
|
186
|
+
query.
|
187
|
+
where(id: id, state: :running, sub_state: :processing).
|
188
|
+
update({'$set' => {state: :failed, worker_name: worker_name}})
|
189
|
+
end
|
190
|
+
fail_job = false unless result.modified_count > 0
|
191
|
+
end
|
192
|
+
if fail_job
|
193
|
+
message = "#{failed_count} slices failed to process"
|
194
|
+
self.exception = JobException.new(message: message)
|
195
|
+
fail!(worker_name, message)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
return true
|
199
|
+
end
|
200
|
+
|
201
|
+
# Any work left?
|
202
|
+
return false if input_count > 0
|
203
|
+
|
204
|
+
# If the job was not saved to the queue, do not save any changes
|
205
|
+
if new_record?
|
206
|
+
rocket_job_batch_run_after_callbacks(false)
|
207
|
+
return true
|
208
|
+
end
|
209
|
+
|
210
|
+
# Complete job iff no other worker has already completed it
|
211
|
+
# Must set write concern to at least 1 since we need the nModified back
|
212
|
+
result = self.class.with(write: {w: 1}) do |query|
|
213
|
+
query.
|
214
|
+
where(id: id, state: :running, sub_state: :processing).
|
215
|
+
update('$set' => {sub_state: :after, worker_name: worker_name})
|
216
|
+
end
|
217
|
+
|
218
|
+
# Reload to pull in any counters or other data that was modified.
|
219
|
+
reload
|
220
|
+
if result.modified_count > 0
|
221
|
+
rocket_job_batch_run_after_callbacks(false)
|
222
|
+
else
|
223
|
+
# Repeat cleanup in case this worker was still running when the job was aborted
|
224
|
+
cleanup! if aborted?
|
225
|
+
end
|
226
|
+
true
|
227
|
+
end
|
228
|
+
|
229
|
+
# Run the before_batch callbacks
|
230
|
+
# Saves the current state before and after running callbacks if callbacks present
|
231
|
+
def rocket_job_batch_run_before_callbacks
|
232
|
+
unless _before_batch_callbacks.empty?
|
233
|
+
self.sub_state = :before
|
234
|
+
save! unless new_record? || destroyed?
|
235
|
+
logger.measure_info(
|
236
|
+
'before_batch',
|
237
|
+
metric: "#{self.class.name}/before_batch",
|
238
|
+
log_exception: :full,
|
239
|
+
on_exception_level: :error,
|
240
|
+
silence: log_level
|
241
|
+
) do
|
242
|
+
run_callbacks(:before_batch)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
self.sub_state = :processing
|
246
|
+
save! unless new_record? || destroyed?
|
247
|
+
end
|
248
|
+
|
249
|
+
# Run the after_batch callbacks
|
250
|
+
# Saves the current state before and after running callbacks if callbacks present
|
251
|
+
def rocket_job_batch_run_after_callbacks(save_before = true)
|
252
|
+
unless _after_batch_callbacks.empty?
|
253
|
+
self.sub_state = :after
|
254
|
+
save! if save_before && !new_record? && !destroyed?
|
255
|
+
logger.measure_info(
|
256
|
+
'after_batch',
|
257
|
+
metric: "#{self.class.name}/after_batch",
|
258
|
+
log_exception: :full,
|
259
|
+
on_exception_level: :error,
|
260
|
+
silence: log_level
|
261
|
+
) do
|
262
|
+
run_callbacks(:after_batch)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
if new_record? || destroyed?
|
266
|
+
complete if may_complete?
|
267
|
+
else
|
268
|
+
may_complete? ? complete! : save!
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Handle before and after callbacks
|
273
|
+
def rocket_job_handle_callbacks(worker, re_raise_exceptions)
|
274
|
+
rocket_job_fail_on_exception!(worker.name, re_raise_exceptions) do
|
275
|
+
# If this is the first worker to pickup this job
|
276
|
+
if sub_state == :before
|
277
|
+
rocket_job_batch_run_before_callbacks
|
278
|
+
# Check for 0 record jobs
|
279
|
+
rocket_job_batch_complete?(worker.name) if running?
|
280
|
+
elsif sub_state == :after
|
281
|
+
rocket_job_batch_run_after_callbacks
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|