rocketjob 3.5.2 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -0,0 +1,56 @@
1
+ module RocketJob
2
+ module Batch
3
+ # Format output results.
4
+ #
5
+ # Takes Batch::Results, Batch::Result, Hash, Array, or String and renders it for output.
6
+ #
7
+ # Example:
8
+ #
9
+ # tabular = Tabular.new(
10
+ # main: IOStreams::Tabular.new(columns: main_file_headers, format: tabular_output_format),
11
+ # exceptions: IOStreams::Tabular.new(columns: exception_file_headers, format: tabular_output_format)
12
+ # )
13
+ #
14
+ # tabular.render(row)
15
+ class Tabular
16
+ autoload :Input, 'rocket_job/batch/tabular/input'
17
+ autoload :Output, 'rocket_job/batch/tabular/output'
18
+
19
+ def initialize(map)
20
+ @map = map
21
+ end
22
+
23
+ def [](category = :main)
24
+ @map[category] || raise("No tabular map defined for category: #{category.inspect}")
25
+ end
26
+
27
+ # Iterate over responses and format using Tabular
28
+ def render(row, category = :main)
29
+ if row.is_a?(Batch::Results)
30
+ results = Batch::Results.new
31
+ row.each { |result| results << render(result) }
32
+ results
33
+ elsif row.is_a?(Batch::Result)
34
+ row.value = self[row.category].render(row.value)
35
+ row
36
+ elsif row.blank?
37
+ nil
38
+ else
39
+ self[category].render(row)
40
+ end
41
+ end
42
+
43
+ def render_header(category = :main)
44
+ self[category].render_header
45
+ end
46
+
47
+ def requires_header?(category = :main)
48
+ self[category].requires_header?
49
+ end
50
+
51
+ def header?(category = :main)
52
+ self[category].header?
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,123 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ class Tabular
6
+ # For the simple case where all `input_categories` have the same format,
7
+ # If multiple input categories are used with different formats, then use IOStreams::Tabular directly
8
+ # instead of this plugin.
9
+ module Input
10
+ extend ActiveSupport::Concern
11
+
12
+ included do
13
+ field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
+ field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
+
16
+ # tabular_input_mode: [:line | :row | :record]
17
+ # :line
18
+ # Uploads the file a line (String) at a time for processing by workers.
19
+ # :row
20
+ # Parses each line from the file as an Array and uploads each array for processing by workers.
21
+ # :record
22
+ # Parses each line from the file into a Hash and uploads each hash for processing by workers.
23
+ # See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
24
+ field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
25
+
26
+ validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
27
+ validates_inclusion_of :tabular_input_mode, in: %i[line row record]
28
+ validate :tabular_input_header_present
29
+
30
+ class_attribute :tabular_input_white_list
31
+ class_attribute :tabular_input_required
32
+ class_attribute :tabular_input_skip_unknown
33
+
34
+ # Cleanse all uploaded data by removing non-printable characters
35
+ # and any characters that cannot be converted to UTF-8
36
+ class_attribute :tabular_input_type
37
+
38
+ self.tabular_input_white_list = nil
39
+ self.tabular_input_required = nil
40
+ self.tabular_input_skip_unknown = true
41
+ self.tabular_input_type = :text
42
+
43
+ before_perform :tabular_input_render
44
+ end
45
+
46
+ # Extract the header line during the upload.
47
+ #
48
+ # Overrides: RocketJob::Batch::IO#upload
49
+ #
50
+ # Notes:
51
+ # - When supplying a block the header must be set manually
52
+ def upload(file_name_or_io = nil, **args, &block)
53
+ if tabular_input_type == :text
54
+ args[:encoding] = 'UTF-8'
55
+ args[:encode_cleaner] = :printable
56
+ args[:encode_replace] = ''
57
+ end
58
+
59
+ # If an input header is not required, then we don't extract it'
60
+ return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
61
+
62
+ # If the header is already set then it is not expected in the file
63
+ if tabular_input_header.present?
64
+ tabular_input_cleanse_header
65
+ return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
66
+ end
67
+
68
+ case tabular_input_mode
69
+ when :line
70
+ parse_header = -> (line) do
71
+ tabular_input.parse_header(line)
72
+ tabular_input_cleanse_header
73
+ self.tabular_input_header = tabular_input.header.columns
74
+ end
75
+ super(file_name_or_io, on_first: parse_header, stream_mode: tabular_input_mode, **args, &block)
76
+ when :row
77
+ set_header = -> (row) do
78
+ tabular_input.header.columns = row
79
+ tabular_input_cleanse_header
80
+ self.tabular_input_header = tabular_input.header.columns
81
+ end
82
+ super(file_name_or_io, on_first: set_header, stream_mode: tabular_input_mode, **args, &block)
83
+ when :record
84
+ super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
85
+ else
86
+ raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
87
+ end
88
+ end
89
+
90
+ private
91
+
92
+ # Shared instance used for this slice, by a single worker (thread)
93
+ def tabular_input
94
+ @tabular_input ||= IOStreams::Tabular.new(
95
+ columns: tabular_input_header,
96
+ allowed_columns: tabular_input_white_list,
97
+ required_columns: tabular_input_required,
98
+ skip_unknown: tabular_input_skip_unknown,
99
+ format: tabular_input_format
100
+ )
101
+ end
102
+
103
+ def tabular_input_render
104
+ @rocket_job_input = tabular_input.record_parse(@rocket_job_input) unless tabular_input_header.blank? && tabular_input.header?
105
+ end
106
+
107
+ # Cleanse custom input header if supplied.
108
+ def tabular_input_cleanse_header
109
+ ignored_columns = tabular_input.header.cleanse!
110
+ logger.warn('Stripped out invalid columns from custom header', ignored_columns) unless ignored_columns.empty?
111
+
112
+ self.tabular_input_header = tabular_input.header.columns
113
+ end
114
+
115
+ def tabular_input_header_present
116
+ return if tabular_input_header.present? || !tabular_input.header? || (tabular_input_mode == :record)
117
+
118
+ errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,59 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ class Tabular
6
+ # For the simple case where all `output_categories` have the same format,
7
+ # If multiple output categories are used with different formats, then use IOStreams::Tabular directly
8
+ # instead of this plugin.
9
+ module Output
10
+ extend ActiveSupport::Concern
11
+
12
+ included do
13
+ field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
+ field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
+
16
+ validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
17
+
18
+ after_perform :tabular_output_render
19
+ end
20
+
21
+ # Clear out cached tabular_output any time header or format is changed.
22
+ def tabular_output_header=(tabular_output_header)
23
+ super(tabular_output_header)
24
+ @tabular_output = nil
25
+ end
26
+
27
+ def tabular_output_format=(tabular_output_format)
28
+ super(tabular_output_format)
29
+ @tabular_output = nil
30
+ end
31
+
32
+ # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
33
+ def download(file_name_or_io = nil, category: :main, **args, &block)
34
+ # No header required
35
+ return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
36
+
37
+ header = tabular_output.render_header(category)
38
+ super(file_name_or_io, header_line: header, category: category, **args, &block)
39
+ end
40
+
41
+ private
42
+
43
+ # Delimited instance used for this slice, by a single worker (thread)
44
+ def tabular_output
45
+ @tabular_output ||= Tabular.new(
46
+ main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
47
+ )
48
+ end
49
+
50
+ # Render the output from the perform.
51
+ def tabular_output_render
52
+ return unless collect_output?
53
+
54
+ @rocket_job_output = tabular_output.render(@rocket_job_output)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,91 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Rocket Job Batch Throttling Framework.
6
+ #
7
+ # Example:
8
+ # # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
9
+ # class MyJob < RocketJob
10
+ # include RocketJob::Batch
11
+ #
12
+ # # Define a custom mysql throttle
13
+ # # Prevents all slices from this job from running on the current server.
14
+ # define_batch_throttle :mysql_throttle_exceeded?
15
+ #
16
+ # def perform(record)
17
+ # # ....
18
+ # end
19
+ #
20
+ # private
21
+ #
22
+ # # Returns true if the MySQL slave delay exceeds 5 minutes
23
+ # def mysql_throttle_exceeded?
24
+ # status = ActiveRecord::Base.connection.connection.select_one('show slave status')
25
+ # seconds_delay = Hash(status)['Seconds_Behind_Master'].to_i
26
+ # seconds_delay >= 300
27
+ # end
28
+ # end
29
+ module Throttle
30
+ extend ActiveSupport::Concern
31
+
32
+ included do
33
+ class_attribute :rocket_job_batch_throttles
34
+ self.rocket_job_batch_throttles = []
35
+ end
36
+
37
+ module ClassMethods
38
+ # Add a new throttle.
39
+ #
40
+ # Parameters:
41
+ # method_name: [Symbol]
42
+ # Name of method to call to evaluate whether a throttle has been exceeded.
43
+ # Note: Must return true or false.
44
+ # filter: [Symbol|Proc]
45
+ # Name of method to call to return the filter when the throttle has been exceeded.
46
+ # Or, a block that will return the filter.
47
+ # Default: :throttle_filter_class (Throttle all jobs of this class)
48
+ #
49
+ # Note: Throttles are executed in the order they are defined.
50
+ def define_batch_throttle(method_name, filter: :throttle_filter_class)
51
+ unless filter.is_a?(Symbol) || filter.is_a?(Proc)
52
+ raise(ArgumentError, "Filter for #{method_name} must be a Symbol or Proc")
53
+ end
54
+ if batch_throttle?(method_name)
55
+ raise(ArgumentError, "Cannot define #{method_name} twice, undefine previous throttle first")
56
+ end
57
+
58
+ self.rocket_job_batch_throttles += [ThrottleDefinition.new(method_name, filter)]
59
+ end
60
+
61
+ # Undefine a previously defined throttle
62
+ def undefine_batch_throttle(method_name)
63
+ rocket_job_batch_throttles.delete_if { |throttle| throttle.method_name == method_name }
64
+ end
65
+
66
+ # Has a throttle been defined?
67
+ def batch_throttle?(method_name)
68
+ rocket_job_batch_throttles.any? { |throttle| throttle.method_name == method_name }
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ ThrottleDefinition = Struct.new(:method_name, :filter)
75
+
76
+ # Returns the matching filter, or nil if no throttles were triggered.
77
+ def rocket_job_batch_evaluate_throttles(slice)
78
+ rocket_job_batch_throttles.each do |throttle|
79
+ throttle_exceeded = method(throttle.method_name).arity == 0 ? send(throttle.method_name) : send(throttle.method_name, slice)
80
+ next unless throttle_exceeded
81
+
82
+ logger.debug { "Batch Throttle: #{throttle.method_name} has been exceeded. #{self.class.name}:#{id}" }
83
+ filter = throttle.filter
84
+ return filter.is_a?(Proc) ? filter.call(self) : send(filter)
85
+ end
86
+ nil
87
+ end
88
+
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,53 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Throttle the number of slices of a specific batch job that are processed at the same time.
6
+ #
7
+ # Example:
8
+ # class MyJob < RocketJob
9
+ # include RocketJob::Batch
10
+ #
11
+ # # Maximum number of slices to process at the same time for each running instance.
12
+ # self.throttle_running_slices = 25
13
+ #
14
+ # def perform(record)
15
+ # # ....
16
+ # end
17
+ # end
18
+ #
19
+ # It attempts to ensure that the number of workers do not exceed this number.
20
+ # This is not a hard limit and it is possible for the number of workers to
21
+ # slightly exceed this value at times. It can also occur that the number of
22
+ # slices running can drop below this number for a short period.
23
+ #
24
+ # This value can be modified while a job is running. The change will be picked
25
+ # up at the start of processing slices, or after processing a slice and
26
+ # `re_check_seconds` has been exceeded.
27
+ #
28
+ # 0 or nil : No limits in place
29
+ #
30
+ # Default: nil
31
+ module ThrottleRunningSlices
32
+ extend ActiveSupport::Concern
33
+
34
+ included do
35
+ field :throttle_running_slices, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
36
+
37
+ validates :throttle_running_slices, numericality: {greater_than_or_equal_to: 0}, allow_nil: true
38
+
39
+ define_batch_throttle :throttle_running_slices_exceeded?, filter: :throttle_filter_id
40
+ end
41
+
42
+ private
43
+
44
+ # Returns [Boolean] whether the throttle for this job has been exceeded
45
+ def throttle_running_slices_exceeded?(slice)
46
+ throttle_running_slices &&
47
+ (throttle_running_slices != 0) &&
48
+ (input.running.where(:id.ne => slice.id).count >= throttle_running_slices)
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,288 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Worker
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # While working on a slice, the current slice is available via this reader
10
+ attr_reader :rocket_job_slice, :rocket_job_record_number
11
+
12
+ private
13
+
14
+ attr_writer :rocket_job_slice, :rocket_job_record_number
15
+ end
16
+
17
+ # Processes records in each available slice for this job. Slices are processed
18
+ # one at a time to allow for concurrent calls to this method to increase
19
+ # throughput. Processing will continue until there are no more jobs available
20
+ # for this job.
21
+ #
22
+ # Returns [true|false] whether this job should be excluded from the next lookup
23
+ #
24
+ # Slices are destroyed after their records are successfully processed
25
+ #
26
+ # Results are stored in the output collection if `collect_output?`
27
+ # `nil` results from workers are kept if `collect_nil_output`
28
+ #
29
+ # If an exception was thrown the entire slice of records is marked as failed.
30
+ #
31
+ # If the mongo_ha gem has been loaded, then the connection to mongo is
32
+ # automatically re-established and the job will resume anytime a
33
+ # Mongo connection failure occurs.
34
+ #
35
+ # Thread-safe, can be called by multiple threads at the same time
36
+ def rocket_job_work(worker, re_raise_exceptions = false, filter = {})
37
+ raise 'Job must be started before calling #rocket_job_work' unless running?
38
+ start_time = Time.now
39
+ if sub_state != :processing
40
+ rocket_job_handle_callbacks(worker, re_raise_exceptions)
41
+ return false unless running?
42
+ end
43
+
44
+ while !worker.shutdown?
45
+ if slice = input.next_slice(worker.name)
46
+ # Grab a slice before checking the throttle to reduce concurrency race condition.
47
+ if new_filter = rocket_job_batch_evaluate_throttles(slice)
48
+ # Restore retrieved slice so that other workers can process it later.
49
+ slice.set(worker_name: nil, state: :queued, started_at: nil)
50
+ self.class.send(:rocket_job_merge_filter, filter, new_filter)
51
+ return true
52
+ end
53
+
54
+ SemanticLogger.named_tagged(slice: slice.id.to_s) do
55
+ rocket_job_process_slice(slice, re_raise_exceptions)
56
+ end
57
+ else
58
+ break if record_count && rocket_job_batch_complete?(worker.name)
59
+ logger.debug 'No more work available for this job'
60
+ self.class.send(:rocket_job_merge_filter, filter, throttle_filter_id)
61
+ return true
62
+ end
63
+
64
+ # Allow new jobs with a higher priority to interrupt this job
65
+ break if (Time.now - start_time) >= Config.instance.re_check_seconds
66
+ end
67
+ false
68
+ end
69
+
70
+ # Prior to a job being made available for processing it can be processed one
71
+ # slice at a time.
72
+ #
73
+ # For example, to extract the header row which would be in the first slice.
74
+ #
75
+ # Returns [Integer] the number of records processed in the slice
76
+ #
77
+ # Note: The slice will be removed from processing when this method completes
78
+ def work_first_slice(&block)
79
+ raise '#work_first_slice can only be called from within before_batch callbacks' unless sub_state == :before
80
+ # TODO Make these settings configurable
81
+ count = 0
82
+ wait_seconds = 5
83
+ while (slice = input.first).nil?
84
+ break if count > 10
85
+ logger.info "First slice has not arrived yet, sleeping for #{wait_seconds} seconds"
86
+ sleep wait_seconds
87
+ count += 1
88
+ end
89
+
90
+ if slice = input.first
91
+ SemanticLogger.named_tagged(slice: slice.id.to_s) do
92
+ # TODO Persist that the first slice is being processed by this worker
93
+ slice.start
94
+ rocket_job_process_slice(slice, true, &block)
95
+ end
96
+ else
97
+ # No records processed
98
+ 0
99
+ end
100
+ end
101
+
102
+ # Returns [Array<ActiveWorker>] All workers actively working on this job
103
+ def rocket_job_active_workers(server_name = nil)
104
+ servers = []
105
+ case sub_state
106
+ when :before, :after
107
+ unless server_name && !worker_on_server?(server_name)
108
+ servers << ActiveWorker.new(worker_name, started_at, self) if running?
109
+ end
110
+ when :processing
111
+ query = input.running
112
+ query = query.where(worker_name: /\A#{server_name}/) if server_name
113
+ query.each do |slice|
114
+ servers << ActiveWorker.new(slice.worker_name, slice.started_at, self)
115
+ end
116
+ end
117
+ servers
118
+ end
119
+
120
+ private
121
+
122
+ # Process a single slice from Mongo
123
+ # Once the slice has been successfully processed it will be removed from the input collection
124
+ # Returns [Integer] the number of records successfully processed
125
+ def rocket_job_process_slice(slice, re_raise_exceptions)
126
+ slice_record_number = 0
127
+ @rocket_job_record_number = slice.first_record_number || 0
128
+ @rocket_job_slice = slice
129
+ run_callbacks :slice do
130
+ RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
131
+ slice.each do |record|
132
+ slice_record_number += 1
133
+ SemanticLogger.named_tagged(record: @rocket_job_record_number) do
134
+ if _perform_callbacks.empty?
135
+ @rocket_job_output = block_given? ? yield(record) : perform(record)
136
+ else
137
+ # Allows @rocket_job_input to be modified by before/around callbacks
138
+ @rocket_job_input = record
139
+ # Allow callbacks to fail, complete or abort the job
140
+ if running?
141
+ if block_given?
142
+ run_callbacks(:perform) { @rocket_job_output = yield(@rocket_job_input) }
143
+ else
144
+ # Allows @rocket_job_output to be modified by after/around callbacks
145
+ run_callbacks(:perform) { @rocket_job_output = perform(@rocket_job_input) }
146
+ end
147
+ end
148
+ end
149
+ writer << @rocket_job_output
150
+ end
151
+ # JRuby says self.rocket_job_record_number= is private and cannot be accessed
152
+ @rocket_job_record_number += 1
153
+ end
154
+ end
155
+ @rocket_job_input = @rocket_job_slice = @rocket_job_output = nil
156
+ end
157
+
158
+ # On successful completion remove the slice from the input queue
159
+ # TODO Option to complete slice instead of destroying it to retain input data
160
+ slice.destroy
161
+ slice_record_number
162
+ rescue Exception => exc
163
+ slice.fail!(exc, slice_record_number)
164
+ raise exc if re_raise_exceptions
165
+ slice_record_number > 0 ? slice_record_number - 1 : 0
166
+ end
167
+
168
+ # Checks for completion and runs after_batch if defined
169
+ # Returns true if the job is now complete/aborted/failed
170
+ def rocket_job_batch_complete?(worker_name)
171
+ return true unless running?
172
+ return false unless record_count
173
+
174
+ # Only failed slices left?
175
+ input_count = input.count
176
+ failed_count = input.failed.count
177
+ if (failed_count > 0) && (input_count == failed_count)
178
+ # Reload to pull in any counters or other data that was modified.
179
+ reload unless new_record?
180
+ if may_fail?
181
+ fail_job = true
182
+ unless new_record?
183
+ # Fail job iff no other worker has already finished it
184
+ # Must set write concern to at least 1 since we need the nModified back
185
+ result = self.class.with(write: {w: 1}) do |query|
186
+ query.
187
+ where(id: id, state: :running, sub_state: :processing).
188
+ update({'$set' => {state: :failed, worker_name: worker_name}})
189
+ end
190
+ fail_job = false unless result.modified_count > 0
191
+ end
192
+ if fail_job
193
+ message = "#{failed_count} slices failed to process"
194
+ self.exception = JobException.new(message: message)
195
+ fail!(worker_name, message)
196
+ end
197
+ end
198
+ return true
199
+ end
200
+
201
+ # Any work left?
202
+ return false if input_count > 0
203
+
204
+ # If the job was not saved to the queue, do not save any changes
205
+ if new_record?
206
+ rocket_job_batch_run_after_callbacks(false)
207
+ return true
208
+ end
209
+
210
+ # Complete job iff no other worker has already completed it
211
+ # Must set write concern to at least 1 since we need the nModified back
212
+ result = self.class.with(write: {w: 1}) do |query|
213
+ query.
214
+ where(id: id, state: :running, sub_state: :processing).
215
+ update('$set' => {sub_state: :after, worker_name: worker_name})
216
+ end
217
+
218
+ # Reload to pull in any counters or other data that was modified.
219
+ reload
220
+ if result.modified_count > 0
221
+ rocket_job_batch_run_after_callbacks(false)
222
+ else
223
+ # Repeat cleanup in case this worker was still running when the job was aborted
224
+ cleanup! if aborted?
225
+ end
226
+ true
227
+ end
228
+
229
+ # Run the before_batch callbacks
230
+ # Saves the current state before and after running callbacks if callbacks present
231
+ def rocket_job_batch_run_before_callbacks
232
+ unless _before_batch_callbacks.empty?
233
+ self.sub_state = :before
234
+ save! unless new_record? || destroyed?
235
+ logger.measure_info(
236
+ 'before_batch',
237
+ metric: "#{self.class.name}/before_batch",
238
+ log_exception: :full,
239
+ on_exception_level: :error,
240
+ silence: log_level
241
+ ) do
242
+ run_callbacks(:before_batch)
243
+ end
244
+ end
245
+ self.sub_state = :processing
246
+ save! unless new_record? || destroyed?
247
+ end
248
+
249
+ # Run the after_batch callbacks
250
+ # Saves the current state before and after running callbacks if callbacks present
251
+ def rocket_job_batch_run_after_callbacks(save_before = true)
252
+ unless _after_batch_callbacks.empty?
253
+ self.sub_state = :after
254
+ save! if save_before && !new_record? && !destroyed?
255
+ logger.measure_info(
256
+ 'after_batch',
257
+ metric: "#{self.class.name}/after_batch",
258
+ log_exception: :full,
259
+ on_exception_level: :error,
260
+ silence: log_level
261
+ ) do
262
+ run_callbacks(:after_batch)
263
+ end
264
+ end
265
+ if new_record? || destroyed?
266
+ complete if may_complete?
267
+ else
268
+ may_complete? ? complete! : save!
269
+ end
270
+ end
271
+
272
+ # Handle before and after callbacks
273
+ def rocket_job_handle_callbacks(worker, re_raise_exceptions)
274
+ rocket_job_fail_on_exception!(worker.name, re_raise_exceptions) do
275
+ # If this is the first worker to pickup this job
276
+ if sub_state == :before
277
+ rocket_job_batch_run_before_callbacks
278
+ # Check for 0 record jobs
279
+ rocket_job_batch_complete?(worker.name) if running?
280
+ elsif sub_state == :after
281
+ rocket_job_batch_run_after_callbacks
282
+ end
283
+ end
284
+ end
285
+
286
+ end
287
+ end
288
+ end