rocketjob 3.5.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -0,0 +1,56 @@
1
+ module RocketJob
2
+ module Batch
3
+ # Format output results.
4
+ #
5
+ # Takes Batch::Results, Batch::Result, Hash, Array, or String and renders it for output.
6
+ #
7
+ # Example:
8
+ #
9
+ # tabular = Tabular.new(
10
+ # main: IOStreams::Tabular.new(columns: main_file_headers, format: tabular_output_format),
11
+ # exceptions: IOStreams::Tabular.new(columns: exception_file_headers, format: tabular_output_format)
12
+ # )
13
+ #
14
+ # tabular.render(row)
15
+ class Tabular
16
+ autoload :Input, 'rocket_job/batch/tabular/input'
17
+ autoload :Output, 'rocket_job/batch/tabular/output'
18
+
19
+ def initialize(map)
20
+ @map = map
21
+ end
22
+
23
+ def [](category = :main)
24
+ @map[category] || raise("No tabular map defined for category: #{category.inspect}")
25
+ end
26
+
27
+ # Iterate over responses and format using Tabular
28
+ def render(row, category = :main)
29
+ if row.is_a?(Batch::Results)
30
+ results = Batch::Results.new
31
+ row.each { |result| results << render(result) }
32
+ results
33
+ elsif row.is_a?(Batch::Result)
34
+ row.value = self[row.category].render(row.value)
35
+ row
36
+ elsif row.blank?
37
+ nil
38
+ else
39
+ self[category].render(row)
40
+ end
41
+ end
42
+
43
+ def render_header(category = :main)
44
+ self[category].render_header
45
+ end
46
+
47
+ def requires_header?(category = :main)
48
+ self[category].requires_header?
49
+ end
50
+
51
+ def header?(category = :main)
52
+ self[category].header?
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,123 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ class Tabular
6
+ # For the simple case where all `input_categories` have the same format,
7
+ # If multiple input categories are used with different formats, then use IOStreams::Tabular directly
8
+ # instead of this plugin.
9
+ module Input
10
+ extend ActiveSupport::Concern
11
+
12
+ included do
13
+ field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
+ field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
+
16
+ # tabular_input_mode: [:line | :row | :record]
17
+ # :line
18
+ # Uploads the file a line (String) at a time for processing by workers.
19
+ # :row
20
+ # Parses each line from the file as an Array and uploads each array for processing by workers.
21
+ # :record
22
+ # Parses each line from the file into a Hash and uploads each hash for processing by workers.
23
+ # See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
24
+ field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
25
+
26
+ validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
27
+ validates_inclusion_of :tabular_input_mode, in: %i[line row record]
28
+ validate :tabular_input_header_present
29
+
30
+ class_attribute :tabular_input_white_list
31
+ class_attribute :tabular_input_required
32
+ class_attribute :tabular_input_skip_unknown
33
+
34
+ # Cleanse all uploaded data by removing non-printable characters
35
+ # and any characters that cannot be converted to UTF-8
36
+ class_attribute :tabular_input_type
37
+
38
+ self.tabular_input_white_list = nil
39
+ self.tabular_input_required = nil
40
+ self.tabular_input_skip_unknown = true
41
+ self.tabular_input_type = :text
42
+
43
+ before_perform :tabular_input_render
44
+ end
45
+
46
+ # Extract the header line during the upload.
47
+ #
48
+ # Overrides: RocketJob::Batch::IO#upload
49
+ #
50
+ # Notes:
51
+ # - When supplying a block the header must be set manually
52
+ def upload(file_name_or_io = nil, **args, &block)
53
+ if tabular_input_type == :text
54
+ args[:encoding] = 'UTF-8'
55
+ args[:encode_cleaner] = :printable
56
+ args[:encode_replace] = ''
57
+ end
58
+
59
+ # If an input header is not required, then we don't extract it'
60
+ return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
61
+
62
+ # If the header is already set then it is not expected in the file
63
+ if tabular_input_header.present?
64
+ tabular_input_cleanse_header
65
+ return super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
66
+ end
67
+
68
+ case tabular_input_mode
69
+ when :line
70
+ parse_header = -> (line) do
71
+ tabular_input.parse_header(line)
72
+ tabular_input_cleanse_header
73
+ self.tabular_input_header = tabular_input.header.columns
74
+ end
75
+ super(file_name_or_io, on_first: parse_header, stream_mode: tabular_input_mode, **args, &block)
76
+ when :row
77
+ set_header = -> (row) do
78
+ tabular_input.header.columns = row
79
+ tabular_input_cleanse_header
80
+ self.tabular_input_header = tabular_input.header.columns
81
+ end
82
+ super(file_name_or_io, on_first: set_header, stream_mode: tabular_input_mode, **args, &block)
83
+ when :record
84
+ super(file_name_or_io, stream_mode: tabular_input_mode, **args, &block)
85
+ else
86
+ raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
87
+ end
88
+ end
89
+
90
+ private
91
+
92
+ # Shared instance used for this slice, by a single worker (thread)
93
+ def tabular_input
94
+ @tabular_input ||= IOStreams::Tabular.new(
95
+ columns: tabular_input_header,
96
+ allowed_columns: tabular_input_white_list,
97
+ required_columns: tabular_input_required,
98
+ skip_unknown: tabular_input_skip_unknown,
99
+ format: tabular_input_format
100
+ )
101
+ end
102
+
103
+ def tabular_input_render
104
+ @rocket_job_input = tabular_input.record_parse(@rocket_job_input) unless tabular_input_header.blank? && tabular_input.header?
105
+ end
106
+
107
+ # Cleanse custom input header if supplied.
108
+ def tabular_input_cleanse_header
109
+ ignored_columns = tabular_input.header.cleanse!
110
+ logger.warn('Stripped out invalid columns from custom header', ignored_columns) unless ignored_columns.empty?
111
+
112
+ self.tabular_input_header = tabular_input.header.columns
113
+ end
114
+
115
+ def tabular_input_header_present
116
+ return if tabular_input_header.present? || !tabular_input.header? || (tabular_input_mode == :record)
117
+
118
+ errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,59 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ class Tabular
6
+ # For the simple case where all `output_categories` have the same format,
7
+ # If multiple output categories are used with different formats, then use IOStreams::Tabular directly
8
+ # instead of this plugin.
9
+ module Output
10
+ extend ActiveSupport::Concern
11
+
12
+ included do
13
+ field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
+ field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
+
16
+ validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
17
+
18
+ after_perform :tabular_output_render
19
+ end
20
+
21
+ # Clear out cached tabular_output any time header or format is changed.
22
+ def tabular_output_header=(tabular_output_header)
23
+ super(tabular_output_header)
24
+ @tabular_output = nil
25
+ end
26
+
27
+ def tabular_output_format=(tabular_output_format)
28
+ super(tabular_output_format)
29
+ @tabular_output = nil
30
+ end
31
+
32
+ # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
33
+ def download(file_name_or_io = nil, category: :main, **args, &block)
34
+ # No header required
35
+ return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
36
+
37
+ header = tabular_output.render_header(category)
38
+ super(file_name_or_io, header_line: header, category: category, **args, &block)
39
+ end
40
+
41
+ private
42
+
43
+ # Delimited instance used for this slice, by a single worker (thread)
44
+ def tabular_output
45
+ @tabular_output ||= Tabular.new(
46
+ main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
47
+ )
48
+ end
49
+
50
+ # Render the output from the perform.
51
+ def tabular_output_render
52
+ return unless collect_output?
53
+
54
+ @rocket_job_output = tabular_output.render(@rocket_job_output)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,91 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Rocket Job Batch Throttling Framework.
6
+ #
7
+ # Example:
8
+ # # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
9
+ # class MyJob < RocketJob
10
+ # include RocketJob::Batch
11
+ #
12
+ # # Define a custom mysql throttle
13
+ # # Prevents all slices from this job from running on the current server.
14
+ # define_batch_throttle :mysql_throttle_exceeded?
15
+ #
16
+ # def perform(record)
17
+ # # ....
18
+ # end
19
+ #
20
+ # private
21
+ #
22
+ # # Returns true if the MySQL slave delay exceeds 5 minutes
23
+ # def mysql_throttle_exceeded?
24
+ # status = ActiveRecord::Base.connection.connection.select_one('show slave status')
25
+ # seconds_delay = Hash(status)['Seconds_Behind_Master'].to_i
26
+ # seconds_delay >= 300
27
+ # end
28
+ # end
29
+ module Throttle
30
+ extend ActiveSupport::Concern
31
+
32
+ included do
33
+ class_attribute :rocket_job_batch_throttles
34
+ self.rocket_job_batch_throttles = []
35
+ end
36
+
37
+ module ClassMethods
38
+ # Add a new throttle.
39
+ #
40
+ # Parameters:
41
+ # method_name: [Symbol]
42
+ # Name of method to call to evaluate whether a throttle has been exceeded.
43
+ # Note: Must return true or false.
44
+ # filter: [Symbol|Proc]
45
+ # Name of method to call to return the filter when the throttle has been exceeded.
46
+ # Or, a block that will return the filter.
47
+ # Default: :throttle_filter_class (Throttle all jobs of this class)
48
+ #
49
+ # Note: Throttles are executed in the order they are defined.
50
+ def define_batch_throttle(method_name, filter: :throttle_filter_class)
51
+ unless filter.is_a?(Symbol) || filter.is_a?(Proc)
52
+ raise(ArgumentError, "Filter for #{method_name} must be a Symbol or Proc")
53
+ end
54
+ if batch_throttle?(method_name)
55
+ raise(ArgumentError, "Cannot define #{method_name} twice, undefine previous throttle first")
56
+ end
57
+
58
+ self.rocket_job_batch_throttles += [ThrottleDefinition.new(method_name, filter)]
59
+ end
60
+
61
+ # Undefine a previously defined throttle
62
+ def undefine_batch_throttle(method_name)
63
+ rocket_job_batch_throttles.delete_if { |throttle| throttle.method_name == method_name }
64
+ end
65
+
66
+ # Has a throttle been defined?
67
+ def batch_throttle?(method_name)
68
+ rocket_job_batch_throttles.any? { |throttle| throttle.method_name == method_name }
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ ThrottleDefinition = Struct.new(:method_name, :filter)
75
+
76
+ # Returns the matching filter, or nil if no throttles were triggered.
77
+ def rocket_job_batch_evaluate_throttles(slice)
78
+ rocket_job_batch_throttles.each do |throttle|
79
+ throttle_exceeded = method(throttle.method_name).arity == 0 ? send(throttle.method_name) : send(throttle.method_name, slice)
80
+ next unless throttle_exceeded
81
+
82
+ logger.debug { "Batch Throttle: #{throttle.method_name} has been exceeded. #{self.class.name}:#{id}" }
83
+ filter = throttle.filter
84
+ return filter.is_a?(Proc) ? filter.call(self) : send(filter)
85
+ end
86
+ nil
87
+ end
88
+
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,53 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Throttle the number of slices of a specific batch job that are processed at the same time.
6
+ #
7
+ # Example:
8
+ # class MyJob < RocketJob
9
+ # include RocketJob::Batch
10
+ #
11
+ # # Maximum number of slices to process at the same time for each running instance.
12
+ # self.throttle_running_slices = 25
13
+ #
14
+ # def perform(record)
15
+ # # ....
16
+ # end
17
+ # end
18
+ #
19
+ # It attempts to ensure that the number of workers do not exceed this number.
20
+ # This is not a hard limit and it is possible for the number of workers to
21
+ # slightly exceed this value at times. It can also occur that the number of
22
+ # slices running can drop below this number for a short period.
23
+ #
24
+ # This value can be modified while a job is running. The change will be picked
25
+ # up at the start of processing slices, or after processing a slice and
26
+ # `re_check_seconds` has been exceeded.
27
+ #
28
+ # 0 or nil : No limits in place
29
+ #
30
+ # Default: nil
31
+ module ThrottleRunningSlices
32
+ extend ActiveSupport::Concern
33
+
34
+ included do
35
+ field :throttle_running_slices, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
36
+
37
+ validates :throttle_running_slices, numericality: {greater_than_or_equal_to: 0}, allow_nil: true
38
+
39
+ define_batch_throttle :throttle_running_slices_exceeded?, filter: :throttle_filter_id
40
+ end
41
+
42
+ private
43
+
44
+ # Returns [Boolean] whether the throttle for this job has been exceeded
45
+ def throttle_running_slices_exceeded?(slice)
46
+ throttle_running_slices &&
47
+ (throttle_running_slices != 0) &&
48
+ (input.running.where(:id.ne => slice.id).count >= throttle_running_slices)
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,288 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Worker
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # While working on a slice, the current slice is available via this reader
10
+ attr_reader :rocket_job_slice, :rocket_job_record_number
11
+
12
+ private
13
+
14
+ attr_writer :rocket_job_slice, :rocket_job_record_number
15
+ end
16
+
17
+ # Processes records in each available slice for this job. Slices are processed
18
+ # one at a time to allow for concurrent calls to this method to increase
19
+ # throughput. Processing will continue until there are no more jobs available
20
+ # for this job.
21
+ #
22
+ # Returns [true|false] whether this job should be excluded from the next lookup
23
+ #
24
+ # Slices are destroyed after their records are successfully processed
25
+ #
26
+ # Results are stored in the output collection if `collect_output?`
27
+ # `nil` results from workers are kept if `collect_nil_output`
28
+ #
29
+ # If an exception was thrown the entire slice of records is marked as failed.
30
+ #
31
+ # If the mongo_ha gem has been loaded, then the connection to mongo is
32
+ # automatically re-established and the job will resume anytime a
33
+ # Mongo connection failure occurs.
34
+ #
35
+ # Thread-safe, can be called by multiple threads at the same time
36
+ def rocket_job_work(worker, re_raise_exceptions = false, filter = {})
37
+ raise 'Job must be started before calling #rocket_job_work' unless running?
38
+ start_time = Time.now
39
+ if sub_state != :processing
40
+ rocket_job_handle_callbacks(worker, re_raise_exceptions)
41
+ return false unless running?
42
+ end
43
+
44
+ while !worker.shutdown?
45
+ if slice = input.next_slice(worker.name)
46
+ # Grab a slice before checking the throttle to reduce concurrency race condition.
47
+ if new_filter = rocket_job_batch_evaluate_throttles(slice)
48
+ # Restore retrieved slice so that other workers can process it later.
49
+ slice.set(worker_name: nil, state: :queued, started_at: nil)
50
+ self.class.send(:rocket_job_merge_filter, filter, new_filter)
51
+ return true
52
+ end
53
+
54
+ SemanticLogger.named_tagged(slice: slice.id.to_s) do
55
+ rocket_job_process_slice(slice, re_raise_exceptions)
56
+ end
57
+ else
58
+ break if record_count && rocket_job_batch_complete?(worker.name)
59
+ logger.debug 'No more work available for this job'
60
+ self.class.send(:rocket_job_merge_filter, filter, throttle_filter_id)
61
+ return true
62
+ end
63
+
64
+ # Allow new jobs with a higher priority to interrupt this job
65
+ break if (Time.now - start_time) >= Config.instance.re_check_seconds
66
+ end
67
+ false
68
+ end
69
+
70
+ # Prior to a job being made available for processing it can be processed one
71
+ # slice at a time.
72
+ #
73
+ # For example, to extract the header row which would be in the first slice.
74
+ #
75
+ # Returns [Integer] the number of records processed in the slice
76
+ #
77
+ # Note: The slice will be removed from processing when this method completes
78
+ def work_first_slice(&block)
79
+ raise '#work_first_slice can only be called from within before_batch callbacks' unless sub_state == :before
80
+ # TODO Make these settings configurable
81
+ count = 0
82
+ wait_seconds = 5
83
+ while (slice = input.first).nil?
84
+ break if count > 10
85
+ logger.info "First slice has not arrived yet, sleeping for #{wait_seconds} seconds"
86
+ sleep wait_seconds
87
+ count += 1
88
+ end
89
+
90
+ if slice = input.first
91
+ SemanticLogger.named_tagged(slice: slice.id.to_s) do
92
+ # TODO Persist that the first slice is being processed by this worker
93
+ slice.start
94
+ rocket_job_process_slice(slice, true, &block)
95
+ end
96
+ else
97
+ # No records processed
98
+ 0
99
+ end
100
+ end
101
+
102
+ # Returns [Array<ActiveWorker>] All workers actively working on this job
103
+ def rocket_job_active_workers(server_name = nil)
104
+ servers = []
105
+ case sub_state
106
+ when :before, :after
107
+ unless server_name && !worker_on_server?(server_name)
108
+ servers << ActiveWorker.new(worker_name, started_at, self) if running?
109
+ end
110
+ when :processing
111
+ query = input.running
112
+ query = query.where(worker_name: /\A#{server_name}/) if server_name
113
+ query.each do |slice|
114
+ servers << ActiveWorker.new(slice.worker_name, slice.started_at, self)
115
+ end
116
+ end
117
+ servers
118
+ end
119
+
120
+ private
121
+
122
+ # Process a single slice from Mongo
123
+ # Once the slice has been successfully processed it will be removed from the input collection
124
+ # Returns [Integer] the number of records successfully processed
125
+ def rocket_job_process_slice(slice, re_raise_exceptions)
126
+ slice_record_number = 0
127
+ @rocket_job_record_number = slice.first_record_number || 0
128
+ @rocket_job_slice = slice
129
+ run_callbacks :slice do
130
+ RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
131
+ slice.each do |record|
132
+ slice_record_number += 1
133
+ SemanticLogger.named_tagged(record: @rocket_job_record_number) do
134
+ if _perform_callbacks.empty?
135
+ @rocket_job_output = block_given? ? yield(record) : perform(record)
136
+ else
137
+ # Allows @rocket_job_input to be modified by before/around callbacks
138
+ @rocket_job_input = record
139
+ # Allow callbacks to fail, complete or abort the job
140
+ if running?
141
+ if block_given?
142
+ run_callbacks(:perform) { @rocket_job_output = yield(@rocket_job_input) }
143
+ else
144
+ # Allows @rocket_job_output to be modified by after/around callbacks
145
+ run_callbacks(:perform) { @rocket_job_output = perform(@rocket_job_input) }
146
+ end
147
+ end
148
+ end
149
+ writer << @rocket_job_output
150
+ end
151
+ # JRuby says self.rocket_job_record_number= is private and cannot be accessed
152
+ @rocket_job_record_number += 1
153
+ end
154
+ end
155
+ @rocket_job_input = @rocket_job_slice = @rocket_job_output = nil
156
+ end
157
+
158
+ # On successful completion remove the slice from the input queue
159
+ # TODO Option to complete slice instead of destroying it to retain input data
160
+ slice.destroy
161
+ slice_record_number
162
+ rescue Exception => exc
163
+ slice.fail!(exc, slice_record_number)
164
+ raise exc if re_raise_exceptions
165
+ slice_record_number > 0 ? slice_record_number - 1 : 0
166
+ end
167
+
168
+ # Checks for completion and runs after_batch if defined
169
+ # Returns true if the job is now complete/aborted/failed
170
+ def rocket_job_batch_complete?(worker_name)
171
+ return true unless running?
172
+ return false unless record_count
173
+
174
+ # Only failed slices left?
175
+ input_count = input.count
176
+ failed_count = input.failed.count
177
+ if (failed_count > 0) && (input_count == failed_count)
178
+ # Reload to pull in any counters or other data that was modified.
179
+ reload unless new_record?
180
+ if may_fail?
181
+ fail_job = true
182
+ unless new_record?
183
+ # Fail job iff no other worker has already finished it
184
+ # Must set write concern to at least 1 since we need the nModified back
185
+ result = self.class.with(write: {w: 1}) do |query|
186
+ query.
187
+ where(id: id, state: :running, sub_state: :processing).
188
+ update({'$set' => {state: :failed, worker_name: worker_name}})
189
+ end
190
+ fail_job = false unless result.modified_count > 0
191
+ end
192
+ if fail_job
193
+ message = "#{failed_count} slices failed to process"
194
+ self.exception = JobException.new(message: message)
195
+ fail!(worker_name, message)
196
+ end
197
+ end
198
+ return true
199
+ end
200
+
201
+ # Any work left?
202
+ return false if input_count > 0
203
+
204
+ # If the job was not saved to the queue, do not save any changes
205
+ if new_record?
206
+ rocket_job_batch_run_after_callbacks(false)
207
+ return true
208
+ end
209
+
210
+ # Complete job iff no other worker has already completed it
211
+ # Must set write concern to at least 1 since we need the nModified back
212
+ result = self.class.with(write: {w: 1}) do |query|
213
+ query.
214
+ where(id: id, state: :running, sub_state: :processing).
215
+ update('$set' => {sub_state: :after, worker_name: worker_name})
216
+ end
217
+
218
+ # Reload to pull in any counters or other data that was modified.
219
+ reload
220
+ if result.modified_count > 0
221
+ rocket_job_batch_run_after_callbacks(false)
222
+ else
223
+ # Repeat cleanup in case this worker was still running when the job was aborted
224
+ cleanup! if aborted?
225
+ end
226
+ true
227
+ end
228
+
229
+ # Run the before_batch callbacks
230
+ # Saves the current state before and after running callbacks if callbacks present
231
+ def rocket_job_batch_run_before_callbacks
232
+ unless _before_batch_callbacks.empty?
233
+ self.sub_state = :before
234
+ save! unless new_record? || destroyed?
235
+ logger.measure_info(
236
+ 'before_batch',
237
+ metric: "#{self.class.name}/before_batch",
238
+ log_exception: :full,
239
+ on_exception_level: :error,
240
+ silence: log_level
241
+ ) do
242
+ run_callbacks(:before_batch)
243
+ end
244
+ end
245
+ self.sub_state = :processing
246
+ save! unless new_record? || destroyed?
247
+ end
248
+
249
+ # Run the after_batch callbacks
250
+ # Saves the current state before and after running callbacks if callbacks present
251
+ def rocket_job_batch_run_after_callbacks(save_before = true)
252
+ unless _after_batch_callbacks.empty?
253
+ self.sub_state = :after
254
+ save! if save_before && !new_record? && !destroyed?
255
+ logger.measure_info(
256
+ 'after_batch',
257
+ metric: "#{self.class.name}/after_batch",
258
+ log_exception: :full,
259
+ on_exception_level: :error,
260
+ silence: log_level
261
+ ) do
262
+ run_callbacks(:after_batch)
263
+ end
264
+ end
265
+ if new_record? || destroyed?
266
+ complete if may_complete?
267
+ else
268
+ may_complete? ? complete! : save!
269
+ end
270
+ end
271
+
272
+ # Handle before and after callbacks
273
+ def rocket_job_handle_callbacks(worker, re_raise_exceptions)
274
+ rocket_job_fail_on_exception!(worker.name, re_raise_exceptions) do
275
+ # If this is the first worker to pickup this job
276
+ if sub_state == :before
277
+ rocket_job_batch_run_before_callbacks
278
+ # Check for 0 record jobs
279
+ rocket_job_batch_complete?(worker.name) if running?
280
+ elsif sub_state == :after
281
+ rocket_job_batch_run_after_callbacks
282
+ end
283
+ end
284
+ end
285
+
286
+ end
287
+ end
288
+ end