rocketjob 5.4.1 → 6.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch.rb +3 -0
  6. data/lib/rocket_job/batch/categories.rb +338 -0
  7. data/lib/rocket_job/batch/io.rb +128 -60
  8. data/lib/rocket_job/batch/model.rb +20 -68
  9. data/lib/rocket_job/batch/performance.rb +20 -8
  10. data/lib/rocket_job/batch/statistics.rb +35 -13
  11. data/lib/rocket_job/batch/tabular.rb +2 -0
  12. data/lib/rocket_job/batch/tabular/input.rb +8 -6
  13. data/lib/rocket_job/batch/tabular/output.rb +4 -2
  14. data/lib/rocket_job/batch/throttle_running_workers.rb +1 -5
  15. data/lib/rocket_job/batch/worker.rb +27 -24
  16. data/lib/rocket_job/category/base.rb +78 -0
  17. data/lib/rocket_job/category/input.rb +110 -0
  18. data/lib/rocket_job/category/output.rb +25 -0
  19. data/lib/rocket_job/cli.rb +24 -16
  20. data/lib/rocket_job/dirmon_entry.rb +22 -12
  21. data/lib/rocket_job/event.rb +1 -1
  22. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  23. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  24. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  25. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  26. data/lib/rocket_job/jobs/dirmon_job.rb +1 -1
  27. data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
  28. data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
  29. data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
  30. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  31. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +5 -4
  32. data/lib/rocket_job/jobs/upload_file_job.rb +46 -9
  33. data/lib/rocket_job/lookup_collection.rb +68 -0
  34. data/lib/rocket_job/plugins/job/model.rb +25 -50
  35. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  37. data/lib/rocket_job/plugins/restart.rb +12 -5
  38. data/lib/rocket_job/plugins/state_machine.rb +2 -1
  39. data/lib/rocket_job/ractor_worker.rb +42 -0
  40. data/lib/rocket_job/server/model.rb +1 -1
  41. data/lib/rocket_job/sliced.rb +15 -70
  42. data/lib/rocket_job/sliced/input.rb +1 -1
  43. data/lib/rocket_job/sliced/slice.rb +5 -13
  44. data/lib/rocket_job/sliced/slices.rb +14 -2
  45. data/lib/rocket_job/sliced/writer/output.rb +33 -44
  46. data/lib/rocket_job/subscribers/server.rb +1 -1
  47. data/lib/rocket_job/thread_worker.rb +46 -0
  48. data/lib/rocket_job/version.rb +1 -1
  49. data/lib/rocket_job/worker.rb +21 -55
  50. data/lib/rocket_job/worker_pool.rb +5 -7
  51. data/lib/rocketjob.rb +52 -41
  52. metadata +35 -27
  53. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  54. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -37,7 +37,7 @@ module RocketJob
37
37
 
38
38
  private
39
39
 
40
- # Returns [Boolean] whether the throttle for this job has been exceeded
40
+ # Returns [true|false] whether the throttle for this job has been exceeded
41
41
  def throttle_running_jobs_exceeded?
42
42
  return false unless throttle_running_jobs&.positive?
43
43
 
@@ -48,11 +48,11 @@ module RocketJob
48
48
  def perform_now
49
49
  raise(::Mongoid::Errors::Validations, self) unless valid?
50
50
 
51
- worker = RocketJob::Worker.new(inline: true)
51
+ worker = RocketJob::Worker.new
52
52
  start if may_start?
53
53
  # Re-Raise exceptions
54
54
  rocket_job_work(worker, true) if running?
55
- result
55
+ @rocket_job_output
56
56
  end
57
57
 
58
58
  def perform(*)
@@ -106,11 +106,6 @@ module RocketJob
106
106
  end
107
107
  end
108
108
 
109
- if collect_output?
110
- # Result must be a Hash, if not put it in a Hash
111
- self.result = @rocket_job_output.is_a?(Hash) ? @rocket_job_output : {"result" => @rocket_job_output}
112
- end
113
-
114
109
  if new_record? || destroyed?
115
110
  complete if may_complete?
116
111
  else
@@ -91,8 +91,16 @@ module RocketJob
91
91
  logger.info("Job has expired. Not creating a new instance.")
92
92
  return
93
93
  end
94
- attributes = rocket_job_restart_attributes.each_with_object({}) { |attr, attrs| attrs[attr] = send(attr) }
95
- rocket_job_restart_create(attributes)
94
+ job_attrs =
95
+ rocket_job_restart_attributes.each_with_object({}) { |attr, attrs| attrs[attr] = send(attr) }
96
+ job = self.class.new(job_attrs)
97
+
98
+ # Copy across input and output categories to new scheduled job so that all of the
99
+ # settings are remembered between instance. Example: slice_size
100
+ job.input_categories = input_categories if respond_to?(:input_categories)
101
+ job.output_categories = output_categories if respond_to?(:output_categories)
102
+
103
+ rocket_job_restart_save(job)
96
104
  end
97
105
 
98
106
  def rocket_job_restart_abort
@@ -101,11 +109,10 @@ module RocketJob
101
109
 
102
110
  # Allow Singleton to prevent the creation of a new job if one is already running
103
111
  # Retry since the delete may not have persisted to disk yet.
104
- def rocket_job_restart_create(attrs, retry_limit = 3, sleep_interval = 0.1)
112
+ def rocket_job_restart_save(job, retry_limit = 10, sleep_interval = 0.5)
105
113
  count = 0
106
114
  while count < retry_limit
107
- job = self.class.create(attrs)
108
- if job.persisted?
115
+ if job.save
109
116
  logger.info("Created a new job instance: #{job.id}")
110
117
  return true
111
118
  else
@@ -51,7 +51,8 @@ module RocketJob
51
51
  # Validate methods are any of Symbol String Proc
52
52
  methods.each do |method|
53
53
  unless method.is_a?(Symbol) || method.is_a?(String)
54
- raise(ArgumentError, "#{action}_#{event_name} currently does not support any options. Only Symbol and String method names can be supplied.")
54
+ raise(ArgumentError,
55
+ "#{action}_#{event_name} currently does not support any options. Only Symbol and String method names can be supplied.")
55
56
  end
56
57
  end
57
58
  methods
@@ -0,0 +1,42 @@
1
+ module RocketJob
2
+ # Run each worker in its own "Ractor".
3
+ class RactorWorker < Worker
4
+ attr_reader :thread
5
+
6
+ def initialize(id:, server_name:)
7
+ super(id: id, server_name: server_name)
8
+ @shutdown = Concurrent::Event.new
9
+ @thread = Ractor.new(name: "rocketjob-#{id}") { run }
10
+ end
11
+
12
+ def alive?
13
+ @thread.alive?
14
+ end
15
+
16
+ def backtrace
17
+ @thread.backtrace
18
+ end
19
+
20
+ def join(*args)
21
+ @thread.join(*args)
22
+ end
23
+
24
+ # Send each active worker the RocketJob::ShutdownException so that stops processing immediately.
25
+ def kill
26
+ @thread.raise(Shutdown, "Shutdown due to kill request for worker: #{name}") if @thread.alive?
27
+ end
28
+
29
+ def shutdown?
30
+ @shutdown.set?
31
+ end
32
+
33
+ def shutdown!
34
+ @shutdown.set
35
+ end
36
+
37
+ # Returns [true|false] whether the shutdown indicator was set
38
+ def wait_for_shutdown?(timeout = nil)
39
+ @shutdown.wait(timeout)
40
+ end
41
+ end
42
+ end
@@ -28,7 +28,7 @@ module RocketJob
28
28
 
29
29
  # Current state
30
30
  # Internal use only. Do not set this field directly
31
- field :state, type: Symbol, default: :starting
31
+ field :state, type: Mongoid::StringifiedSymbol, default: :starting
32
32
 
33
33
  index({name: 1}, background: true, unique: true)
34
34
 
@@ -14,78 +14,23 @@ module RocketJob
14
14
  autoload :Output, "rocket_job/sliced/writer/output"
15
15
  end
16
16
 
17
- # Returns [RocketJob::Sliced::Slices] for the relevant type and category.
18
- #
19
- # Supports compress and encrypt with [true|false|Hash] values.
20
- # When [Hash] they must specify whether the apply to the input or output collection types.
21
- #
22
- # Example, compress both input and output collections:
23
- # class MyJob < RocketJob::Job
24
- # include RocketJob::Batch
25
- # self.compress = true
26
- # end
27
- #
28
- # Example, compress just the output collections:
29
- # class MyJob < RocketJob::Job
30
- # include RocketJob::Batch
31
- # self.compress = {output: true}
32
- # end
33
- #
34
- # To use the specialized BZip output compressor, and the regular compressor for the input collections:
35
- # class MyJob < RocketJob::Job
36
- # include RocketJob::Batch
37
- # self.compress = {output: :bzip2, input: true}
38
- # end
39
- def self.factory(type, category, job)
40
- raise(ArgumentError, "Unknown type: #{type.inspect}") unless %i[input output].include?(type)
41
-
42
- collection_name = "rocket_job.#{type}s.#{job.id}"
43
- collection_name << ".#{category}" unless category == :main
44
-
45
- args = {collection_name: collection_name, slice_size: job.slice_size}
46
- klass = slice_class(type, job)
47
- args[:slice_class] = klass if klass
48
-
49
- if type == :input
50
- RocketJob::Sliced::Input.new(args)
17
+ # Returns [RocketJob::Sliced::Slices] for the relevant direction and category.
18
+ def self.factory(direction, category, job)
19
+ collection_name = "rocket_job.#{direction}s.#{job.id}"
20
+ collection_name << ".#{category.name}" unless category.name == :main
21
+
22
+ case direction
23
+ when :input
24
+ RocketJob::Sliced::Input.new(
25
+ collection_name: collection_name,
26
+ slice_class: category.serializer_class,
27
+ slice_size: category.slice_size
28
+ )
29
+ when :output
30
+ RocketJob::Sliced::Output.new(collection_name: collection_name, slice_class: category.serializer_class)
51
31
  else
52
- RocketJob::Sliced::Output.new(args)
53
- end
54
- end
55
-
56
- private
57
-
58
- # Parses the encrypt and compress options to determine which slice serializer to use.
59
- # `encrypt` takes priority over any `compress` option.
60
- def self.slice_class(type, job)
61
- encrypt = extract_value(type, job.encrypt)
62
- compress = extract_value(type, job.compress)
63
-
64
- if encrypt
65
- case encrypt
66
- when true
67
- EncryptedSlice
68
- else
69
- raise(ArgumentError, "Unknown job `encrypt` value: #{compress}") unless compress.is_a?(Slices)
70
- # Returns the supplied class to use for encryption.
71
- encrypt
72
- end
73
- elsif compress
74
- case compress
75
- when true
76
- CompressedSlice
77
- when :bzip2
78
- BZip2OutputSlice
79
- else
80
- raise(ArgumentError, "Unknown job `compress` value: #{compress}") unless compress.is_a?(Slices)
81
- # Returns the supplied class to use for compression.
82
- compress
83
- end
32
+ raise(ArgumentError, "Unknown direction: #{direction.inspect}")
84
33
  end
85
34
  end
86
-
87
- def self.extract_value(type, value)
88
- value.is_a?(Hash) ? value[type] : value
89
- end
90
35
  end
91
36
  end
@@ -139,7 +139,7 @@ module RocketJob
139
139
  document = all.queued.
140
140
  sort("_id" => 1).
141
141
  find_one_and_update(
142
- {"$set" => {worker_name: worker_name, state: :running, started_at: Time.now}},
142
+ {"$set" => {worker_name: worker_name, state: "running", started_at: Time.now}},
143
143
  return_document: :after
144
144
  )
145
145
  document.collection_name = collection_name if document
@@ -33,7 +33,7 @@ module RocketJob
33
33
  #
34
34
 
35
35
  # Current state, as set by AASM
36
- field :state, type: Symbol, default: :queued
36
+ field :state, type: Mongoid::StringifiedSymbol, default: :queued
37
37
 
38
38
  # When processing started on this slice
39
39
  field :started_at, type: Time
@@ -139,18 +139,10 @@ module RocketJob
139
139
 
140
140
  # Returns [Hash] the slice as a Hash for storage purposes
141
141
  # Compresses / Encrypts the slice according to the job setting
142
- if ::Mongoid::VERSION.to_i >= 6
143
- def as_attributes
144
- attrs = super
145
- attrs["records"] = serialize_records if @records
146
- attrs
147
- end
148
- else
149
- def as_document
150
- attrs = super
151
- attrs["records"] = serialize_records if @records
152
- attrs
153
- end
142
+ def as_attributes
143
+ attrs = super
144
+ attrs["records"] = serialize_records if @records
145
+ attrs
154
146
  end
155
147
 
156
148
  def inspect
@@ -50,8 +50,8 @@ module RocketJob
50
50
 
51
51
  # Returns output slices in the order of their id
52
52
  # which is usually the order in which they were written.
53
- def each
54
- all.sort(id: 1).each { |document| yield(document) }
53
+ def each(&block)
54
+ all.sort(id: 1).each(&block)
55
55
  end
56
56
 
57
57
  # Insert a new slice into the collection
@@ -96,6 +96,17 @@ module RocketJob
96
96
  slice
97
97
  end
98
98
 
99
+ # Append to an existing slice if already present
100
+ def append(slice, input_slice)
101
+ existing_slice = all.where(id: input_slice.id).first
102
+ return insert(slice, input_slice) unless existing_slice
103
+
104
+ extra_records = slice.is_a?(Slice) ? slice.records : slice
105
+ existing_slice.records = existing_slice.records + extra_records
106
+ existing_slice.save!
107
+ existing_slice
108
+ end
109
+
99
110
  alias << insert
100
111
 
101
112
  # Index for find_and_modify only if it is not already present
@@ -139,6 +150,7 @@ module RocketJob
139
150
  def last
140
151
  all.sort("_id" => -1).first
141
152
  end
153
+
142
154
  # rubocop:enable Style/RedundantSort
143
155
 
144
156
  # Returns [Array<Struct>] grouped exceptions by class name,
@@ -1,30 +1,37 @@
1
1
  module RocketJob
2
2
  module Sliced
3
3
  module Writer
4
- # Internal class for writing categorized results into output slices
5
- class Output
4
+ class Null
6
5
  attr_reader :job, :categorized_records
7
- attr_accessor :input_slice
8
-
9
- # Collect output results and write to output collections
10
- # iff job is collecting output
11
- # Notes:
12
- # Nothing is saved if an exception is raised inside the block
13
- def self.collect(job, input_slice = nil)
14
- if job.collect_output?
15
- writer = new(job, input_slice)
16
- yield(writer)
17
- writer.close
18
- else
19
- writer = NullWriter.new(job, input_slice)
20
- yield(writer)
21
- end
22
- end
6
+ attr_accessor :input_slice, :append
23
7
 
24
- def initialize(job, input_slice = nil)
8
+ def initialize(job, input_slice: nil, append: false)
25
9
  @job = job
26
10
  @input_slice = input_slice
27
11
  @categorized_records = {}
12
+ @append = append
13
+ end
14
+
15
+ def <<(_)
16
+ # noop
17
+ end
18
+
19
+ def close
20
+ # noop
21
+ end
22
+ end
23
+
24
+ # Internal class for writing categorized results into output slices
25
+ class Output < Null
26
+ # Collect output results and write to output collections
27
+ # iff job is collecting output
28
+ # Notes:
29
+ # Partial slices are saved when an exception is raised inside the block
30
+ def self.collect(job, **args)
31
+ writer = job.output_categories.present? ? new(job, **args) : Null.new(job, **args)
32
+ yield(writer)
33
+ ensure
34
+ writer&.close
28
35
  end
29
36
 
30
37
  # Writes the supplied result, RocketJob::Batch::Result or RocketJob::Batch::Results
@@ -40,7 +47,8 @@ module RocketJob
40
47
  # Write categorized results to their relevant collections
41
48
  def close
42
49
  categorized_records.each_pair do |category, results|
43
- job.output(category).insert(results, input_slice)
50
+ collection = job.output(category)
51
+ append ? collection.append(results, input_slice) : collection.insert(results, input_slice)
44
52
  end
45
53
  end
46
54
 
@@ -48,35 +56,16 @@ module RocketJob
48
56
 
49
57
  # Stores the categorized result from one result
50
58
  def extract_categorized_result(result)
51
- category = :main
52
- value = result
59
+ named_category = :main
60
+ value = result
53
61
  if result.is_a?(RocketJob::Batch::Result)
54
- category = result.category
55
- value = result.value
56
- raise(ArgumentError, "Invalid RocketJob Output Category: #{category}") if job.output_categories.exclude?(category)
62
+ named_category = result.category
63
+ value = result.value
57
64
  end
58
- (categorized_records[category] ||= []) << value unless value.nil? && !job.collect_nil_output?
65
+ (categorized_records[named_category] ||= []) << value unless value.nil? && !job.output_category(named_category).nils
59
66
  end
60
67
  end
61
68
 
62
- class NullWriter
63
- attr_reader :job, :categorized_records
64
- attr_accessor :input_slice
65
-
66
- def initialize(job, input_slice = nil)
67
- @job = job
68
- @input_slice = input_slice
69
- @categorized_records = {}
70
- end
71
-
72
- def <<(_)
73
- # noop
74
- end
75
-
76
- def close
77
- # noop
78
- end
79
- end
80
69
  end
81
70
  end
82
71
  end
@@ -17,7 +17,7 @@ module RocketJob
17
17
 
18
18
  supervisor.logger.info("Stopping Pool")
19
19
  supervisor.worker_pool.stop
20
- unless supervisor.worker_pool.living_count == 0
20
+ unless supervisor.worker_pool.living_count.zero?
21
21
  supervisor.logger.info("Giving pool #{wait_timeout} seconds to terminate")
22
22
  sleep(wait_timeout)
23
23
  end
@@ -0,0 +1,46 @@
1
+ require "concurrent"
2
+ module RocketJob
3
+ # ThreadWorker
4
+ #
5
+ # A worker runs on a single operating system thread.
6
+ # Is usually started under a Rocket Job server process.
7
+ class ThreadWorker < Worker
8
+ attr_reader :thread
9
+
10
+ def initialize(id:, server_name:)
11
+ super(id: id, server_name: server_name)
12
+ @shutdown = Concurrent::Event.new
13
+ @thread = Thread.new { run }
14
+ end
15
+
16
+ def alive?
17
+ @thread.alive?
18
+ end
19
+
20
+ def backtrace
21
+ @thread.backtrace
22
+ end
23
+
24
+ def join(*args)
25
+ @thread.join(*args)
26
+ end
27
+
28
+ # Send each active worker the RocketJob::ShutdownException so that stops processing immediately.
29
+ def kill
30
+ @thread.raise(Shutdown, "Shutdown due to kill request for worker: #{name}") if @thread.alive?
31
+ end
32
+
33
+ def shutdown?
34
+ @shutdown.set?
35
+ end
36
+
37
+ def shutdown!
38
+ @shutdown.set
39
+ end
40
+
41
+ # Returns [true|false] whether the shutdown indicator was set
42
+ def wait_for_shutdown?(timeout = nil)
43
+ @shutdown.wait(timeout)
44
+ end
45
+ end
46
+ end