rocketjob 5.3.3 → 6.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch.rb +3 -0
  6. data/lib/rocket_job/batch/categories.rb +338 -0
  7. data/lib/rocket_job/batch/io.rb +132 -69
  8. data/lib/rocket_job/batch/model.rb +20 -68
  9. data/lib/rocket_job/batch/performance.rb +20 -8
  10. data/lib/rocket_job/batch/statistics.rb +35 -13
  11. data/lib/rocket_job/batch/tabular.rb +2 -0
  12. data/lib/rocket_job/batch/tabular/input.rb +8 -6
  13. data/lib/rocket_job/batch/tabular/output.rb +4 -2
  14. data/lib/rocket_job/batch/throttle_running_workers.rb +8 -17
  15. data/lib/rocket_job/batch/worker.rb +27 -24
  16. data/lib/rocket_job/category/base.rb +78 -0
  17. data/lib/rocket_job/category/input.rb +110 -0
  18. data/lib/rocket_job/category/output.rb +25 -0
  19. data/lib/rocket_job/cli.rb +24 -16
  20. data/lib/rocket_job/dirmon_entry.rb +22 -12
  21. data/lib/rocket_job/event.rb +1 -1
  22. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  23. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  24. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  25. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  26. data/lib/rocket_job/jobs/dirmon_job.rb +1 -1
  27. data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
  28. data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
  29. data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
  30. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  31. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +5 -4
  32. data/lib/rocket_job/jobs/upload_file_job.rb +47 -10
  33. data/lib/rocket_job/lookup_collection.rb +68 -0
  34. data/lib/rocket_job/plugins/job/model.rb +25 -50
  35. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +12 -4
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  37. data/lib/rocket_job/plugins/restart.rb +12 -5
  38. data/lib/rocket_job/plugins/state_machine.rb +2 -1
  39. data/lib/rocket_job/ractor_worker.rb +42 -0
  40. data/lib/rocket_job/server/model.rb +1 -1
  41. data/lib/rocket_job/sliced.rb +36 -0
  42. data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
  43. data/lib/rocket_job/sliced/input.rb +4 -4
  44. data/lib/rocket_job/sliced/slice.rb +11 -13
  45. data/lib/rocket_job/sliced/slices.rb +20 -2
  46. data/lib/rocket_job/sliced/writer/output.rb +33 -44
  47. data/lib/rocket_job/subscribers/server.rb +1 -1
  48. data/lib/rocket_job/thread_worker.rb +46 -0
  49. data/lib/rocket_job/version.rb +1 -1
  50. data/lib/rocket_job/worker.rb +21 -55
  51. data/lib/rocket_job/worker_pool.rb +5 -7
  52. data/lib/rocketjob.rb +52 -59
  53. metadata +43 -33
  54. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  55. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -29,20 +29,28 @@ module RocketJob
29
29
  class_attribute :throttle_running_jobs
30
30
  self.throttle_running_jobs = nil
31
31
 
32
+ # Allow jobs to be throttled by group name instance of the job class name.
33
+ field :throttle_group, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
34
+
32
35
  define_throttle :throttle_running_jobs_exceeded?
33
36
  end
34
37
 
35
38
  private
36
39
 
37
- # Returns [Boolean] whether the throttle for this job has been exceeded
40
+ # Returns [true|false] whether the throttle for this job has been exceeded
38
41
  def throttle_running_jobs_exceeded?
39
- return unless throttle_running_jobs&.positive?
42
+ return false unless throttle_running_jobs&.positive?
40
43
 
41
- # Cannot use this class since it will include instances of parent job classes.
42
44
  RocketJob::Job.with(read: {mode: :primary}) do |conn|
43
- conn.running.where("_type" => self.class.name, :id.ne => id).count >= throttle_running_jobs
45
+ query = throttle_running_jobs_base_query
46
+ throttle_group ? query["throttle_group"] = throttle_group : query["_type"] = self.class.name
47
+ conn.running.where(query).count >= throttle_running_jobs
44
48
  end
45
49
  end
50
+
51
+ def throttle_running_jobs_base_query
52
+ {:id.ne => id}
53
+ end
46
54
  end
47
55
  end
48
56
  end
@@ -48,11 +48,11 @@ module RocketJob
48
48
  def perform_now
49
49
  raise(::Mongoid::Errors::Validations, self) unless valid?
50
50
 
51
- worker = RocketJob::Worker.new(inline: true)
51
+ worker = RocketJob::Worker.new
52
52
  start if may_start?
53
53
  # Re-Raise exceptions
54
54
  rocket_job_work(worker, true) if running?
55
- result
55
+ @rocket_job_output
56
56
  end
57
57
 
58
58
  def perform(*)
@@ -106,11 +106,6 @@ module RocketJob
106
106
  end
107
107
  end
108
108
 
109
- if collect_output?
110
- # Result must be a Hash, if not put it in a Hash
111
- self.result = @rocket_job_output.is_a?(Hash) ? @rocket_job_output : {"result" => @rocket_job_output}
112
- end
113
-
114
109
  if new_record? || destroyed?
115
110
  complete if may_complete?
116
111
  else
@@ -91,8 +91,16 @@ module RocketJob
91
91
  logger.info("Job has expired. Not creating a new instance.")
92
92
  return
93
93
  end
94
- attributes = rocket_job_restart_attributes.each_with_object({}) { |attr, attrs| attrs[attr] = send(attr) }
95
- rocket_job_restart_create(attributes)
94
+ job_attrs =
95
+ rocket_job_restart_attributes.each_with_object({}) { |attr, attrs| attrs[attr] = send(attr) }
96
+ job = self.class.new(job_attrs)
97
+
98
+ # Copy across input and output categories to new scheduled job so that all of the
99
+ # settings are remembered between instance. Example: slice_size
100
+ job.input_categories = input_categories if respond_to?(:input_categories)
101
+ job.output_categories = output_categories if respond_to?(:output_categories)
102
+
103
+ rocket_job_restart_save(job)
96
104
  end
97
105
 
98
106
  def rocket_job_restart_abort
@@ -101,11 +109,10 @@ module RocketJob
101
109
 
102
110
  # Allow Singleton to prevent the creation of a new job if one is already running
103
111
  # Retry since the delete may not have persisted to disk yet.
104
- def rocket_job_restart_create(attrs, retry_limit = 3, sleep_interval = 0.1)
112
+ def rocket_job_restart_save(job, retry_limit = 10, sleep_interval = 0.5)
105
113
  count = 0
106
114
  while count < retry_limit
107
- job = self.class.create(attrs)
108
- if job.persisted?
115
+ if job.save
109
116
  logger.info("Created a new job instance: #{job.id}")
110
117
  return true
111
118
  else
@@ -51,7 +51,8 @@ module RocketJob
51
51
  # Validate methods are any of Symbol String Proc
52
52
  methods.each do |method|
53
53
  unless method.is_a?(Symbol) || method.is_a?(String)
54
- raise(ArgumentError, "#{action}_#{event_name} currently does not support any options. Only Symbol and String method names can be supplied.")
54
+ raise(ArgumentError,
55
+ "#{action}_#{event_name} currently does not support any options. Only Symbol and String method names can be supplied.")
55
56
  end
56
57
  end
57
58
  methods
@@ -0,0 +1,42 @@
1
+ module RocketJob
2
+ # Run each worker in its own "Ractor".
3
+ class RactorWorker < Worker
4
+ attr_reader :thread
5
+
6
+ def initialize(id:, server_name:)
7
+ super(id: id, server_name: server_name)
8
+ @shutdown = Concurrent::Event.new
9
+ @thread = Ractor.new(name: "rocketjob-#{id}") { run }
10
+ end
11
+
12
+ def alive?
13
+ @thread.alive?
14
+ end
15
+
16
+ def backtrace
17
+ @thread.backtrace
18
+ end
19
+
20
+ def join(*args)
21
+ @thread.join(*args)
22
+ end
23
+
24
+ # Send each active worker the RocketJob::ShutdownException so that stops processing immediately.
25
+ def kill
26
+ @thread.raise(Shutdown, "Shutdown due to kill request for worker: #{name}") if @thread.alive?
27
+ end
28
+
29
+ def shutdown?
30
+ @shutdown.set?
31
+ end
32
+
33
+ def shutdown!
34
+ @shutdown.set
35
+ end
36
+
37
+ # Returns [true|false] whether the shutdown indicator was set
38
+ def wait_for_shutdown?(timeout = nil)
39
+ @shutdown.wait(timeout)
40
+ end
41
+ end
42
+ end
@@ -28,7 +28,7 @@ module RocketJob
28
28
 
29
29
  # Current state
30
30
  # Internal use only. Do not set this field directly
31
- field :state, type: Symbol, default: :starting
31
+ field :state, type: Mongoid::StringifiedSymbol, default: :starting
32
32
 
33
33
  index({name: 1}, background: true, unique: true)
34
34
 
@@ -0,0 +1,36 @@
1
+ module RocketJob
2
+ module Sliced
3
+ autoload :BZip2OutputSlice, "rocket_job/sliced/bzip2_output_slice"
4
+ autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
5
+ autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
6
+ autoload :Input, "rocket_job/sliced/input"
7
+ autoload :Output, "rocket_job/sliced/output"
8
+ autoload :Slice, "rocket_job/sliced/slice"
9
+ autoload :Slices, "rocket_job/sliced/slices"
10
+ autoload :Store, "rocket_job/sliced/store"
11
+
12
+ module Writer
13
+ autoload :Input, "rocket_job/sliced/writer/input"
14
+ autoload :Output, "rocket_job/sliced/writer/output"
15
+ end
16
+
17
+ # Returns [RocketJob::Sliced::Slices] for the relevant direction and category.
18
+ def self.factory(direction, category, job)
19
+ collection_name = "rocket_job.#{direction}s.#{job.id}"
20
+ collection_name << ".#{category.name}" unless category.name == :main
21
+
22
+ case direction
23
+ when :input
24
+ RocketJob::Sliced::Input.new(
25
+ collection_name: collection_name,
26
+ slice_class: category.serializer_class,
27
+ slice_size: category.slice_size
28
+ )
29
+ when :output
30
+ RocketJob::Sliced::Output.new(collection_name: collection_name, slice_class: category.serializer_class)
31
+ else
32
+ raise(ArgumentError, "Unknown direction: #{direction.inspect}")
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,43 @@
1
+ module RocketJob
2
+ module Sliced
3
+ # This is a specialized output serializer that renders each output slice as a single BZip2 compressed stream.
4
+ # BZip2 allows multiple output streams to be written into a single BZip2 file.
5
+ #
6
+ # Notes:
7
+ # * The `bzip2` linux command line utility supports multiple embedded BZip2 stream,
8
+ # but some other custom implementations may not. They may only read the first slice and stop.
9
+ # * It is only designed for use on output collections.
10
+ #
11
+ # To download the output when using this slice:
12
+ #
13
+ # # Download the binary BZip2 streams into a single file
14
+ # IOStreams.path(output_file_name).stream(:none).writer do |io|
15
+ # job.download { |slice| io << slice[:binary] }
16
+ # end
17
+ class BZip2OutputSlice < ::RocketJob::Sliced::Slice
18
+ # This is a specialized binary slice for creating binary data from each slice
19
+ # that must be downloaded as-is into output files.
20
+ def self.binary?
21
+ true
22
+ end
23
+
24
+ private
25
+
26
+ def parse_records
27
+ records = attributes.delete("records")
28
+
29
+ # Convert BSON::Binary to a string
30
+ @records = [{binary: records.data}]
31
+ end
32
+
33
+ def serialize_records
34
+ return [] if @records.nil? || @records.empty?
35
+
36
+ lines = records.to_a.join("\n") + "\n"
37
+ s = StringIO.new
38
+ IOStreams::Bzip2::Writer.stream(s) { |io| io.write(lines) }
39
+ BSON::Binary.new(s.string)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -5,7 +5,7 @@ module RocketJob
5
5
  # Create indexes before uploading
6
6
  create_indexes
7
7
  Writer::Input.collect(self, on_first: on_first, &block)
8
- rescue StandardError => e
8
+ rescue Exception => e
9
9
  drop
10
10
  raise(e)
11
11
  end
@@ -73,7 +73,7 @@ module RocketJob
73
73
  count += 1
74
74
  end
75
75
  count
76
- rescue StandardError => e
76
+ rescue Exception => e
77
77
  drop
78
78
  raise(e)
79
79
  end
@@ -91,7 +91,7 @@ module RocketJob
91
91
  count += 1
92
92
  end
93
93
  count
94
- rescue StandardError => e
94
+ rescue Exception => e
95
95
  drop
96
96
  raise(e)
97
97
  end
@@ -139,7 +139,7 @@ module RocketJob
139
139
  document = all.queued.
140
140
  sort("_id" => 1).
141
141
  find_one_and_update(
142
- {"$set" => {worker_name: worker_name, state: :running, started_at: Time.now}},
142
+ {"$set" => {worker_name: worker_name, state: "running", started_at: Time.now}},
143
143
  return_document: :after
144
144
  )
145
145
  document.collection_name = collection_name if document
@@ -33,7 +33,7 @@ module RocketJob
33
33
  #
34
34
 
35
35
  # Current state, as set by AASM
36
- field :state, type: Symbol, default: :queued
36
+ field :state, type: Mongoid::StringifiedSymbol, default: :queued
37
37
 
38
38
  # When processing started on this slice
39
39
  field :started_at, type: Time
@@ -94,6 +94,12 @@ module RocketJob
94
94
  end
95
95
  end
96
96
 
97
+ # Returns whether this is a specialized binary slice for creating binary data from each slice
98
+ # that is then just downloaded as-is into output files.
99
+ def self.binary?
100
+ false
101
+ end
102
+
97
103
  # `records` array has special handling so that it can be modified in place instead of having
98
104
  # to replace the entire array every time. For example, when appending lines with `<<`.
99
105
  def records
@@ -133,18 +139,10 @@ module RocketJob
133
139
 
134
140
  # Returns [Hash] the slice as a Hash for storage purposes
135
141
  # Compresses / Encrypts the slice according to the job setting
136
- if ::Mongoid::VERSION.to_i >= 6
137
- def as_attributes
138
- attrs = super
139
- attrs["records"] = serialize_records if @records
140
- attrs
141
- end
142
- else
143
- def as_document
144
- attrs = super
145
- attrs["records"] = serialize_records if @records
146
- attrs
147
- end
142
+ def as_attributes
143
+ attrs = super
144
+ attrs["records"] = serialize_records if @records
145
+ attrs
148
146
  end
149
147
 
150
148
  def inspect
@@ -42,10 +42,16 @@ module RocketJob
42
42
  slice
43
43
  end
44
44
 
45
+ # Returns whether this collection contains specialized binary slices for creating binary data from each slice
46
+ # that is then just downloaded as-is into output files.
47
+ def binary?
48
+ slice_class.binary?
49
+ end
50
+
45
51
  # Returns output slices in the order of their id
46
52
  # which is usually the order in which they were written.
47
- def each
48
- all.sort(id: 1).each { |document| yield(document) }
53
+ def each(&block)
54
+ all.sort(id: 1).each(&block)
49
55
  end
50
56
 
51
57
  # Insert a new slice into the collection
@@ -90,6 +96,17 @@ module RocketJob
90
96
  slice
91
97
  end
92
98
 
99
+ # Append to an existing slice if already present
100
+ def append(slice, input_slice)
101
+ existing_slice = all.where(id: input_slice.id).first
102
+ return insert(slice, input_slice) unless existing_slice
103
+
104
+ extra_records = slice.is_a?(Slice) ? slice.records : slice
105
+ existing_slice.records = existing_slice.records + extra_records
106
+ existing_slice.save!
107
+ existing_slice
108
+ end
109
+
93
110
  alias << insert
94
111
 
95
112
  # Index for find_and_modify only if it is not already present
@@ -133,6 +150,7 @@ module RocketJob
133
150
  def last
134
151
  all.sort("_id" => -1).first
135
152
  end
153
+
136
154
  # rubocop:enable Style/RedundantSort
137
155
 
138
156
  # Returns [Array<Struct>] grouped exceptions by class name,
@@ -1,30 +1,37 @@
1
1
  module RocketJob
2
2
  module Sliced
3
3
  module Writer
4
- # Internal class for writing categorized results into output slices
5
- class Output
4
+ class Null
6
5
  attr_reader :job, :categorized_records
7
- attr_accessor :input_slice
8
-
9
- # Collect output results and write to output collections
10
- # iff job is collecting output
11
- # Notes:
12
- # Nothing is saved if an exception is raised inside the block
13
- def self.collect(job, input_slice = nil)
14
- if job.collect_output?
15
- writer = new(job, input_slice)
16
- yield(writer)
17
- writer.close
18
- else
19
- writer = NullWriter.new(job, input_slice)
20
- yield(writer)
21
- end
22
- end
6
+ attr_accessor :input_slice, :append
23
7
 
24
- def initialize(job, input_slice = nil)
8
+ def initialize(job, input_slice: nil, append: false)
25
9
  @job = job
26
10
  @input_slice = input_slice
27
11
  @categorized_records = {}
12
+ @append = append
13
+ end
14
+
15
+ def <<(_)
16
+ # noop
17
+ end
18
+
19
+ def close
20
+ # noop
21
+ end
22
+ end
23
+
24
+ # Internal class for writing categorized results into output slices
25
+ class Output < Null
26
+ # Collect output results and write to output collections
27
+ # iff job is collecting output
28
+ # Notes:
29
+ # Partial slices are saved when an exception is raised inside the block
30
+ def self.collect(job, **args)
31
+ writer = job.output_categories.present? ? new(job, **args) : Null.new(job, **args)
32
+ yield(writer)
33
+ ensure
34
+ writer&.close
28
35
  end
29
36
 
30
37
  # Writes the supplied result, RocketJob::Batch::Result or RocketJob::Batch::Results
@@ -40,7 +47,8 @@ module RocketJob
40
47
  # Write categorized results to their relevant collections
41
48
  def close
42
49
  categorized_records.each_pair do |category, results|
43
- job.output(category).insert(results, input_slice)
50
+ collection = job.output(category)
51
+ append ? collection.append(results, input_slice) : collection.insert(results, input_slice)
44
52
  end
45
53
  end
46
54
 
@@ -48,35 +56,16 @@ module RocketJob
48
56
 
49
57
  # Stores the categorized result from one result
50
58
  def extract_categorized_result(result)
51
- category = :main
52
- value = result
59
+ named_category = :main
60
+ value = result
53
61
  if result.is_a?(RocketJob::Batch::Result)
54
- category = result.category
55
- value = result.value
56
- raise(ArgumentError, "Invalid RocketJob Output Category: #{category}") if job.output_categories.exclude?(category)
62
+ named_category = result.category
63
+ value = result.value
57
64
  end
58
- (categorized_records[category] ||= []) << value unless value.nil? && !job.collect_nil_output?
65
+ (categorized_records[named_category] ||= []) << value unless value.nil? && !job.output_category(named_category).nils
59
66
  end
60
67
  end
61
68
 
62
- class NullWriter
63
- attr_reader :job, :categorized_records
64
- attr_accessor :input_slice
65
-
66
- def initialize(job, input_slice = nil)
67
- @job = job
68
- @input_slice = input_slice
69
- @categorized_records = {}
70
- end
71
-
72
- def <<(_)
73
- # noop
74
- end
75
-
76
- def close
77
- # noop
78
- end
79
- end
80
69
  end
81
70
  end
82
71
  end