rocketjob 5.3.3 → 6.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch.rb +3 -0
  6. data/lib/rocket_job/batch/categories.rb +338 -0
  7. data/lib/rocket_job/batch/io.rb +132 -69
  8. data/lib/rocket_job/batch/model.rb +20 -68
  9. data/lib/rocket_job/batch/performance.rb +20 -8
  10. data/lib/rocket_job/batch/statistics.rb +35 -13
  11. data/lib/rocket_job/batch/tabular.rb +2 -0
  12. data/lib/rocket_job/batch/tabular/input.rb +8 -6
  13. data/lib/rocket_job/batch/tabular/output.rb +4 -2
  14. data/lib/rocket_job/batch/throttle_running_workers.rb +8 -17
  15. data/lib/rocket_job/batch/worker.rb +27 -24
  16. data/lib/rocket_job/category/base.rb +78 -0
  17. data/lib/rocket_job/category/input.rb +110 -0
  18. data/lib/rocket_job/category/output.rb +25 -0
  19. data/lib/rocket_job/cli.rb +24 -16
  20. data/lib/rocket_job/dirmon_entry.rb +22 -12
  21. data/lib/rocket_job/event.rb +1 -1
  22. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  23. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  24. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  25. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  26. data/lib/rocket_job/jobs/dirmon_job.rb +1 -1
  27. data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
  28. data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
  29. data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
  30. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  31. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +5 -4
  32. data/lib/rocket_job/jobs/upload_file_job.rb +47 -10
  33. data/lib/rocket_job/lookup_collection.rb +68 -0
  34. data/lib/rocket_job/plugins/job/model.rb +25 -50
  35. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +12 -4
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  37. data/lib/rocket_job/plugins/restart.rb +12 -5
  38. data/lib/rocket_job/plugins/state_machine.rb +2 -1
  39. data/lib/rocket_job/ractor_worker.rb +42 -0
  40. data/lib/rocket_job/server/model.rb +1 -1
  41. data/lib/rocket_job/sliced.rb +36 -0
  42. data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
  43. data/lib/rocket_job/sliced/input.rb +4 -4
  44. data/lib/rocket_job/sliced/slice.rb +11 -13
  45. data/lib/rocket_job/sliced/slices.rb +20 -2
  46. data/lib/rocket_job/sliced/writer/output.rb +33 -44
  47. data/lib/rocket_job/subscribers/server.rb +1 -1
  48. data/lib/rocket_job/thread_worker.rb +46 -0
  49. data/lib/rocket_job/version.rb +1 -1
  50. data/lib/rocket_job/worker.rb +21 -55
  51. data/lib/rocket_job/worker_pool.rb +5 -7
  52. data/lib/rocketjob.rb +52 -59
  53. metadata +43 -33
  54. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  55. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -0,0 +1,8 @@
1
+ require "psych/visitors/yaml_tree"
2
+
3
+ class Psych::Visitors::YAMLTree
4
+ # Serialize IOStream path as a string
5
+ def visit_IOStreams_Path(o)
6
+ visit_String(o.to_s)
7
+ end
8
+ end
@@ -18,7 +18,7 @@ module RocketJob
18
18
  # file name of the archived file is passed into the job as either
19
19
  # `upload_file_name` or `full_file_name`.
20
20
 
21
- # Note:
21
+ # Notes:
22
22
  # - Jobs that do not implement #upload _must_ have either `upload_file_name` or `full_file_name` as an attribute.
23
23
  #
24
24
  # With RocketJob Pro, the file is automatically uploaded into the job itself
@@ -35,7 +35,7 @@ module RocketJob
35
35
  self.cron_schedule = "*/15 * * * * UTC"
36
36
 
37
37
  # Whether to destroy zombie servers automatically
38
- field :destroy_zombies, type: Boolean, default: true, user_editable: true, copy_on_restart: true
38
+ field :destroy_zombies, type: Mongoid::Boolean, default: true, user_editable: true, copy_on_restart: true
39
39
 
40
40
  # Retention intervals in seconds.
41
41
  # Set to nil to retain everything.
@@ -54,12 +54,12 @@ module RocketJob
54
54
  RocketJob::Job.paused.where(completed_at: {"$lte" => paused_retention.seconds.ago}).destroy_all if paused_retention
55
55
  RocketJob::Job.queued.where(created_at: {"$lte" => queued_retention.seconds.ago}).destroy_all if queued_retention
56
56
 
57
- if destroy_zombies
58
- # Cleanup zombie servers
59
- RocketJob::Server.destroy_zombies
60
- # Requeue jobs where the worker is in the zombie state and its server has gone away
61
- RocketJob::ActiveWorker.requeue_zombies
62
- end
57
+ return unless destroy_zombies
58
+
59
+ # Cleanup zombie servers
60
+ RocketJob::Server.destroy_zombies
61
+ # Requeue jobs where the worker is in the zombie state and its server has gone away
62
+ RocketJob::ActiveWorker.requeue_zombies
63
63
  end
64
64
  end
65
65
  end
@@ -31,16 +31,17 @@
31
31
  # job.perform_now
32
32
  # job.cleanup!
33
33
  #
34
- # By default output is not collected, add the option `collect_output: true` to collect output.
34
+ # By default output is not collected, call the method `#collect_output` to collect output.
35
35
  #
36
36
  # Example:
37
37
  # job = RocketJob::Jobs::OnDemandBatchJob(
38
38
  # description: 'Fix data',
39
39
  # code: code,
40
40
  # throttle_running_workers: 5,
41
- # priority: 30,
42
- # collect_output: true
41
+ # priority: 30
43
42
  # )
43
+ # job.collect_output
44
+ # job.save!
44
45
  #
45
46
  # Example: Move the upload operation into a before_batch.
46
47
  # upload_code = <<-CODE
@@ -95,10 +96,18 @@ module RocketJob
95
96
  before_batch :run_before_code
96
97
  after_batch :run_after_code
97
98
 
99
+ # Make this job collect its output
100
+ # :nils [true|false]
101
+ # Whether to skip the output from `code` when it is nil
102
+ # Default: false
103
+ def collect_output(nils: false)
104
+ self.output_categories = [RocketJob::Category::Output.new(nils: nils)]
105
+ end
106
+
98
107
  private
99
108
 
100
109
  def load_perform_code
101
- instance_eval("def perform(row)\n#{code}\nend")
110
+ instance_eval("def perform(row)\n#{code}\nend", __FILE__, __LINE__)
102
111
  end
103
112
 
104
113
  def run_before_code
@@ -118,13 +127,13 @@ module RocketJob
118
127
  def validate_before_code
119
128
  return if before_code.nil?
120
129
 
121
- validate_field(:before_code) { instance_eval("def __before_code\n#{before_code}\nend") }
130
+ validate_field(:before_code) { instance_eval("def __before_code\n#{before_code}\nend", __FILE__, __LINE__) }
122
131
  end
123
132
 
124
133
  def validate_after_code
125
134
  return if after_code.nil?
126
135
 
127
- validate_field(:after_code) { instance_eval("def __after_code\n#{after_code}\nend") }
136
+ validate_field(:after_code) { instance_eval("def __after_code\n#{after_code}\nend", __FILE__, __LINE__) }
128
137
  end
129
138
 
130
139
  def validate_field(field)
@@ -38,12 +38,11 @@
38
38
  #
39
39
  # Example: Retain output:
40
40
  # code = <<~CODE
41
- # {'value' => data['a'] * data['b']}
41
+ # data['result'] = data['a'] * data['b']
42
42
  # CODE
43
43
  #
44
44
  # RocketJob::Jobs::OnDemandJob.create!(
45
45
  # code: code,
46
- # collect_output: true,
47
46
  # data: {'a' => 10, 'b' => 2}
48
47
  # )
49
48
  #
@@ -6,9 +6,11 @@ module RocketJob
6
6
  # Define the job's default attributes
7
7
  self.description = "Performance Test"
8
8
  self.priority = 5
9
- self.slice_size = 100
10
9
  self.destroy_on_complete = false
11
10
 
11
+ input_category slice_size: 100
12
+ output_category
13
+
12
14
  # No operation, just return the supplied line (record)
13
15
  def perform(line)
14
16
  line
@@ -17,13 +17,13 @@ module RocketJob
17
17
  class RelationalJob < RocketJob::Job
18
18
  include RocketJob::Batch
19
19
 
20
- self.slice_size = 1000
21
20
  self.priority = 30
22
21
  self.destroy_on_complete = false
23
- self.compress = true
24
22
  self.throttle_running_jobs = 1
25
23
  self.throttle_running_workers = 10
26
24
 
25
+ input_category slice_size: 1_000
26
+
27
27
  # Name of the table being re-encrypted
28
28
  field :table_name, type: String
29
29
 
@@ -73,7 +73,7 @@ module RocketJob
73
73
 
74
74
  # Use AR to fetch all the records
75
75
  self.class.connection.select_rows(sql).each do |row|
76
- row = row.unshift(nil)
76
+ row.unshift(nil)
77
77
  index = 1
78
78
  sql = "update #{quoted_table_name} set "
79
79
  updates = []
@@ -120,7 +120,8 @@ module RocketJob
120
120
  def upload_records
121
121
  start_id = self.class.connection.select_value("select min(id) from #{quoted_table_name}").to_i
122
122
  last_id = self.class.connection.select_value("select max(id) from #{quoted_table_name}").to_i
123
- self.record_count = last_id.positive? ? (input.upload_integer_range_in_reverse_order(start_id, last_id) * slice_size) : 0
123
+ self.record_count =
124
+ last_id.positive? ? (input.upload_integer_range_in_reverse_order(start_id, last_id) * input_category.slice_size) : 0
124
125
  end
125
126
  end
126
127
  end
@@ -19,7 +19,7 @@ module RocketJob
19
19
  field :properties, type: Hash, default: {}, user_editable: true
20
20
 
21
21
  # File to upload
22
- field :upload_file_name, type: String, user_editable: true
22
+ field :upload_file_name, type: IOStreams::Path, user_editable: true
23
23
 
24
24
  # The original Input file name.
25
25
  # Used by #upload to extract the IOStreams when present.
@@ -33,14 +33,15 @@ module RocketJob
33
33
  validate :job_is_a_rocket_job
34
34
  validate :job_implements_upload
35
35
  validate :file_exists
36
+ validate :job_has_properties
36
37
 
37
38
  # Create the job and upload the file into it.
38
39
  def perform
39
- job = job_class.new(properties)
40
+ job = job_class.from_properties(properties)
40
41
  job.id = job_id if job_id
41
42
  upload_file(job)
42
43
  job.save!
43
- rescue StandardError => e
44
+ rescue Exception => e
44
45
  # Prevent partial uploads
45
46
  job&.cleanup! if job.respond_to?(:cleanup!)
46
47
  raise(e)
@@ -66,7 +67,10 @@ module RocketJob
66
67
  elsif job.respond_to?(:full_file_name=)
67
68
  job.full_file_name = upload_file_name
68
69
  else
69
- raise(ArgumentError, "Model #{job_class_name} must implement '#upload', or have attribute 'upload_file_name' or 'full_file_name'")
70
+ raise(
71
+ ArgumentError,
72
+ "Model #{job_class_name} must implement '#upload', or have attribute 'upload_file_name' or 'full_file_name'"
73
+ )
70
74
  end
71
75
  end
72
76
 
@@ -85,17 +89,50 @@ module RocketJob
85
89
  klass = job_class
86
90
  return if klass.nil? || klass.instance_methods.any? { |m| VALID_INSTANCE_METHODS.include?(m) }
87
91
 
88
- errors.add(:job_class_name, "#{job_class} must implement any one of: :#{VALID_INSTANCE_METHODS.join(' :')} instance methods")
92
+ errors.add(:job_class_name,
93
+ "#{job_class} must implement any one of: :#{VALID_INSTANCE_METHODS.join(' :')} instance methods")
89
94
  end
90
95
 
91
96
  def file_exists
92
- return if upload_file_name.nil?
93
-
94
- uri = URI.parse(upload_file_name)
95
- return unless uri.scheme.nil? || uri.scheme == "file"
96
- return if File.exist?(upload_file_name)
97
+ # Only check for file existence when it is a local file
98
+ return unless upload_file_name.is_a?(IOStreams::Paths::File)
99
+ if upload_file_name.to_s == ""
100
+ return errors.add(:upload_file_name, "Upload file name can't be blank.")
101
+ end
97
102
 
103
+ return if upload_file_name.exist?
98
104
  errors.add(:upload_file_name, "Upload file: #{upload_file_name} does not exist.")
105
+ rescue NotImplementedError
106
+ nil
107
+ end
108
+
109
+ def job_has_properties
110
+ klass = job_class
111
+ return unless klass
112
+
113
+ properties.each_pair do |k, _v|
114
+ next if klass.public_method_defined?("#{k}=".to_sym)
115
+
116
+ if %i[output_categories input_categories].include?(k)
117
+ category_class = k == :input_categories ? RocketJob::Category::Input : RocketJob::Category::Output
118
+ properties[k].each do |category|
119
+ category.each_pair do |key, _value|
120
+ next if category_class.public_method_defined?("#{key}=".to_sym)
121
+
122
+ errors.add(
123
+ :properties,
124
+ "Unknown Property in #{k}: Attempted to set a value for #{key}.#{k} which is not allowed on the job #{job_class_name}"
125
+ )
126
+ end
127
+ end
128
+ next
129
+ end
130
+
131
+ errors.add(
132
+ :properties,
133
+ "Unknown Property: Attempted to set a value for #{k.inspect} which is not allowed on the job #{job_class_name}"
134
+ )
135
+ end
99
136
  end
100
137
  end
101
138
  end
@@ -0,0 +1,68 @@
1
+ module RocketJob
2
+ class LookupCollection < Mongo::Collection
3
+ # Rapidly upload individual records in batches.
4
+ #
5
+ # Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
6
+ # for each and every row.
7
+ #
8
+ # Example:
9
+ # lookup_collection(:my_lookup).upload do |io|
10
+ # io << {id: 123, data: "first record"}
11
+ # io << {id: 124, data: "second record"}
12
+ # end
13
+ #
14
+ # input_category(:my_lookup).find(id: 123).first
15
+ def upload(batch_size: 10_000, &block)
16
+ BatchUploader.upload(batch_size: batch_size, &block)
17
+ end
18
+
19
+ # Looks up the value at the specified id.
20
+ # Returns [nil] if no record was found with the supplied id.
21
+ def lookup(id)
22
+ find(id: id).first
23
+ end
24
+
25
+ private
26
+
27
+ # Internal class for uploading records in batches
28
+ class BatchUploader
29
+ attr_reader :record_count
30
+
31
+ def self.upload(collection, **args)
32
+ writer = new(collection, **args)
33
+ yield(writer)
34
+ writer.record_count
35
+ ensure
36
+ writer&.close
37
+ end
38
+
39
+ def initialize(collection, batch_size:)
40
+ @batch_size = batch_size
41
+ @record_count = 0
42
+ @batch_count = 0
43
+ @documents = []
44
+ @collection = collection
45
+ end
46
+
47
+ def <<(record)
48
+ raise(ArgumentError, "Record must be a Hash") unless record.is_a?(Hash)
49
+ raise(ArgumentError, "Record must include an :id key") unless record.key?(:id) || record.key?("id") || record.key?("_id")
50
+
51
+ @documents << record
52
+ @record_count += 1
53
+ @batch_count += 1
54
+ if @batch_count >= @batch_size
55
+ @collection.insert_many(@documents)
56
+ @documents.clear
57
+ @batch_count = 0
58
+ end
59
+
60
+ self
61
+ end
62
+
63
+ def close
64
+ @collection.insert_many(@documents) unless @documents.empty?
65
+ end
66
+ end
67
+ end
68
+ end
@@ -37,12 +37,10 @@ module RocketJob
37
37
  # arrives, then the current job will complete the current slices and process
38
38
  # the new higher priority job
39
39
  field :priority, type: Integer, default: 50, class_attribute: true, user_editable: true, copy_on_restart: true
40
+ validates_inclusion_of :priority, in: 1..100
40
41
 
41
42
  # When the job completes destroy it from both the database and the UI
42
- field :destroy_on_complete, type: Boolean, default: true, class_attribute: true, copy_on_restart: true
43
-
44
- # Whether to store the results from this job
45
- field :collect_output, type: Boolean, default: false, class_attribute: true
43
+ field :destroy_on_complete, type: Mongoid::Boolean, default: true, class_attribute: true, copy_on_restart: true
46
44
 
47
45
  # Run this job no earlier than this time
48
46
  field :run_at, type: Time, user_editable: true
@@ -54,14 +52,15 @@ module RocketJob
54
52
  # Can be used to reduce log noise, especially during high volume calls
55
53
  # For debugging a single job can be logged at a low level such as :trace
56
54
  # Levels supported: :trace, :debug, :info, :warn, :error, :fatal
57
- field :log_level, type: Symbol, class_attribute: true, user_editable: true, copy_on_restart: true
55
+ field :log_level, type: Mongoid::StringifiedSymbol, class_attribute: true, user_editable: true, copy_on_restart: true
56
+ validates_inclusion_of :log_level, in: SemanticLogger::LEVELS + [nil]
58
57
 
59
58
  #
60
59
  # Read-only attributes
61
60
  #
62
61
 
63
62
  # Current state, as set by the state machine. Do not modify this value directly.
64
- field :state, type: Symbol, default: :queued
63
+ field :state, type: Mongoid::StringifiedSymbol, default: :queued
65
64
 
66
65
  # When the job was created
67
66
  field :created_at, type: Time, default: -> { Time.now }
@@ -89,17 +88,12 @@ module RocketJob
89
88
  # Store the last exception for this job
90
89
  embeds_one :exception, class_name: "RocketJob::JobException"
91
90
 
92
- # Store the Hash result from this job if collect_output is true,
93
- # and the job returned actually returned a Hash, otherwise nil
94
- # Not applicable to SlicedJob jobs, since its output is stored in a
95
- # separate collection
96
- field :result, type: Hash
97
-
91
+ # Used when workers fetch jobs to work on.
98
92
  index({state: 1, priority: 1, _id: 1}, background: true)
93
+ # Used by Mission Control to display completed jobs sorted by completion.
94
+ index({completed_at: 1}, background: true)
99
95
 
100
96
  validates_presence_of :state, :failure_count, :created_at
101
- validates :priority, inclusion: 1..100
102
- validates :log_level, inclusion: SemanticLogger::LEVELS + [nil]
103
97
  end
104
98
 
105
99
  module ClassMethods
@@ -155,14 +149,8 @@ module RocketJob
155
149
 
156
150
  # Scope for queued jobs that can run now
157
151
  # I.e. Queued jobs excluding scheduled jobs
158
- if Mongoid::VERSION.to_f >= 7.1
159
- def queued_now
160
- queued.and(RocketJob::Job.where(run_at: nil).or(:run_at.lte => Time.now))
161
- end
162
- else
163
- def queued_now
164
- queued.or({run_at: nil}, :run_at.lte => Time.now)
165
- end
152
+ def queued_now
153
+ queued.and(RocketJob::Job.where(run_at: nil).or(:run_at.lte => Time.now))
166
154
  end
167
155
 
168
156
  # Defines all the fields that are accessible on the Document
@@ -183,43 +171,30 @@ module RocketJob
183
171
  #
184
172
  # @return [ Field ] The generated field
185
173
  def field(name, options)
186
- if options.delete(:user_editable) == true
187
- self.user_editable_fields += [name.to_sym] unless user_editable_fields.include?(name.to_sym)
174
+ if (options.delete(:user_editable) == true) && !user_editable_fields.include?(name.to_sym)
175
+ self.user_editable_fields += [name.to_sym]
188
176
  end
177
+
189
178
  if options.delete(:class_attribute) == true
190
179
  class_attribute(name, instance_accessor: false)
191
180
  public_send("#{name}=", options[:default]) if options.key?(:default)
192
181
  options[:default] = -> { self.class.public_send(name) }
193
182
  end
194
- if options.delete(:copy_on_restart) == true
195
- self.rocket_job_restart_attributes += [name.to_sym] unless rocket_job_restart_attributes.include?(name.to_sym)
183
+
184
+ if (options.delete(:copy_on_restart) == true) && !rocket_job_restart_attributes.include?(name.to_sym)
185
+ self.rocket_job_restart_attributes += [name.to_sym]
196
186
  end
197
- super(name, options)
198
- end
199
187
 
200
- # DEPRECATED
201
- def rocket_job
202
- warn "Replace calls to .rocket_job with calls to set class instance variables. For example: self.priority = 50"
203
- yield(self)
188
+ super(name, options)
204
189
  end
205
190
 
206
- # DEPRECATED
207
- def public_rocket_job_properties(*args)
208
- warn "Replace calls to .public_rocket_job_properties by adding `user_editable: true` option to the field declaration in #{name} for: #{args.inspect}"
209
- self.user_editable_fields += args.collect(&:to_sym)
191
+ # Builds this job instance from the supplied properties hash.
192
+ # Overridden by batch to support child objects.
193
+ def from_properties(properties)
194
+ new(properties)
210
195
  end
211
196
  end
212
197
 
213
- # Returns [true|false] whether to collect nil results from running this batch
214
- def collect_nil_output?
215
- collect_output? ? (collect_nil_output == true) : false
216
- end
217
-
218
- # Returns [true|false] whether to collect the results from running this batch
219
- def collect_output?
220
- collect_output == true
221
- end
222
-
223
198
  # Returns [Float] the number of seconds the job has taken
224
199
  # - Elapsed seconds to process the job from when a worker first started working on it
225
200
  # until now if still running, or until it was completed
@@ -282,7 +257,6 @@ module RocketJob
282
257
  # Returns [Hash] status of this job
283
258
  def as_json
284
259
  attrs = serializable_hash(methods: %i[seconds duration])
285
- attrs.delete("result") unless collect_output?
286
260
  attrs.delete("failure_count") unless failure_count.positive?
287
261
  if queued?
288
262
  attrs.delete("started_at")
@@ -319,16 +293,17 @@ module RocketJob
319
293
  h = as_json
320
294
  h.delete("seconds")
321
295
  h.dup.each_pair do |k, v|
322
- if v.is_a?(Time)
296
+ case v
297
+ when Time
323
298
  h[k] = v.in_time_zone(time_zone).to_s
324
- elsif v.is_a?(BSON::ObjectId)
299
+ when BSON::ObjectId
325
300
  h[k] = v.to_s
326
301
  end
327
302
  end
328
303
  h
329
304
  end
330
305
 
331
- # Returns [Boolean] whether the worker runs on a particular server.
306
+ # Returns [true|false] whether the worker runs on a particular server.
332
307
  def worker_on_server?(server_name)
333
308
  return false unless worker_name.present? && server_name.present?
334
309