rocketjob 5.4.0.beta1 → 6.0.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch.rb +3 -0
  6. data/lib/rocket_job/batch/categories.rb +341 -0
  7. data/lib/rocket_job/batch/io.rb +128 -60
  8. data/lib/rocket_job/batch/model.rb +20 -68
  9. data/lib/rocket_job/batch/performance.rb +19 -7
  10. data/lib/rocket_job/batch/statistics.rb +34 -12
  11. data/lib/rocket_job/batch/tabular.rb +2 -0
  12. data/lib/rocket_job/batch/tabular/input.rb +8 -6
  13. data/lib/rocket_job/batch/tabular/output.rb +4 -2
  14. data/lib/rocket_job/batch/throttle_running_workers.rb +8 -17
  15. data/lib/rocket_job/batch/worker.rb +27 -24
  16. data/lib/rocket_job/category/base.rb +78 -0
  17. data/lib/rocket_job/category/input.rb +110 -0
  18. data/lib/rocket_job/category/output.rb +25 -0
  19. data/lib/rocket_job/cli.rb +25 -17
  20. data/lib/rocket_job/dirmon_entry.rb +22 -12
  21. data/lib/rocket_job/event.rb +1 -1
  22. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  23. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  24. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  25. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  26. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  27. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  28. data/lib/rocket_job/jobs/dirmon_job.rb +2 -2
  29. data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
  31. data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
  32. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  33. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
  34. data/lib/rocket_job/jobs/upload_file_job.rb +44 -8
  35. data/lib/rocket_job/lookup_collection.rb +69 -0
  36. data/lib/rocket_job/plugins/job/model.rb +25 -50
  37. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  38. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +12 -4
  39. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  40. data/lib/rocket_job/plugins/restart.rb +12 -5
  41. data/lib/rocket_job/plugins/state_machine.rb +2 -1
  42. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +38 -0
  43. data/lib/rocket_job/ractor_worker.rb +42 -0
  44. data/lib/rocket_job/server/model.rb +1 -1
  45. data/lib/rocket_job/sliced.rb +15 -70
  46. data/lib/rocket_job/sliced/bzip2_output_slice.rb +2 -2
  47. data/lib/rocket_job/sliced/input.rb +1 -1
  48. data/lib/rocket_job/sliced/slice.rb +5 -13
  49. data/lib/rocket_job/sliced/slices.rb +14 -2
  50. data/lib/rocket_job/sliced/writer/output.rb +33 -45
  51. data/lib/rocket_job/subscribers/server.rb +1 -1
  52. data/lib/rocket_job/thread_worker.rb +46 -0
  53. data/lib/rocket_job/throttle_definitions.rb +7 -1
  54. data/lib/rocket_job/version.rb +1 -1
  55. data/lib/rocket_job/worker.rb +21 -55
  56. data/lib/rocket_job/worker_pool.rb +5 -7
  57. data/lib/rocketjob.rb +53 -43
  58. metadata +35 -26
  59. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  60. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A class which sends values to the database as Strings but returns them to the user as Symbols.
4
+ module Mongoid
5
+ class StringifiedSymbol
6
+ class << self
7
+ # Convert the object from its mongo friendly ruby type to this type.
8
+ #
9
+ # @example Demongoize the object.
10
+ # Symbol.demongoize(object)
11
+ #
12
+ # @param [ Object ] object The object to demongoize.
13
+ #
14
+ # @return [ Symbol ] The object.
15
+ #
16
+ # @api private
17
+ def demongoize(object)
18
+ if object.nil?
19
+ object
20
+ else
21
+ object.to_s.to_sym
22
+ end
23
+ end
24
+
25
+ # Turn the object from the ruby type we deal with to a Mongo friendly
26
+ # type.
27
+ #
28
+ # @example Mongoize the object.
29
+ # Symbol.mongoize("123.11")
30
+ #
31
+ # @param [ Object ] object The object to mongoize.
32
+ #
33
+ # @return [ Symbol ] The object mongoized.
34
+ #
35
+ # @api private
36
+ def mongoize(object)
37
+ if object.nil?
38
+ object
39
+ else
40
+ object.to_s
41
+ end
42
+ end
43
+
44
+ # @api private
45
+ def evolve(object)
46
+ mongoize(object)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,8 @@
1
+ require "psych/visitors/yaml_tree"
2
+
3
+ class Psych::Visitors::YAMLTree
4
+ # Serialize IOStream path as a string
5
+ def visit_IOStreams_Path(o)
6
+ visit_String(o.to_s)
7
+ end
8
+ end
@@ -55,13 +55,13 @@ module ActiveJob
55
55
  # - Completed jobs will not appear in completed since the Active Job adapter
56
56
  # uses the default Rocket Job `destroy_on_completion` of `false`.
57
57
  class RocketJobAdapter
58
- def self.enqueue(active_job) #:nodoc:
58
+ def self.enqueue(active_job)
59
59
  job = RocketJob::Jobs::ActiveJob.create!(active_job_params(active_job))
60
60
  active_job.provider_job_id = job.id.to_s if active_job.respond_to?(:provider_job_id=)
61
61
  job
62
62
  end
63
63
 
64
- def self.enqueue_at(active_job, timestamp) #:nodoc:
64
+ def self.enqueue_at(active_job, timestamp)
65
65
  params = active_job_params(active_job)
66
66
  params[:run_at] = Time.at(timestamp).utc
67
67
 
@@ -18,7 +18,7 @@ module RocketJob
18
18
  # file name of the archived file is passed into the job as either
19
19
  # `upload_file_name` or `full_file_name`.
20
20
 
21
- # Note:
21
+ # Notes:
22
22
  # - Jobs that do not implement #upload _must_ have either `upload_file_name` or `full_file_name` as an attribute.
23
23
  #
24
24
  # With RocketJob Pro, the file is automatically uploaded into the job itself
@@ -82,7 +82,7 @@ module RocketJob
82
82
  key = iopath.to_s.tr(".", "_")
83
83
  previous_size = previous_file_names[key]
84
84
  # Check every few minutes for a file size change before trying to process the file.
85
- size = check_file(entry, iopath, previous_size)
85
+ size = check_file(entry, iopath, previous_size)
86
86
  new_file_names[key] = size if size
87
87
  end
88
88
  end
@@ -35,7 +35,7 @@ module RocketJob
35
35
  self.cron_schedule = "*/15 * * * * UTC"
36
36
 
37
37
  # Whether to destroy zombie servers automatically
38
- field :destroy_zombies, type: Boolean, default: true, user_editable: true, copy_on_restart: true
38
+ field :destroy_zombies, type: Mongoid::Boolean, default: true, user_editable: true, copy_on_restart: true
39
39
 
40
40
  # Retention intervals in seconds.
41
41
  # Set to nil to retain everything.
@@ -54,12 +54,12 @@ module RocketJob
54
54
  RocketJob::Job.paused.where(completed_at: {"$lte" => paused_retention.seconds.ago}).destroy_all if paused_retention
55
55
  RocketJob::Job.queued.where(created_at: {"$lte" => queued_retention.seconds.ago}).destroy_all if queued_retention
56
56
 
57
- if destroy_zombies
58
- # Cleanup zombie servers
59
- RocketJob::Server.destroy_zombies
60
- # Requeue jobs where the worker is in the zombie state and its server has gone away
61
- RocketJob::ActiveWorker.requeue_zombies
62
- end
57
+ return unless destroy_zombies
58
+
59
+ # Cleanup zombie servers
60
+ RocketJob::Server.destroy_zombies
61
+ # Requeue jobs where the worker is in the zombie state and its server has gone away
62
+ RocketJob::ActiveWorker.requeue_zombies
63
63
  end
64
64
  end
65
65
  end
@@ -31,16 +31,17 @@
31
31
  # job.perform_now
32
32
  # job.cleanup!
33
33
  #
34
- # By default output is not collected, add the option `collect_output: true` to collect output.
34
+ # By default output is not collected, call the method `#collect_output` to collect output.
35
35
  #
36
36
  # Example:
37
37
  # job = RocketJob::Jobs::OnDemandBatchJob(
38
38
  # description: 'Fix data',
39
39
  # code: code,
40
40
  # throttle_running_workers: 5,
41
- # priority: 30,
42
- # collect_output: true
41
+ # priority: 30
43
42
  # )
43
+ # job.collect_output
44
+ # job.save!
44
45
  #
45
46
  # Example: Move the upload operation into a before_batch.
46
47
  # upload_code = <<-CODE
@@ -95,10 +96,18 @@ module RocketJob
95
96
  before_batch :run_before_code
96
97
  after_batch :run_after_code
97
98
 
99
+ # Make this job collect its output
100
+ # :nils [true|false]
101
+ # Whether to skip the output from `code` when it is nil
102
+ # Default: false
103
+ def collect_output(nils: false)
104
+ self.output_categories = [RocketJob::Category::Output.new(nils: nils)]
105
+ end
106
+
98
107
  private
99
108
 
100
109
  def load_perform_code
101
- instance_eval("def perform(row)\n#{code}\nend")
110
+ instance_eval("def perform(row)\n#{code}\nend", __FILE__, __LINE__)
102
111
  end
103
112
 
104
113
  def run_before_code
@@ -118,13 +127,13 @@ module RocketJob
118
127
  def validate_before_code
119
128
  return if before_code.nil?
120
129
 
121
- validate_field(:before_code) { instance_eval("def __before_code\n#{before_code}\nend") }
130
+ validate_field(:before_code) { instance_eval("def __before_code\n#{before_code}\nend", __FILE__, __LINE__) }
122
131
  end
123
132
 
124
133
  def validate_after_code
125
134
  return if after_code.nil?
126
135
 
127
- validate_field(:after_code) { instance_eval("def __after_code\n#{after_code}\nend") }
136
+ validate_field(:after_code) { instance_eval("def __after_code\n#{after_code}\nend", __FILE__, __LINE__) }
128
137
  end
129
138
 
130
139
  def validate_field(field)
@@ -38,12 +38,11 @@
38
38
  #
39
39
  # Example: Retain output:
40
40
  # code = <<~CODE
41
- # {'value' => data['a'] * data['b']}
41
+ # data['result'] = data['a'] * data['b']
42
42
  # CODE
43
43
  #
44
44
  # RocketJob::Jobs::OnDemandJob.create!(
45
45
  # code: code,
46
- # collect_output: true,
47
46
  # data: {'a' => 10, 'b' => 2}
48
47
  # )
49
48
  #
@@ -6,9 +6,11 @@ module RocketJob
6
6
  # Define the job's default attributes
7
7
  self.description = "Performance Test"
8
8
  self.priority = 5
9
- self.slice_size = 100
10
9
  self.destroy_on_complete = false
11
10
 
11
+ input_category slice_size: 100
12
+ output_category
13
+
12
14
  # No operation, just return the supplied line (record)
13
15
  def perform(line)
14
16
  line
@@ -1,6 +1,3 @@
1
- require "active_record"
2
- require "sync_attr"
3
-
4
1
  # Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
5
2
  #
6
3
  # Run in Rails console:
@@ -11,116 +8,126 @@ require "sync_attr"
11
8
  # * This job will find any column in the database that starts with`encrypted_`.
12
9
  # * This means that temporary or other tables not part of the application tables will also be processed.
13
10
  # * Since it automatically finds and re-encrypts any column, new columns are handled without any manual intervention.
14
- module RocketJob
15
- module Jobs
16
- module ReEncrypt
17
- class RelationalJob < RocketJob::Job
18
- include RocketJob::Batch
19
-
20
- self.slice_size = 1000
21
- self.priority = 30
22
- self.destroy_on_complete = false
23
- self.compress = true
24
- self.throttle_running_jobs = 1
25
- self.throttle_running_workers = 10
26
-
27
- # Name of the table being re-encrypted
28
- field :table_name, type: String
29
-
30
- # Limit the number of records to re-encrypt in test environments
31
- field :limit, type: Integer
32
-
33
- validates_presence_of :table_name
34
- before_batch :upload_records
35
-
36
- # Returns [Hash] of table names with each entry being an array
37
- # of columns that start with encrypted_
38
- sync_cattr_reader :encrypted_columns do
39
- h = {}
40
- connection.tables.each do |table|
41
- columns = connection.columns(table)
42
- columns.each do |column|
43
- if column.name.start_with?("encrypted_")
44
- add_column = column.name
45
- (h[table] ||= []) << add_column if add_column
11
+ if defined?(ActiveRecord) && defined?(SyncAttr)
12
+ require "active_record"
13
+ require "sync_attr"
14
+
15
+ module RocketJob
16
+ module Jobs
17
+ module ReEncrypt
18
+ class RelationalJob < RocketJob::Job
19
+ include RocketJob::Batch
20
+
21
+ self.priority = 30
22
+ self.destroy_on_complete = false
23
+ self.throttle_running_jobs = 1
24
+ self.throttle_running_workers = 10
25
+
26
+ input_category slice_size: 1_000
27
+
28
+ # Name of the table being re-encrypted
29
+ field :table_name, type: String
30
+
31
+ # Limit the number of records to re-encrypt in test environments
32
+ field :limit, type: Integer
33
+
34
+ validates_presence_of :table_name
35
+ before_batch :upload_records
36
+
37
+ # Returns [Hash] of table names with each entry being an array
38
+ # of columns that start with encrypted_
39
+ sync_cattr_reader :encrypted_columns do
40
+ h = {}
41
+ connection.tables.each do |table|
42
+ columns = connection.columns(table)
43
+ columns.each do |column|
44
+ if column.name.start_with?("encrypted_")
45
+ add_column = column.name
46
+ (h[table] ||= []) << add_column if add_column
47
+ end
46
48
  end
47
49
  end
50
+ h
48
51
  end
49
- h
50
- end
51
52
 
52
- # Re-encrypt all `encrypted_` columns in the relational database.
53
- # Queues a Job for each table that needs re-encryption.
54
- def self.start(**args)
55
- encrypted_columns.keys.collect do |table|
56
- create!(table_name: table, description: table, **args)
53
+ # Re-encrypt all `encrypted_` columns in the relational database.
54
+ # Queues a Job for each table that needs re-encryption.
55
+ def self.start(**args)
56
+ encrypted_columns.keys.collect do |table|
57
+ create!(table_name: table, description: table, **args)
58
+ end
57
59
  end
58
- end
59
60
 
60
- # Re-encrypt all encrypted columns for the named table.
61
- # Does not use AR models since we do not have models for all tables.
62
- def perform(range)
63
- start_id, end_id = range
61
+ # Re-encrypt all encrypted columns for the named table.
62
+ # Does not use AR models since we do not have models for all tables.
63
+ def perform(range)
64
+ start_id, end_id = range
64
65
 
65
- columns = self.class.encrypted_columns[table_name]
66
- unless columns&.size&.positive?
67
- logger.error "No columns for table: #{table_name} from #{start_id} to #{end_id}"
68
- return
69
- end
66
+ columns = self.class.encrypted_columns[table_name]
67
+ unless columns&.size&.positive?
68
+ logger.error "No columns for table: #{table_name} from #{start_id} to #{end_id}"
69
+ return
70
+ end
70
71
 
71
- logger.info "Processing: #{table_name} from #{start_id} to #{end_id}"
72
- sql = "select id, #{columns.join(',')} from #{quoted_table_name} where id >= #{start_id} and id <= #{end_id}"
73
-
74
- # Use AR to fetch all the records
75
- self.class.connection.select_rows(sql).each do |row|
76
- row = row.unshift(nil)
77
- index = 1
78
- sql = "update #{quoted_table_name} set "
79
- updates = []
80
- columns.collect do |column|
81
- index += 1
82
- value = row[index]
83
- # Prevent re-encryption
84
- unless value.blank?
85
- new_value = re_encrypt(value)
86
- updates << "#{column} = \"#{new_value}\"" if new_value != value
72
+ logger.info "Processing: #{table_name} from #{start_id} to #{end_id}"
73
+ sql = "select id, #{columns.join(',')} from #{quoted_table_name} where id >= #{start_id} and id <= #{end_id}"
74
+
75
+ # Use AR to fetch all the records
76
+ self.class.connection.select_rows(sql).each do |row|
77
+ row.unshift(nil)
78
+ index = 1
79
+ sql = "update #{quoted_table_name} set "
80
+ updates = []
81
+ columns.collect do |column|
82
+ index += 1
83
+ value = row[index]
84
+ # Prevent re-encryption
85
+ unless value.blank?
86
+ new_value = re_encrypt(value)
87
+ updates << "#{column} = \"#{new_value}\"" if new_value != value
88
+ end
89
+ end
90
+ if updates.size.positive?
91
+ sql << updates.join(", ")
92
+ sql << " where id=#{row[1]}"
93
+ logger.trace sql
94
+ self.class.connection.execute sql
95
+ else
96
+ logger.trace { "Skipping empty values #{table_name}:#{row[1]}" }
87
97
  end
88
- end
89
- if updates.size.positive?
90
- sql << updates.join(", ")
91
- sql << " where id=#{row[1]}"
92
- logger.trace sql
93
- self.class.connection.execute sql
94
- else
95
- logger.trace { "Skipping empty values #{table_name}:#{row[1]}" }
96
98
  end
97
99
  end
98
- end
99
100
 
100
- # Returns a database connection.
101
- #
102
- # Override this method to support other ways of obtaining a thread specific database connection.
103
- def self.connection
104
- ActiveRecord::Base.connection
105
- end
101
+ # Returns a database connection.
102
+ #
103
+ # Override this method to support other ways of obtaining a thread specific database connection.
104
+ def self.connection
105
+ ActiveRecord::Base.connection
106
+ end
106
107
 
107
- private
108
+ private
108
109
 
109
- def quoted_table_name
110
- @quoted_table_name ||= self.class.connection.quote_table_name(table_name)
111
- end
110
+ def quoted_table_name
111
+ @quoted_table_name ||= self.class.connection.quote_table_name(table_name)
112
+ end
112
113
 
113
- def re_encrypt(encrypted_value)
114
- return encrypted_value if (encrypted_value == "") || encrypted_value.nil?
114
+ def re_encrypt(encrypted_value)
115
+ return encrypted_value if (encrypted_value == "") || encrypted_value.nil?
115
116
 
116
- SymmetricEncryption.encrypt(SymmetricEncryption.decrypt(encrypted_value))
117
- end
117
+ SymmetricEncryption.encrypt(SymmetricEncryption.decrypt(encrypted_value))
118
+ end
118
119
 
119
- # Upload range to re-encrypt all rows in the specified table.
120
- def upload_records
121
- start_id = self.class.connection.select_value("select min(id) from #{quoted_table_name}").to_i
122
- last_id = self.class.connection.select_value("select max(id) from #{quoted_table_name}").to_i
123
- self.record_count = last_id.positive? ? (input.upload_integer_range_in_reverse_order(start_id, last_id) * slice_size) : 0
120
+ # Upload range to re-encrypt all rows in the specified table.
121
+ def upload_records
122
+ start_id = self.class.connection.select_value("select min(id) from #{quoted_table_name}").to_i
123
+ last_id = self.class.connection.select_value("select max(id) from #{quoted_table_name}").to_i
124
+ self.record_count =
125
+ if last_id.positive?
126
+ input.upload_integer_range_in_reverse_order(start_id, last_id) * input_category.slice_size
127
+ else
128
+ 0
129
+ end
130
+ end
124
131
  end
125
132
  end
126
133
  end
@@ -19,7 +19,7 @@ module RocketJob
19
19
  field :properties, type: Hash, default: {}, user_editable: true
20
20
 
21
21
  # File to upload
22
- field :upload_file_name, type: String, user_editable: true
22
+ field :upload_file_name, type: IOStreams::Path, user_editable: true
23
23
 
24
24
  # The original Input file name.
25
25
  # Used by #upload to extract the IOStreams when present.
@@ -33,10 +33,11 @@ module RocketJob
33
33
  validate :job_is_a_rocket_job
34
34
  validate :job_implements_upload
35
35
  validate :file_exists
36
+ validate :job_has_properties
36
37
 
37
38
  # Create the job and upload the file into it.
38
39
  def perform
39
- job = job_class.new(properties)
40
+ job = job_class.from_properties(properties)
40
41
  job.id = job_id if job_id
41
42
  upload_file(job)
42
43
  job.save!
@@ -66,7 +67,10 @@ module RocketJob
66
67
  elsif job.respond_to?(:full_file_name=)
67
68
  job.full_file_name = upload_file_name
68
69
  else
69
- raise(ArgumentError, "Model #{job_class_name} must implement '#upload', or have attribute 'upload_file_name' or 'full_file_name'")
70
+ raise(
71
+ ArgumentError,
72
+ "Model #{job_class_name} must implement '#upload', or have attribute 'upload_file_name' or 'full_file_name'"
73
+ )
70
74
  end
71
75
  end
72
76
 
@@ -85,17 +89,49 @@ module RocketJob
85
89
  klass = job_class
86
90
  return if klass.nil? || klass.instance_methods.any? { |m| VALID_INSTANCE_METHODS.include?(m) }
87
91
 
88
- errors.add(:job_class_name, "#{job_class} must implement any one of: :#{VALID_INSTANCE_METHODS.join(' :')} instance methods")
92
+ errors.add(:job_class_name,
93
+ "#{job_class} must implement any one of: :#{VALID_INSTANCE_METHODS.join(' :')} instance methods")
89
94
  end
90
95
 
91
96
  def file_exists
92
- return if upload_file_name.nil?
97
+ # Only check for file existence when it is a local file
98
+ return unless upload_file_name.is_a?(IOStreams::Paths::File)
99
+ return errors.add(:upload_file_name, "Upload file name can't be blank.") if upload_file_name.to_s == ""
93
100
 
94
- uri = URI.parse(upload_file_name)
95
- return unless uri.scheme.nil? || uri.scheme == "file"
96
- return if File.exist?(upload_file_name)
101
+ return if upload_file_name.exist?
97
102
 
98
103
  errors.add(:upload_file_name, "Upload file: #{upload_file_name} does not exist.")
104
+ rescue NotImplementedError
105
+ nil
106
+ end
107
+
108
+ def job_has_properties
109
+ klass = job_class
110
+ return unless klass
111
+
112
+ properties.each_pair do |k, _v|
113
+ next if klass.public_method_defined?("#{k}=".to_sym)
114
+
115
+ if %i[output_categories input_categories].include?(k)
116
+ category_class = k == :input_categories ? RocketJob::Category::Input : RocketJob::Category::Output
117
+ properties[k].each do |category|
118
+ category.each_pair do |key, _value|
119
+ next if category_class.public_method_defined?("#{key}=".to_sym)
120
+
121
+ errors.add(
122
+ :properties,
123
+ "Unknown Property in #{k}: Attempted to set a value for #{key}.#{k} which is not allowed on the job #{job_class_name}"
124
+ )
125
+ end
126
+ end
127
+ next
128
+ end
129
+
130
+ errors.add(
131
+ :properties,
132
+ "Unknown Property: Attempted to set a value for #{k.inspect} which is not allowed on the job #{job_class_name}"
133
+ )
134
+ end
99
135
  end
100
136
  end
101
137
  end