rocketjob 5.3.0 → 5.4.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe39da29017eca601c104ca9d7c4fa90f903c32cbf3822e064cbd4a7f0780272
4
- data.tar.gz: ce81cfb0d3a0ac4dbfe4ebe7c549077e56ca52d19ca41a2fda150561ecfeedd7
3
+ metadata.gz: df527427b1abf142761ba3be11df6f146708763a371c345c1a2c4a2d5d160f73
4
+ data.tar.gz: 2cf7a11dc3b82fd5fceb147991eb3dfa448d454c40264a96a2e9318cd2783e10
5
5
  SHA512:
6
- metadata.gz: fa49046d738a5064da99363e0d854253d17c375ab1e75b095c118139d9081781b9e4177bc03cf70abd4edb906b43cdc033b96234bd5864b5f9959e81f1421074
7
- data.tar.gz: 5e67742618a4ec874a1c43649d8619cec007380f8db84a3a81f584ec6f9896be0e14e062e69955158732b32e419a8ac7bfab9eef394c983a9f484a9c5c04c934
6
+ metadata.gz: 8af8995b77274fad9d791a9598191ca0dc1f28dcc13f1391d91accc39f193bb9a15a1447526cc550312d2160925276a271c89a9eb2be5927199d5ba8b96cfa1d
7
+ data.tar.gz: e0f48e2b522eae0a470ce1fa0af9daa02900a446990e52bbe472880818fde220b15575e365135cae2843c4aceeac8680a8f316f70a7961cdf788eb856f6771d3
@@ -18,7 +18,7 @@ module RocketJob
18
18
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
19
  end
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(**rocket_job_io_slice_arguments("inputs", category))
21
+ (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -34,7 +34,7 @@ module RocketJob
34
34
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
35
  end
36
36
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(**rocket_job_io_slice_arguments("outputs", category))
37
+ (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
38
38
  end
39
39
 
40
40
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -355,8 +355,18 @@ module RocketJob
355
355
 
356
356
  return output(category).download(header_line: header_line, &block) if block
357
357
 
358
- IOStreams.new(stream).writer(:line, **args) do |io|
359
- output(category).download(header_line: header_line) { |record| io << record }
358
+ output_collection = output(category)
359
+
360
+ if output_collection.binary?
361
+ IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
+ raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
363
+
364
+ output_collection.download { |record| io << record[:binary] }
365
+ end
366
+ else
367
+ IOStreams.new(stream).writer(:line, **args) do |io|
368
+ output_collection.download(header_line: header_line) { |record| io << record }
369
+ end
360
370
  end
361
371
  end
362
372
 
@@ -393,21 +403,6 @@ module RocketJob
393
403
  RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
394
404
  end
395
405
  end
396
-
397
- private
398
-
399
- def rocket_job_io_slice_arguments(collection_type, category)
400
- collection_name = "rocket_job.#{collection_type}.#{id}"
401
- collection_name << ".#{category}" unless category == :main
402
-
403
- args = {collection_name: collection_name, slice_size: slice_size}
404
- if encrypt
405
- args[:slice_class] = Sliced::EncryptedSlice
406
- elsif compress
407
- args[:slice_class] = Sliced::CompressedSlice
408
- end
409
- args
410
- end
411
406
  end
412
407
  end
413
408
  end
@@ -44,12 +44,12 @@ module RocketJob
44
44
  # Compress uploaded records.
45
45
  # The fields are not affected in any way, only the data stored in the
46
46
  # records and results collections will compressed
47
- field :compress, type: Boolean, default: false, class_attribute: true
47
+ field :compress, type: Object, default: false, class_attribute: true
48
48
 
49
49
  # Encrypt uploaded records.
50
50
  # The fields are not affected in any way, only the data stored in the
51
51
  # records and results collections will be encrypted
52
- field :encrypt, type: Boolean, default: false, class_attribute: true
52
+ field :encrypt, type: Object, default: false, class_attribute: true
53
53
 
54
54
  #
55
55
  # Values that jobs can also update during processing
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
14
  field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
+ field :tabular_input_options, type: Hash, class_attribute: true
15
16
 
16
17
  # tabular_input_mode: [:line | :array | :hash]
17
18
  # :line
@@ -53,7 +54,9 @@ module RocketJob
53
54
  input_stream = stream.nil? ? nil : IOStreams.new(stream)
54
55
 
55
56
  if stream && (tabular_input_type == :text)
56
- input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: "")
57
+ # Cannot change the length of fixed width lines
58
+ replace = tabular_input_format == :fixed ? " " : ""
59
+ input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
57
60
  end
58
61
 
59
62
  # If an input header is not required, then we don't extract it'
@@ -96,14 +99,15 @@ module RocketJob
96
99
  allowed_columns: tabular_input_white_list,
97
100
  required_columns: tabular_input_required,
98
101
  skip_unknown: tabular_input_skip_unknown,
99
- format: tabular_input_format
102
+ format: tabular_input_format,
103
+ format_options: tabular_input_options&.deep_symbolize_keys
100
104
  )
101
105
  end
102
106
 
103
107
  def tabular_input_render
104
- unless tabular_input_header.blank? && tabular_input.header?
105
- @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
106
- end
108
+ return if tabular_input_header.blank? && tabular_input.header?
109
+
110
+ @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
107
111
  end
108
112
 
109
113
  # Cleanse custom input header if supplied.
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
14
  field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
+ field :tabular_output_options, type: Hash, class_attribute: true
15
16
 
16
17
  validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
17
18
 
@@ -31,8 +32,9 @@ module RocketJob
31
32
 
32
33
  # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
33
34
  def download(file_name_or_io = nil, category: :main, **args, &block)
34
- # No header required
35
- return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
35
+ unless tabular_output.requires_header?(category)
36
+ return super(file_name_or_io, category: category, **args, &block)
37
+ end
36
38
 
37
39
  header = tabular_output.render_header(category)
38
40
  super(file_name_or_io, header_line: header, category: category, **args, &block)
@@ -43,7 +45,11 @@ module RocketJob
43
45
  # Delimited instance used for this slice, by a single worker (thread)
44
46
  def tabular_output
45
47
  @tabular_output ||= Tabular.new(
46
- main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
48
+ main: IOStreams::Tabular.new(
49
+ columns: tabular_output_header,
50
+ format: tabular_output_format,
51
+ format_options: tabular_output_options&.deep_symbolize_keys
52
+ )
47
53
  )
48
54
  end
49
55
 
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Batch
@@ -43,6 +44,10 @@ module RocketJob
43
44
  field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
44
45
 
45
46
  define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
47
+
48
+ validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
49
+ record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
50
+ end
46
51
  end
47
52
 
48
53
  private
@@ -56,10 +61,10 @@ module RocketJob
56
61
  end
57
62
 
58
63
  def throttle_outside_window?(schedule, duration)
59
- cron = Plugins::Rufus::CronLine.new(schedule)
60
- time = Time.now + 1
64
+ cron = Fugit::Cron.new(schedule)
65
+ time = Time.now.utc + 1
61
66
  # Add 1 second since right now could be the very beginning of the processing window.
62
- previous_time = cron.previous_time(time).to_time
67
+ previous_time = cron.previous_time(time).to_utc_time
63
68
  previous_time + duration < time
64
69
  end
65
70
  end
@@ -46,7 +46,7 @@ module RocketJob
46
46
  next if slice.failed?
47
47
 
48
48
  slice.fail_on_exception!(re_raise_exceptions) { rocket_job_process_slice(slice) }
49
- elsif record_count && rocket_job_batch_complete?(worker.name)
49
+ elsif record_count && fail_on_exception!(re_raise_exceptions) { rocket_job_batch_complete?(worker.name) }
50
50
  return false
51
51
  else
52
52
  logger.debug "No more work available for this job"
@@ -110,8 +110,6 @@ module RocketJob
110
110
  servers
111
111
  end
112
112
 
113
- private
114
-
115
113
  def rocket_job_batch_throttled?(slice, worker)
116
114
  filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
117
115
  return false unless filter
@@ -150,7 +148,7 @@ module RocketJob
150
148
  records = slice.records
151
149
 
152
150
  # Skip records already processed, if any.
153
- #slice.processing_record_number ||= 0
151
+ # slice.processing_record_number ||= 0
154
152
  # TODO: Must append to existing output slices before this can be enabled.
155
153
  # if !collect_output && (slice.processing_record_number > 1)
156
154
  # records = records[slice.processing_record_number - 1..-1]
@@ -104,8 +104,6 @@ module RocketJob
104
104
  end
105
105
  end
106
106
 
107
- private
108
-
109
107
  @load_time = Time.now.utc
110
108
  @subscribers = Concurrent::Map.new { Concurrent::Array.new }
111
109
 
@@ -22,8 +22,6 @@ module RocketJob
22
22
  @collection_name = collection_name&.to_sym
23
23
  end
24
24
 
25
- private
26
-
27
25
  module ClassMethods
28
26
  def with_collection(collection_name)
29
27
  all.with_collection(collection_name)
@@ -0,0 +1,12 @@
1
+ require "mongoid/fields/validators/macro"
2
+ require "semantic_logger"
3
+ module RocketJob
4
+ module RemoveMongoidWarnings
5
+ # Remove annoying warnings about Symbols type being deprecated.
6
+ def validate_options(*params)
7
+ SemanticLogger.silence(:error) { super(*params) }
8
+ end
9
+ end
10
+ end
11
+
12
+ ::Mongoid::Fields::Validators::Macro.extend(RocketJob::RemoveMongoidWarnings)
@@ -25,7 +25,7 @@ module RocketJob
25
25
 
26
26
  self.destroy_on_complete = false
27
27
  # Number of times to automatically retry the copy. Set to `0` for no retry attempts.
28
- self.retry_limit = 5
28
+ self.retry_limit = 10
29
29
 
30
30
  # File names in IOStreams URL format.
31
31
  field :source_url, type: String, user_editable: true
@@ -1,8 +1,5 @@
1
- begin
2
- require "active_record"
3
- rescue LoadError
4
- raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
5
- end
1
+ require "active_record"
2
+ require "sync_attr"
6
3
 
7
4
  # Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
8
5
  #
@@ -40,7 +40,7 @@ module RocketJob
40
40
  job.id = job_id if job_id
41
41
  upload_file(job)
42
42
  job.save!
43
- rescue StandardError => e
43
+ rescue Exception => e
44
44
  # Prevent partial uploads
45
45
  job&.cleanup! if job.respond_to?(:cleanup!)
46
46
  raise(e)
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -17,7 +18,9 @@ module RocketJob
17
18
 
18
19
  field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
19
20
 
20
- validate :rocket_job_cron_valid
21
+ validates_each :cron_schedule do |record, attr, value|
22
+ record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
23
+ end
21
24
  before_save :rocket_job_cron_set_run_at
22
25
 
23
26
  private
@@ -42,30 +45,10 @@ module RocketJob
42
45
  end
43
46
  end
44
47
 
45
- # Returns [Time] the next time this job will be scheduled to run at.
46
- #
47
- # Parameters
48
- # time: [Time]
49
- # The next time as of this time.
50
- # Default: Time.now
51
- def rocket_job_cron_next_time(time = Time.now)
52
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
53
- end
54
-
55
- private
56
-
57
48
  def rocket_job_cron_set_run_at
58
- return unless cron_schedule
59
-
60
- self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
61
- end
62
-
63
- def rocket_job_cron_valid
64
- return unless cron_schedule
49
+ return if cron_schedule.nil? || !(cron_schedule_changed? && !run_at_changed?)
65
50
 
66
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
67
- rescue ArgumentError => e
68
- errors.add(:cron_schedule, e.message)
51
+ self.run_at = Fugit::Cron.new(cron_schedule).next_time.to_utc_time
69
52
  end
70
53
  end
71
54
  end
@@ -73,12 +73,13 @@ module RocketJob
73
73
  if failed? || !may_fail?
74
74
  self.exception = JobException.from_exception(e)
75
75
  exception.worker_name = worker_name
76
- save! unless new_record? || destroyed?
77
- elsif new_record? || destroyed?
78
- fail(worker_name, e)
79
76
  else
80
- fail!(worker_name, e)
77
+ fail(worker_name, e)
81
78
  end
79
+
80
+ # Prevent validation failures from failing the job
81
+ save(validate: false) unless new_record? || destroyed?
82
+
82
83
  raise e if re_raise_exceptions
83
84
  end
84
85
  end
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -47,18 +48,14 @@ module RocketJob
47
48
 
48
49
  validates_presence_of :processing_schedule, :processing_duration
49
50
  validates_each :processing_schedule do |record, attr, value|
50
- begin
51
- RocketJob::Plugins::Rufus::CronLine.new(value)
52
- rescue ArgumentError => e
53
- record.errors.add(attr, e.message)
54
- end
51
+ record.errors.add(attr, "Invalid schedule: #{value.inspect}") unless Fugit::Cron.new(value)
55
52
  end
56
53
  end
57
54
 
58
55
  # Returns [true|false] whether this job is currently inside its processing window
59
56
  def rocket_job_processing_window_active?
60
- time = Time.now
61
- previous_time = rocket_job_processing_schedule.previous_time(time)
57
+ time = Time.now.utc
58
+ previous_time = Fugit::Cron.new(processing_schedule).previous_time(time).to_utc_time
62
59
  # Inside previous processing window?
63
60
  previous_time + processing_duration > time
64
61
  end
@@ -69,17 +66,14 @@ module RocketJob
69
66
  def rocket_job_processing_window_check
70
67
  return if rocket_job_processing_window_active?
71
68
 
72
- logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{rocket_job_processing_schedule.next_time}")
69
+ next_time = Fugit::Cron.new(processing_schedule).next_time.to_utc_time
70
+ logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{next_time}")
73
71
  self.worker_name ||= "inline"
74
72
  requeue!(worker_name)
75
73
  end
76
74
 
77
75
  def rocket_job_processing_window_set_run_at
78
- self.run_at = rocket_job_processing_schedule.next_time unless rocket_job_processing_window_active?
79
- end
80
-
81
- def rocket_job_processing_schedule
82
- RocketJob::Plugins::Rufus::CronLine.new(processing_schedule)
76
+ self.run_at = Fugit::Cron.new(processing_schedule).next_time.to_utc_time unless rocket_job_processing_window_active?
83
77
  end
84
78
  end
85
79
  end
@@ -0,0 +1,91 @@
1
+ module RocketJob
2
+ module Sliced
3
+ autoload :BZip2OutputSlice, "rocket_job/sliced/bzip2_output_slice"
4
+ autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
5
+ autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
6
+ autoload :Input, "rocket_job/sliced/input"
7
+ autoload :Output, "rocket_job/sliced/output"
8
+ autoload :Slice, "rocket_job/sliced/slice"
9
+ autoload :Slices, "rocket_job/sliced/slices"
10
+ autoload :Store, "rocket_job/sliced/store"
11
+
12
+ module Writer
13
+ autoload :Input, "rocket_job/sliced/writer/input"
14
+ autoload :Output, "rocket_job/sliced/writer/output"
15
+ end
16
+
17
+ # Returns [RocketJob::Sliced::Slices] for the relevant type and category.
18
+ #
19
+ # Supports compress and encrypt with [true|false|Hash] values.
20
+ # When [Hash] they must specify whether the apply to the input or output collection types.
21
+ #
22
+ # Example, compress both input and output collections:
23
+ # class MyJob < RocketJob::Job
24
+ # include RocketJob::Batch
25
+ # self.compress = true
26
+ # end
27
+ #
28
+ # Example, compress just the output collections:
29
+ # class MyJob < RocketJob::Job
30
+ # include RocketJob::Batch
31
+ # self.compress = {output: true}
32
+ # end
33
+ #
34
+ # To use the specialized BZip output compressor, and the regular compressor for the input collections:
35
+ # class MyJob < RocketJob::Job
36
+ # include RocketJob::Batch
37
+ # self.compress = {output: :bzip2, input: true}
38
+ # end
39
+ def self.factory(type, category, job)
40
+ raise(ArgumentError, "Unknown type: #{type.inspect}") unless %i[input output].include?(type)
41
+
42
+ collection_name = "rocket_job.#{type}s.#{job.id}"
43
+ collection_name << ".#{category}" unless category == :main
44
+
45
+ args = {collection_name: collection_name, slice_size: job.slice_size}
46
+ klass = slice_class(type, job)
47
+ args[:slice_class] = klass if klass
48
+
49
+ if type == :input
50
+ RocketJob::Sliced::Input.new(args)
51
+ else
52
+ RocketJob::Sliced::Output.new(args)
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ # Parses the encrypt and compress options to determine which slice serializer to use.
59
+ # `encrypt` takes priority over any `compress` option.
60
+ def self.slice_class(type, job)
61
+ encrypt = extract_value(type, job.encrypt)
62
+ compress = extract_value(type, job.compress)
63
+
64
+ if encrypt
65
+ case encrypt
66
+ when true
67
+ EncryptedSlice
68
+ else
69
+ raise(ArgumentError, "Unknown job `encrypt` value: #{compress}") unless compress.is_a?(Slices)
70
+ # Returns the supplied class to use for encryption.
71
+ encrypt
72
+ end
73
+ elsif compress
74
+ case compress
75
+ when true
76
+ CompressedSlice
77
+ when :bzip2
78
+ BZip2OutputSlice
79
+ else
80
+ raise(ArgumentError, "Unknown job `compress` value: #{compress}") unless compress.is_a?(Slices)
81
+ # Returns the supplied class to use for compression.
82
+ compress
83
+ end
84
+ end
85
+ end
86
+
87
+ def self.extract_value(type, value)
88
+ value.is_a?(Hash) ? value[type] : value
89
+ end
90
+ end
91
+ end