rocketjob 5.3.0 → 5.4.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe39da29017eca601c104ca9d7c4fa90f903c32cbf3822e064cbd4a7f0780272
4
- data.tar.gz: ce81cfb0d3a0ac4dbfe4ebe7c549077e56ca52d19ca41a2fda150561ecfeedd7
3
+ metadata.gz: df527427b1abf142761ba3be11df6f146708763a371c345c1a2c4a2d5d160f73
4
+ data.tar.gz: 2cf7a11dc3b82fd5fceb147991eb3dfa448d454c40264a96a2e9318cd2783e10
5
5
  SHA512:
6
- metadata.gz: fa49046d738a5064da99363e0d854253d17c375ab1e75b095c118139d9081781b9e4177bc03cf70abd4edb906b43cdc033b96234bd5864b5f9959e81f1421074
7
- data.tar.gz: 5e67742618a4ec874a1c43649d8619cec007380f8db84a3a81f584ec6f9896be0e14e062e69955158732b32e419a8ac7bfab9eef394c983a9f484a9c5c04c934
6
+ metadata.gz: 8af8995b77274fad9d791a9598191ca0dc1f28dcc13f1391d91accc39f193bb9a15a1447526cc550312d2160925276a271c89a9eb2be5927199d5ba8b96cfa1d
7
+ data.tar.gz: e0f48e2b522eae0a470ce1fa0af9daa02900a446990e52bbe472880818fde220b15575e365135cae2843c4aceeac8680a8f316f70a7961cdf788eb856f6771d3
@@ -18,7 +18,7 @@ module RocketJob
18
18
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
19
  end
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(**rocket_job_io_slice_arguments("inputs", category))
21
+ (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -34,7 +34,7 @@ module RocketJob
34
34
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
35
  end
36
36
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(**rocket_job_io_slice_arguments("outputs", category))
37
+ (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
38
38
  end
39
39
 
40
40
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -355,8 +355,18 @@ module RocketJob
355
355
 
356
356
  return output(category).download(header_line: header_line, &block) if block
357
357
 
358
- IOStreams.new(stream).writer(:line, **args) do |io|
359
- output(category).download(header_line: header_line) { |record| io << record }
358
+ output_collection = output(category)
359
+
360
+ if output_collection.binary?
361
+ IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
+ raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
363
+
364
+ output_collection.download { |record| io << record[:binary] }
365
+ end
366
+ else
367
+ IOStreams.new(stream).writer(:line, **args) do |io|
368
+ output_collection.download(header_line: header_line) { |record| io << record }
369
+ end
360
370
  end
361
371
  end
362
372
 
@@ -393,21 +403,6 @@ module RocketJob
393
403
  RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
394
404
  end
395
405
  end
396
-
397
- private
398
-
399
- def rocket_job_io_slice_arguments(collection_type, category)
400
- collection_name = "rocket_job.#{collection_type}.#{id}"
401
- collection_name << ".#{category}" unless category == :main
402
-
403
- args = {collection_name: collection_name, slice_size: slice_size}
404
- if encrypt
405
- args[:slice_class] = Sliced::EncryptedSlice
406
- elsif compress
407
- args[:slice_class] = Sliced::CompressedSlice
408
- end
409
- args
410
- end
411
406
  end
412
407
  end
413
408
  end
@@ -44,12 +44,12 @@ module RocketJob
44
44
  # Compress uploaded records.
45
45
  # The fields are not affected in any way, only the data stored in the
46
46
  # records and results collections will compressed
47
- field :compress, type: Boolean, default: false, class_attribute: true
47
+ field :compress, type: Object, default: false, class_attribute: true
48
48
 
49
49
  # Encrypt uploaded records.
50
50
  # The fields are not affected in any way, only the data stored in the
51
51
  # records and results collections will be encrypted
52
- field :encrypt, type: Boolean, default: false, class_attribute: true
52
+ field :encrypt, type: Object, default: false, class_attribute: true
53
53
 
54
54
  #
55
55
  # Values that jobs can also update during processing
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
14
  field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
+ field :tabular_input_options, type: Hash, class_attribute: true
15
16
 
16
17
  # tabular_input_mode: [:line | :array | :hash]
17
18
  # :line
@@ -53,7 +54,9 @@ module RocketJob
53
54
  input_stream = stream.nil? ? nil : IOStreams.new(stream)
54
55
 
55
56
  if stream && (tabular_input_type == :text)
56
- input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: "")
57
+ # Cannot change the length of fixed width lines
58
+ replace = tabular_input_format == :fixed ? " " : ""
59
+ input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
57
60
  end
58
61
 
59
62
  # If an input header is not required, then we don't extract it'
@@ -96,14 +99,15 @@ module RocketJob
96
99
  allowed_columns: tabular_input_white_list,
97
100
  required_columns: tabular_input_required,
98
101
  skip_unknown: tabular_input_skip_unknown,
99
- format: tabular_input_format
102
+ format: tabular_input_format,
103
+ format_options: tabular_input_options&.deep_symbolize_keys
100
104
  )
101
105
  end
102
106
 
103
107
  def tabular_input_render
104
- unless tabular_input_header.blank? && tabular_input.header?
105
- @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
106
- end
108
+ return if tabular_input_header.blank? && tabular_input.header?
109
+
110
+ @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
107
111
  end
108
112
 
109
113
  # Cleanse custom input header if supplied.
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
14
  field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
+ field :tabular_output_options, type: Hash, class_attribute: true
15
16
 
16
17
  validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
17
18
 
@@ -31,8 +32,9 @@ module RocketJob
31
32
 
32
33
  # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
33
34
  def download(file_name_or_io = nil, category: :main, **args, &block)
34
- # No header required
35
- return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
35
+ unless tabular_output.requires_header?(category)
36
+ return super(file_name_or_io, category: category, **args, &block)
37
+ end
36
38
 
37
39
  header = tabular_output.render_header(category)
38
40
  super(file_name_or_io, header_line: header, category: category, **args, &block)
@@ -43,7 +45,11 @@ module RocketJob
43
45
  # Delimited instance used for this slice, by a single worker (thread)
44
46
  def tabular_output
45
47
  @tabular_output ||= Tabular.new(
46
- main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
48
+ main: IOStreams::Tabular.new(
49
+ columns: tabular_output_header,
50
+ format: tabular_output_format,
51
+ format_options: tabular_output_options&.deep_symbolize_keys
52
+ )
47
53
  )
48
54
  end
49
55
 
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Batch
@@ -43,6 +44,10 @@ module RocketJob
43
44
  field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
44
45
 
45
46
  define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
47
+
48
+ validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
49
+ record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
50
+ end
46
51
  end
47
52
 
48
53
  private
@@ -56,10 +61,10 @@ module RocketJob
56
61
  end
57
62
 
58
63
  def throttle_outside_window?(schedule, duration)
59
- cron = Plugins::Rufus::CronLine.new(schedule)
60
- time = Time.now + 1
64
+ cron = Fugit::Cron.new(schedule)
65
+ time = Time.now.utc + 1
61
66
  # Add 1 second since right now could be the very beginning of the processing window.
62
- previous_time = cron.previous_time(time).to_time
67
+ previous_time = cron.previous_time(time).to_utc_time
63
68
  previous_time + duration < time
64
69
  end
65
70
  end
@@ -46,7 +46,7 @@ module RocketJob
46
46
  next if slice.failed?
47
47
 
48
48
  slice.fail_on_exception!(re_raise_exceptions) { rocket_job_process_slice(slice) }
49
- elsif record_count && rocket_job_batch_complete?(worker.name)
49
+ elsif record_count && fail_on_exception!(re_raise_exceptions) { rocket_job_batch_complete?(worker.name) }
50
50
  return false
51
51
  else
52
52
  logger.debug "No more work available for this job"
@@ -110,8 +110,6 @@ module RocketJob
110
110
  servers
111
111
  end
112
112
 
113
- private
114
-
115
113
  def rocket_job_batch_throttled?(slice, worker)
116
114
  filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
117
115
  return false unless filter
@@ -150,7 +148,7 @@ module RocketJob
150
148
  records = slice.records
151
149
 
152
150
  # Skip records already processed, if any.
153
- #slice.processing_record_number ||= 0
151
+ # slice.processing_record_number ||= 0
154
152
  # TODO: Must append to existing output slices before this can be enabled.
155
153
  # if !collect_output && (slice.processing_record_number > 1)
156
154
  # records = records[slice.processing_record_number - 1..-1]
@@ -104,8 +104,6 @@ module RocketJob
104
104
  end
105
105
  end
106
106
 
107
- private
108
-
109
107
  @load_time = Time.now.utc
110
108
  @subscribers = Concurrent::Map.new { Concurrent::Array.new }
111
109
 
@@ -22,8 +22,6 @@ module RocketJob
22
22
  @collection_name = collection_name&.to_sym
23
23
  end
24
24
 
25
- private
26
-
27
25
  module ClassMethods
28
26
  def with_collection(collection_name)
29
27
  all.with_collection(collection_name)
@@ -0,0 +1,12 @@
1
+ require "mongoid/fields/validators/macro"
2
+ require "semantic_logger"
3
+ module RocketJob
4
+ module RemoveMongoidWarnings
5
+ # Remove annoying warnings about Symbols type being deprecated.
6
+ def validate_options(*params)
7
+ SemanticLogger.silence(:error) { super(*params) }
8
+ end
9
+ end
10
+ end
11
+
12
+ ::Mongoid::Fields::Validators::Macro.extend(RocketJob::RemoveMongoidWarnings)
@@ -25,7 +25,7 @@ module RocketJob
25
25
 
26
26
  self.destroy_on_complete = false
27
27
  # Number of times to automatically retry the copy. Set to `0` for no retry attempts.
28
- self.retry_limit = 5
28
+ self.retry_limit = 10
29
29
 
30
30
  # File names in IOStreams URL format.
31
31
  field :source_url, type: String, user_editable: true
@@ -1,8 +1,5 @@
1
- begin
2
- require "active_record"
3
- rescue LoadError
4
- raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
5
- end
1
+ require "active_record"
2
+ require "sync_attr"
6
3
 
7
4
  # Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
8
5
  #
@@ -40,7 +40,7 @@ module RocketJob
40
40
  job.id = job_id if job_id
41
41
  upload_file(job)
42
42
  job.save!
43
- rescue StandardError => e
43
+ rescue Exception => e
44
44
  # Prevent partial uploads
45
45
  job&.cleanup! if job.respond_to?(:cleanup!)
46
46
  raise(e)
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -17,7 +18,9 @@ module RocketJob
17
18
 
18
19
  field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
19
20
 
20
- validate :rocket_job_cron_valid
21
+ validates_each :cron_schedule do |record, attr, value|
22
+ record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
23
+ end
21
24
  before_save :rocket_job_cron_set_run_at
22
25
 
23
26
  private
@@ -42,30 +45,10 @@ module RocketJob
42
45
  end
43
46
  end
44
47
 
45
- # Returns [Time] the next time this job will be scheduled to run at.
46
- #
47
- # Parameters
48
- # time: [Time]
49
- # The next time as of this time.
50
- # Default: Time.now
51
- def rocket_job_cron_next_time(time = Time.now)
52
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
53
- end
54
-
55
- private
56
-
57
48
  def rocket_job_cron_set_run_at
58
- return unless cron_schedule
59
-
60
- self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
61
- end
62
-
63
- def rocket_job_cron_valid
64
- return unless cron_schedule
49
+ return if cron_schedule.nil? || !(cron_schedule_changed? && !run_at_changed?)
65
50
 
66
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
67
- rescue ArgumentError => e
68
- errors.add(:cron_schedule, e.message)
51
+ self.run_at = Fugit::Cron.new(cron_schedule).next_time.to_utc_time
69
52
  end
70
53
  end
71
54
  end
@@ -73,12 +73,13 @@ module RocketJob
73
73
  if failed? || !may_fail?
74
74
  self.exception = JobException.from_exception(e)
75
75
  exception.worker_name = worker_name
76
- save! unless new_record? || destroyed?
77
- elsif new_record? || destroyed?
78
- fail(worker_name, e)
79
76
  else
80
- fail!(worker_name, e)
77
+ fail(worker_name, e)
81
78
  end
79
+
80
+ # Prevent validation failures from failing the job
81
+ save(validate: false) unless new_record? || destroyed?
82
+
82
83
  raise e if re_raise_exceptions
83
84
  end
84
85
  end
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -47,18 +48,14 @@ module RocketJob
47
48
 
48
49
  validates_presence_of :processing_schedule, :processing_duration
49
50
  validates_each :processing_schedule do |record, attr, value|
50
- begin
51
- RocketJob::Plugins::Rufus::CronLine.new(value)
52
- rescue ArgumentError => e
53
- record.errors.add(attr, e.message)
54
- end
51
+ record.errors.add(attr, "Invalid schedule: #{value.inspect}") unless Fugit::Cron.new(value)
55
52
  end
56
53
  end
57
54
 
58
55
  # Returns [true|false] whether this job is currently inside its processing window
59
56
  def rocket_job_processing_window_active?
60
- time = Time.now
61
- previous_time = rocket_job_processing_schedule.previous_time(time)
57
+ time = Time.now.utc
58
+ previous_time = Fugit::Cron.new(processing_schedule).previous_time(time).to_utc_time
62
59
  # Inside previous processing window?
63
60
  previous_time + processing_duration > time
64
61
  end
@@ -69,17 +66,14 @@ module RocketJob
69
66
  def rocket_job_processing_window_check
70
67
  return if rocket_job_processing_window_active?
71
68
 
72
- logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{rocket_job_processing_schedule.next_time}")
69
+ next_time = Fugit::Cron.new(processing_schedule).next_time.to_utc_time
70
+ logger.warn("Processing window closed before job was processed. Job is re-scheduled to run at: #{next_time}")
73
71
  self.worker_name ||= "inline"
74
72
  requeue!(worker_name)
75
73
  end
76
74
 
77
75
  def rocket_job_processing_window_set_run_at
78
- self.run_at = rocket_job_processing_schedule.next_time unless rocket_job_processing_window_active?
79
- end
80
-
81
- def rocket_job_processing_schedule
82
- RocketJob::Plugins::Rufus::CronLine.new(processing_schedule)
76
+ self.run_at = Fugit::Cron.new(processing_schedule).next_time.to_utc_time unless rocket_job_processing_window_active?
83
77
  end
84
78
  end
85
79
  end
@@ -0,0 +1,91 @@
1
+ module RocketJob
2
+ module Sliced
3
+ autoload :BZip2OutputSlice, "rocket_job/sliced/bzip2_output_slice"
4
+ autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
5
+ autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
6
+ autoload :Input, "rocket_job/sliced/input"
7
+ autoload :Output, "rocket_job/sliced/output"
8
+ autoload :Slice, "rocket_job/sliced/slice"
9
+ autoload :Slices, "rocket_job/sliced/slices"
10
+ autoload :Store, "rocket_job/sliced/store"
11
+
12
+ module Writer
13
+ autoload :Input, "rocket_job/sliced/writer/input"
14
+ autoload :Output, "rocket_job/sliced/writer/output"
15
+ end
16
+
17
+ # Returns [RocketJob::Sliced::Slices] for the relevant type and category.
18
+ #
19
+ # Supports compress and encrypt with [true|false|Hash] values.
20
+ # When [Hash] they must specify whether the apply to the input or output collection types.
21
+ #
22
+ # Example, compress both input and output collections:
23
+ # class MyJob < RocketJob::Job
24
+ # include RocketJob::Batch
25
+ # self.compress = true
26
+ # end
27
+ #
28
+ # Example, compress just the output collections:
29
+ # class MyJob < RocketJob::Job
30
+ # include RocketJob::Batch
31
+ # self.compress = {output: true}
32
+ # end
33
+ #
34
+ # To use the specialized BZip output compressor, and the regular compressor for the input collections:
35
+ # class MyJob < RocketJob::Job
36
+ # include RocketJob::Batch
37
+ # self.compress = {output: :bzip2, input: true}
38
+ # end
39
+ def self.factory(type, category, job)
40
+ raise(ArgumentError, "Unknown type: #{type.inspect}") unless %i[input output].include?(type)
41
+
42
+ collection_name = "rocket_job.#{type}s.#{job.id}"
43
+ collection_name << ".#{category}" unless category == :main
44
+
45
+ args = {collection_name: collection_name, slice_size: job.slice_size}
46
+ klass = slice_class(type, job)
47
+ args[:slice_class] = klass if klass
48
+
49
+ if type == :input
50
+ RocketJob::Sliced::Input.new(args)
51
+ else
52
+ RocketJob::Sliced::Output.new(args)
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ # Parses the encrypt and compress options to determine which slice serializer to use.
59
+ # `encrypt` takes priority over any `compress` option.
60
+ def self.slice_class(type, job)
61
+ encrypt = extract_value(type, job.encrypt)
62
+ compress = extract_value(type, job.compress)
63
+
64
+ if encrypt
65
+ case encrypt
66
+ when true
67
+ EncryptedSlice
68
+ else
69
+ raise(ArgumentError, "Unknown job `encrypt` value: #{compress}") unless compress.is_a?(Slices)
70
+ # Returns the supplied class to use for encryption.
71
+ encrypt
72
+ end
73
+ elsif compress
74
+ case compress
75
+ when true
76
+ CompressedSlice
77
+ when :bzip2
78
+ BZip2OutputSlice
79
+ else
80
+ raise(ArgumentError, "Unknown job `compress` value: #{compress}") unless compress.is_a?(Slices)
81
+ # Returns the supplied class to use for compression.
82
+ compress
83
+ end
84
+ end
85
+ end
86
+
87
+ def self.extract_value(type, value)
88
+ value.is_a?(Hash) ? value[type] : value
89
+ end
90
+ end
91
+ end