rocketjob 5.2.0 → 5.4.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/lib/rocket_job/batch.rb +1 -0
  4. data/lib/rocket_job/batch/io.rb +14 -19
  5. data/lib/rocket_job/batch/model.rb +2 -2
  6. data/lib/rocket_job/batch/tabular/input.rb +9 -5
  7. data/lib/rocket_job/batch/tabular/output.rb +9 -3
  8. data/lib/rocket_job/batch/throttle.rb +1 -1
  9. data/lib/rocket_job/batch/throttle_running_workers.rb +1 -1
  10. data/lib/rocket_job/batch/throttle_windows.rb +72 -0
  11. data/lib/rocket_job/batch/worker.rb +2 -8
  12. data/lib/rocket_job/event.rb +0 -2
  13. data/lib/rocket_job/extensions/mongoid/clients/options.rb +0 -2
  14. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +12 -0
  15. data/lib/rocket_job/jobs/copy_file_job.rb +1 -1
  16. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +2 -5
  17. data/lib/rocket_job/jobs/upload_file_job.rb +1 -1
  18. data/lib/rocket_job/plugins/cron.rb +6 -23
  19. data/lib/rocket_job/plugins/job/throttle.rb +1 -1
  20. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
  21. data/lib/rocket_job/plugins/job/worker.rb +5 -4
  22. data/lib/rocket_job/plugins/processing_window.rb +7 -13
  23. data/lib/rocket_job/sliced.rb +91 -0
  24. data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
  25. data/lib/rocket_job/sliced/input.rb +3 -3
  26. data/lib/rocket_job/sliced/slice.rb +6 -0
  27. data/lib/rocket_job/sliced/slices.rb +6 -0
  28. data/lib/rocket_job/subscribers/server.rb +9 -3
  29. data/lib/rocket_job/supervisor.rb +3 -1
  30. data/lib/rocket_job/version.rb +1 -1
  31. data/lib/rocket_job/worker_pool.rb +1 -0
  32. data/lib/rocketjob.rb +7 -20
  33. metadata +27 -11
  34. data/lib/rocket_job/plugins/rufus/cron_line.rb +0 -520
  35. data/lib/rocket_job/plugins/rufus/zo_time.rb +0 -524
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6715f9b19c04ee07962197db8e635e9e5e74b1e1dbd9e3add5239d235c5dd218
4
- data.tar.gz: 5c501260e0e328691f668325ff48e88ea5da86f49f6a739343b5bf24569aee75
3
+ metadata.gz: 55ab6ca2b3f76cdb4ddf679a2d3e88e1d6a6f3106a69349129a267b629ef4a53
4
+ data.tar.gz: 4e0d07878fb4265179b4a270650cc9b89ca4bca55f1d8f9a3451cb3064062c35
5
5
  SHA512:
6
- metadata.gz: db9890e678c9e2cb97c6f8faf25e8115f5946017ed3dc0fc3f208a62c2765fabb2c5f7673c9850b6202c61bd312c0e535944609e21e3dbbfd514eff2a50ffa0a
7
- data.tar.gz: ede6fbe558cb428c3291bba1584d32dffcc65c6889ab90753e1a79ce33713410b071e2db44bf5007119ba0a83867e2d42485f1b16bfa4070b0a36e7ca89bbf13
6
+ metadata.gz: fdc7ae3827d987404f431410cb81161fbfa269dfe7575a4a86a6abf362c59c6acd0d2c7e9856273ced396eeaf20e50561f1be10a7de0d5c4ae45e5648d15d083
7
+ data.tar.gz: 7bb9e9ac90569e78e135293efcd357a0d75037b5417f5f530f18ae3f17f44acf28d7de5b4aeef880a0315117f2992623958a4fb4c9df93ca7273369ac052e759
data/README.md CHANGED
@@ -3,9 +3,9 @@
3
3
 
4
4
  Ruby's missing batch system
5
5
 
6
- Checkout http://rocketjob.io/
6
+ Checkout https://rocketjob.io/
7
7
 
8
- ![Rocket Job](http://rocketjob.io/images/rocket/rocket-icon-512x512.png)
8
+ ![Rocket Job](https://rocketjob.io/images/rocket/rocket-icon-512x512.png)
9
9
 
10
10
  ## Documentation
11
11
 
@@ -24,6 +24,7 @@ module RocketJob
24
24
  autoload :LowerPriority, "rocket_job/batch/lower_priority"
25
25
  autoload :Performance, "rocket_job/batch/performance"
26
26
  autoload :Statistics, "rocket_job/batch/statistics"
27
+ autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
27
28
  autoload :Result, "rocket_job/batch/result"
28
29
  autoload :Results, "rocket_job/batch/results"
29
30
  autoload :Tabular, "rocket_job/batch/tabular"
@@ -18,7 +18,7 @@ module RocketJob
18
18
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
19
  end
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
21
+ (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -34,7 +34,7 @@ module RocketJob
34
34
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
35
  end
36
36
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
37
+ (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
38
38
  end
39
39
 
40
40
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -355,8 +355,18 @@ module RocketJob
355
355
 
356
356
  return output(category).download(header_line: header_line, &block) if block
357
357
 
358
- IOStreams.new(stream).writer(:line, **args) do |io|
359
- output(category).download(header_line: header_line) { |record| io << record }
358
+ output_collection = output(category)
359
+
360
+ if output_collection.binary?
361
+ IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
+ raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
363
+
364
+ output_collection.download { |record| io << record[:binary] }
365
+ end
366
+ else
367
+ IOStreams.new(stream).writer(:line, **args) do |io|
368
+ output_collection.download(header_line: header_line) { |record| io << record }
369
+ end
360
370
  end
361
371
  end
362
372
 
@@ -393,21 +403,6 @@ module RocketJob
393
403
  RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
394
404
  end
395
405
  end
396
-
397
- private
398
-
399
- def rocket_job_io_slice_arguments(collection_type, category)
400
- collection_name = "rocket_job.#{collection_type}.#{id}"
401
- collection_name << ".#{category}" unless category == :main
402
-
403
- args = {collection_name: collection_name, slice_size: slice_size}
404
- if encrypt
405
- args[:slice_class] = Sliced::EncryptedSlice
406
- elsif compress
407
- args[:slice_class] = Sliced::CompressedSlice
408
- end
409
- args
410
- end
411
406
  end
412
407
  end
413
408
  end
@@ -44,12 +44,12 @@ module RocketJob
44
44
  # Compress uploaded records.
45
45
  # The fields are not affected in any way, only the data stored in the
46
46
  # records and results collections will compressed
47
- field :compress, type: Boolean, default: false, class_attribute: true
47
+ field :compress, type: Object, default: false, class_attribute: true
48
48
 
49
49
  # Encrypt uploaded records.
50
50
  # The fields are not affected in any way, only the data stored in the
51
51
  # records and results collections will be encrypted
52
- field :encrypt, type: Boolean, default: false, class_attribute: true
52
+ field :encrypt, type: Object, default: false, class_attribute: true
53
53
 
54
54
  #
55
55
  # Values that jobs can also update during processing
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
14
  field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
+ field :tabular_input_options, type: Hash, class_attribute: true
15
16
 
16
17
  # tabular_input_mode: [:line | :array | :hash]
17
18
  # :line
@@ -53,7 +54,9 @@ module RocketJob
53
54
  input_stream = stream.nil? ? nil : IOStreams.new(stream)
54
55
 
55
56
  if stream && (tabular_input_type == :text)
56
- input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: "")
57
+ # Cannot change the length of fixed width lines
58
+ replace = tabular_input_format == :fixed ? " " : ""
59
+ input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
57
60
  end
58
61
 
59
62
  # If an input header is not required, then we don't extract it'
@@ -96,14 +99,15 @@ module RocketJob
96
99
  allowed_columns: tabular_input_white_list,
97
100
  required_columns: tabular_input_required,
98
101
  skip_unknown: tabular_input_skip_unknown,
99
- format: tabular_input_format
102
+ format: tabular_input_format,
103
+ format_options: tabular_input_options&.deep_symbolize_keys
100
104
  )
101
105
  end
102
106
 
103
107
  def tabular_input_render
104
- unless tabular_input_header.blank? && tabular_input.header?
105
- @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
106
- end
108
+ return if tabular_input_header.blank? && tabular_input.header?
109
+
110
+ @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
107
111
  end
108
112
 
109
113
  # Cleanse custom input header if supplied.
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
14
  field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
+ field :tabular_output_options, type: Hash, class_attribute: true
15
16
 
16
17
  validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
17
18
 
@@ -31,8 +32,9 @@ module RocketJob
31
32
 
32
33
  # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
33
34
  def download(file_name_or_io = nil, category: :main, **args, &block)
34
- # No header required
35
- return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
35
+ unless tabular_output.requires_header?(category)
36
+ return super(file_name_or_io, category: category, **args, &block)
37
+ end
36
38
 
37
39
  header = tabular_output.render_header(category)
38
40
  super(file_name_or_io, header_line: header, category: category, **args, &block)
@@ -43,7 +45,11 @@ module RocketJob
43
45
  # Delimited instance used for this slice, by a single worker (thread)
44
46
  def tabular_output
45
47
  @tabular_output ||= Tabular.new(
46
- main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
48
+ main: IOStreams::Tabular.new(
49
+ columns: tabular_output_header,
50
+ format: tabular_output_format,
51
+ format_options: tabular_output_options&.deep_symbolize_keys
52
+ )
47
53
  )
48
54
  end
49
55
 
@@ -6,7 +6,7 @@ module RocketJob
6
6
  #
7
7
  # Example:
8
8
  # # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # include RocketJob::Batch
11
11
  #
12
12
  # # Define a custom mysql throttle
@@ -5,7 +5,7 @@ module RocketJob
5
5
  # Throttle the number of slices of a specific batch job that are processed at the same time.
6
6
  #
7
7
  # Example:
8
- # class MyJob < RocketJob
8
+ # class MyJob < RocketJob::Job
9
9
  # include RocketJob::Batch
10
10
  #
11
11
  # # Maximum number of slices to process at the same time for each running instance.
@@ -0,0 +1,72 @@
1
+ require "active_support/concern"
2
+ require "fugit"
3
+
4
+ module RocketJob
5
+ module Batch
6
+ # For a batch job that can run over a long period of time it can be useful
7
+ # to prevent its slices from being processed outside a predefined processing window.
8
+ #
9
+ # This plugin supports up to 2 different processing windows.
10
+ #
11
+ # For example, do not run this job during business hours.
12
+ # Allow it to run from 5pm until 8am the following day Mon through Fri.
13
+ #
14
+ # class AfterHoursJob < RocketJob::Job
15
+ # include RocketJob::Batch
16
+ # include RocketJob::Batch::ThrottleWindows
17
+ #
18
+ # # Monday through Thursday the job can start processing at 5pm Eastern.
19
+ # self.primary_schedule = "0 17 * * 1-4 America/New_York"
20
+ # # Slices are allowed to run until 8am the following day, which is 15 hours long:
21
+ # self.primary_duration = 15.hours
22
+ #
23
+ # # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
24
+ # self.secondary_schedule = "0 17 * * 5 America/New_York"
25
+ # # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
26
+ # self.secondary_duration = 63.hours
27
+ # end
28
+ #
29
+ # Notes:
30
+ # * These schedules do not affect when the job is started, completed, or when `before_batch` or
31
+ # `after_batch` processing is performed. It only limits when individual slices are processed.
32
+ module ThrottleWindows
33
+ extend ActiveSupport::Concern
34
+
35
+ included do
36
+ # Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
37
+ field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
38
+ # Duration in seconds of the primary window.
39
+ field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
40
+
41
+ # Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
42
+ field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
43
+ # Duration in seconds of the secondary window.
44
+ field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
45
+
46
+ define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
47
+
48
+ validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
49
+ record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def throttle_windows_exceeded?
56
+ exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
57
+ if exceeded && secondary_schedule && secondary_duration
58
+ exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
59
+ end
60
+ exceeded
61
+ end
62
+
63
+ def throttle_outside_window?(schedule, duration)
64
+ cron = Fugit::Cron.new(schedule)
65
+ time = Time.now.utc + 1
66
+ # Add 1 second since right now could be the very beginning of the processing window.
67
+ previous_time = cron.previous_time(time).to_utc_time
68
+ previous_time + duration < time
69
+ end
70
+ end
71
+ end
72
+ end
@@ -28,10 +28,6 @@ module RocketJob
28
28
  #
29
29
  # If an exception was thrown the entire slice of records is marked as failed.
30
30
  #
31
- # If the mongo_ha gem has been loaded, then the connection to mongo is
32
- # automatically re-established and the job will resume anytime a
33
- # Mongo connection failure occurs.
34
- #
35
31
  # Thread-safe, can be called by multiple threads at the same time
36
32
  def rocket_job_work(worker, re_raise_exceptions = false)
37
33
  raise "Job must be started before calling #rocket_job_work" unless running?
@@ -50,7 +46,7 @@ module RocketJob
50
46
  next if slice.failed?
51
47
 
52
48
  slice.fail_on_exception!(re_raise_exceptions) { rocket_job_process_slice(slice) }
53
- elsif record_count && rocket_job_batch_complete?(worker.name)
49
+ elsif record_count && fail_on_exception!(re_raise_exceptions) { rocket_job_batch_complete?(worker.name) }
54
50
  return false
55
51
  else
56
52
  logger.debug "No more work available for this job"
@@ -114,8 +110,6 @@ module RocketJob
114
110
  servers
115
111
  end
116
112
 
117
- private
118
-
119
113
  def rocket_job_batch_throttled?(slice, worker)
120
114
  filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
121
115
  return false unless filter
@@ -154,7 +148,7 @@ module RocketJob
154
148
  records = slice.records
155
149
 
156
150
  # Skip records already processed, if any.
157
- #slice.processing_record_number ||= 0
151
+ # slice.processing_record_number ||= 0
158
152
  # TODO: Must append to existing output slices before this can be enabled.
159
153
  # if !collect_output && (slice.processing_record_number > 1)
160
154
  # records = records[slice.processing_record_number - 1..-1]
@@ -104,8 +104,6 @@ module RocketJob
104
104
  end
105
105
  end
106
106
 
107
- private
108
-
109
107
  @load_time = Time.now.utc
110
108
  @subscribers = Concurrent::Map.new { Concurrent::Array.new }
111
109
 
@@ -22,8 +22,6 @@ module RocketJob
22
22
  @collection_name = collection_name&.to_sym
23
23
  end
24
24
 
25
- private
26
-
27
25
  module ClassMethods
28
26
  def with_collection(collection_name)
29
27
  all.with_collection(collection_name)
@@ -0,0 +1,12 @@
1
+ require "mongoid/fields/validators/macro"
2
+ require "semantic_logger"
3
+ module RocketJob
4
+ module RemoveMongoidWarnings
5
+ # Remove annoying warnings about Symbols type being deprecated.
6
+ def validate_options(*params)
7
+ SemanticLogger.silence(:error) { super(*params) }
8
+ end
9
+ end
10
+ end
11
+
12
+ ::Mongoid::Fields::Validators::Macro.extend(RocketJob::RemoveMongoidWarnings)
@@ -25,7 +25,7 @@ module RocketJob
25
25
 
26
26
  self.destroy_on_complete = false
27
27
  # Number of times to automatically retry the copy. Set to `0` for no retry attempts.
28
- self.retry_limit = 5
28
+ self.retry_limit = 10
29
29
 
30
30
  # File names in IOStreams URL format.
31
31
  field :source_url, type: String, user_editable: true
@@ -1,8 +1,5 @@
1
- begin
2
- require "active_record"
3
- rescue LoadError
4
- raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
5
- end
1
+ require "active_record"
2
+ require "sync_attr"
6
3
 
7
4
  # Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
8
5
  #
@@ -40,7 +40,7 @@ module RocketJob
40
40
  job.id = job_id if job_id
41
41
  upload_file(job)
42
42
  job.save!
43
- rescue StandardError => e
43
+ rescue Exception => e
44
44
  # Prevent partial uploads
45
45
  job&.cleanup! if job.respond_to?(:cleanup!)
46
46
  raise(e)
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -17,7 +18,9 @@ module RocketJob
17
18
 
18
19
  field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
19
20
 
20
- validate :rocket_job_cron_valid
21
+ validates_each :cron_schedule do |record, attr, value|
22
+ record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
23
+ end
21
24
  before_save :rocket_job_cron_set_run_at
22
25
 
23
26
  private
@@ -42,30 +45,10 @@ module RocketJob
42
45
  end
43
46
  end
44
47
 
45
- # Returns [Time] the next time this job will be scheduled to run at.
46
- #
47
- # Parameters
48
- # time: [Time]
49
- # The next time as of this time.
50
- # Default: Time.now
51
- def rocket_job_cron_next_time(time = Time.now)
52
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
53
- end
54
-
55
- private
56
-
57
48
  def rocket_job_cron_set_run_at
58
- return unless cron_schedule
59
-
60
- self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
61
- end
62
-
63
- def rocket_job_cron_valid
64
- return unless cron_schedule
49
+ return if cron_schedule.nil? || !(cron_schedule_changed? && !run_at_changed?)
65
50
 
66
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
67
- rescue ArgumentError => e
68
- errors.add(:cron_schedule, e.message)
51
+ self.run_at = Fugit::Cron.new(cron_schedule).next_time.to_utc_time
69
52
  end
70
53
  end
71
54
  end
@@ -7,7 +7,7 @@ module RocketJob
7
7
  #
8
8
  # Example:
9
9
  # # Do not run this job when the MySQL slave delay exceeds 5 minutes.
10
- # class MyJob < RocketJob
10
+ # class MyJob < RocketJob::Job
11
11
  # # Define a custom mysql throttle
12
12
  # # Prevents all jobs of this class from running on the current server.
13
13
  # define_throttle :mysql_throttle_exceeded?
@@ -6,7 +6,7 @@ module RocketJob
6
6
  # Throttle the number of jobs of a specific class that are processed at the same time.
7
7
  #
8
8
  # Example:
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # # Maximum number of jobs of this class to process at the same time.
11
11
  # self.throttle_running_jobs = 25
12
12
  #
@@ -73,12 +73,13 @@ module RocketJob
73
73
  if failed? || !may_fail?
74
74
  self.exception = JobException.from_exception(e)
75
75
  exception.worker_name = worker_name
76
- save! unless new_record? || destroyed?
77
- elsif new_record? || destroyed?
78
- fail(worker_name, e)
79
76
  else
80
- fail!(worker_name, e)
77
+ fail(worker_name, e)
81
78
  end
79
+
80
+ # Prevent validation failures from failing the job
81
+ save(validate: false) unless new_record? || destroyed?
82
+
82
83
  raise e if re_raise_exceptions
83
84
  end
84
85
  end