rocketjob 5.2.0 → 5.4.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/lib/rocket_job/batch.rb +1 -0
  4. data/lib/rocket_job/batch/io.rb +14 -19
  5. data/lib/rocket_job/batch/model.rb +2 -2
  6. data/lib/rocket_job/batch/tabular/input.rb +9 -5
  7. data/lib/rocket_job/batch/tabular/output.rb +9 -3
  8. data/lib/rocket_job/batch/throttle.rb +1 -1
  9. data/lib/rocket_job/batch/throttle_running_workers.rb +1 -1
  10. data/lib/rocket_job/batch/throttle_windows.rb +72 -0
  11. data/lib/rocket_job/batch/worker.rb +2 -8
  12. data/lib/rocket_job/event.rb +0 -2
  13. data/lib/rocket_job/extensions/mongoid/clients/options.rb +0 -2
  14. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +12 -0
  15. data/lib/rocket_job/jobs/copy_file_job.rb +1 -1
  16. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +2 -5
  17. data/lib/rocket_job/jobs/upload_file_job.rb +1 -1
  18. data/lib/rocket_job/plugins/cron.rb +6 -23
  19. data/lib/rocket_job/plugins/job/throttle.rb +1 -1
  20. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
  21. data/lib/rocket_job/plugins/job/worker.rb +5 -4
  22. data/lib/rocket_job/plugins/processing_window.rb +7 -13
  23. data/lib/rocket_job/sliced.rb +91 -0
  24. data/lib/rocket_job/sliced/bzip2_output_slice.rb +43 -0
  25. data/lib/rocket_job/sliced/input.rb +3 -3
  26. data/lib/rocket_job/sliced/slice.rb +6 -0
  27. data/lib/rocket_job/sliced/slices.rb +6 -0
  28. data/lib/rocket_job/subscribers/server.rb +9 -3
  29. data/lib/rocket_job/supervisor.rb +3 -1
  30. data/lib/rocket_job/version.rb +1 -1
  31. data/lib/rocket_job/worker_pool.rb +1 -0
  32. data/lib/rocketjob.rb +7 -20
  33. metadata +27 -11
  34. data/lib/rocket_job/plugins/rufus/cron_line.rb +0 -520
  35. data/lib/rocket_job/plugins/rufus/zo_time.rb +0 -524
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6715f9b19c04ee07962197db8e635e9e5e74b1e1dbd9e3add5239d235c5dd218
4
- data.tar.gz: 5c501260e0e328691f668325ff48e88ea5da86f49f6a739343b5bf24569aee75
3
+ metadata.gz: 55ab6ca2b3f76cdb4ddf679a2d3e88e1d6a6f3106a69349129a267b629ef4a53
4
+ data.tar.gz: 4e0d07878fb4265179b4a270650cc9b89ca4bca55f1d8f9a3451cb3064062c35
5
5
  SHA512:
6
- metadata.gz: db9890e678c9e2cb97c6f8faf25e8115f5946017ed3dc0fc3f208a62c2765fabb2c5f7673c9850b6202c61bd312c0e535944609e21e3dbbfd514eff2a50ffa0a
7
- data.tar.gz: ede6fbe558cb428c3291bba1584d32dffcc65c6889ab90753e1a79ce33713410b071e2db44bf5007119ba0a83867e2d42485f1b16bfa4070b0a36e7ca89bbf13
6
+ metadata.gz: fdc7ae3827d987404f431410cb81161fbfa269dfe7575a4a86a6abf362c59c6acd0d2c7e9856273ced396eeaf20e50561f1be10a7de0d5c4ae45e5648d15d083
7
+ data.tar.gz: 7bb9e9ac90569e78e135293efcd357a0d75037b5417f5f530f18ae3f17f44acf28d7de5b4aeef880a0315117f2992623958a4fb4c9df93ca7273369ac052e759
data/README.md CHANGED
@@ -3,9 +3,9 @@
3
3
 
4
4
  Ruby's missing batch system
5
5
 
6
- Checkout http://rocketjob.io/
6
+ Checkout https://rocketjob.io/
7
7
 
8
- ![Rocket Job](http://rocketjob.io/images/rocket/rocket-icon-512x512.png)
8
+ ![Rocket Job](https://rocketjob.io/images/rocket/rocket-icon-512x512.png)
9
9
 
10
10
  ## Documentation
11
11
 
@@ -24,6 +24,7 @@ module RocketJob
24
24
  autoload :LowerPriority, "rocket_job/batch/lower_priority"
25
25
  autoload :Performance, "rocket_job/batch/performance"
26
26
  autoload :Statistics, "rocket_job/batch/statistics"
27
+ autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
27
28
  autoload :Result, "rocket_job/batch/result"
28
29
  autoload :Results, "rocket_job/batch/results"
29
30
  autoload :Tabular, "rocket_job/batch/tabular"
@@ -18,7 +18,7 @@ module RocketJob
18
18
  raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
19
  end
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(rocket_job_io_slice_arguments("inputs", category))
21
+ (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -34,7 +34,7 @@ module RocketJob
34
34
  raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
35
  end
36
36
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(rocket_job_io_slice_arguments("outputs", category))
37
+ (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
38
38
  end
39
39
 
40
40
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -355,8 +355,18 @@ module RocketJob
355
355
 
356
356
  return output(category).download(header_line: header_line, &block) if block
357
357
 
358
- IOStreams.new(stream).writer(:line, **args) do |io|
359
- output(category).download(header_line: header_line) { |record| io << record }
358
+ output_collection = output(category)
359
+
360
+ if output_collection.binary?
361
+ IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
+ raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
363
+
364
+ output_collection.download { |record| io << record[:binary] }
365
+ end
366
+ else
367
+ IOStreams.new(stream).writer(:line, **args) do |io|
368
+ output_collection.download(header_line: header_line) { |record| io << record }
369
+ end
360
370
  end
361
371
  end
362
372
 
@@ -393,21 +403,6 @@ module RocketJob
393
403
  RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
394
404
  end
395
405
  end
396
-
397
- private
398
-
399
- def rocket_job_io_slice_arguments(collection_type, category)
400
- collection_name = "rocket_job.#{collection_type}.#{id}"
401
- collection_name << ".#{category}" unless category == :main
402
-
403
- args = {collection_name: collection_name, slice_size: slice_size}
404
- if encrypt
405
- args[:slice_class] = Sliced::EncryptedSlice
406
- elsif compress
407
- args[:slice_class] = Sliced::CompressedSlice
408
- end
409
- args
410
- end
411
406
  end
412
407
  end
413
408
  end
@@ -44,12 +44,12 @@ module RocketJob
44
44
  # Compress uploaded records.
45
45
  # The fields are not affected in any way, only the data stored in the
46
46
  # records and results collections will compressed
47
- field :compress, type: Boolean, default: false, class_attribute: true
47
+ field :compress, type: Object, default: false, class_attribute: true
48
48
 
49
49
  # Encrypt uploaded records.
50
50
  # The fields are not affected in any way, only the data stored in the
51
51
  # records and results collections will be encrypted
52
- field :encrypt, type: Boolean, default: false, class_attribute: true
52
+ field :encrypt, type: Object, default: false, class_attribute: true
53
53
 
54
54
  #
55
55
  # Values that jobs can also update during processing
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
14
14
  field :tabular_input_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true
15
+ field :tabular_input_options, type: Hash, class_attribute: true
15
16
 
16
17
  # tabular_input_mode: [:line | :array | :hash]
17
18
  # :line
@@ -53,7 +54,9 @@ module RocketJob
53
54
  input_stream = stream.nil? ? nil : IOStreams.new(stream)
54
55
 
55
56
  if stream && (tabular_input_type == :text)
56
- input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: "")
57
+ # Cannot change the length of fixed width lines
58
+ replace = tabular_input_format == :fixed ? " " : ""
59
+ input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
57
60
  end
58
61
 
59
62
  # If an input header is not required, then we don't extract it'
@@ -96,14 +99,15 @@ module RocketJob
96
99
  allowed_columns: tabular_input_white_list,
97
100
  required_columns: tabular_input_required,
98
101
  skip_unknown: tabular_input_skip_unknown,
99
- format: tabular_input_format
102
+ format: tabular_input_format,
103
+ format_options: tabular_input_options&.deep_symbolize_keys
100
104
  )
101
105
  end
102
106
 
103
107
  def tabular_input_render
104
- unless tabular_input_header.blank? && tabular_input.header?
105
- @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
106
- end
108
+ return if tabular_input_header.blank? && tabular_input.header?
109
+
110
+ @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
107
111
  end
108
112
 
109
113
  # Cleanse custom input header if supplied.
@@ -12,6 +12,7 @@ module RocketJob
12
12
  included do
13
13
  field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
14
14
  field :tabular_output_format, type: Symbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
15
+ field :tabular_output_options, type: Hash, class_attribute: true
15
16
 
16
17
  validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
17
18
 
@@ -31,8 +32,9 @@ module RocketJob
31
32
 
32
33
  # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
33
34
  def download(file_name_or_io = nil, category: :main, **args, &block)
34
- # No header required
35
- return super(file_name_or_io, category: category, **args, &block) unless tabular_output.requires_header?(category)
35
+ unless tabular_output.requires_header?(category)
36
+ return super(file_name_or_io, category: category, **args, &block)
37
+ end
36
38
 
37
39
  header = tabular_output.render_header(category)
38
40
  super(file_name_or_io, header_line: header, category: category, **args, &block)
@@ -43,7 +45,11 @@ module RocketJob
43
45
  # Delimited instance used for this slice, by a single worker (thread)
44
46
  def tabular_output
45
47
  @tabular_output ||= Tabular.new(
46
- main: IOStreams::Tabular.new(columns: tabular_output_header, format: tabular_output_format)
48
+ main: IOStreams::Tabular.new(
49
+ columns: tabular_output_header,
50
+ format: tabular_output_format,
51
+ format_options: tabular_output_options&.deep_symbolize_keys
52
+ )
47
53
  )
48
54
  end
49
55
 
@@ -6,7 +6,7 @@ module RocketJob
6
6
  #
7
7
  # Example:
8
8
  # # Do not run any slices for this job when the MySQL slave delay exceeds 5 minutes.
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # include RocketJob::Batch
11
11
  #
12
12
  # # Define a custom mysql throttle
@@ -5,7 +5,7 @@ module RocketJob
5
5
  # Throttle the number of slices of a specific batch job that are processed at the same time.
6
6
  #
7
7
  # Example:
8
- # class MyJob < RocketJob
8
+ # class MyJob < RocketJob::Job
9
9
  # include RocketJob::Batch
10
10
  #
11
11
  # # Maximum number of slices to process at the same time for each running instance.
@@ -0,0 +1,72 @@
1
+ require "active_support/concern"
2
+ require "fugit"
3
+
4
+ module RocketJob
5
+ module Batch
6
+ # For a batch job that can run over a long period of time it can be useful
7
+ # to prevent its slices from being processed outside a predefined processing window.
8
+ #
9
+ # This plugin supports up to 2 different processing windows.
10
+ #
11
+ # For example, do not run this job during business hours.
12
+ # Allow it to run from 5pm until 8am the following day Mon through Fri.
13
+ #
14
+ # class AfterHoursJob < RocketJob::Job
15
+ # include RocketJob::Batch
16
+ # include RocketJob::Batch::ThrottleWindows
17
+ #
18
+ # # Monday through Thursday the job can start processing at 5pm Eastern.
19
+ # self.primary_schedule = "0 17 * * 1-4 America/New_York"
20
+ # # Slices are allowed to run until 8am the following day, which is 15 hours long:
21
+ # self.primary_duration = 15.hours
22
+ #
23
+ # # The slices for this job can run all weekend long, starting Friday at 5pm Eastern.
24
+ # self.secondary_schedule = "0 17 * * 5 America/New_York"
25
+ # # Slices are allowed to run until 8am on Monday morning, which is 63 hours long:
26
+ # self.secondary_duration = 63.hours
27
+ # end
28
+ #
29
+ # Notes:
30
+ # * These schedules do not affect when the job is started, completed, or when `before_batch` or
31
+ # `after_batch` processing is performed. It only limits when individual slices are processed.
32
+ module ThrottleWindows
33
+ extend ActiveSupport::Concern
34
+
35
+ included do
36
+ # Beginning of the primary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
37
+ field :primary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
38
+ # Duration in seconds of the primary window.
39
+ field :primary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
40
+
41
+ # Beginning of the secondary schedule. In cron format, see Scheduled Jobs `cron_schedule` for examples.
42
+ field :secondary_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
43
+ # Duration in seconds of the secondary window.
44
+ field :secondary_duration, type: Integer, class_attribute: true, user_editable: true, copy_on_restart: true
45
+
46
+ define_batch_throttle :throttle_windows_exceeded?, filter: :throttle_filter_id
47
+
48
+ validates_each :primary_schedule, :secondary_schedule do |record, attr, value|
49
+ record.errors.add(attr, "Invalid #{attr}: #{value.inspect}") if value && !Fugit::Cron.new(value)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def throttle_windows_exceeded?
56
+ exceeded = primary_schedule && primary_duration && throttle_outside_window?(primary_schedule, primary_duration)
57
+ if exceeded && secondary_schedule && secondary_duration
58
+ exceeded = throttle_outside_window?(secondary_schedule, secondary_duration)
59
+ end
60
+ exceeded
61
+ end
62
+
63
+ def throttle_outside_window?(schedule, duration)
64
+ cron = Fugit::Cron.new(schedule)
65
+ time = Time.now.utc + 1
66
+ # Add 1 second since right now could be the very beginning of the processing window.
67
+ previous_time = cron.previous_time(time).to_utc_time
68
+ previous_time + duration < time
69
+ end
70
+ end
71
+ end
72
+ end
@@ -28,10 +28,6 @@ module RocketJob
28
28
  #
29
29
  # If an exception was thrown the entire slice of records is marked as failed.
30
30
  #
31
- # If the mongo_ha gem has been loaded, then the connection to mongo is
32
- # automatically re-established and the job will resume anytime a
33
- # Mongo connection failure occurs.
34
- #
35
31
  # Thread-safe, can be called by multiple threads at the same time
36
32
  def rocket_job_work(worker, re_raise_exceptions = false)
37
33
  raise "Job must be started before calling #rocket_job_work" unless running?
@@ -50,7 +46,7 @@ module RocketJob
50
46
  next if slice.failed?
51
47
 
52
48
  slice.fail_on_exception!(re_raise_exceptions) { rocket_job_process_slice(slice) }
53
- elsif record_count && rocket_job_batch_complete?(worker.name)
49
+ elsif record_count && fail_on_exception!(re_raise_exceptions) { rocket_job_batch_complete?(worker.name) }
54
50
  return false
55
51
  else
56
52
  logger.debug "No more work available for this job"
@@ -114,8 +110,6 @@ module RocketJob
114
110
  servers
115
111
  end
116
112
 
117
- private
118
-
119
113
  def rocket_job_batch_throttled?(slice, worker)
120
114
  filter = self.class.rocket_job_batch_throttles.matching_filter(self, slice)
121
115
  return false unless filter
@@ -154,7 +148,7 @@ module RocketJob
154
148
  records = slice.records
155
149
 
156
150
  # Skip records already processed, if any.
157
- #slice.processing_record_number ||= 0
151
+ # slice.processing_record_number ||= 0
158
152
  # TODO: Must append to existing output slices before this can be enabled.
159
153
  # if !collect_output && (slice.processing_record_number > 1)
160
154
  # records = records[slice.processing_record_number - 1..-1]
@@ -104,8 +104,6 @@ module RocketJob
104
104
  end
105
105
  end
106
106
 
107
- private
108
-
109
107
  @load_time = Time.now.utc
110
108
  @subscribers = Concurrent::Map.new { Concurrent::Array.new }
111
109
 
@@ -22,8 +22,6 @@ module RocketJob
22
22
  @collection_name = collection_name&.to_sym
23
23
  end
24
24
 
25
- private
26
-
27
25
  module ClassMethods
28
26
  def with_collection(collection_name)
29
27
  all.with_collection(collection_name)
@@ -0,0 +1,12 @@
1
+ require "mongoid/fields/validators/macro"
2
+ require "semantic_logger"
3
+ module RocketJob
4
+ module RemoveMongoidWarnings
5
+ # Remove annoying warnings about Symbols type being deprecated.
6
+ def validate_options(*params)
7
+ SemanticLogger.silence(:error) { super(*params) }
8
+ end
9
+ end
10
+ end
11
+
12
+ ::Mongoid::Fields::Validators::Macro.extend(RocketJob::RemoveMongoidWarnings)
@@ -25,7 +25,7 @@ module RocketJob
25
25
 
26
26
  self.destroy_on_complete = false
27
27
  # Number of times to automatically retry the copy. Set to `0` for no retry attempts.
28
- self.retry_limit = 5
28
+ self.retry_limit = 10
29
29
 
30
30
  # File names in IOStreams URL format.
31
31
  field :source_url, type: String, user_editable: true
@@ -1,8 +1,5 @@
1
- begin
2
- require "active_record"
3
- rescue LoadError
4
- raise 'RocketJob::Jobs::ReEncrypt::RelationalJob uses ActiveRecord to obtain the database connection, please install the gem "activerecord".'
5
- end
1
+ require "active_record"
2
+ require "sync_attr"
6
3
 
7
4
  # Batch Worker to Re-encrypt all encrypted fields in MySQL that start with `encrytped_`.
8
5
  #
@@ -40,7 +40,7 @@ module RocketJob
40
40
  job.id = job_id if job_id
41
41
  upload_file(job)
42
42
  job.save!
43
- rescue StandardError => e
43
+ rescue Exception => e
44
44
  # Prevent partial uploads
45
45
  job&.cleanup! if job.respond_to?(:cleanup!)
46
46
  raise(e)
@@ -1,4 +1,5 @@
1
1
  require "active_support/concern"
2
+ require "fugit"
2
3
 
3
4
  module RocketJob
4
5
  module Plugins
@@ -17,7 +18,9 @@ module RocketJob
17
18
 
18
19
  field :cron_schedule, type: String, class_attribute: true, user_editable: true, copy_on_restart: true
19
20
 
20
- validate :rocket_job_cron_valid
21
+ validates_each :cron_schedule do |record, attr, value|
22
+ record.errors.add(attr, "Invalid cron_schedule: #{value.inspect}") if value && !Fugit::Cron.new(value)
23
+ end
21
24
  before_save :rocket_job_cron_set_run_at
22
25
 
23
26
  private
@@ -42,30 +45,10 @@ module RocketJob
42
45
  end
43
46
  end
44
47
 
45
- # Returns [Time] the next time this job will be scheduled to run at.
46
- #
47
- # Parameters
48
- # time: [Time]
49
- # The next time as of this time.
50
- # Default: Time.now
51
- def rocket_job_cron_next_time(time = Time.now)
52
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule).next_time(time)
53
- end
54
-
55
- private
56
-
57
48
  def rocket_job_cron_set_run_at
58
- return unless cron_schedule
59
-
60
- self.run_at = rocket_job_cron_next_time if cron_schedule_changed? && !run_at_changed?
61
- end
62
-
63
- def rocket_job_cron_valid
64
- return unless cron_schedule
49
+ return if cron_schedule.nil? || !(cron_schedule_changed? && !run_at_changed?)
65
50
 
66
- RocketJob::Plugins::Rufus::CronLine.new(cron_schedule)
67
- rescue ArgumentError => e
68
- errors.add(:cron_schedule, e.message)
51
+ self.run_at = Fugit::Cron.new(cron_schedule).next_time.to_utc_time
69
52
  end
70
53
  end
71
54
  end
@@ -7,7 +7,7 @@ module RocketJob
7
7
  #
8
8
  # Example:
9
9
  # # Do not run this job when the MySQL slave delay exceeds 5 minutes.
10
- # class MyJob < RocketJob
10
+ # class MyJob < RocketJob::Job
11
11
  # # Define a custom mysql throttle
12
12
  # # Prevents all jobs of this class from running on the current server.
13
13
  # define_throttle :mysql_throttle_exceeded?
@@ -6,7 +6,7 @@ module RocketJob
6
6
  # Throttle the number of jobs of a specific class that are processed at the same time.
7
7
  #
8
8
  # Example:
9
- # class MyJob < RocketJob
9
+ # class MyJob < RocketJob::Job
10
10
  # # Maximum number of jobs of this class to process at the same time.
11
11
  # self.throttle_running_jobs = 25
12
12
  #
@@ -73,12 +73,13 @@ module RocketJob
73
73
  if failed? || !may_fail?
74
74
  self.exception = JobException.from_exception(e)
75
75
  exception.worker_name = worker_name
76
- save! unless new_record? || destroyed?
77
- elsif new_record? || destroyed?
78
- fail(worker_name, e)
79
76
  else
80
- fail!(worker_name, e)
77
+ fail(worker_name, e)
81
78
  end
79
+
80
+ # Prevent validation failures from failing the job
81
+ save(validate: false) unless new_record? || destroyed?
82
+
82
83
  raise e if re_raise_exceptions
83
84
  end
84
85
  end