RubyGems - rocketjob - Versions diffs - 6.0.0.rc3 → 6.0.0 - Mend

rocketjob 6.0.0.rc3 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/README.md +26 -0
data/lib/rocket_job/batch/categories.rb +24 -20
data/lib/rocket_job/batch/io.rb +128 -128
data/lib/rocket_job/batch/worker.rb +14 -12
data/lib/rocket_job/category/base.rb +10 -7
data/lib/rocket_job/category/input.rb +61 -1
data/lib/rocket_job/category/output.rb +9 -0
data/lib/rocket_job/dirmon_entry.rb +1 -1
data/lib/rocket_job/jobs/conversion_job.rb +21 -17
data/lib/rocket_job/jobs/dirmon_job.rb +24 -35
data/lib/rocket_job/jobs/housekeeping_job.rb +4 -5
data/lib/rocket_job/jobs/on_demand_batch_job.rb +7 -5
data/lib/rocket_job/jobs/on_demand_job.rb +2 -2
data/lib/rocket_job/jobs/upload_file_job.rb +4 -0
data/lib/rocket_job/plugins/cron.rb +60 -20
data/lib/rocket_job/plugins/job/persistence.rb +36 -0
data/lib/rocket_job/plugins/restart.rb +3 -110
data/lib/rocket_job/plugins/state_machine.rb +2 -2
data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +1 -2
data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
data/lib/rocket_job/sliced/input.rb +42 -54
data/lib/rocket_job/sliced/slice.rb +7 -3
data/lib/rocket_job/sliced/slices.rb +12 -9
data/lib/rocket_job/sliced/writer/input.rb +46 -18
data/lib/rocket_job/sliced.rb +1 -19
data/lib/rocket_job/version.rb +1 -1
data/lib/rocketjob.rb +2 -2
metadata +8 -10
data/lib/rocket_job/batch/tabular/input.rb +0 -133
data/lib/rocket_job/batch/tabular/output.rb +0 -67
data/lib/rocket_job/batch/tabular.rb +0 -58

data/lib/rocket_job/sliced/slice.rb CHANGED Viewed

@@ -95,9 +95,13 @@ module RocketJob
       end
       # Returns whether this is a specialized binary slice for creating binary data from each slice
-      # that is then just downloaded as-is into output files.
-      def self.binary?
-        false
+      # that is downloaded without conversion into output files.
+      def self.binary_format
+      end
+      # For binary formats only, format the supplied records into the binary format for this slice
+      def self.to_binary(_records)
+        raise NotImplementedError
       end
       # `records` array has special handling so that it can be modified in place instead of having

data/lib/rocket_job/sliced/slices.rb CHANGED Viewed

@@ -42,12 +42,6 @@ module RocketJob
         slice
       end
-      # Returns whether this collection contains specialized binary slices for creating binary data from each slice
-      # that is then just downloaded as-is into output files.
-      def binary?
-        slice_class.binary?
-      end
       # Returns output slices in the order of their id
       # which is usually the order in which they were written.
       def each(&block)
@@ -96,6 +90,11 @@ module RocketJob
         slice
       end
+      def insert_many(slices)
+        documents = slices.collect(&:as_document)
+        all.collection.insert_many(documents)
+      end
       # Append to an existing slice if already present
       def append(slice, input_slice)
         existing_slice = all.where(id: input_slice.id).first
@@ -111,9 +110,13 @@ module RocketJob
       # Index for find_and_modify only if it is not already present
       def create_indexes
-        all.collection.indexes.create_one(state: 1, _id: 1) if all.collection.indexes.none? { |i| i["name"] == "state_1__id_1" }
-      rescue Mongo::Error::OperationFailure
-        all.collection.indexes.create_one(state: 1, _id: 1)
+        missing =
+          begin
+            all.collection.indexes.none? { |i| i["name"] == "state_1__id_1" }
+          rescue Mongo::Error::OperationFailure
+            true
+          end
+        all.collection.indexes.create_one({state: 1, _id: 1}, unique: true) if missing
       end
       # Forward additional methods.

data/lib/rocket_job/sliced/writer/input.rb CHANGED Viewed

@@ -12,43 +12,71 @@ module RocketJob
         #     Block to call on the first line only, instead of storing in the slice.
         #     Useful for extracting the header row
         #     Default: nil
-        def self.collect(input, **args)
-          writer = new(input, **args)
+        #
+        #   slice_size: [Integer]
+        #     Override the slice size when uploading for example ranges, where slice is the size
+        #     of the range itself.
+        #
+        #   slice_batch_size: [Integer]
+        #     The number of slices to batch up and to bulk load.
+        #     For smaller slices this significantly improves upload performance.
+        #     Note: If `slice_batch_size` is too high, it can exceed the maximum BSON block size.
+        def self.collect(data_store, **args)
+          writer = new(data_store, **args)
           yield(writer)
           writer.record_count
         ensure
-          writer&.close
+          writer&.flush
         end
-        def initialize(input, on_first: nil)
-          @on_first      = on_first
-          @batch_count   = 0
-          @record_count  = 0
-          @input         = input
-          @record_number = 1
-          @slice         = @input.new(first_record_number: @record_number)
+        def initialize(data_store, on_first: nil, slice_size: nil, slice_batch_size: nil)
+          @on_first         = on_first
+          @record_count     = 0
+          @data_store       = data_store
+          @slice_size       = slice_size || @data_store.slice_size
+          @slice_batch_size = slice_batch_size || 20
+          @batch            = []
+          @batch_count      = 0
+          new_slice
         end
         def <<(line)
-          @record_number += 1
           if @on_first
             @on_first.call(line)
             @on_first = nil
             return self
           end
           @slice << line
-          @batch_count  += 1
           @record_count += 1
-          if @batch_count >= @input.slice_size
-            @input.insert(@slice)
-            @batch_count = 0
-            @slice       = @input.new(first_record_number: @record_number)
+          if @slice.size >= @slice_size
+            save_slice
+            new_slice
           end
           self
         end
-        def close
-          @input.insert(@slice) if @slice.size.positive?
+        def flush
+          if @slice_batch_size
+            @batch << @slice if @slice.size.positive?
+            @data_store.insert_many(@batch)
+            @batch       = []
+            @batch_count = 0
+          elsif @slice.size.positive?
+            @data_store.insert(@slice)
+          end
+        end
+        def new_slice
+          @slice = @data_store.new(first_record_number: @record_count + 1)
+        end
+        def save_slice
+          return flush unless @slice_batch_size
+          @batch_count += 1
+          return flush if @batch_count >= @slice_batch_size
+          @batch << @slice
         end
       end
     end

data/lib/rocket_job/sliced.rb CHANGED Viewed

@@ -2,6 +2,7 @@ module RocketJob
   module Sliced
     autoload :BZip2OutputSlice, "rocket_job/sliced/bzip2_output_slice"
     autoload :CompressedSlice, "rocket_job/sliced/compressed_slice"
+    autoload :EncryptedBZip2OutputSlice, "rocket_job/sliced/encrypted_bzip2_output_slice"
     autoload :EncryptedSlice, "rocket_job/sliced/encrypted_slice"
     autoload :Input, "rocket_job/sliced/input"
     autoload :Output, "rocket_job/sliced/output"
@@ -13,24 +14,5 @@ module RocketJob
       autoload :Input, "rocket_job/sliced/writer/input"
       autoload :Output, "rocket_job/sliced/writer/output"
     end
-    # Returns [RocketJob::Sliced::Slices] for the relevant direction and category.
-    def self.factory(direction, category, job)
-      collection_name = "rocket_job.#{direction}s.#{job.id}"
-      collection_name << ".#{category.name}" unless category.name == :main
-      case direction
-      when :input
-        RocketJob::Sliced::Input.new(
-          collection_name: collection_name,
-          slice_class:     category.serializer_class,
-          slice_size:      category.slice_size
-        )
-      when :output
-        RocketJob::Sliced::Output.new(collection_name: collection_name, slice_class: category.serializer_class)
-      else
-        raise(ArgumentError, "Unknown direction: #{direction.inspect}")
-      end
-    end
   end
 end

data/lib/rocket_job/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module RocketJob
-  VERSION = "6.0.0.rc3".freeze
+  VERSION = "6.0.0".freeze
 end

data/lib/rocketjob.rb CHANGED Viewed

@@ -63,7 +63,6 @@ module RocketJob
     autoload :Cron,                  "rocket_job/plugins/cron"
     autoload :Document,              "rocket_job/plugins/document"
     autoload :ProcessingWindow,      "rocket_job/plugins/processing_window"
-    autoload :Restart,               "rocket_job/plugins/restart"
     autoload :Retry,                 "rocket_job/plugins/retry"
     autoload :Singleton,             "rocket_job/plugins/singleton"
     autoload :StateMachine,          "rocket_job/plugins/state_machine"
@@ -73,11 +72,12 @@ module RocketJob
   module Jobs
     autoload :ActiveJob,             "rocket_job/jobs/active_job"
+    autoload :ConversionJob,         "rocket_job/jobs/conversion_job"
     autoload :CopyFileJob,           "rocket_job/jobs/copy_file_job"
     autoload :DirmonJob,             "rocket_job/jobs/dirmon_job"
+    autoload :HousekeepingJob,       "rocket_job/jobs/housekeeping_job"
     autoload :OnDemandBatchJob,      "rocket_job/jobs/on_demand_batch_job"
     autoload :OnDemandJob,           "rocket_job/jobs/on_demand_job"
-    autoload :HousekeepingJob,       "rocket_job/jobs/housekeeping_job"
     autoload :PerformanceJob,        "rocket_job/jobs/performance_job"
     autoload :SimpleJob,             "rocket_job/jobs/simple_job"
     autoload :UploadFileJob,         "rocket_job/jobs/upload_file_job"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rocketjob
 version: !ruby/object:Gem::Version
-  version: 6.0.0.rc3
+  version: 6.0.0
 platform: ruby
 authors:
 - Reid Morrison
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-06-23 00:00:00.000000000 Z
+date: 2021-08-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: aasm
@@ -58,14 +58,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.6'
+        version: '1.9'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.6'
+        version: '1.9'
 - !ruby/object:Gem::Dependency
   name: mongoid
   requirement: !ruby/object:Gem::Requirement
@@ -134,9 +134,6 @@ files:
 - lib/rocket_job/batch/results.rb
 - lib/rocket_job/batch/state_machine.rb
 - lib/rocket_job/batch/statistics.rb
-- lib/rocket_job/batch/tabular.rb
-- lib/rocket_job/batch/tabular/input.rb
-- lib/rocket_job/batch/tabular/output.rb
 - lib/rocket_job/batch/throttle.rb
 - lib/rocket_job/batch/throttle_running_workers.rb
 - lib/rocket_job/batch/throttle_windows.rb
@@ -198,6 +195,7 @@ files:
 - lib/rocket_job/sliced.rb
 - lib/rocket_job/sliced/bzip2_output_slice.rb
 - lib/rocket_job/sliced/compressed_slice.rb
+- lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb
 - lib/rocket_job/sliced/encrypted_slice.rb
 - lib/rocket_job/sliced/input.rb
 - lib/rocket_job/sliced/output.rb
@@ -233,11 +231,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '2.5'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">"
+  - - ">="
     - !ruby/object:Gem::Version
-      version: 1.3.1
+      version: '0'
 requirements: []
-rubygems_version: 3.2.15
+rubygems_version: 3.2.22
 signing_key:
 specification_version: 4
 summary: Ruby's missing batch processing system.

data/lib/rocket_job/batch/tabular/input.rb DELETED Viewed

@@ -1,133 +0,0 @@
-require "active_support/concern"
-module RocketJob
-  module Batch
-    class Tabular
-      # @deprecated
-      module Input
-        extend ActiveSupport::Concern
-        included do
-          warn "#{name} is using RocketJob::Batch::Tabular::Input which is deprecated"
-          field :tabular_input_header, type: Array, class_attribute: true, user_editable: true
-          field :tabular_input_format, type: Mongoid::StringifiedSymbol, default: :csv, class_attribute: true, user_editable: true
-          field :tabular_input_options, type: Hash, class_attribute: true
-          # tabular_input_mode: [:line | :array | :hash]
-          #   :line
-          #     Uploads the file a line (String) at a time for processing by workers.
-          #   :array
-          #     Parses each line from the file as an Array and uploads each array for processing by workers.
-          #   :hash
-          #     Parses each line from the file into a Hash and uploads each hash for processing by workers.
-          #   See IOStreams#each.
-          field :tabular_input_mode, type: Mongoid::StringifiedSymbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
-          validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
-          validates_inclusion_of :tabular_input_mode, in: %i[line array hash row record]
-          validate :tabular_input_header_present
-          class_attribute :tabular_input_white_list
-          class_attribute :tabular_input_required
-          class_attribute :tabular_input_skip_unknown
-          # Cleanse all uploaded data by removing non-printable characters
-          # and any characters that cannot be converted to UTF-8
-          class_attribute :tabular_input_type
-          self.tabular_input_white_list   = nil
-          self.tabular_input_required     = nil
-          self.tabular_input_skip_unknown = true
-          self.tabular_input_type         = :text
-          before_perform :tabular_input_render
-        end
-        # Extract the header line during the upload.
-        #
-        # Overrides: RocketJob::Batch::IO#upload
-        #
-        # Notes:
-        # - When supplying a block the header must be set manually
-        def upload(stream = nil, **args, &block)
-          input_stream = stream.nil? ? nil : IOStreams.new(stream)
-          if stream && (tabular_input_type == :text)
-            # Cannot change the length of fixed width lines
-            replace = tabular_input_format == :fixed ? " " : ""
-            input_stream.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
-          end
-          # If an input header is not required, then we don't extract it'
-          return super(input_stream, stream_mode: tabular_input_mode, **args, &block) unless tabular_input.header?
-          # If the header is already set then it is not expected in the file
-          if tabular_input_header.present?
-            tabular_input_cleanse_header
-            return super(input_stream, stream_mode: tabular_input_mode, **args, &block)
-          end
-          case tabular_input_mode
-          when :line
-            parse_header = lambda do |line|
-              tabular_input.parse_header(line)
-              tabular_input_cleanse_header
-              self.tabular_input_header = tabular_input.header.columns
-            end
-            super(input_stream, on_first: parse_header, stream_mode: :line, **args, &block)
-          when :array, :row
-            set_header = lambda do |row|
-              tabular_input.header.columns = row
-              tabular_input_cleanse_header
-              self.tabular_input_header = tabular_input.header.columns
-            end
-            super(input_stream, on_first: set_header, stream_mode: :array, **args, &block)
-          when :hash, :record
-            super(input_stream, stream_mode: :hash, **args, &block)
-          else
-            raise(ArgumentError, "Invalid tabular_input_mode: #{stream_mode.inspect}")
-          end
-        end
-        private
-        # Shared instance used for this slice, by a single worker (thread)
-        def tabular_input
-          @tabular_input ||= IOStreams::Tabular.new(
-            columns:          tabular_input_header,
-            allowed_columns:  tabular_input_white_list,
-            required_columns: tabular_input_required,
-            skip_unknown:     tabular_input_skip_unknown,
-            format:           tabular_input_format,
-            format_options:   tabular_input_options&.deep_symbolize_keys
-          )
-        end
-        def tabular_input_render
-          return if tabular_input_header.blank? && tabular_input.header?
-          @rocket_job_input = tabular_input.record_parse(@rocket_job_input)
-        end
-        # Cleanse custom input header if supplied.
-        def tabular_input_cleanse_header
-          ignored_columns = tabular_input.header.cleanse!
-          logger.warn("Stripped out invalid columns from custom header", ignored_columns) unless ignored_columns.empty?
-          self.tabular_input_header = tabular_input.header.columns
-        end
-        def tabular_input_header_present
-          if tabular_input_header.present? ||
-             !tabular_input.header? ||
-             (tabular_input_mode == :hash || tabular_input_mode == :record)
-            return
-          end
-          errors.add(:tabular_input_header, "is required when tabular_input_format is #{tabular_input_format.inspect}")
-        end
-      end
-    end
-  end
-end

data/lib/rocket_job/batch/tabular/output.rb DELETED Viewed

@@ -1,67 +0,0 @@
-require "active_support/concern"
-module RocketJob
-  module Batch
-    class Tabular
-      # For the simple case where all `output_categories` have the same format,
-      # If multiple output categories are used with different formats, then use IOStreams::Tabular directly
-      # instead of this plugin.
-      module Output
-        extend ActiveSupport::Concern
-        included do
-          warn "#{name} is using RocketJob::Batch::Tabular::Output which is deprecated"
-          field :tabular_output_header, type: Array, class_attribute: true, user_editable: true, copy_on_restart: true
-          field :tabular_output_format, type: Mongoid::StringifiedSymbol, default: :csv, class_attribute: true, user_editable: true, copy_on_restart: true
-          field :tabular_output_options, type: Hash, class_attribute: true
-          validates_inclusion_of :tabular_output_format, in: IOStreams::Tabular.registered_formats
-          after_perform :tabular_output_render
-        end
-        # Clear out cached tabular_output any time header or format is changed.
-        def tabular_output_header=(tabular_output_header)
-          super(tabular_output_header)
-          @tabular_output = nil
-        end
-        def tabular_output_format=(tabular_output_format)
-          super(tabular_output_format)
-          @tabular_output = nil
-        end
-        # Overrides: `RocketJob::Batch::IO#download` to add the `tabular_output_header`.
-        def download(file_name_or_io = nil, category: :main, **args, &block)
-          unless tabular_output.requires_header?(category)
-            return super(file_name_or_io, category: category, **args, &block)
-          end
-          header = tabular_output.render_header(category)
-          super(file_name_or_io, header_line: header, category: category, **args, &block)
-        end
-        private
-        # Delimited instance used for this slice, by a single worker (thread)
-        def tabular_output
-          @tabular_output ||= Tabular.new(
-            main: IOStreams::Tabular.new(
-              columns:        tabular_output_header,
-              format:         tabular_output_format,
-              format_options: tabular_output_options&.deep_symbolize_keys
-            )
-          )
-        end
-        # Render the output from the perform.
-        def tabular_output_render
-          return unless output_categories.present?
-          @rocket_job_output = tabular_output.render(@rocket_job_output)
-        end
-      end
-    end
-  end
-end