RubyGems - rocketjob - Versions diffs - 5.4.1 → 6.0.0 - Mend

rocketjob 5.4.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

checksums.yaml +4 -4
data/README.md +175 -5
data/bin/rocketjob_batch_perf +1 -1
data/bin/rocketjob_perf +1 -1
data/lib/rocket_job/batch/categories.rb +345 -0
data/lib/rocket_job/batch/io.rb +174 -106
data/lib/rocket_job/batch/model.rb +20 -68
data/lib/rocket_job/batch/performance.rb +19 -7
data/lib/rocket_job/batch/statistics.rb +34 -12
data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
data/lib/rocket_job/batch/worker.rb +31 -26
data/lib/rocket_job/batch.rb +3 -1
data/lib/rocket_job/category/base.rb +81 -0
data/lib/rocket_job/category/input.rb +170 -0
data/lib/rocket_job/category/output.rb +34 -0
data/lib/rocket_job/cli.rb +25 -17
data/lib/rocket_job/dirmon_entry.rb +23 -13
data/lib/rocket_job/event.rb +1 -1
data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
data/lib/rocket_job/jobs/conversion_job.rb +43 -0
data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
data/lib/rocket_job/jobs/performance_job.rb +3 -1
data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
data/lib/rocket_job/lookup_collection.rb +69 -0
data/lib/rocket_job/plugins/cron.rb +60 -20
data/lib/rocket_job/plugins/job/model.rb +25 -50
data/lib/rocket_job/plugins/job/persistence.rb +36 -0
data/lib/rocket_job/plugins/job/throttle.rb +2 -2
data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
data/lib/rocket_job/plugins/job/worker.rb +2 -7
data/lib/rocket_job/plugins/restart.rb +3 -103
data/lib/rocket_job/plugins/state_machine.rb +4 -3
data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
data/lib/rocket_job/ractor_worker.rb +42 -0
data/lib/rocket_job/server/model.rb +1 -1
data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
data/lib/rocket_job/sliced/input.rb +42 -54
data/lib/rocket_job/sliced/slice.rb +12 -16
data/lib/rocket_job/sliced/slices.rb +26 -11
data/lib/rocket_job/sliced/writer/input.rb +46 -18
data/lib/rocket_job/sliced/writer/output.rb +33 -45
data/lib/rocket_job/sliced.rb +1 -74
data/lib/rocket_job/subscribers/server.rb +1 -1
data/lib/rocket_job/thread_worker.rb +46 -0
data/lib/rocket_job/throttle_definitions.rb +7 -1
data/lib/rocket_job/version.rb +1 -1
data/lib/rocket_job/worker.rb +21 -55
data/lib/rocket_job/worker_pool.rb +5 -7
data/lib/rocketjob.rb +53 -43
metadata +36 -28
data/lib/rocket_job/batch/tabular/input.rb +0 -131
data/lib/rocket_job/batch/tabular/output.rb +0 -65
data/lib/rocket_job/batch/tabular.rb +0 -56
data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28

data/lib/rocket_job/batch/performance.rb CHANGED Viewed

@@ -22,12 +22,15 @@ module RocketJob
         count_running_workers
         puts "Loading job with #{count} records/lines"
-        args = {log_level: :warn, slice_size: slice_size}
-        if defined?(::RocketJob)
-          args[:compress] = compress
-          args[:encrypt]  = encrypt
+        job                           = RocketJob::Jobs::PerformanceJob.new(log_level: :warn)
+        job.input_category.slice_size = slice_size
+        if encrypt
+          job.input_category.serializer  = :encrypt
+          job.output_category.serializer = :encrypt
+        elsif !compress
+          job.input_category.serializer  = :none
+          job.output_category.serializer = :none
         end
-        job = RocketJob::Jobs::PerformanceJob.new(args)
         job.upload do |writer|
           count.times { |i| writer << i }
         end
@@ -37,7 +40,15 @@ module RocketJob
         sleep 3 until job.reload.completed?
         duration = job.completed_at - job.started_at
-        {count: count, duration: duration, records_per_second: (count.to_f / duration).round(3), workers: workers, servers: servers, compress: compress, encrypt: encrypt}
+        {
+          count:              count,
+          duration:           duration,
+          records_per_second: (count.to_f / duration).round(3),
+          workers:            workers,
+          servers:            servers,
+          compress:           compress,
+          encrypt:            encrypt
+        }
       end
       # Export the Results hash to a CSV file
@@ -60,7 +71,8 @@ module RocketJob
           o.on("-m", "--mongo MONGO_CONFIG_FILE_NAME", "Location of mongoid.yml config file") do |arg|
             self.mongo_config = arg
           end
-          o.on("-e", "--environment ENVIRONMENT", "The environment to run the app on (Default: RAILS_ENV || RACK_ENV || development)") do |arg|
+          o.on("-e", "--environment ENVIRONMENT",
+               "The environment to run the app on (Default: RAILS_ENV || RACK_ENV || development)") do |arg|
             self.environment = arg
           end
           o.on("-z", "--compress", "Turn on compression") do

data/lib/rocket_job/batch/statistics.rb CHANGED Viewed

@@ -2,7 +2,11 @@ require "active_support/concern"
 module RocketJob
   module Batch
-    # Allow statistics to be gathered while a batch job is running
+    # Allow statistics to be gathered while a batch job is running.
+    #
+    # Notes:
+    # - Statistics for successfully processed records within a slice are saved.
+    # - Statistics gathered during a perform that then results in an exception are discarded.
     module Statistics
       extend ActiveSupport::Concern
@@ -45,34 +49,52 @@ module RocketJob
           last  = paths.pop
           return unless last
-          target = paths.inject(in_memory) { |target, key| target.key?(key) ? target[key] : target[key] = Hash.new(0) }
-          target[last] += increment
+          last_target = paths.inject(in_memory) do |target, sub_key|
+            target.key?(sub_key) ? target[sub_key] : target[sub_key] = Hash.new(0)
+          end
+          last_target[last] += increment
         end
       end
       included do
         field :statistics, type: Hash, default: -> { Hash.new(0) }
-        around_slice :statistics_capture
+        around_slice :rocket_job_statistics_capture
+        after_perform :rocket_job_statistics_commit
       end
       # Increment a statistic
       def statistics_inc(key, increment = 1)
         return if key.nil? || key == ""
-        # Being called within tests outside of a perform
-        @slice_statistics ||= Stats.new(new_record? ? statistics : nil)
-        key.is_a?(Hash) ? @slice_statistics.inc(key) : @slice_statistics.inc_key(key, increment)
+        (@rocket_job_perform_statistics ||= []) << (key.is_a?(Hash) ? key : [key, increment])
       end
       private
-      # Capture the number of successful and failed tradelines
-      # as well as those with notices and alerts.
-      def statistics_capture
-        @slice_statistics = Stats.new(new_record? ? statistics : nil)
+      def rocket_job_statistics_capture
+        @rocket_job_perform_statistics = nil
+        @rocket_job_slice_statistics   = nil
         yield
-        collection.update_one({_id: id}, {"$inc" => @slice_statistics.stats}) unless @slice_statistics.empty?
+      ensure
+        if @rocket_job_slice_statistics && !@rocket_job_slice_statistics.empty?
+          collection.update_one({_id: id}, {"$inc" => @rocket_job_slice_statistics.stats})
+        end
+      end
+      def rocket_job_slice_statistics
+        @rocket_job_slice_statistics ||= Stats.new(new_record? ? statistics : nil)
+      end
+      # Apply stats gathered during the perform to the slice level stats
+      def rocket_job_statistics_commit
+        return unless @rocket_job_perform_statistics
+        @rocket_job_perform_statistics.each do |key|
+          key.is_a?(Hash) ? rocket_job_slice_statistics.inc(key) : rocket_job_slice_statistics.inc_key(*key)
+        end
+        @rocket_job_perform_statistics = nil
       end
       # Overrides RocketJob::Batch::Logger#rocket_job_batch_log_payload

data/lib/rocket_job/batch/throttle_running_workers.rb CHANGED Viewed

@@ -37,15 +37,11 @@ module RocketJob
         validates :throttle_running_workers, numericality: {greater_than_or_equal_to: 0}, allow_nil: true
         define_batch_throttle :throttle_running_workers_exceeded?, filter: :throttle_filter_id
-        # Deprecated. For backward compatibility.
-        alias_method :throttle_running_slices, :throttle_running_workers
-        alias_method :throttle_running_slices=, :throttle_running_workers=
       end
       private
-      # Returns [Boolean] whether the throttle for this job has been exceeded
+      # Returns [true|false] whether the throttle for this job has been exceeded
       def throttle_running_workers_exceeded?(slice)
         return false unless throttle_running_workers&.positive?
@@ -57,7 +53,7 @@ module RocketJob
       # Allows another job with a higher priority to start even though this one is running already
       # @overrides RocketJob::Plugins::Job::ThrottleRunningJobs#throttle_running_jobs_base_query
       def throttle_running_jobs_base_query
-        query = super
+        query                = super
         query[:priority.lte] = priority if throttle_running_workers&.positive?
         query
       end

data/lib/rocket_job/batch/worker.rb CHANGED Viewed

@@ -23,9 +23,6 @@ module RocketJob
       #
       # Slices are destroyed after their records are successfully processed
       #
-      # Results are stored in the output collection if `collect_output?`
-      # `nil` results from workers are kept if `collect_nil_output`
-      #
       # If an exception was thrown the entire slice of records is marked as failed.
       #
       # Thread-safe, can be called by multiple threads at the same time
@@ -40,7 +37,8 @@ module RocketJob
         SemanticLogger.named_tagged(job: id.to_s) do
           until worker.shutdown?
-            if slice = input.next_slice(worker.name)
+            slice = input.next_slice(worker.name)
+            if slice
               # Grab a slice before checking the throttle to reduce concurrency race condition.
               return true if slice.fail_on_exception!(re_raise_exceptions) { rocket_job_batch_throttled?(slice, worker) }
               next if slice.failed?
@@ -69,6 +67,8 @@ module RocketJob
       # Returns [Integer] the number of records processed in the slice
       #
       # Note: The slice will be removed from processing when this method completes
+      #
+      # @deprecated Please open a ticket if you need this behavior.
       def work_first_slice(&block)
         raise "#work_first_slice can only be called from within before_batch callbacks" unless sub_state == :before
@@ -97,8 +97,8 @@ module RocketJob
         servers = []
         case sub_state
         when :before, :after
-          unless server_name && !worker_on_server?(server_name)
-            servers << ActiveWorker.new(worker_name, started_at, self) if running?
+          if running? && (server_name.nil? || worker_on_server?(server_name))
+            servers << ActiveWorker.new(worker_name, started_at, self)
           end
         when :processing
           query = input.running
@@ -143,19 +143,23 @@ module RocketJob
       # Perform individual slice without callbacks
       def rocket_job_perform_slice(slice, &block)
-        count = 0
-        RocketJob::Sliced::Writer::Output.collect(self, slice) do |writer|
-          records = slice.records
-          # Skip records already processed, if any.
-          # slice.processing_record_number ||= 0
-          # TODO: Must append to existing output slices before this can be enabled.
-          # if !collect_output && (slice.processing_record_number > 1)
-          #   records = records[slice.processing_record_number - 1..-1]
-          # end
-          # Until the changes above have been implemented, reprocess all records in the slice.
-          slice.processing_record_number = 0
+        slice.processing_record_number ||= 0
+        append                         = false
+        # Skip processed records in this slice if it has no output categories.
+        records =
+          if slice.processing_record_number.to_i > 1
+            append = true
+            logger.info("Resuming previously incomplete slice from record number #{slice.processing_record_number}")
+            slice.records[slice.processing_record_number - 1..-1]
+          else
+            # Reprocess all records in this slice.
+            slice.processing_record_number = 0
+            slice.records
+          end
+        count = 0
+        RocketJob::Sliced::Writer::Output.collect(self, input_slice: slice, append: append) do |writer|
           records.each do |record|
             slice.processing_record_number += 1
             SemanticLogger.named_tagged(record: slice.current_record_number) do
@@ -174,8 +178,8 @@ module RocketJob
         return block_given? ? yield(record) : perform(record) if _perform_callbacks.empty?
         # @rocket_job_input and @rocket_job_output can be modified by before/around callbacks
-        @rocket_job_input         = record
-        @rocket_job_output        = nil
+        @rocket_job_input  = record
+        @rocket_job_output = nil
         run_callbacks(:perform) do
           @rocket_job_output =
@@ -186,9 +190,9 @@ module RocketJob
             end
         end
-        @rocket_job_input         = nil
-        result                    = @rocket_job_output
-        @rocket_job_output        = nil
+        @rocket_job_input  = nil
+        result             = @rocket_job_output
+        @rocket_job_output = nil
         result
       end
@@ -244,7 +248,7 @@ module RocketJob
         unless new_record?
           # Fail job iff no other worker has already finished it
           # Must set write concern to at least 1 since we need the nModified back
-          result = self.class.with(write: {w: 1}) do |query|
+          result   = self.class.with(write: {w: 1}) do |query|
             query.
               where(id: id, state: :running, sub_state: :processing).
               update({"$set" => {state: :failed, worker_name: worker_name}})
@@ -305,11 +309,12 @@ module RocketJob
       # Run Batch before and after callbacks
       def rocket_job_batch_callbacks(worker)
         # If this is the first worker to pickup this job
-        if sub_state == :before
+        case sub_state
+        when :before
           rocket_job_batch_run_before_callbacks
           # Check for 0 record jobs
           rocket_job_batch_complete?(worker.name) if running?
-        elsif sub_state == :after
+        when sub_state == :after
           rocket_job_batch_run_after_callbacks
         end
       end

data/lib/rocket_job/batch.rb CHANGED Viewed

@@ -7,6 +7,8 @@ require "rocket_job/batch/state_machine"
 require "rocket_job/batch/throttle"
 require "rocket_job/batch/throttle_running_workers"
 require "rocket_job/batch/worker"
+# Ensure after_perform is run first and #upload override is after IO#upload is defined.
+require "rocket_job/batch/categories"
 module RocketJob
   module Batch
@@ -17,6 +19,7 @@ module RocketJob
     include Callbacks
     include Logger
     include Worker
+    include Categories
     include Throttle
     include ThrottleRunningWorkers
     include IO
@@ -27,6 +30,5 @@ module RocketJob
     autoload :ThrottleWindows, "rocket_job/batch/throttle_windows"
     autoload :Result, "rocket_job/batch/result"
     autoload :Results, "rocket_job/batch/results"
-    autoload :Tabular, "rocket_job/batch/tabular"
   end
 end

data/lib/rocket_job/category/base.rb ADDED Viewed

@@ -0,0 +1,81 @@
+require "active_support/concern"
+module RocketJob
+  module Category
+    # Define the layout for each category of input or output data
+    module Base
+      extend ActiveSupport::Concern
+      included do
+        field :name, type: ::Mongoid::StringifiedSymbol, default: :main
+        # Whether to compress, encrypt, or use the bzip2 serialization for data in this category.
+        field :serializer, type: ::Mongoid::StringifiedSymbol, default: :compress
+        # The header columns when the file does not include a header row.
+        # Note:
+        # - All column names must be strings so that it can be serialized into MongoDB.
+        field :columns, type: Array
+        # On an input collection `format` specifies the format of the input data so that it can be
+        # transformed into a Hash when passed into the `#perform` method.
+        #
+        # On an output collection `format` specifies the format to transform the output hash into.
+        #
+        # `:auto` it uses the `file_name` on this category to determine the format.
+        # `nil` no transformation is performed on the data returned by the `#perform` method.
+        # Any other format supported by IOStreams, for example: csv, :hash, :array, :json, :psv, :fixed
+        #
+        # Default: `nil`
+        field :format, type: ::Mongoid::StringifiedSymbol
+        validates_inclusion_of :format, in: [nil, :auto] + IOStreams::Tabular.registered_formats
+        # Any specialized format specific options. For example, `:fixed` format requires a `:layout`.
+        field :format_options, type: Hash
+        # When `:format` is not supplied the file name can be used to infer the required format.
+        # Optional.
+        # Default: nil
+        field :file_name, type: IOStreams::Path
+      end
+      # Return which slice serializer class to use that matches the current options.
+      def serializer_class
+        case serializer
+        when :none
+          Sliced::Slice
+        when :compress
+          Sliced::CompressedSlice
+        when :encrypt
+          Sliced::EncryptedSlice
+        when :bzip2, :bz2
+          Sliced::BZip2OutputSlice
+        when :encrypted_bz2
+          Sliced::EncryptedBZip2OutputSlice
+        else
+          raise(ArgumentError, "serialize: #{serializer.inspect} must be :none, :compress, :encrypt, :bz2, or :encrypted_bz2")
+        end
+      end
+      def tabular
+        @tabular ||= IOStreams::Tabular.new(
+          columns:        columns,
+          format:         format == :auto ? nil : format,
+          format_options: format_options&.deep_symbolize_keys,
+          file_name:      file_name
+        )
+      end
+      # Returns [true|false] whether this category has the attributes defined for tabular to work.
+      def tabular?
+        format.present?
+      end
+      def build_collection_name(direction, job)
+        collection_name = "rocket_job.#{direction}s.#{job.id}"
+        collection_name << ".#{name}" unless name == :main
+        collection_name
+      end
+    end
+  end
+end

data/lib/rocket_job/category/input.rb ADDED Viewed

@@ -0,0 +1,170 @@
+module RocketJob
+  module Category
+    # Define the layout for each category of input or output data
+    class Input
+      include SemanticLogger::Loggable
+      include Plugins::Document
+      include Category::Base
+      embedded_in :job, class_name: "RocketJob::Job", inverse_of: :input_categories
+      # Slice size for this input collection
+      field :slice_size, type: Integer, default: 100
+      validates_presence_of :slice_size
+      #
+      # The fields below only apply if the field `format` has been set:
+      #
+      # List of columns to allow.
+      # Default: nil ( Allow all columns )
+      # Note:
+      #   When supplied any columns that are rejected will be returned in the cleansed columns
+      #   as nil so that they can be ignored during processing.
+      field :allowed_columns, type: Array
+      # List of columns that must be present, otherwise an Exception is raised.
+      field :required_columns, type: Array
+      # Whether to skip unknown columns in the uploaded file.
+      # Ignores any column that was not found in the `allowed_columns` list.
+      #
+      # false:
+      #   Raises IOStreams::Tabular::InvalidHeader when a column is supplied that is not in `allowed_columns`.
+      # true:
+      #   Ignore additional columns in a file that are not listed in `allowed_columns`
+      #   Job processing will skip the additional columns entirely as if they were not supplied at all.
+      #   A warning is logged with the names of the columns that were ignored.
+      #   The `columns` field will list all skipped columns with a nil value so that downstream workers
+      #   know to ignore those columns.
+      #
+      # Notes:
+      # - Only applicable when `allowed_columns` has been set.
+      # - Recommended to leave as `false` otherwise a misspelled column can result in missed columns.
+      field :skip_unknown, type: ::Mongoid::Boolean, default: false
+      validates_inclusion_of :skip_unknown, in: [true, false]
+      # When `#upload` is called with a file_name, it uploads the file using any of the following approaches:
+      # :line
+      #   Uploads the file a line (String) at a time for processing by workers.
+      #   This is the default behavior and is the most performant since it leaves the parsing of each line
+      #   up to the workers themselves.
+      # :array
+      #   Parses each line from the file as an Array and uploads each array for processing by workers.
+      #   Every line in the input file is parsed and converted into an array before uploading.
+      #   This approach ensures that the entire files is valid before starting to process it.
+      #   Ideal for when files may contain invalid lines.
+      #   Not recommended for large files since the CSV or other parsing is performed sequentially during the
+      #   upload process.
+      # :hash
+      #   Parses each line from the file into a Hash and uploads each hash for processing by workers.
+      #   Similar to :array above in that the entire file is parsed before processing is started.
+      #   Slightly less efficient than :array since it stores every record as a hash with both the key and value.
+      #
+      # Recommend using :array when the entire file must be parsed/validated before processing is started, and
+      # upload time is not important.
+      # See IOStreams#each for more details.
+      field :mode, type: ::Mongoid::StringifiedSymbol, default: :line
+      validates_inclusion_of :mode, in: %i[line array hash]
+      # When reading tabular input data (e.g. CSV, PSV) the header is automatically cleansed.
+      # This removes issues when the input header varies in case and other small ways. See IOStreams::Tabular
+      # Currently Supported:
+      #   :default
+      #     Each column is cleansed as follows:
+      #     - Leading and trailing whitespace is stripped.
+      #     - All characters converted to lower case.
+      #     - Spaces and '-' are converted to '_'.
+      #     - All characters except for letters, digits, and '_' are stripped.
+      #   :none
+      #     Do not cleanse the columns names supplied in the header row.
+      #
+      # Note: Submit a ticket if you have other cleansers that you want added.
+      field :header_cleanser, type: ::Mongoid::StringifiedSymbol, default: :default
+      validates :header_cleanser, inclusion: %i[default none]
+      validates_inclusion_of :serializer, in: %i[none compress encrypt]
+      # Cleanses the header column names when `cleanse_header` is true
+      def cleanse_header!
+        return unless header_cleanser == :default
+        ignored_columns = tabular.header.cleanse!
+        logger.warn("Stripped out invalid columns from custom header", ignored_columns) unless ignored_columns.empty?
+        self.columns = tabular.header.columns
+      end
+      def tabular
+        @tabular ||= IOStreams::Tabular.new(
+          columns:          columns,
+          format:           format == :auto ? nil : format,
+          format_options:   format_options&.deep_symbolize_keys,
+          file_name:        file_name,
+          allowed_columns:  allowed_columns,
+          required_columns: required_columns,
+          skip_unknown:     skip_unknown
+        )
+      end
+      def data_store(job)
+        RocketJob::Sliced::Input.new(
+          collection_name: build_collection_name(:input, job),
+          slice_class:     serializer_class,
+          slice_size:      slice_size
+        )
+      end
+      # Returns [IOStreams::Path] of file to upload.
+      # Auto-detects file format from file name when format is :auto.
+      def upload_path(stream = nil, original_file_name: nil)
+        unless stream || file_name
+          raise(ArgumentError, "Either supply a file name to upload, or set input_collection.file_name first")
+        end
+        path           = IOStreams.new(stream || file_name)
+        path.file_name = original_file_name if original_file_name
+        self.file_name = path.file_name
+        # Auto detect the format based on the upload file name if present.
+        if format == :auto
+          self.format = path.format || :csv
+          # Rebuild tabular with new values.
+          @tabular = nil
+        end
+        # Remove non-printable characters from tabular input formats.
+        if tabular?
+          # Cannot change the length of fixed width lines.
+          replace = format == :fixed ? " " : ""
+          path.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
+        end
+        path
+      end
+      # Return a lambda to extract the header row from the uploaded file.
+      def extract_header_callback(on_first)
+        return on_first unless tabular? && tabular.header?
+        case mode
+        when :line
+          lambda do |line|
+            tabular.parse_header(line)
+            cleanse_header!
+            self.columns = tabular.header.columns
+            # Call chained on_first if present
+            on_first&.call(line)
+          end
+        when :array
+          lambda do |row|
+            tabular.header.columns = row
+            cleanse_header!
+            self.columns = category.tabular.header.columns
+            # Call chained on_first if present
+            on_first&.call(line)
+          end
+        end
+      end
+    end
+  end
+end