RubyGems - bulk-processor - Versions diffs - 0.2.0 → 0.3.0 - Mend

bulk-processor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +59 -28
data/lib/bulk_processor/csv_processor/no_op_handler.rb +15 -0
data/lib/bulk_processor/csv_processor/no_op_post_processor.rb +16 -0
data/lib/bulk_processor/csv_processor/result.rb +20 -0
data/lib/bulk_processor/csv_processor/row_processor.rb +61 -0
data/lib/bulk_processor/csv_processor.rb +36 -21
data/lib/bulk_processor/stream_encoder.rb +1 -1
data/lib/bulk_processor/validated_csv.rb +7 -6
data/lib/bulk_processor/version.rb +1 -1
metadata +6 -3
data/lib/bulk_processor/no_op_handler.rb +0 -12

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 56580f10538cc75b8fbfb8006248905cb0cfeb71
-  data.tar.gz: 3ce3bc03cf878836f5a768a3be0fad169bfabdc6
+  metadata.gz: d7b6dc445bf46f3f35477510449e5f56aed3f854
+  data.tar.gz: ef53879ba52375923ba2b55460b5fb217665d68c
 SHA512:
-  metadata.gz: 1c4fa2fdf92ec038c73c6eec886a682e9cb82f8fdb1ef0ade84a0eefa0eb1fffbd29cd510c3c555c6456b5ee7e600c2e52999031169dfbd773a73a593ee543ee
-  data.tar.gz: b32c0129b95fa2a44e7d2dca5455bca2a60137aac6d0f97b260b60e90d8c79d28acd4e91a12f9d9a6b14e0a1ac4735d505b31bea182499f14c9c19991fbfa930
+  metadata.gz: a4b9727d06824b5cf68789a4f3fb00d5b039d079201c33f4f68bfdb9ee720a9517df2e78d12614dcf1f1ca2673e7580dfd914b8963d3b463747026ce4681fec6
+  data.tar.gz: 67c87c2517515fd5912d05f494ce2d3454409cdb9417466a112b48be00ab9a4d9cfb1b72fe57f4139403bbdc9980803f5e3140405efdae5c1c1ab54339f95e42

data/README.md CHANGED Viewed

@@ -52,12 +52,12 @@ class PetCSVProcessor
     ['favorite_toy', 'talents']
   end
-  def initialize(records, payload:)
+  def initialize(csv, payload:)
     # Assign instance variables and do any other setup
   end
   def start
-    # Process the records
+    # Process the CSV
   end
 end
 ```
@@ -66,7 +66,7 @@ To account for a common use case, a base `BulkProcessor::CSVProcessor` class is
 though it must be explicitly required. This base class can be subclassed to build a CSV processor.
 This base class implements the initializer and `#start` methods and returns an empty set for `.optional_columns`.
-The `#start` method iterates over each record, processes it using a `RowProcessor`,
+The `#start` method iterates over each row, processes it using a `RowProcessor`,
 accumulates the results, which are passed off to a `Handler`. An example
 implementation could look like:
@@ -95,56 +95,76 @@ class PetCSVProcessor < BulkProcessor::CSVProcessor
     PetRowProcessor
   end
+  # @return [PostProcessor] a class that implements the PostProcessor role
+  def self.post_processor_class
+    PetPostProcessor
+  end
   # @return [Handler] a class that implements the Handler role
   def self.handler_class
     PetHandler
   end
 end
+```
-class PetRowProcessor
-  def initialize(record, payload:)
-    # Assign instance variables and do any other setup
-  end
+```ruby
+class PetRowProcessor < BulkProcessor::CSVProcessor::RowProcessor
   # Process the row, e.g. create a new record in the DB, send an email, etc
   def process!
-    pet = Pet.new(record)
+    pet = Pet.new(row)
     if pet.save
-      @success = true
+      self.successful = true
     else
-      @messages = pet.errors.full_messages
+      messages.concat(pet.errors.full_messages)
     end
   end
-  # @return [true|false] true iff the item was processed completely
-  def success?
-    @success == true
+  # Setting these allow us to identify error messages by these key/values for
+  # a row, rather than using the row number
+  def primary_keys
+    ['species', 'name']
   end
+end
+```
-  # @return [Array<String>] list of messages for this item to pass back to the
-  #   completion handler.
-  def messages
-    @messages || []
+```ruby
+class PetPostProcessor
+  attr_reader :results
+  def initialize(row_processors)
+    # Assign instance variables and do any other setup
+  end
+  def start
+    cat_count = 0
+    @results = []
+    row_processors.each do |row_processor|
+      cat_count += 1 if row_processor.cat?
+    end
+    if cat_count > 2
+      @results << BulkProcessor::CSVProcessor::Result.new(messages: ['Too many cats!'],
+                                                          successful: false)
+    end
   end
 end
+```
+```ruby
 class PetHandler
   # @param payload [Hash] the payload passed into 'BulkProcessor.process', can
   #   be used to pass metadata around, e.g. the email address to send a
   #   completion report to
-  # @param successes [Hash<Fixnum, Array<String>>] keys are all successfully
-  #   processed rows, indexed from 0 (row 1 in the CSV is index 0 in this hash)
-  #   The values are arrays of messages the item processor generated for the row
-  #   (may be empty), e.g. { 0 => [], 1 => ['pet ID = 22 created'] }
-  # @param errors [Hash<Fixnum, Array<String>>] similar structure to successes,
-  #   but rows that were not completed successfully.
-  def initialize(payload:, successes:, errors:)
+  # @param results [Array<BulkProcessor::CSVProcessor::RowProcessor>] results
+  #   for processing the rows (there will be one pre row in the CSV plus zero
+  #   or more from post-processing)
+  def initialize(payload:, results:)
     # Assign instance variables and do any other setup
   end
   # Notify the owner that their pets were processed
   def complete!
-    OwnerMailer.competed(successes, errors)
+    OwnerMailer.completed(results, payload)
   end
   # Notify the owner that processing failed
@@ -152,7 +172,7 @@ class PetHandler
   # @param fatal_error [StandardError] if nil, then all rows were processed,
   #   else the error that was raise is passed in here
   def fail!(fatal_error)
-    OwnerMailer.failed(fatal_error)
+    OwnerMailer.failed(fatal_error, payload)
   end
 end
 ```
@@ -163,7 +183,7 @@ Putting it all together
 processor = BulkProcessor.new(
               stream: file_stream,
               processor_class: PetCSVProcessor,
-              payload: {recipient: current_user.email}
+              payload: { recipient: current_user.email }
             )
 if processor.start
   # The job has been enqueued, go get a coffee and wait
@@ -173,6 +193,17 @@ else
 end
 ```
+### BulkProcessor::CSVProcessor::Result
+The result instances passed from BulkProcessor::CSVProcessor to the Handler
+respond to the following messages:
+* `#messages [Array<String>]` - zero or more messages generated when processing the row
+* `#row_num [Fixnum|nil]` - the CSV row number (starting with 2) or nil if result is from post-processing
+* `#primary_attributes [Hash]` - a set of values that can be used to identify which row the messages are for.
+You must override `#primary_keys` to use this.
+* `#successful?` - true iff the processing happened with no errors
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/bulk_processor/csv_processor/no_op_handler.rb ADDED Viewed

@@ -0,0 +1,15 @@
+class BulkProcessor
+  class CSVProcessor
+    # A null object implementation of the Handler role
+    class NoOpHandler
+      def initialize(payload:, results:)
+      end
+      def complete!
+      end
+      def fail!(fatal_error)
+      end
+    end
+  end
+end

data/lib/bulk_processor/csv_processor/no_op_post_processor.rb ADDED Viewed

@@ -0,0 +1,16 @@
+class BulkProcessor
+  class CSVProcessor
+    # A null object implementation of the PostProcessor role
+    class NoOpPostProcessor
+      def initialize(row_processors)
+      end
+      def start
+      end
+      def results
+        []
+      end
+    end
+  end
+end

data/lib/bulk_processor/csv_processor/result.rb ADDED Viewed

@@ -0,0 +1,20 @@
+class BulkProcessor
+  class CSVProcessor
+    # A container for messages generated by processing that need to be passed
+    # back to the handler.
+    class Result
+      attr_reader :messages, :primary_attributes, :row_num
+      def initialize(messages:, successful:, row_num: nil, primary_attributes: nil)
+        @messages = messages
+        @successful = successful
+        @row_num = row_num
+        @primary_attributes = primary_attributes
+      end
+      def successful?
+        @successful
+      end
+    end
+  end
+end

data/lib/bulk_processor/csv_processor/row_processor.rb ADDED Viewed

@@ -0,0 +1,61 @@
+class BulkProcessor
+  class CSVProcessor
+    # An abstract implementation of the RowProcessor role. This class implements
+    # `#results` by returning an array of `Results`. To subclass, just implement
+    # `#process` to handle the row.
+    #
+    # The row will be considered a failure by default. After a row is successfully
+    # processed, set `self.successful = true`. If there are any messages that
+    # should be passed back to the Handler, add them to the `#errors` array.
+    #
+    # You can optionally override `#primary_keys` so that the result returned
+    # has more natural identifiers than just the row number. For example, you
+    # setting this to ['species', 'name'] (for the PetRowProcessor example from
+    # the README), the result would have `#primary_attributes` like
+    #
+    #  { 'species' => 'dog', 'name' => 'Fido' }
+    #
+    class RowProcessor
+      attr_reader :messages
+      def initialize(row, row_num:, payload:)
+        @row = row
+        @row_num = row_num
+        @payload = payload
+        @successful = false
+        @messages = []
+      end
+      def process!
+        raise NotImplementedError,
+              "#{self.class.name} must implement #{__method__}"
+      end
+      def successful?
+        @successful
+      end
+      def result
+        Result.new(messages: messages, row_num: row_num,
+                   primary_attributes: primary_attrs, successful: @successful)
+      end
+      private
+      attr_reader :row, :row_num, :payload
+      attr_writer :successful
+      # Override this with an array of column names that can be used to uniquely
+      # identify a row, if you'd prefer to not identify rows by row number
+      def primary_keys
+        []
+      end
+      # @return [Hash<String, String>] the set of primary keys and their values
+      #   for this row
+      def primary_attrs
+        row.slice(*primary_keys)
+      end
+    end
+  end
+end

data/lib/bulk_processor/csv_processor.rb CHANGED Viewed

@@ -1,4 +1,7 @@
-require_relative 'no_op_handler'
+require_relative 'csv_processor/no_op_handler'
+require_relative 'csv_processor/no_op_post_processor'
+require_relative 'csv_processor/result'
+require_relative 'csv_processor/row_processor'
 class BulkProcessor
   # An abstract implmentation of the CSVProcessor role. Provides
@@ -9,7 +12,7 @@ class BulkProcessor
   #
   # The common use case cover by this class' implementation of `#start` is
   #
-  #   1. Iteratively process each record
+  #   1. Iteratively process each row
   #   2. Accumulate the results (did the processing succeed? what were the error
   #      messages?)
   #   3. Send the results to an instance of the Handler role.
@@ -26,6 +29,12 @@ class BulkProcessor
   # The `required_columns` method must still be implemented in a subclass
   #
   class CSVProcessor
+    # Since the first data column in a CSV is row 2, but will have index 0 in
+    # the items array, we need to offset the index by 2 when we add a row
+    # identifier to all error messages.
+    FIRST_ROW_OFFSET = 2
+    private_constant :FIRST_ROW_OFFSET
     # @return [RowProcessor] a class that implements the RowProcessor interface
     def self.row_processor_class
       raise NotImplementedError,
@@ -37,6 +46,11 @@ class BulkProcessor
       NoOpHandler
     end
+    # @return [PostProcessor] a class that implements the PostProcessor role
+    def self.post_processor_class
+      NoOpPostProcessor
+    end
     # @return [Array<String>] column headers that must be present
     def self.required_columns
       raise NotImplementedError,
@@ -51,27 +65,22 @@ class BulkProcessor
       []
     end
-    def initialize(records, payload: {})
-      @records = records
+    def initialize(csv, payload: {})
       @payload = payload
-      @successes = {}
-      @errors = {}
+      @row_processors = csv.map.with_index(&method(:row_processor))
+      @results = []
     end
-    # Iteratively process each record, accumulate the results, and pass those
-    # off to the handler. If an unrescued error is raised for any record,
-    # processing will halt for all remaining records and the `#fail!` will be
+    # Iteratively process each row, accumulate the results, and pass those
+    # off to the handler. If an unrescued error is raised for any row,
+    # processing will halt for all remaining rows and the `#fail!` will be
     # invoked on the handler.
     def start
-      records.each_with_index do |record, index|
-        processor = row_processor(record)
+      row_processors.each do |processor|
         processor.process!
-        if processor.success?
-          successes[index] = processor.messages
-        else
-          errors[index] = processor.messages
-        end
+        results << processor.result
       end
+      post_processes
       handler.complete!
     rescue Exception => exception
       handler.fail!(exception)
@@ -84,15 +93,21 @@ class BulkProcessor
     private
-    attr_reader :records, :payload, :successes, :errors
+    attr_reader :row_processors, :payload, :results
     def handler
-      self.class.handler_class.new(payload: payload, successes: successes,
-                                   errors: errors)
+      self.class.handler_class.new(payload: payload, results: results)
+    end
+    def row_processor(row, index)
+      row_num = index + FIRST_ROW_OFFSET
+      self.class.row_processor_class.new(row, row_num: row_num, payload: payload)
     end
-    def row_processor(record)
-      self.class.row_processor_class.new(record, payload: payload)
+    def post_processes
+      post_processor = self.class.post_processor_class.new(row_processors)
+      post_processor.start
+      results.concat(post_processor.results)
     end
   end
 end

data/lib/bulk_processor/stream_encoder.rb CHANGED Viewed

@@ -2,7 +2,7 @@ class BulkProcessor
   # Force encode a stream into UTF-8 by removing invalid and undefined
   # characters.
   class StreamEncoder
-    ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }
+    ENCODING_OPTIONS = { undef: :replace, invalid: :replace, replace: '' }.freeze
     private_constant :ENCODING_OPTIONS
     def initialize(stream)

data/lib/bulk_processor/validated_csv.rb CHANGED Viewed

@@ -3,19 +3,20 @@ require 'csv'
 class BulkProcessor
   # A Wrapper on CSV that validates column headers.
   class ValidatedCSV
-    PARSING_OPTIONS  = { headers: true, header_converters: :downcase }
+    PARSING_OPTIONS  = { headers: true, header_converters: :downcase }.freeze
     private_constant :PARSING_OPTIONS
     # This cryptic message usually just means that the header row contains a
     # blank field; in ruby ~> 2.1.5 It is the error message for a NoMethodError
     # raised when parsing a CSV.
-    BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass"
+    BAD_HEADERS_ERROR_MSG = "undefined method `encode' for nil:NilClass".freeze
     private_constant :BAD_HEADERS_ERROR_MSG
-    MISSING_COLUMN_MESSAGE = 'Missing or malformed column header, is one of them blank?'
+    MISSING_COLUMN_MESSAGE =
+      'Missing or malformed column header, is one of them blank?'.freeze
     private_constant :MISSING_COLUMN_MESSAGE
-    attr_reader :errors, :records
+    attr_reader :errors
     def initialize(stream, required_headers, optional_headers)
       @stream = stream
@@ -33,11 +34,11 @@ class BulkProcessor
       @errors = []
       if missing_headers.any?
-        errors << "Missing required column(s): #{missing_headers.join(', ')}"
+        errors << "Missing required column(s): #{missing_headers.join(', ')}".freeze
       end
       if extra_headers.any?
-        errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}"
+        errors << "Unrecognized column(s) found: #{extra_headers.join(', ')}".freeze
       end
       if csv.headers.any? { |header| header.nil? || header.strip == '' }

data/lib/bulk_processor/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class BulkProcessor
-  VERSION = '0.2.0'
+  VERSION = '0.3.0'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: bulk-processor
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Tom Collier, Justin Richard
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-01-15 00:00:00.000000000 Z
+date: 2016-01-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activejob
@@ -103,8 +103,11 @@ files:
 - lib/bulk_processor.rb
 - lib/bulk_processor/config.rb
 - lib/bulk_processor/csv_processor.rb
+- lib/bulk_processor/csv_processor/no_op_handler.rb
+- lib/bulk_processor/csv_processor/no_op_post_processor.rb
+- lib/bulk_processor/csv_processor/result.rb
+- lib/bulk_processor/csv_processor/row_processor.rb
 - lib/bulk_processor/job.rb
-- lib/bulk_processor/no_op_handler.rb
 - lib/bulk_processor/stream_encoder.rb
 - lib/bulk_processor/validated_csv.rb
 - lib/bulk_processor/version.rb

data/lib/bulk_processor/no_op_handler.rb DELETED Viewed

@@ -1,12 +0,0 @@
-class BulkProcessor
-  class NoOpHandler
-    def initialize(payload:, successes:, errors:)
-    end
-    def complete!
-    end
-    def fail!(fatal_error)
-    end
-  end
-end