RubyGems - csv_decision - Versions diffs - 0.0.1 → 0.0.2 - Mend

csv_decision 0.0.1 → 0.0.2

Files changed (48) hide show

checksums.yaml +4 -4
data/.coveralls.yml +2 -0
data/.rubocop.yml +16 -4
data/.travis.yml +10 -0
data/CHANGELOG.md +2 -0
data/Gemfile +3 -0
data/LICENSE +21 -0
data/README.md +133 -19
data/benchmark.rb +143 -0
data/csv_decision.gemspec +8 -6
data/lib/csv_decision.rb +18 -4
data/lib/csv_decision/columns.rb +69 -0
data/lib/csv_decision/data.rb +31 -16
data/lib/csv_decision/decide.rb +47 -0
data/lib/csv_decision/decision.rb +105 -0
data/lib/csv_decision/header.rb +143 -8
data/lib/csv_decision/input.rb +49 -0
data/lib/csv_decision/load.rb +31 -0
data/lib/csv_decision/matchers.rb +131 -0
data/lib/csv_decision/matchers/numeric.rb +37 -0
data/lib/csv_decision/matchers/pattern.rb +76 -0
data/lib/csv_decision/matchers/range.rb +76 -0
data/lib/csv_decision/options.rb +80 -50
data/lib/csv_decision/parse.rb +77 -23
data/lib/csv_decision/scan_row.rb +68 -0
data/lib/csv_decision/table.rb +34 -6
data/spec/csv_decision/columns_spec.rb +86 -0
data/spec/csv_decision/data_spec.rb +16 -3
data/spec/csv_decision/decision_spec.rb +30 -0
data/spec/csv_decision/input_spec.rb +54 -0
data/spec/csv_decision/load_spec.rb +28 -0
data/spec/csv_decision/matchers/numeric_spec.rb +84 -0
data/spec/csv_decision/matchers/pattern_spec.rb +183 -0
data/spec/csv_decision/matchers/range_spec.rb +132 -0
data/spec/csv_decision/options_spec.rb +67 -0
data/spec/csv_decision/parse_spec.rb +2 -3
data/spec/csv_decision/simple_example_spec.rb +45 -0
data/spec/csv_decision/table_spec.rb +151 -0
data/spec/data/invalid/invalid_header1.csv +4 -0
data/spec/data/invalid/invalid_header2.csv +4 -0
data/spec/data/invalid/invalid_header3.csv +4 -0
data/spec/data/invalid/invalid_header4.csv +4 -0
data/spec/data/valid/options_in_file1.csv +5 -0
data/spec/data/valid/options_in_file2.csv +5 -0
data/spec/data/valid/simple_example.csv +10 -0
data/spec/data/valid/valid.csv +4 -4
data/spec/spec_helper.rb +6 -0
metadata +89 -12

data/lib/csv_decision/columns.rb ADDED Viewed

@@ -0,0 +1,69 @@
+# frozen_string_literal: true
+# CSV Decision: CSV based Ruby decision tables.
+# Created December 2017 by Brett Vickers
+# See LICENSE and README.md for details.
+module CSVDecision
+  # Dictionary of all this table's columns - inputs, outputs etc.
+  class Columns
+    # Value object used for column dictionary entries
+    Entry = Struct.new(:name, :text_only)
+    # Value object used for columns with defaults
+    Default = Struct.new(:name, :function, :default_if)
+    # Dictionary of all data columns.
+    # # Note that the key of each hash is the header cell's array column index.
+    # Note that input and output columns can be interspersed and need not have unique names.
+    class Dictionary
+      attr_accessor :ins
+      attr_accessor :outs
+      attr_accessor :path
+      attr_accessor :defaults
+      def initialize
+        @ins = {}
+        @outs = {}
+        # Path for the input hash - optional
+        @path = {}
+        # Hash of columns that require defaults to be set
+        @defaults = {}
+      end
+    end
+    # Dictionary of all data columns
+    attr_reader :dictionary
+    # Input columns
+    def ins
+      @dictionary.ins
+    end
+    # Output columns
+    def outs
+      @dictionary.outs
+    end
+    # Input columns with defaults specified (planned feature)
+    # def defaults
+    #   @dictionary.defaults
+    # end
+    # Input hash path (planned feature)
+    # def path
+    #   @dictionary.path
+    # end
+    def initialize(table)
+      # If a column does not have a valid header cell, then it's empty of data.
+      # Return the stripped header row, removing it from the data array.
+      row = Header.strip_empty_columns(rows: table.rows)
+      # Build a dictionary of all valid data columns from the header row.
+      @dictionary = Header.dictionary(row: row) if row
+      freeze
+    end
+  end
+end

data/lib/csv_decision/data.rb CHANGED Viewed

@@ -10,21 +10,32 @@ module CSVDecision
   # Methods to load data from a file, CSV string or array of arrays
   module Data
+    CSV_OPTIONS = { encoding: 'UTF-8', skip_blanks: true }.freeze
     # Parse the input data which may either be a file path name, CSV string or
     # array of arrays. Strips out empty columns/rows and comment cells
-    def self.to_array(data:, options: { force_encoding: 'UTF-8', ascii_only?: true })
-      strip_rows(data: data_array(data), options: options)
+    def self.to_array(data:)
+      strip_rows(data: data_array(data))
+    end
+    def self.input_file?(input)
+      input.is_a?(Pathname) || input.is_a?(File)
     end
-    # TODO: strip empty columns
-    def self.strip_columns(_data:, _empty_cols:); end
+    def self.strip_columns(data:, empty_columns:)
+      # Adjust column indices as we delete columns the rest shift to the left by 1
+      empty_columns.map!.with_index { |col, index| col - index }
+      # Delete all empty columns from the array of arrays
+      empty_columns.each { |col| data.each_index { |row| data[row].delete_at(col) } }
+    end
     # Parse the input data which may either be a file path name, CSV string or
     # array of arrays
     def self.data_array(input)
-      return CSV.read(input) if input.is_a?(Pathname)
+      return CSV.read(input, CSV_OPTIONS) if input_file?(input)
       return input.deep_dup if input.is_a?(Array) && input[0].is_a?(Array)
-      return CSV.parse(input) if input.is_a?(String)
+      return CSV.parse(input, CSV_OPTIONS) if input.is_a?(String)
       raise ArgumentError,
             "#{input.class} input invalid; " \
@@ -32,10 +43,10 @@ module CSVDecision
     end
     private_class_method :data_array
-    def self.strip_rows(data:, options:)
+    def self.strip_rows(data:)
       rows = []
       data.each do |row|
-        row = strip_cells(row: row, options: options)
+        row = strip_cells(row: row)
         rows << row if row.find { |cell| cell != '' }
       end
       rows
@@ -45,15 +56,19 @@ module CSVDecision
     # Strip cells of leading/trailing spaces; treat comments as an empty cell.
     # Non string values treated as empty cells.
     # Non-ascii strings treated as empty cells by default.
-    def self.strip_cells(row:, options:)
-      row.map! do |cell|
-        next '' unless cell.is_a?(String)
-        cell = options[:force_encoding] ? cell.force_encoding(options[:force_encoding]) : cell
-        next '' if options[:ascii_only?] && !cell.ascii_only?
-        next '' if cell.lstrip[0] == COMMENT_CHARACTER
-        cell.strip
-      end
+    def self.strip_cells(row:)
+      row.map! { |cell| strip_cell(cell) }
     end
     private_class_method :strip_cells
+    def self.strip_cell(cell)
+      return '' unless cell.is_a?(String)
+      cell = cell.force_encoding('UTF-8')
+      return '' unless cell.ascii_only?
+      return '' if cell.lstrip[0] == COMMENT_CHARACTER
+      cell.strip
+    end
+    private_class_method :strip_cell
   end
 end

data/lib/csv_decision/decide.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+# CSV Decision: CSV based Ruby decision tables.
+# Created December 2017 by Brett Vickers
+# See LICENSE and README.md for details.
+module CSVDecision
+  # Main module for searching the decision table looking for one or more matches
+  module Decide
+    # Main method for making decisions.
+    #
+    # @param table [CSVDecision::Table]
+    # @param input [Hash] - input hash (keys may or may not be symbolized)
+    # @param symbolize_keys [true, false] - set to true if keys are symbolized and it's
+    #   OK to mutate the input hash. Otherwise a copy of the input hash is symbolized.
+    # @return [Hash]
+    def self.decide(table:, input:, symbolize_keys:)
+      # Parse and transform the hash supplied as input
+      parsed_input = Input.parse(table: table, input: input, symbolize_keys: symbolize_keys)
+      # The decision object collects the results of the search and
+      # calculates the final result
+      decision = Decision.new(table: table, input: parsed_input)
+      # table_scan(table: table, input: parsed_input, decision: decision)
+      decision.scan(table: table, input: parsed_input)
+    end
+    def self.matches?(row:, input:, scan_row:)
+      match = scan_row.match_constants?(row: row, scan_cols: input[:scan_cols])
+      return false unless match
+      return true if scan_row.procs.empty?
+      scan_row.match_procs?(row: row, input: input)
+    end
+    def self.eval_matcher(proc:, value:, hash:)
+      function = proc.function
+      # A symbol expression just needs to be passed the input hash
+      return function[hash] if proc.type == :expression
+      # All other procs can take one or two args
+      function.arity == 1 ? function[value] : function[value, hash]
+    end
+  end
+end

data/lib/csv_decision/decision.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# frozen_string_literal: true
+# CSV Decision: CSV based Ruby decision tables.
+# Created December 2017 by Brett Vickers
+# See LICENSE and README.md for details.
+module CSVDecision
+  # Accumulate the matching row(s) and calculate the final result
+  class Decision
+    def initialize(table:, input:)
+      @result = {}
+      # Relevant table attributes
+      @first_match = table.options[:first_match]
+      @outs = table.columns.outs
+      @outs_functions = table.outs_functions
+      # Partial result always includes the input hash for calculating output functions
+      @partial_result = input[:hash].dup if @outs_functions
+      @row_picked = nil
+      return if @first_match
+      # Extra attributes for the accumulate option
+      @rows_picked = []
+      @multi_result = nil
+    end
+    # Is the result set empty? That is, nothing matched?
+    def empty?
+      return @row_picked.nil? if @first_match
+      @rows_picked.empty?
+    end
+    def exist?
+      !empty?
+    end
+    def result
+      return {} if empty?
+      return final_result unless @outs_functions
+      nil
+    end
+    def scan(table:, input:)
+      scan_rows = table.scan_rows
+      table.each do |row, index|
+        done = row_scan(input: input, row: row, scan_row: scan_rows[index])
+        return self if done
+      end
+      self
+    end
+    def add(row)
+      return add_first_match(row) if @first_match
+      # Accumulate output rows
+      @rows_picked << row
+      @outs.each_pair do |col, column|
+        accumulate_outs(column_name: column.name, cell: row[col])
+      end
+      # Not done
+      false
+    end
+    private
+    def accumulate_outs(column_name:, cell:)
+      current = @result[column_name]
+      case current
+      when nil
+        @result[column_name] = cell
+      when Array
+        @result[column_name] << cell
+      else
+        @result[column_name] = [current, cell]
+        @multi_result ||= true
+      end
+    end
+    def row_scan(input:, row:, scan_row:)
+      return unless Decide.matches?(row: row, input: input, scan_row: scan_row)
+      add(row)
+    end
+    def final_result
+      @result
+    end
+    def add_first_match(row)
+      @row_picked = row
+      # Common case is just copying output column values to the final result
+      @outs.each_pair { |col, column| @result[column.name] = row[col] }
+    end
+  end
+end

data/lib/csv_decision/header.rb CHANGED Viewed

@@ -4,20 +4,155 @@
 # Created December 2017 by Brett Vickers
 # See LICENSE and README.md for details.
 module CSVDecision
-  # Parse the CSV file's header row
-  class Header
-    # Column header looks like IN :col_name or if:
-    COLUMN_TYPE = %r{\A(in|out|in/text|out/text)|set\s*:\s*(\S?.*)\z}i
+  # Parse the CSV file's header row. These methods are only required at table load time.
+  module Header
+    # Column header looks like IN :col_name or cond:
+    COLUMN_TYPE = %r{
+      \A(?<type>in|out|in/text|out/text|set|set/nil|set/blank|path|cond|if)
+      \s*:\s*(?<name>\S?.*)\z
+    }xi
+    # These column types do not need a name
+    COLUMN_TYPE_ANONYMOUS = Set.new(%i[path if cond]).freeze
+    # More lenient than a Ruby method name -
+    # any spaces will have been replaced with underscores
+    COLUMN_NAME = %r{\A\w[\w:/!?]*\z}
+    # Does this row contain a recognisable header cell?
+    #
+    # @param row [Array<String>]
+    # @return [true, false]
     def self.row?(row)
       row.find { |cell| cell.match(COLUMN_TYPE) }
     end
-    # Parse the input data which may either be a path name, CSV string or array of arrays
-    def self.parse(table:, options: {})
-      header = Header.new
+    # Strip empty columns from all data rows.
+    #
+    # @param rows [Array<Array<String>>]
+    # @return [Array<Array<String>>] - data array after removing any empty columns and the
+    #   header row.
+    def self.strip_empty_columns(rows:)
+      empty_cols = empty_columns?(row: rows.first)
+      Data.strip_columns(data: rows, empty_columns: empty_cols) unless empty_cols.empty?
+      # Remove the header row from the data array.
+      rows.shift
+    end
+    # Classify and build a dictionary of all input and output columns.
+    #
+    # @param row [Array<String>] - the header row after removing any empty columns.
+    # @return [Hash<Hash>] - Column dictionary if a hash of hashes.
+    def self.dictionary(row:)
+      dictionary = Columns::Dictionary.new
+      row.each_with_index do |cell, index|
+        dictionary = parse_cell(cell: cell, index: index, dictionary: dictionary)
+      end
+      dictionary
+    end
+    def self.header_column?(cell:)
+      match = COLUMN_TYPE.match(cell)
+      raise CellValidationError, 'column name is not well formed' unless match
+      column_type = match['type']&.downcase&.to_sym
+      column_name = column_name(type: column_type, name: match['name'])
+      [column_type, column_name]
+    rescue CellValidationError => exp
+      raise CellValidationError,
+            "header column '#{cell}' is not valid as the #{exp.message}"
+    end
+    private_class_method :header_column?
+    # Array of all empty column indices.
+    def self.empty_columns?(row:)
+      result = []
+      row&.each_with_index { |cell, index| result << index if cell == '' }
+      result
+    end
+    private_class_method :empty_columns?
+    def self.column_name(type:, name:)
+      return format_column_name(name) if name.present?
+      return if COLUMN_TYPE_ANONYMOUS.member?(type)
+      raise CellValidationError, 'column name is missing'
+    end
+    def self.format_column_name(name)
+      column_name = name.strip.tr("\s", '_')
+      return column_name.to_sym if COLUMN_NAME.match(column_name)
+      raise CellValidationError, "column name '#{name}' contains invalid characters"
+    end
+    # Returns the normalized column type, along with an indication if
+    # the column is text only
+    def self.column_type(type)
+      case type
+      when :'in/text'
+        [:in, true]
+      when :cond
+        [:in, false]
+      when :'out/text'
+        [:out, true]
+      # Column may turn out to be text-only, or not
+      else
+        [type, nil]
+      end
+    end
+    def self.parse_cell(cell:, index:, dictionary:)
+      column_type, column_name = header_column?(cell: cell)
+      type, text_only = Header.column_type(column_type)
-      header.freeze
+      dictionary_entry(dictionary: dictionary,
+                       type: type,
+                       entry: Columns::Entry.new(column_name, text_only),
+                       index: index)
     end
+    private_class_method :parse_cell
+    def self.dictionary_entry(dictionary:, type:, entry:, index:)
+      case type
+      # Header column that has a function for setting the value (planned feature)
+      # when :set, :'set/nil', :'set/blank'
+      #   # Default function will set the input value unconditionally or conditionally
+      #   dictionary.defaults[index] =
+      #     Columns::Default.new(entry.name, nil, default_if(type))
+      #
+      #   # Treat set: as an in: column
+      #   dictionary.ins[index] = entry
+      when :in
+        dictionary.ins[index] = entry
+      when :out
+        dictionary.outs[index] = entry
+      else
+        raise "internal error - column type #{type} not recognised"
+      end
+      dictionary
+    end
+    private_class_method :dictionary_entry
+    # def self.default_if(type)
+    #   return nil if type == :set
+    #   return :nil? if type == :'set/nil'
+    #   :blank?
+    # end
+    # private_class_method :default_if
   end
 end