csv_decision 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.rubocop.yml +16 -4
  4. data/.travis.yml +10 -0
  5. data/CHANGELOG.md +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +21 -0
  8. data/README.md +133 -19
  9. data/benchmark.rb +143 -0
  10. data/csv_decision.gemspec +8 -6
  11. data/lib/csv_decision.rb +18 -4
  12. data/lib/csv_decision/columns.rb +69 -0
  13. data/lib/csv_decision/data.rb +31 -16
  14. data/lib/csv_decision/decide.rb +47 -0
  15. data/lib/csv_decision/decision.rb +105 -0
  16. data/lib/csv_decision/header.rb +143 -8
  17. data/lib/csv_decision/input.rb +49 -0
  18. data/lib/csv_decision/load.rb +31 -0
  19. data/lib/csv_decision/matchers.rb +131 -0
  20. data/lib/csv_decision/matchers/numeric.rb +37 -0
  21. data/lib/csv_decision/matchers/pattern.rb +76 -0
  22. data/lib/csv_decision/matchers/range.rb +76 -0
  23. data/lib/csv_decision/options.rb +80 -50
  24. data/lib/csv_decision/parse.rb +77 -23
  25. data/lib/csv_decision/scan_row.rb +68 -0
  26. data/lib/csv_decision/table.rb +34 -6
  27. data/spec/csv_decision/columns_spec.rb +86 -0
  28. data/spec/csv_decision/data_spec.rb +16 -3
  29. data/spec/csv_decision/decision_spec.rb +30 -0
  30. data/spec/csv_decision/input_spec.rb +54 -0
  31. data/spec/csv_decision/load_spec.rb +28 -0
  32. data/spec/csv_decision/matchers/numeric_spec.rb +84 -0
  33. data/spec/csv_decision/matchers/pattern_spec.rb +183 -0
  34. data/spec/csv_decision/matchers/range_spec.rb +132 -0
  35. data/spec/csv_decision/options_spec.rb +67 -0
  36. data/spec/csv_decision/parse_spec.rb +2 -3
  37. data/spec/csv_decision/simple_example_spec.rb +45 -0
  38. data/spec/csv_decision/table_spec.rb +151 -0
  39. data/spec/data/invalid/invalid_header1.csv +4 -0
  40. data/spec/data/invalid/invalid_header2.csv +4 -0
  41. data/spec/data/invalid/invalid_header3.csv +4 -0
  42. data/spec/data/invalid/invalid_header4.csv +4 -0
  43. data/spec/data/valid/options_in_file1.csv +5 -0
  44. data/spec/data/valid/options_in_file2.csv +5 -0
  45. data/spec/data/valid/simple_example.csv +10 -0
  46. data/spec/data/valid/valid.csv +4 -4
  47. data/spec/spec_helper.rb +6 -0
  48. metadata +89 -12
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Dictionary of all this table's columns - inputs, outputs etc.
8
+ class Columns
9
+ # Value object used for column dictionary entries
10
+ Entry = Struct.new(:name, :text_only)
11
+
12
+ # Value object used for columns with defaults
13
+ Default = Struct.new(:name, :function, :default_if)
14
+
15
+ # Dictionary of all data columns.
16
+ # # Note that the key of each hash is the header cell's array column index.
17
+ # Note that input and output columns can be interspersed and need not have unique names.
18
+ class Dictionary
19
+ attr_accessor :ins
20
+ attr_accessor :outs
21
+ attr_accessor :path
22
+ attr_accessor :defaults
23
+
24
+ def initialize
25
+ @ins = {}
26
+ @outs = {}
27
+
28
+ # Path for the input hash - optional
29
+ @path = {}
30
+ # Hash of columns that require defaults to be set
31
+ @defaults = {}
32
+ end
33
+ end
34
+
35
+ # Dictionary of all data columns
36
+ attr_reader :dictionary
37
+
38
+ # Input columns
39
+ def ins
40
+ @dictionary.ins
41
+ end
42
+
43
+ # Output columns
44
+ def outs
45
+ @dictionary.outs
46
+ end
47
+
48
+ # Input columns with defaults specified (planned feature)
49
+ # def defaults
50
+ # @dictionary.defaults
51
+ # end
52
+
53
+ # Input hash path (planned feature)
54
+ # def path
55
+ # @dictionary.path
56
+ # end
57
+
58
+ def initialize(table)
59
+ # If a column does not have a valid header cell, then it's empty of data.
60
+ # Return the stripped header row, removing it from the data array.
61
+ row = Header.strip_empty_columns(rows: table.rows)
62
+
63
+ # Build a dictionary of all valid data columns from the header row.
64
+ @dictionary = Header.dictionary(row: row) if row
65
+
66
+ freeze
67
+ end
68
+ end
69
+ end
@@ -10,21 +10,32 @@ module CSVDecision
10
10
 
11
11
  # Methods to load data from a file, CSV string or array of arrays
12
12
  module Data
13
+ CSV_OPTIONS = { encoding: 'UTF-8', skip_blanks: true }.freeze
14
+
13
15
  # Parse the input data which may either be a file path name, CSV string or
14
16
  # array of arrays. Strips out empty columns/rows and comment cells
15
- def self.to_array(data:, options: { force_encoding: 'UTF-8', ascii_only?: true })
16
- strip_rows(data: data_array(data), options: options)
17
+ def self.to_array(data:)
18
+ strip_rows(data: data_array(data))
19
+ end
20
+
21
+ def self.input_file?(input)
22
+ input.is_a?(Pathname) || input.is_a?(File)
17
23
  end
18
24
 
19
- # TODO: strip empty columns
20
- def self.strip_columns(_data:, _empty_cols:); end
25
+ def self.strip_columns(data:, empty_columns:)
26
+ # Adjust column indices as we delete columns the rest shift to the left by 1
27
+ empty_columns.map!.with_index { |col, index| col - index }
28
+
29
+ # Delete all empty columns from the array of arrays
30
+ empty_columns.each { |col| data.each_index { |row| data[row].delete_at(col) } }
31
+ end
21
32
 
22
33
  # Parse the input data which may either be a file path name, CSV string or
23
34
  # array of arrays
24
35
  def self.data_array(input)
25
- return CSV.read(input) if input.is_a?(Pathname)
36
+ return CSV.read(input, CSV_OPTIONS) if input_file?(input)
26
37
  return input.deep_dup if input.is_a?(Array) && input[0].is_a?(Array)
27
- return CSV.parse(input) if input.is_a?(String)
38
+ return CSV.parse(input, CSV_OPTIONS) if input.is_a?(String)
28
39
 
29
40
  raise ArgumentError,
30
41
  "#{input.class} input invalid; " \
@@ -32,10 +43,10 @@ module CSVDecision
32
43
  end
33
44
  private_class_method :data_array
34
45
 
35
- def self.strip_rows(data:, options:)
46
+ def self.strip_rows(data:)
36
47
  rows = []
37
48
  data.each do |row|
38
- row = strip_cells(row: row, options: options)
49
+ row = strip_cells(row: row)
39
50
  rows << row if row.find { |cell| cell != '' }
40
51
  end
41
52
  rows
@@ -45,15 +56,19 @@ module CSVDecision
45
56
  # Strip cells of leading/trailing spaces; treat comments as an empty cell.
46
57
  # Non string values treated as empty cells.
47
58
  # Non-ascii strings treated as empty cells by default.
48
- def self.strip_cells(row:, options:)
49
- row.map! do |cell|
50
- next '' unless cell.is_a?(String)
51
- cell = options[:force_encoding] ? cell.force_encoding(options[:force_encoding]) : cell
52
- next '' if options[:ascii_only?] && !cell.ascii_only?
53
- next '' if cell.lstrip[0] == COMMENT_CHARACTER
54
- cell.strip
55
- end
59
+ def self.strip_cells(row:)
60
+ row.map! { |cell| strip_cell(cell) }
56
61
  end
57
62
  private_class_method :strip_cells
63
+
64
+ def self.strip_cell(cell)
65
+ return '' unless cell.is_a?(String)
66
+ cell = cell.force_encoding('UTF-8')
67
+ return '' unless cell.ascii_only?
68
+ return '' if cell.lstrip[0] == COMMENT_CHARACTER
69
+
70
+ cell.strip
71
+ end
72
+ private_class_method :strip_cell
58
73
  end
59
74
  end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Main module for searching the decision table looking for one or more matches
8
+ module Decide
9
+ # Main method for making decisions.
10
+ #
11
+ # @param table [CSVDecision::Table]
12
+ # @param input [Hash] - input hash (keys may or may not be symbolized)
13
+ # @param symbolize_keys [true, false] - set to true if keys are symbolized and it's
14
+ # OK to mutate the input hash. Otherwise a copy of the input hash is symbolized.
15
+ # @return [Hash]
16
+ def self.decide(table:, input:, symbolize_keys:)
17
+ # Parse and transform the hash supplied as input
18
+ parsed_input = Input.parse(table: table, input: input, symbolize_keys: symbolize_keys)
19
+
20
+ # The decision object collects the results of the search and
21
+ # calculates the final result
22
+ decision = Decision.new(table: table, input: parsed_input)
23
+
24
+ # table_scan(table: table, input: parsed_input, decision: decision)
25
+ decision.scan(table: table, input: parsed_input)
26
+ end
27
+
28
+ def self.matches?(row:, input:, scan_row:)
29
+ match = scan_row.match_constants?(row: row, scan_cols: input[:scan_cols])
30
+ return false unless match
31
+
32
+ return true if scan_row.procs.empty?
33
+
34
+ scan_row.match_procs?(row: row, input: input)
35
+ end
36
+
37
+ def self.eval_matcher(proc:, value:, hash:)
38
+ function = proc.function
39
+
40
+ # A symbol expression just needs to be passed the input hash
41
+ return function[hash] if proc.type == :expression
42
+
43
+ # All other procs can take one or two args
44
+ function.arity == 1 ? function[value] : function[value, hash]
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Accumulate the matching row(s) and calculate the final result
8
+ class Decision
9
+ def initialize(table:, input:)
10
+ @result = {}
11
+
12
+ # Relevant table attributes
13
+ @first_match = table.options[:first_match]
14
+ @outs = table.columns.outs
15
+ @outs_functions = table.outs_functions
16
+
17
+ # Partial result always includes the input hash for calculating output functions
18
+ @partial_result = input[:hash].dup if @outs_functions
19
+
20
+ @row_picked = nil
21
+ return if @first_match
22
+
23
+ # Extra attributes for the accumulate option
24
+ @rows_picked = []
25
+ @multi_result = nil
26
+ end
27
+
28
+ # Is the result set empty? That is, nothing matched?
29
+ def empty?
30
+ return @row_picked.nil? if @first_match
31
+ @rows_picked.empty?
32
+ end
33
+
34
+ def exist?
35
+ !empty?
36
+ end
37
+
38
+ def result
39
+ return {} if empty?
40
+ return final_result unless @outs_functions
41
+
42
+ nil
43
+ end
44
+
45
+ def scan(table:, input:)
46
+ scan_rows = table.scan_rows
47
+
48
+ table.each do |row, index|
49
+ done = row_scan(input: input, row: row, scan_row: scan_rows[index])
50
+
51
+ return self if done
52
+ end
53
+
54
+ self
55
+ end
56
+
57
+ def add(row)
58
+ return add_first_match(row) if @first_match
59
+
60
+ # Accumulate output rows
61
+ @rows_picked << row
62
+ @outs.each_pair do |col, column|
63
+ accumulate_outs(column_name: column.name, cell: row[col])
64
+ end
65
+
66
+ # Not done
67
+ false
68
+ end
69
+
70
+ private
71
+
72
+ def accumulate_outs(column_name:, cell:)
73
+ current = @result[column_name]
74
+
75
+ case current
76
+ when nil
77
+ @result[column_name] = cell
78
+
79
+ when Array
80
+ @result[column_name] << cell
81
+
82
+ else
83
+ @result[column_name] = [current, cell]
84
+ @multi_result ||= true
85
+ end
86
+ end
87
+
88
+ def row_scan(input:, row:, scan_row:)
89
+ return unless Decide.matches?(row: row, input: input, scan_row: scan_row)
90
+
91
+ add(row)
92
+ end
93
+
94
+ def final_result
95
+ @result
96
+ end
97
+
98
+ def add_first_match(row)
99
+ @row_picked = row
100
+
101
+ # Common case is just copying output column values to the final result
102
+ @outs.each_pair { |col, column| @result[column.name] = row[col] }
103
+ end
104
+ end
105
+ end
@@ -4,20 +4,155 @@
4
4
  # Created December 2017 by Brett Vickers
5
5
  # See LICENSE and README.md for details.
6
6
  module CSVDecision
7
- # Parse the CSV file's header row
8
- class Header
9
- # Column header looks like IN :col_name or if:
10
- COLUMN_TYPE = %r{\A(in|out|in/text|out/text)|set\s*:\s*(\S?.*)\z}i
7
+ # Parse the CSV file's header row. These methods are only required at table load time.
8
+ module Header
9
+ # Column header looks like IN :col_name or cond:
10
+ COLUMN_TYPE = %r{
11
+ \A(?<type>in|out|in/text|out/text|set|set/nil|set/blank|path|cond|if)
12
+ \s*:\s*(?<name>\S?.*)\z
13
+ }xi
11
14
 
15
+ # These column types do not need a name
16
+ COLUMN_TYPE_ANONYMOUS = Set.new(%i[path if cond]).freeze
17
+
18
+ # More lenient than a Ruby method name -
19
+ # any spaces will have been replaced with underscores
20
+ COLUMN_NAME = %r{\A\w[\w:/!?]*\z}
21
+
22
+ # Does this row contain a recognisable header cell?
23
+ #
24
+ # @param row [Array<String>]
25
+ # @return [true, false]
12
26
  def self.row?(row)
13
27
  row.find { |cell| cell.match(COLUMN_TYPE) }
14
28
  end
15
29
 
16
- # Parse the input data which may either be a path name, CSV string or array of arrays
17
- def self.parse(table:, options: {})
18
- header = Header.new
30
+ # Strip empty columns from all data rows.
31
+ #
32
+ # @param rows [Array<Array<String>>]
33
+ # @return [Array<Array<String>>] - data array after removing any empty columns and the
34
+ # header row.
35
+ def self.strip_empty_columns(rows:)
36
+ empty_cols = empty_columns?(row: rows.first)
37
+ Data.strip_columns(data: rows, empty_columns: empty_cols) unless empty_cols.empty?
38
+
39
+ # Remove the header row from the data array.
40
+ rows.shift
41
+ end
42
+
43
+ # Classify and build a dictionary of all input and output columns.
44
+ #
45
+ # @param row [Array<String>] - the header row after removing any empty columns.
46
+ # @return [Hash<Hash>] - Column dictionary if a hash of hashes.
47
+ def self.dictionary(row:)
48
+ dictionary = Columns::Dictionary.new
49
+
50
+ row.each_with_index do |cell, index|
51
+ dictionary = parse_cell(cell: cell, index: index, dictionary: dictionary)
52
+ end
53
+
54
+ dictionary
55
+ end
56
+
57
+ def self.header_column?(cell:)
58
+ match = COLUMN_TYPE.match(cell)
59
+ raise CellValidationError, 'column name is not well formed' unless match
60
+
61
+ column_type = match['type']&.downcase&.to_sym
62
+ column_name = column_name(type: column_type, name: match['name'])
63
+
64
+ [column_type, column_name]
65
+ rescue CellValidationError => exp
66
+ raise CellValidationError,
67
+ "header column '#{cell}' is not valid as the #{exp.message}"
68
+ end
69
+ private_class_method :header_column?
70
+
71
+ # Array of all empty column indices.
72
+ def self.empty_columns?(row:)
73
+ result = []
74
+ row&.each_with_index { |cell, index| result << index if cell == '' }
75
+
76
+ result
77
+ end
78
+ private_class_method :empty_columns?
79
+
80
+ def self.column_name(type:, name:)
81
+ return format_column_name(name) if name.present?
82
+ return if COLUMN_TYPE_ANONYMOUS.member?(type)
83
+
84
+ raise CellValidationError, 'column name is missing'
85
+ end
86
+
87
+ def self.format_column_name(name)
88
+ column_name = name.strip.tr("\s", '_')
89
+
90
+ return column_name.to_sym if COLUMN_NAME.match(column_name)
91
+
92
+ raise CellValidationError, "column name '#{name}' contains invalid characters"
93
+ end
94
+
95
+ # Returns the normalized column type, along with an indication if
96
+ # the column is text only
97
+ def self.column_type(type)
98
+ case type
99
+ when :'in/text'
100
+ [:in, true]
101
+
102
+ when :cond
103
+ [:in, false]
104
+
105
+ when :'out/text'
106
+ [:out, true]
107
+
108
+ # Column may turn out to be text-only, or not
109
+ else
110
+ [type, nil]
111
+ end
112
+ end
113
+
114
+ def self.parse_cell(cell:, index:, dictionary:)
115
+ column_type, column_name = header_column?(cell: cell)
116
+
117
+ type, text_only = Header.column_type(column_type)
19
118
 
20
- header.freeze
119
+ dictionary_entry(dictionary: dictionary,
120
+ type: type,
121
+ entry: Columns::Entry.new(column_name, text_only),
122
+ index: index)
21
123
  end
124
+ private_class_method :parse_cell
125
+
126
+ def self.dictionary_entry(dictionary:, type:, entry:, index:)
127
+ case type
128
+ # Header column that has a function for setting the value (planned feature)
129
+ # when :set, :'set/nil', :'set/blank'
130
+ # # Default function will set the input value unconditionally or conditionally
131
+ # dictionary.defaults[index] =
132
+ # Columns::Default.new(entry.name, nil, default_if(type))
133
+ #
134
+ # # Treat set: as an in: column
135
+ # dictionary.ins[index] = entry
136
+
137
+ when :in
138
+ dictionary.ins[index] = entry
139
+
140
+ when :out
141
+ dictionary.outs[index] = entry
142
+
143
+ else
144
+ raise "internal error - column type #{type} not recognised"
145
+ end
146
+
147
+ dictionary
148
+ end
149
+ private_class_method :dictionary_entry
150
+
151
+ # def self.default_if(type)
152
+ # return nil if type == :set
153
+ # return :nil? if type == :'set/nil'
154
+ # :blank?
155
+ # end
156
+ # private_class_method :default_if
22
157
  end
23
158
  end