csv_decision 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.rubocop.yml +16 -4
  4. data/.travis.yml +10 -0
  5. data/CHANGELOG.md +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +21 -0
  8. data/README.md +133 -19
  9. data/benchmark.rb +143 -0
  10. data/csv_decision.gemspec +8 -6
  11. data/lib/csv_decision.rb +18 -4
  12. data/lib/csv_decision/columns.rb +69 -0
  13. data/lib/csv_decision/data.rb +31 -16
  14. data/lib/csv_decision/decide.rb +47 -0
  15. data/lib/csv_decision/decision.rb +105 -0
  16. data/lib/csv_decision/header.rb +143 -8
  17. data/lib/csv_decision/input.rb +49 -0
  18. data/lib/csv_decision/load.rb +31 -0
  19. data/lib/csv_decision/matchers.rb +131 -0
  20. data/lib/csv_decision/matchers/numeric.rb +37 -0
  21. data/lib/csv_decision/matchers/pattern.rb +76 -0
  22. data/lib/csv_decision/matchers/range.rb +76 -0
  23. data/lib/csv_decision/options.rb +80 -50
  24. data/lib/csv_decision/parse.rb +77 -23
  25. data/lib/csv_decision/scan_row.rb +68 -0
  26. data/lib/csv_decision/table.rb +34 -6
  27. data/spec/csv_decision/columns_spec.rb +86 -0
  28. data/spec/csv_decision/data_spec.rb +16 -3
  29. data/spec/csv_decision/decision_spec.rb +30 -0
  30. data/spec/csv_decision/input_spec.rb +54 -0
  31. data/spec/csv_decision/load_spec.rb +28 -0
  32. data/spec/csv_decision/matchers/numeric_spec.rb +84 -0
  33. data/spec/csv_decision/matchers/pattern_spec.rb +183 -0
  34. data/spec/csv_decision/matchers/range_spec.rb +132 -0
  35. data/spec/csv_decision/options_spec.rb +67 -0
  36. data/spec/csv_decision/parse_spec.rb +2 -3
  37. data/spec/csv_decision/simple_example_spec.rb +45 -0
  38. data/spec/csv_decision/table_spec.rb +151 -0
  39. data/spec/data/invalid/invalid_header1.csv +4 -0
  40. data/spec/data/invalid/invalid_header2.csv +4 -0
  41. data/spec/data/invalid/invalid_header3.csv +4 -0
  42. data/spec/data/invalid/invalid_header4.csv +4 -0
  43. data/spec/data/valid/options_in_file1.csv +5 -0
  44. data/spec/data/valid/options_in_file2.csv +5 -0
  45. data/spec/data/valid/simple_example.csv +10 -0
  46. data/spec/data/valid/valid.csv +4 -4
  47. data/spec/spec_helper.rb +6 -0
  48. metadata +89 -12
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Dictionary of all this table's columns - inputs, outputs etc.
8
+ class Columns
9
+ # Value object used for column dictionary entries
10
+ Entry = Struct.new(:name, :text_only)
11
+
12
+ # Value object used for columns with defaults
13
+ Default = Struct.new(:name, :function, :default_if)
14
+
15
+ # Dictionary of all data columns.
16
+ # # Note that the key of each hash is the header cell's array column index.
17
+ # Note that input and output columns can be interspersed and need not have unique names.
18
+ class Dictionary
19
+ attr_accessor :ins
20
+ attr_accessor :outs
21
+ attr_accessor :path
22
+ attr_accessor :defaults
23
+
24
+ def initialize
25
+ @ins = {}
26
+ @outs = {}
27
+
28
+ # Path for the input hash - optional
29
+ @path = {}
30
+ # Hash of columns that require defaults to be set
31
+ @defaults = {}
32
+ end
33
+ end
34
+
35
+ # Dictionary of all data columns
36
+ attr_reader :dictionary
37
+
38
+ # Input columns
39
+ def ins
40
+ @dictionary.ins
41
+ end
42
+
43
+ # Output columns
44
+ def outs
45
+ @dictionary.outs
46
+ end
47
+
48
+ # Input columns with defaults specified (planned feature)
49
+ # def defaults
50
+ # @dictionary.defaults
51
+ # end
52
+
53
+ # Input hash path (planned feature)
54
+ # def path
55
+ # @dictionary.path
56
+ # end
57
+
58
+ def initialize(table)
59
+ # If a column does not have a valid header cell, then it's empty of data.
60
+ # Return the stripped header row, removing it from the data array.
61
+ row = Header.strip_empty_columns(rows: table.rows)
62
+
63
+ # Build a dictionary of all valid data columns from the header row.
64
+ @dictionary = Header.dictionary(row: row) if row
65
+
66
+ freeze
67
+ end
68
+ end
69
+ end
@@ -10,21 +10,32 @@ module CSVDecision
10
10
 
11
11
  # Methods to load data from a file, CSV string or array of arrays
12
12
  module Data
13
+ CSV_OPTIONS = { encoding: 'UTF-8', skip_blanks: true }.freeze
14
+
13
15
  # Parse the input data which may either be a file path name, CSV string or
14
16
  # array of arrays. Strips out empty columns/rows and comment cells
15
- def self.to_array(data:, options: { force_encoding: 'UTF-8', ascii_only?: true })
16
- strip_rows(data: data_array(data), options: options)
17
+ def self.to_array(data:)
18
+ strip_rows(data: data_array(data))
19
+ end
20
+
21
+ def self.input_file?(input)
22
+ input.is_a?(Pathname) || input.is_a?(File)
17
23
  end
18
24
 
19
- # TODO: strip empty columns
20
- def self.strip_columns(_data:, _empty_cols:); end
25
+ def self.strip_columns(data:, empty_columns:)
26
+ # Adjust column indices as we delete columns the rest shift to the left by 1
27
+ empty_columns.map!.with_index { |col, index| col - index }
28
+
29
+ # Delete all empty columns from the array of arrays
30
+ empty_columns.each { |col| data.each_index { |row| data[row].delete_at(col) } }
31
+ end
21
32
 
22
33
  # Parse the input data which may either be a file path name, CSV string or
23
34
  # array of arrays
24
35
  def self.data_array(input)
25
- return CSV.read(input) if input.is_a?(Pathname)
36
+ return CSV.read(input, CSV_OPTIONS) if input_file?(input)
26
37
  return input.deep_dup if input.is_a?(Array) && input[0].is_a?(Array)
27
- return CSV.parse(input) if input.is_a?(String)
38
+ return CSV.parse(input, CSV_OPTIONS) if input.is_a?(String)
28
39
 
29
40
  raise ArgumentError,
30
41
  "#{input.class} input invalid; " \
@@ -32,10 +43,10 @@ module CSVDecision
32
43
  end
33
44
  private_class_method :data_array
34
45
 
35
- def self.strip_rows(data:, options:)
46
+ def self.strip_rows(data:)
36
47
  rows = []
37
48
  data.each do |row|
38
- row = strip_cells(row: row, options: options)
49
+ row = strip_cells(row: row)
39
50
  rows << row if row.find { |cell| cell != '' }
40
51
  end
41
52
  rows
@@ -45,15 +56,19 @@ module CSVDecision
45
56
  # Strip cells of leading/trailing spaces; treat comments as an empty cell.
46
57
  # Non string values treated as empty cells.
47
58
  # Non-ascii strings treated as empty cells by default.
48
- def self.strip_cells(row:, options:)
49
- row.map! do |cell|
50
- next '' unless cell.is_a?(String)
51
- cell = options[:force_encoding] ? cell.force_encoding(options[:force_encoding]) : cell
52
- next '' if options[:ascii_only?] && !cell.ascii_only?
53
- next '' if cell.lstrip[0] == COMMENT_CHARACTER
54
- cell.strip
55
- end
59
+ def self.strip_cells(row:)
60
+ row.map! { |cell| strip_cell(cell) }
56
61
  end
57
62
  private_class_method :strip_cells
63
+
64
+ def self.strip_cell(cell)
65
+ return '' unless cell.is_a?(String)
66
+ cell = cell.force_encoding('UTF-8')
67
+ return '' unless cell.ascii_only?
68
+ return '' if cell.lstrip[0] == COMMENT_CHARACTER
69
+
70
+ cell.strip
71
+ end
72
+ private_class_method :strip_cell
58
73
  end
59
74
  end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Main module for searching the decision table looking for one or more matches
8
+ module Decide
9
+ # Main method for making decisions.
10
+ #
11
+ # @param table [CSVDecision::Table]
12
+ # @param input [Hash] - input hash (keys may or may not be symbolized)
13
+ # @param symbolize_keys [true, false] - set to true if keys are symbolized and it's
14
+ # OK to mutate the input hash. Otherwise a copy of the input hash is symbolized.
15
+ # @return [Hash]
16
+ def self.decide(table:, input:, symbolize_keys:)
17
+ # Parse and transform the hash supplied as input
18
+ parsed_input = Input.parse(table: table, input: input, symbolize_keys: symbolize_keys)
19
+
20
+ # The decision object collects the results of the search and
21
+ # calculates the final result
22
+ decision = Decision.new(table: table, input: parsed_input)
23
+
24
+ # table_scan(table: table, input: parsed_input, decision: decision)
25
+ decision.scan(table: table, input: parsed_input)
26
+ end
27
+
28
+ def self.matches?(row:, input:, scan_row:)
29
+ match = scan_row.match_constants?(row: row, scan_cols: input[:scan_cols])
30
+ return false unless match
31
+
32
+ return true if scan_row.procs.empty?
33
+
34
+ scan_row.match_procs?(row: row, input: input)
35
+ end
36
+
37
+ def self.eval_matcher(proc:, value:, hash:)
38
+ function = proc.function
39
+
40
+ # A symbol expression just needs to be passed the input hash
41
+ return function[hash] if proc.type == :expression
42
+
43
+ # All other procs can take one or two args
44
+ function.arity == 1 ? function[value] : function[value, hash]
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Accumulate the matching row(s) and calculate the final result
8
+ class Decision
9
+ def initialize(table:, input:)
10
+ @result = {}
11
+
12
+ # Relevant table attributes
13
+ @first_match = table.options[:first_match]
14
+ @outs = table.columns.outs
15
+ @outs_functions = table.outs_functions
16
+
17
+ # Partial result always includes the input hash for calculating output functions
18
+ @partial_result = input[:hash].dup if @outs_functions
19
+
20
+ @row_picked = nil
21
+ return if @first_match
22
+
23
+ # Extra attributes for the accumulate option
24
+ @rows_picked = []
25
+ @multi_result = nil
26
+ end
27
+
28
+ # Is the result set empty? That is, nothing matched?
29
+ def empty?
30
+ return @row_picked.nil? if @first_match
31
+ @rows_picked.empty?
32
+ end
33
+
34
+ def exist?
35
+ !empty?
36
+ end
37
+
38
+ def result
39
+ return {} if empty?
40
+ return final_result unless @outs_functions
41
+
42
+ nil
43
+ end
44
+
45
+ def scan(table:, input:)
46
+ scan_rows = table.scan_rows
47
+
48
+ table.each do |row, index|
49
+ done = row_scan(input: input, row: row, scan_row: scan_rows[index])
50
+
51
+ return self if done
52
+ end
53
+
54
+ self
55
+ end
56
+
57
+ def add(row)
58
+ return add_first_match(row) if @first_match
59
+
60
+ # Accumulate output rows
61
+ @rows_picked << row
62
+ @outs.each_pair do |col, column|
63
+ accumulate_outs(column_name: column.name, cell: row[col])
64
+ end
65
+
66
+ # Not done
67
+ false
68
+ end
69
+
70
+ private
71
+
72
+ def accumulate_outs(column_name:, cell:)
73
+ current = @result[column_name]
74
+
75
+ case current
76
+ when nil
77
+ @result[column_name] = cell
78
+
79
+ when Array
80
+ @result[column_name] << cell
81
+
82
+ else
83
+ @result[column_name] = [current, cell]
84
+ @multi_result ||= true
85
+ end
86
+ end
87
+
88
+ def row_scan(input:, row:, scan_row:)
89
+ return unless Decide.matches?(row: row, input: input, scan_row: scan_row)
90
+
91
+ add(row)
92
+ end
93
+
94
+ def final_result
95
+ @result
96
+ end
97
+
98
+ def add_first_match(row)
99
+ @row_picked = row
100
+
101
+ # Common case is just copying output column values to the final result
102
+ @outs.each_pair { |col, column| @result[column.name] = row[col] }
103
+ end
104
+ end
105
+ end
@@ -4,20 +4,155 @@
4
4
  # Created December 2017 by Brett Vickers
5
5
  # See LICENSE and README.md for details.
6
6
  module CSVDecision
7
- # Parse the CSV file's header row
8
- class Header
9
- # Column header looks like IN :col_name or if:
10
- COLUMN_TYPE = %r{\A(in|out|in/text|out/text)|set\s*:\s*(\S?.*)\z}i
7
+ # Parse the CSV file's header row. These methods are only required at table load time.
8
+ module Header
9
+ # Column header looks like IN :col_name or cond:
10
+ COLUMN_TYPE = %r{
11
+ \A(?<type>in|out|in/text|out/text|set|set/nil|set/blank|path|cond|if)
12
+ \s*:\s*(?<name>\S?.*)\z
13
+ }xi
11
14
 
15
+ # These column types do not need a name
16
+ COLUMN_TYPE_ANONYMOUS = Set.new(%i[path if cond]).freeze
17
+
18
+ # More lenient than a Ruby method name -
19
+ # any spaces will have been replaced with underscores
20
+ COLUMN_NAME = %r{\A\w[\w:/!?]*\z}
21
+
22
+ # Does this row contain a recognisable header cell?
23
+ #
24
+ # @param row [Array<String>]
25
+ # @return [true, false]
12
26
  def self.row?(row)
13
27
  row.find { |cell| cell.match(COLUMN_TYPE) }
14
28
  end
15
29
 
16
- # Parse the input data which may either be a path name, CSV string or array of arrays
17
- def self.parse(table:, options: {})
18
- header = Header.new
30
+ # Strip empty columns from all data rows.
31
+ #
32
+ # @param rows [Array<Array<String>>]
33
+ # @return [Array<Array<String>>] - data array after removing any empty columns and the
34
+ # header row.
35
+ def self.strip_empty_columns(rows:)
36
+ empty_cols = empty_columns?(row: rows.first)
37
+ Data.strip_columns(data: rows, empty_columns: empty_cols) unless empty_cols.empty?
38
+
39
+ # Remove the header row from the data array.
40
+ rows.shift
41
+ end
42
+
43
+ # Classify and build a dictionary of all input and output columns.
44
+ #
45
+ # @param row [Array<String>] - the header row after removing any empty columns.
46
+ # @return [Hash<Hash>] - Column dictionary if a hash of hashes.
47
+ def self.dictionary(row:)
48
+ dictionary = Columns::Dictionary.new
49
+
50
+ row.each_with_index do |cell, index|
51
+ dictionary = parse_cell(cell: cell, index: index, dictionary: dictionary)
52
+ end
53
+
54
+ dictionary
55
+ end
56
+
57
+ def self.header_column?(cell:)
58
+ match = COLUMN_TYPE.match(cell)
59
+ raise CellValidationError, 'column name is not well formed' unless match
60
+
61
+ column_type = match['type']&.downcase&.to_sym
62
+ column_name = column_name(type: column_type, name: match['name'])
63
+
64
+ [column_type, column_name]
65
+ rescue CellValidationError => exp
66
+ raise CellValidationError,
67
+ "header column '#{cell}' is not valid as the #{exp.message}"
68
+ end
69
+ private_class_method :header_column?
70
+
71
+ # Array of all empty column indices.
72
+ def self.empty_columns?(row:)
73
+ result = []
74
+ row&.each_with_index { |cell, index| result << index if cell == '' }
75
+
76
+ result
77
+ end
78
+ private_class_method :empty_columns?
79
+
80
+ def self.column_name(type:, name:)
81
+ return format_column_name(name) if name.present?
82
+ return if COLUMN_TYPE_ANONYMOUS.member?(type)
83
+
84
+ raise CellValidationError, 'column name is missing'
85
+ end
86
+
87
+ def self.format_column_name(name)
88
+ column_name = name.strip.tr("\s", '_')
89
+
90
+ return column_name.to_sym if COLUMN_NAME.match(column_name)
91
+
92
+ raise CellValidationError, "column name '#{name}' contains invalid characters"
93
+ end
94
+
95
+ # Returns the normalized column type, along with an indication if
96
+ # the column is text only
97
+ def self.column_type(type)
98
+ case type
99
+ when :'in/text'
100
+ [:in, true]
101
+
102
+ when :cond
103
+ [:in, false]
104
+
105
+ when :'out/text'
106
+ [:out, true]
107
+
108
+ # Column may turn out to be text-only, or not
109
+ else
110
+ [type, nil]
111
+ end
112
+ end
113
+
114
+ def self.parse_cell(cell:, index:, dictionary:)
115
+ column_type, column_name = header_column?(cell: cell)
116
+
117
+ type, text_only = Header.column_type(column_type)
19
118
 
20
- header.freeze
119
+ dictionary_entry(dictionary: dictionary,
120
+ type: type,
121
+ entry: Columns::Entry.new(column_name, text_only),
122
+ index: index)
21
123
  end
124
+ private_class_method :parse_cell
125
+
126
+ def self.dictionary_entry(dictionary:, type:, entry:, index:)
127
+ case type
128
+ # Header column that has a function for setting the value (planned feature)
129
+ # when :set, :'set/nil', :'set/blank'
130
+ # # Default function will set the input value unconditionally or conditionally
131
+ # dictionary.defaults[index] =
132
+ # Columns::Default.new(entry.name, nil, default_if(type))
133
+ #
134
+ # # Treat set: as an in: column
135
+ # dictionary.ins[index] = entry
136
+
137
+ when :in
138
+ dictionary.ins[index] = entry
139
+
140
+ when :out
141
+ dictionary.outs[index] = entry
142
+
143
+ else
144
+ raise "internal error - column type #{type} not recognised"
145
+ end
146
+
147
+ dictionary
148
+ end
149
+ private_class_method :dictionary_entry
150
+
151
+ # def self.default_if(type)
152
+ # return nil if type == :set
153
+ # return :nil? if type == :'set/nil'
154
+ # :blank?
155
+ # end
156
+ # private_class_method :default_if
22
157
  end
23
158
  end