csv_decision 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.rubocop.yml +16 -4
  4. data/.travis.yml +10 -0
  5. data/CHANGELOG.md +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +21 -0
  8. data/README.md +133 -19
  9. data/benchmark.rb +143 -0
  10. data/csv_decision.gemspec +8 -6
  11. data/lib/csv_decision.rb +18 -4
  12. data/lib/csv_decision/columns.rb +69 -0
  13. data/lib/csv_decision/data.rb +31 -16
  14. data/lib/csv_decision/decide.rb +47 -0
  15. data/lib/csv_decision/decision.rb +105 -0
  16. data/lib/csv_decision/header.rb +143 -8
  17. data/lib/csv_decision/input.rb +49 -0
  18. data/lib/csv_decision/load.rb +31 -0
  19. data/lib/csv_decision/matchers.rb +131 -0
  20. data/lib/csv_decision/matchers/numeric.rb +37 -0
  21. data/lib/csv_decision/matchers/pattern.rb +76 -0
  22. data/lib/csv_decision/matchers/range.rb +76 -0
  23. data/lib/csv_decision/options.rb +80 -50
  24. data/lib/csv_decision/parse.rb +77 -23
  25. data/lib/csv_decision/scan_row.rb +68 -0
  26. data/lib/csv_decision/table.rb +34 -6
  27. data/spec/csv_decision/columns_spec.rb +86 -0
  28. data/spec/csv_decision/data_spec.rb +16 -3
  29. data/spec/csv_decision/decision_spec.rb +30 -0
  30. data/spec/csv_decision/input_spec.rb +54 -0
  31. data/spec/csv_decision/load_spec.rb +28 -0
  32. data/spec/csv_decision/matchers/numeric_spec.rb +84 -0
  33. data/spec/csv_decision/matchers/pattern_spec.rb +183 -0
  34. data/spec/csv_decision/matchers/range_spec.rb +132 -0
  35. data/spec/csv_decision/options_spec.rb +67 -0
  36. data/spec/csv_decision/parse_spec.rb +2 -3
  37. data/spec/csv_decision/simple_example_spec.rb +45 -0
  38. data/spec/csv_decision/table_spec.rb +151 -0
  39. data/spec/data/invalid/invalid_header1.csv +4 -0
  40. data/spec/data/invalid/invalid_header2.csv +4 -0
  41. data/spec/data/invalid/invalid_header3.csv +4 -0
  42. data/spec/data/invalid/invalid_header4.csv +4 -0
  43. data/spec/data/valid/options_in_file1.csv +5 -0
  44. data/spec/data/valid/options_in_file2.csv +5 -0
  45. data/spec/data/valid/simple_example.csv +10 -0
  46. data/spec/data/valid/valid.csv +4 -4
  47. data/spec/spec_helper.rb +6 -0
  48. metadata +89 -12
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ice_nine'
4
+ require 'ice_nine/core_ext/object'
5
+
6
+ # CSV Decision: CSV based Ruby decision tables.
7
+ # Created December 2017 by Brett Vickers
8
+ # See LICENSE and README.md for details.
9
+ module CSVDecision
10
+ # Parse the input hash
11
+ module Input
12
+ def self.parse(table:, input:, symbolize_keys:)
13
+ validate(input)
14
+
15
+ # For safety the default is to symbolize keys and make a copy of the hash.
16
+ # However, if this is turned off assume keys are symbolized
17
+ # TODO: Is it OK to mutate the hash in this case?
18
+ input = symbolize_keys ? input.deep_symbolize_keys : input
19
+
20
+ parsed_input = parse_input(table: table, input: input)
21
+
22
+ parsed_input[:hash].freeze if symbolize_keys
23
+
24
+ parsed_input
25
+ end
26
+
27
+ def self.validate(input)
28
+ return if input.is_a?(Hash) && !input.empty?
29
+ raise ArgumentError, 'input must be a non-empty hash'
30
+ end
31
+ private_class_method :validate
32
+
33
+ def self.parse_input(table:, input:)
34
+ scan_cols = {}
35
+
36
+ # Does this table have any defaulted columns?
37
+ # defaulted_columns = table.columns[:defaults]
38
+
39
+ table.columns.ins.each_pair do |col, column|
40
+ value = input[column.name]
41
+
42
+ scan_cols[col] = value
43
+ end
44
+
45
+ { hash: input, scan_cols: scan_cols }
46
+ end
47
+ private_class_method :parse_input
48
+ end
49
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Load all the CSV files located in the designated folder path.
8
+ #
9
+ # @param path [Pathname] - directiory containing CSV files
10
+ # @param options [Hash] - supplied options hash for table creation
11
+ # @return [Hash<CSVDecision::Table>]
12
+ def self.load(path, options = {})
13
+ Load.path(path: path, options: options)
14
+ end
15
+
16
+ # Load all CSV files located in the specified folder.
17
+ module Load
18
+ def self.path(path:, options:)
19
+ raise ArgumentError, 'path argument must be a Pathname' unless path.is_a?(Pathname)
20
+ raise ArgumentError, 'path argument not a valid folder' unless path.directory?
21
+
22
+ tables = {}
23
+ Dir[path.join('*.csv')].each do |file_name|
24
+ table_name = File.basename(file_name, '.csv').to_sym
25
+ tables[table_name] = CSVDecision.parse(Pathname(file_name), options)
26
+ end
27
+
28
+ tables.freeze
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'values'
4
+
5
+ # CSV Decision: CSV based Ruby decision tables.
6
+ # Created December 2017 by Brett Vickers
7
+ # See LICENSE and README.md for details.
8
+ module CSVDecision
9
+ # Value object for a cell proc
10
+ Proc = Value.new(:type, :function)
11
+
12
+ # Value object for a data row indicating which columns are constants versus procs.
13
+ # ScanRow = Struct.new(:constants, :procs) do
14
+ # def scan_columns(columns:, matchers:, row:)
15
+ # columns.each_pair do |col, column|
16
+ # # Empty cell matches everything, and so never needs to be scanned
17
+ # next if row[col] == ''
18
+ #
19
+ # # If the column is text only then no special matchers need be invoked
20
+ # next constants << col if column.text_only
21
+ #
22
+ # # Need to scan the cell against all matchers
23
+ # row[col] = scan_cell(col: col, matchers: matchers, cell: row[col])
24
+ # end
25
+ # end
26
+ #
27
+ # def match_constants?(row:, scan_cols:)
28
+ # constants.each do |col|
29
+ # value = scan_cols.fetch(col, [])
30
+ # # This only happens if the column is indexed
31
+ # next if value == []
32
+ # return false unless row[col] == value
33
+ # end
34
+ #
35
+ # true
36
+ # end
37
+ #
38
+ # def match_procs?(row:, input:)
39
+ # hash = input[:hash]
40
+ # scan_cols = input[:scan_cols]
41
+ #
42
+ # procs.each do |col|
43
+ # return false unless Decide.eval_matcher(proc: row[col],
44
+ # value: scan_cols[col],
45
+ # hash: hash)
46
+ # end
47
+ #
48
+ # true
49
+ # end
50
+ #
51
+ # private
52
+ #
53
+ # def scan_cell(col:, matchers:, cell:)
54
+ # # Scan the cell against all the matchers
55
+ # proc = Matchers.scan(matchers: matchers, cell: cell)
56
+ #
57
+ # if proc
58
+ # procs << col
59
+ # return proc
60
+ # end
61
+ #
62
+ # constants << col
63
+ # cell
64
+ # end
65
+ # end
66
+
67
+ # Methods to assign a matcher to data cells
68
+ module Matchers
69
+ # Negation sign for ranges and functions
70
+ NEGATE = '!'
71
+
72
+ # All regular expressions used for matching are anchored
73
+ #
74
+ # @param value [String]
75
+ # @return [Regexp]
76
+ def self.regexp(value)
77
+ Regexp.new("\\A(#{value})\\z").freeze
78
+ end
79
+
80
+ # Regular expression used to recognise a numeric string with or without a decimal point.
81
+ NUMERIC = '[-+]?\d*(?<decimal>\.?)\d+'
82
+ NUMERIC_RE = regexp(NUMERIC)
83
+
84
+ # Validate a numeric value and convert it to an Integer or BigDecimal if a valid string.
85
+ #
86
+ # @param value [nil, String, Integer, BigDecimal]
87
+ # @return [nil, Integer, BigDecimal]
88
+ def self.numeric(value)
89
+ return value if value.is_a?(Integer) || value.is_a?(BigDecimal)
90
+ return unless value.is_a?(String)
91
+
92
+ to_numeric(value)
93
+ end
94
+
95
+ # Validate a numeric string and convert it to an Integer or BigDecimal.
96
+ #
97
+ # @param value [String]
98
+ # @return [nil, Integer, BigDecimal]
99
+ def self.to_numeric(value)
100
+ return unless (match = NUMERIC_RE.match(value))
101
+ return value.to_i if match['decimal'] == ''
102
+ BigDecimal.new(value.chomp('.'))
103
+ end
104
+
105
+ # Parse the supplied input columns for the row supplied using an array of matchers.
106
+ #
107
+ # @param columns [Hash] - Input columns hash
108
+ # @param matchers [Array]
109
+ # @param row [Array]
110
+ def self.parse(columns:, matchers:, row:)
111
+ # Build an array of column indexes requiring simple constant matches,
112
+ # and a second array of columns requiring special matchers.
113
+ scan_row = ScanRow.new
114
+
115
+ # scan_columns(columns: columns, matchers: matchers, row: row, scan_row: scan_row)
116
+ scan_row.scan_columns(columns: columns, matchers: matchers, row: row)
117
+
118
+ scan_row
119
+ end
120
+
121
+ def self.scan(matchers:, cell:)
122
+ matchers.each do |matcher|
123
+ proc = matcher.matches?(cell)
124
+ return proc if proc
125
+ end
126
+
127
+ # Must be a simple constant
128
+ false
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Methods to assign a matcher to data cells
8
+ module Matchers
9
+ # Match cell against a Ruby-like numeric comparison
10
+ class Numeric
11
+ # Range types are .. or ...
12
+ COMPARISON = /\A(?<comparator><=|>=|<|>|!=)\s*(?<value>\S.*)\z/
13
+
14
+ COMPARATORS = {
15
+ '>' => proc { |numeric_cell, value| Matchers.numeric(value) &.> numeric_cell },
16
+ '>=' => proc { |numeric_cell, value| Matchers.numeric(value) &.>= numeric_cell },
17
+ '<' => proc { |numeric_cell, value| Matchers.numeric(value) &.< numeric_cell },
18
+ '<=' => proc { |numeric_cell, value| Matchers.numeric(value) &.<= numeric_cell },
19
+ '!=' => proc { |numeric_cell, value| Matchers.numeric(value) &.!= numeric_cell }
20
+ }.freeze
21
+
22
+ def matches?(cell)
23
+ match = COMPARISON.match(cell)
24
+ return false unless match
25
+
26
+ numeric_cell = Matchers.numeric(match['value'])
27
+ return false unless numeric_cell
28
+
29
+ Proc.with(type: :proc,
30
+ function: COMPARATORS[match['comparator']].curry[numeric_cell])
31
+ end
32
+
33
+ # This matcher does not need access to the options hash
34
+ def initialize(_options = nil); end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Methods to assign a matcher to data cells
8
+ module Matchers
9
+ # Match cell against a regular expression pattern
10
+ class Pattern
11
+ EXPLICIT_COMPARISON = /\A(?<comparator>=~|!~|!=)\s*(?<value>\S.*)\z/
12
+ IMPLICIT_COMPARISON = /\A(?<comparator>=~|!~|!=)?\s*(?<value>\S.*)\z/
13
+
14
+ # rubocop: disable Style/DoubleNegation
15
+ PATTERN_LAMBDAS = {
16
+ '!=' => proc { |pattern, value| pattern != value }.freeze,
17
+ '=~' => proc { |pattern, value| !!pattern.match(value) }.freeze,
18
+ '!~' => proc { |pattern, value| !pattern.match(value) }.freeze
19
+ }.freeze
20
+ # rubocop: enable Style/DoubleNegation
21
+
22
+ def self.regexp?(cell:, explicit:)
23
+ # By default a regexp pattern must use an explicit comparator
24
+ match = explicit ? EXPLICIT_COMPARISON.match(cell) : IMPLICIT_COMPARISON.match(cell)
25
+ return false if match.nil?
26
+
27
+ comparator = match['comparator']
28
+
29
+ # Comparator may be omitted if the regexp_explicit option is off.
30
+ return false if explicit && comparator.nil?
31
+
32
+ parse(comparator: comparator, value: match['value'])
33
+ end
34
+
35
+ def self.parse(comparator:, value:)
36
+ return false if value.blank?
37
+
38
+ # We cannot do a regexp comparison against a symbol name.
39
+ # (Maybe we should add this feature?)
40
+ return if value[0] == ':'
41
+
42
+ # If no comparator then the implicit option must be on
43
+ comparator = regexp_implicit(value) if comparator.nil?
44
+
45
+ [comparator, value]
46
+ end
47
+
48
+ def self.regexp_implicit(value)
49
+ # rubocop: disable Style/CaseEquality
50
+ return unless /\W/ === value
51
+ # rubocop: enable Style/CaseEquality
52
+
53
+ # Make the implict comparator explict
54
+ '=~'
55
+ end
56
+
57
+ def initialize(options = {})
58
+ # By default regexp's must have an explicit comparator
59
+ @regexp_explicit = !options[:regexp_implicit]
60
+ end
61
+
62
+ def matches?(cell)
63
+ comparator, value = Pattern.regexp?(cell: cell, explicit: @regexp_explicit)
64
+
65
+ # We could not find a regexp pattern - maybe it's a simple string or something else?
66
+ return false unless comparator
67
+
68
+ # No need for a regular expression if we have simple string inequality
69
+ pattern = comparator == '!=' ? value : Matchers.regexp(value)
70
+
71
+ Proc.with(type: :proc,
72
+ function: PATTERN_LAMBDAS[comparator].curry[pattern].freeze)
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSV Decision: CSV based Ruby decision tables.
4
+ # Created December 2017 by Brett Vickers
5
+ # See LICENSE and README.md for details.
6
+ module CSVDecision
7
+ # Methods to assign a matcher to data cells
8
+ module Matchers
9
+ # Match cell against a Ruby-like range
10
+ class Range
11
+ # Range types are .. or ...
12
+ TYPE = '(\.\.\.|\.\.)'
13
+
14
+ def self.range_re(value)
15
+ Matchers.regexp(
16
+ "(?<negate>#{NEGATE}?)\\s*(?<min>#{value})(?<type>#{TYPE})(?<max>#{value})"
17
+ )
18
+ end
19
+ private_class_method :range_re
20
+
21
+ NUMERIC_RANGE = range_re(Matchers::NUMERIC)
22
+
23
+ # One or more alphanumeric characters
24
+ ALNUM = '[[:alnum:]][[:alnum:]]*'
25
+ ALNUM_RANGE = range_re(ALNUM)
26
+
27
+ def self.convert(value, method)
28
+ method ? Matchers.send(method, value) : value
29
+ end
30
+ private_class_method :convert
31
+
32
+ def self.range(match, coerce: nil)
33
+ negate = match['negate'] == Matchers::NEGATE
34
+ min = convert(match['min'], coerce)
35
+ type = match['type']
36
+ max = convert(match['max'], coerce)
37
+
38
+ [negate, type == '...' ? min...max : min..max]
39
+ end
40
+
41
+ def self.numeric_range(negate, range)
42
+ return ->(value) { range.include?(Matchers.numeric(value)) } unless negate
43
+ ->(value) { !range.include?(Matchers.numeric(value)) }
44
+ end
45
+ private_class_method :numeric_range
46
+
47
+ def self.alnum_range(negate, range)
48
+ return ->(value) { range.include?(value) } unless negate
49
+ ->(value) { !range.include?(value) }
50
+ end
51
+ private_class_method :alnum_range
52
+
53
+ def self.proc(match:, coerce: nil)
54
+ negate, range = range(match, coerce: coerce)
55
+ method = coerce ? :numeric_range : :alnum_range
56
+ function = Range.send(method, negate, range).freeze
57
+ Proc.with(type: :proc, function: function)
58
+ end
59
+
60
+ def matches?(cell)
61
+ if (match = NUMERIC_RANGE.match(cell))
62
+ return Range.proc(match: match, coerce: :to_numeric)
63
+ end
64
+
65
+ if (match = ALNUM_RANGE.match(cell))
66
+ return Range.proc(match: match)
67
+ end
68
+
69
+ false
70
+ end
71
+
72
+ # This matcher does not need access to the options hash
73
+ def initialize(_options = nil); end
74
+ end
75
+ end
76
+ end
@@ -4,82 +4,112 @@
4
4
  # Created December 2017 by Brett Vickers
5
5
  # See LICENSE and README.md for details.
6
6
  module CSVDecision
7
- VALID_OPTIONS = %i[
8
- force_encoding
9
- ascii_only?
10
- first_match
11
- regexp_implict
12
- text_only
13
- index
14
- tables
7
+ # Specialized cell value matchers beyond simple string compares.
8
+ # By default all these matchers are tried in the specified order.
9
+ DEFAULT_MATCHERS = [
10
+ Matchers::Range,
11
+ Matchers::Numeric,
12
+ Matchers::Pattern
15
13
  ].freeze
16
14
 
17
- OPTION_DEFAULTS = {
18
- force_encoding: 'UTF-8',
19
- ascii_only?: true,
15
+ # All valid options with their default values.
16
+ VALID_OPTIONS = {
20
17
  first_match: true,
21
- regexp_implict: false,
22
- text_only: false
18
+ regexp_implicit: false,
19
+ text_only: false,
20
+ matchers: DEFAULT_MATCHERS
23
21
  }.freeze
24
22
 
25
- CSV_OPTION_NAMES = %i[first_match accumulate regexp_implict text_only].freeze
23
+ # These options may appear in the CSV file before the header row.
24
+ # Convert them to a normalized option key value pair.
25
+ CSV_OPTION_NAMES = {
26
+ first_match: [:first_match, true],
27
+ accumulate: [:first_match, false],
28
+ regexp_implicit: [:regexp_implicit, true],
29
+ text_only: [:text_only, true]
30
+ }.freeze
26
31
 
27
- # Parse the CSV file and create a new decision table object
28
- class Options
29
- def self.default(options)
30
- result = options.deep_dup
32
+ # Validate and normalize the options hash supplied.
33
+ module Options
34
+ # Validate options and supply default values for any options not explicitly set.
35
+ #
36
+ # @param options [Hash] - input options hash supplied
37
+ # @return [Hash] - options hash filled in with all required default values
38
+ def self.normalize(options)
39
+ validate(options)
40
+ default(options)
41
+ end
31
42
 
32
- # Default any missing options that have defaults defined
33
- OPTION_DEFAULTS.each_pair do |key, value|
34
- next if result.key?(key)
35
- result[key] = value
36
- end
43
+ # Read any options supplied in the CSV file placed before the header row.
44
+ #
45
+ # @param rows [Array<Array<String>>] - table data rows.
46
+ # @param options [Hash] - input options hash built so far
47
+ # @return [Hash] - options hash overridden with any option values in the CSV file
48
+ def self.from_csv(rows:, options:)
49
+ row = rows.first
50
+ return options if row.nil?
37
51
 
38
- result
39
- end
52
+ # Have we hit the header row?
53
+ return options if Header.row?(row)
40
54
 
41
- def self.cell?(cell)
42
- return false if cell == ''
55
+ # Scan each cell looking for valid option values
56
+ options = scan_cells(row: row, options: options)
43
57
 
44
- key = cell.downcase.to_sym
45
- return [key, true] if CSV_OPTION_NAMES.include?(key)
58
+ rows.shift
59
+ from_csv(rows: rows, options: options)
46
60
  end
47
61
 
48
- def self.valid?(options)
49
- invalid_options = options.keys - VALID_OPTIONS
62
+ def self.scan_cells(row:, options:)
63
+ # Scan each cell looking for valid option values
64
+ row.each do |cell|
65
+ next if cell == ''
50
66
 
51
- return true if invalid_options.empty?
67
+ key, value = option?(cell)
68
+ options[key] = value if key
69
+ end
52
70
 
53
- raise ArgumentError, "invalid option(s) supplied: #{invalid_options.inspect}"
71
+ options
54
72
  end
73
+ private_class_method :scan_cells
55
74
 
56
- def self.from_csv(table, attributes)
57
- row = table.rows.first
58
- return attributes unless row
75
+ def self.default(options)
76
+ result = options.dup
59
77
 
60
- return attributes if Header.row?(row)
78
+ # The user may override the list of matchers to be used
79
+ result[:matchers] = matchers(result)
61
80
 
62
- row.each do |cell|
63
- key, value = Options.cell?(cell)
64
- attributes[key] = value if key
81
+ # Supply any missing options with default values
82
+ VALID_OPTIONS.each_pair do |key, value|
83
+ next if result.key?(key)
84
+ result[key] = value
65
85
  end
66
86
 
67
- table.rows.shift
68
- from_csv(table, attributes)
87
+ result
69
88
  end
89
+ private_class_method :default
70
90
 
71
- attr_accessor :attributes
91
+ def self.matchers(options)
92
+ return [] if options.key?(:matchers) && !options[:matchers]
93
+ return [] if options[:text_only]
94
+ return DEFAULT_MATCHERS unless options.key?(:matchers)
72
95
 
73
- def initialize(options)
74
- Options.valid?(options)
75
- @attributes = Options.default(options)
96
+ options[:matchers]
76
97
  end
98
+ private_class_method :matchers
77
99
 
78
- def from_csv(table)
79
- # Options on the CSV file override the ones passed in to the method
80
- @attributes = Options.from_csv(table, @attributes)
100
+ def self.option?(cell)
101
+ key = cell.downcase.to_sym
102
+ return CSV_OPTION_NAMES[key] if CSV_OPTION_NAMES.key?(key)
103
+ end
104
+ private_class_method :option?
105
+
106
+ def self.validate(options)
107
+ invalid_options = options.keys - VALID_OPTIONS.keys
81
108
 
82
- self
109
+ return if invalid_options.empty?
110
+
111
+ raise ArgumentError, "invalid option(s) supplied: #{invalid_options.inspect}"
83
112
  end
113
+ private_class_method :validate
84
114
  end
85
115
  end