csv_decision 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.rubocop.yml +16 -4
- data/.travis.yml +10 -0
- data/CHANGELOG.md +2 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +133 -19
- data/benchmark.rb +143 -0
- data/csv_decision.gemspec +8 -6
- data/lib/csv_decision.rb +18 -4
- data/lib/csv_decision/columns.rb +69 -0
- data/lib/csv_decision/data.rb +31 -16
- data/lib/csv_decision/decide.rb +47 -0
- data/lib/csv_decision/decision.rb +105 -0
- data/lib/csv_decision/header.rb +143 -8
- data/lib/csv_decision/input.rb +49 -0
- data/lib/csv_decision/load.rb +31 -0
- data/lib/csv_decision/matchers.rb +131 -0
- data/lib/csv_decision/matchers/numeric.rb +37 -0
- data/lib/csv_decision/matchers/pattern.rb +76 -0
- data/lib/csv_decision/matchers/range.rb +76 -0
- data/lib/csv_decision/options.rb +80 -50
- data/lib/csv_decision/parse.rb +77 -23
- data/lib/csv_decision/scan_row.rb +68 -0
- data/lib/csv_decision/table.rb +34 -6
- data/spec/csv_decision/columns_spec.rb +86 -0
- data/spec/csv_decision/data_spec.rb +16 -3
- data/spec/csv_decision/decision_spec.rb +30 -0
- data/spec/csv_decision/input_spec.rb +54 -0
- data/spec/csv_decision/load_spec.rb +28 -0
- data/spec/csv_decision/matchers/numeric_spec.rb +84 -0
- data/spec/csv_decision/matchers/pattern_spec.rb +183 -0
- data/spec/csv_decision/matchers/range_spec.rb +132 -0
- data/spec/csv_decision/options_spec.rb +67 -0
- data/spec/csv_decision/parse_spec.rb +2 -3
- data/spec/csv_decision/simple_example_spec.rb +45 -0
- data/spec/csv_decision/table_spec.rb +151 -0
- data/spec/data/invalid/invalid_header1.csv +4 -0
- data/spec/data/invalid/invalid_header2.csv +4 -0
- data/spec/data/invalid/invalid_header3.csv +4 -0
- data/spec/data/invalid/invalid_header4.csv +4 -0
- data/spec/data/valid/options_in_file1.csv +5 -0
- data/spec/data/valid/options_in_file2.csv +5 -0
- data/spec/data/valid/simple_example.csv +10 -0
- data/spec/data/valid/valid.csv +4 -4
- data/spec/spec_helper.rb +6 -0
- metadata +89 -12
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# CSV Decision: CSV based Ruby decision tables.
|
4
|
+
# Created December 2017 by Brett Vickers
|
5
|
+
# See LICENSE and README.md for details.
|
6
|
+
module CSVDecision
|
7
|
+
# Dictionary of all this table's columns - inputs, outputs etc.
|
8
|
+
class Columns
|
9
|
+
# Value object used for column dictionary entries
|
10
|
+
Entry = Struct.new(:name, :text_only)
|
11
|
+
|
12
|
+
# Value object used for columns with defaults
|
13
|
+
Default = Struct.new(:name, :function, :default_if)
|
14
|
+
|
15
|
+
# Dictionary of all data columns.
|
16
|
+
# # Note that the key of each hash is the header cell's array column index.
|
17
|
+
# Note that input and output columns can be interspersed and need not have unique names.
|
18
|
+
class Dictionary
|
19
|
+
attr_accessor :ins
|
20
|
+
attr_accessor :outs
|
21
|
+
attr_accessor :path
|
22
|
+
attr_accessor :defaults
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@ins = {}
|
26
|
+
@outs = {}
|
27
|
+
|
28
|
+
# Path for the input hash - optional
|
29
|
+
@path = {}
|
30
|
+
# Hash of columns that require defaults to be set
|
31
|
+
@defaults = {}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Dictionary of all data columns
|
36
|
+
attr_reader :dictionary
|
37
|
+
|
38
|
+
# Input columns
|
39
|
+
def ins
|
40
|
+
@dictionary.ins
|
41
|
+
end
|
42
|
+
|
43
|
+
# Output columns
|
44
|
+
def outs
|
45
|
+
@dictionary.outs
|
46
|
+
end
|
47
|
+
|
48
|
+
# Input columns with defaults specified (planned feature)
|
49
|
+
# def defaults
|
50
|
+
# @dictionary.defaults
|
51
|
+
# end
|
52
|
+
|
53
|
+
# Input hash path (planned feature)
|
54
|
+
# def path
|
55
|
+
# @dictionary.path
|
56
|
+
# end
|
57
|
+
|
58
|
+
def initialize(table)
|
59
|
+
# If a column does not have a valid header cell, then it's empty of data.
|
60
|
+
# Return the stripped header row, removing it from the data array.
|
61
|
+
row = Header.strip_empty_columns(rows: table.rows)
|
62
|
+
|
63
|
+
# Build a dictionary of all valid data columns from the header row.
|
64
|
+
@dictionary = Header.dictionary(row: row) if row
|
65
|
+
|
66
|
+
freeze
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/csv_decision/data.rb
CHANGED
@@ -10,21 +10,32 @@ module CSVDecision
|
|
10
10
|
|
11
11
|
# Methods to load data from a file, CSV string or array of arrays
|
12
12
|
module Data
|
13
|
+
CSV_OPTIONS = { encoding: 'UTF-8', skip_blanks: true }.freeze
|
14
|
+
|
13
15
|
# Parse the input data which may either be a file path name, CSV string or
|
14
16
|
# array of arrays. Strips out empty columns/rows and comment cells
|
15
|
-
def self.to_array(data
|
16
|
-
strip_rows(data: data_array(data)
|
17
|
+
def self.to_array(data:)
|
18
|
+
strip_rows(data: data_array(data))
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.input_file?(input)
|
22
|
+
input.is_a?(Pathname) || input.is_a?(File)
|
17
23
|
end
|
18
24
|
|
19
|
-
|
20
|
-
|
25
|
+
def self.strip_columns(data:, empty_columns:)
|
26
|
+
# Adjust column indices as we delete columns the rest shift to the left by 1
|
27
|
+
empty_columns.map!.with_index { |col, index| col - index }
|
28
|
+
|
29
|
+
# Delete all empty columns from the array of arrays
|
30
|
+
empty_columns.each { |col| data.each_index { |row| data[row].delete_at(col) } }
|
31
|
+
end
|
21
32
|
|
22
33
|
# Parse the input data which may either be a file path name, CSV string or
|
23
34
|
# array of arrays
|
24
35
|
def self.data_array(input)
|
25
|
-
return CSV.read(input) if
|
36
|
+
return CSV.read(input, CSV_OPTIONS) if input_file?(input)
|
26
37
|
return input.deep_dup if input.is_a?(Array) && input[0].is_a?(Array)
|
27
|
-
return CSV.parse(input) if input.is_a?(String)
|
38
|
+
return CSV.parse(input, CSV_OPTIONS) if input.is_a?(String)
|
28
39
|
|
29
40
|
raise ArgumentError,
|
30
41
|
"#{input.class} input invalid; " \
|
@@ -32,10 +43,10 @@ module CSVDecision
|
|
32
43
|
end
|
33
44
|
private_class_method :data_array
|
34
45
|
|
35
|
-
def self.strip_rows(data
|
46
|
+
def self.strip_rows(data:)
|
36
47
|
rows = []
|
37
48
|
data.each do |row|
|
38
|
-
row = strip_cells(row: row
|
49
|
+
row = strip_cells(row: row)
|
39
50
|
rows << row if row.find { |cell| cell != '' }
|
40
51
|
end
|
41
52
|
rows
|
@@ -45,15 +56,19 @@ module CSVDecision
|
|
45
56
|
# Strip cells of leading/trailing spaces; treat comments as an empty cell.
|
46
57
|
# Non string values treated as empty cells.
|
47
58
|
# Non-ascii strings treated as empty cells by default.
|
48
|
-
def self.strip_cells(row
|
49
|
-
row.map!
|
50
|
-
next '' unless cell.is_a?(String)
|
51
|
-
cell = options[:force_encoding] ? cell.force_encoding(options[:force_encoding]) : cell
|
52
|
-
next '' if options[:ascii_only?] && !cell.ascii_only?
|
53
|
-
next '' if cell.lstrip[0] == COMMENT_CHARACTER
|
54
|
-
cell.strip
|
55
|
-
end
|
59
|
+
def self.strip_cells(row:)
|
60
|
+
row.map! { |cell| strip_cell(cell) }
|
56
61
|
end
|
57
62
|
private_class_method :strip_cells
|
63
|
+
|
64
|
+
def self.strip_cell(cell)
|
65
|
+
return '' unless cell.is_a?(String)
|
66
|
+
cell = cell.force_encoding('UTF-8')
|
67
|
+
return '' unless cell.ascii_only?
|
68
|
+
return '' if cell.lstrip[0] == COMMENT_CHARACTER
|
69
|
+
|
70
|
+
cell.strip
|
71
|
+
end
|
72
|
+
private_class_method :strip_cell
|
58
73
|
end
|
59
74
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# CSV Decision: CSV based Ruby decision tables.
|
4
|
+
# Created December 2017 by Brett Vickers
|
5
|
+
# See LICENSE and README.md for details.
|
6
|
+
module CSVDecision
|
7
|
+
# Main module for searching the decision table looking for one or more matches
|
8
|
+
module Decide
|
9
|
+
# Main method for making decisions.
|
10
|
+
#
|
11
|
+
# @param table [CSVDecision::Table]
|
12
|
+
# @param input [Hash] - input hash (keys may or may not be symbolized)
|
13
|
+
# @param symbolize_keys [true, false] - set to true if keys are symbolized and it's
|
14
|
+
# OK to mutate the input hash. Otherwise a copy of the input hash is symbolized.
|
15
|
+
# @return [Hash]
|
16
|
+
def self.decide(table:, input:, symbolize_keys:)
|
17
|
+
# Parse and transform the hash supplied as input
|
18
|
+
parsed_input = Input.parse(table: table, input: input, symbolize_keys: symbolize_keys)
|
19
|
+
|
20
|
+
# The decision object collects the results of the search and
|
21
|
+
# calculates the final result
|
22
|
+
decision = Decision.new(table: table, input: parsed_input)
|
23
|
+
|
24
|
+
# table_scan(table: table, input: parsed_input, decision: decision)
|
25
|
+
decision.scan(table: table, input: parsed_input)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.matches?(row:, input:, scan_row:)
|
29
|
+
match = scan_row.match_constants?(row: row, scan_cols: input[:scan_cols])
|
30
|
+
return false unless match
|
31
|
+
|
32
|
+
return true if scan_row.procs.empty?
|
33
|
+
|
34
|
+
scan_row.match_procs?(row: row, input: input)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.eval_matcher(proc:, value:, hash:)
|
38
|
+
function = proc.function
|
39
|
+
|
40
|
+
# A symbol expression just needs to be passed the input hash
|
41
|
+
return function[hash] if proc.type == :expression
|
42
|
+
|
43
|
+
# All other procs can take one or two args
|
44
|
+
function.arity == 1 ? function[value] : function[value, hash]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# CSV Decision: CSV based Ruby decision tables.
|
4
|
+
# Created December 2017 by Brett Vickers
|
5
|
+
# See LICENSE and README.md for details.
|
6
|
+
module CSVDecision
|
7
|
+
# Accumulate the matching row(s) and calculate the final result
|
8
|
+
class Decision
|
9
|
+
def initialize(table:, input:)
|
10
|
+
@result = {}
|
11
|
+
|
12
|
+
# Relevant table attributes
|
13
|
+
@first_match = table.options[:first_match]
|
14
|
+
@outs = table.columns.outs
|
15
|
+
@outs_functions = table.outs_functions
|
16
|
+
|
17
|
+
# Partial result always includes the input hash for calculating output functions
|
18
|
+
@partial_result = input[:hash].dup if @outs_functions
|
19
|
+
|
20
|
+
@row_picked = nil
|
21
|
+
return if @first_match
|
22
|
+
|
23
|
+
# Extra attributes for the accumulate option
|
24
|
+
@rows_picked = []
|
25
|
+
@multi_result = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Is the result set empty? That is, nothing matched?
|
29
|
+
def empty?
|
30
|
+
return @row_picked.nil? if @first_match
|
31
|
+
@rows_picked.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def exist?
|
35
|
+
!empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def result
|
39
|
+
return {} if empty?
|
40
|
+
return final_result unless @outs_functions
|
41
|
+
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def scan(table:, input:)
|
46
|
+
scan_rows = table.scan_rows
|
47
|
+
|
48
|
+
table.each do |row, index|
|
49
|
+
done = row_scan(input: input, row: row, scan_row: scan_rows[index])
|
50
|
+
|
51
|
+
return self if done
|
52
|
+
end
|
53
|
+
|
54
|
+
self
|
55
|
+
end
|
56
|
+
|
57
|
+
def add(row)
|
58
|
+
return add_first_match(row) if @first_match
|
59
|
+
|
60
|
+
# Accumulate output rows
|
61
|
+
@rows_picked << row
|
62
|
+
@outs.each_pair do |col, column|
|
63
|
+
accumulate_outs(column_name: column.name, cell: row[col])
|
64
|
+
end
|
65
|
+
|
66
|
+
# Not done
|
67
|
+
false
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def accumulate_outs(column_name:, cell:)
|
73
|
+
current = @result[column_name]
|
74
|
+
|
75
|
+
case current
|
76
|
+
when nil
|
77
|
+
@result[column_name] = cell
|
78
|
+
|
79
|
+
when Array
|
80
|
+
@result[column_name] << cell
|
81
|
+
|
82
|
+
else
|
83
|
+
@result[column_name] = [current, cell]
|
84
|
+
@multi_result ||= true
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def row_scan(input:, row:, scan_row:)
|
89
|
+
return unless Decide.matches?(row: row, input: input, scan_row: scan_row)
|
90
|
+
|
91
|
+
add(row)
|
92
|
+
end
|
93
|
+
|
94
|
+
def final_result
|
95
|
+
@result
|
96
|
+
end
|
97
|
+
|
98
|
+
def add_first_match(row)
|
99
|
+
@row_picked = row
|
100
|
+
|
101
|
+
# Common case is just copying output column values to the final result
|
102
|
+
@outs.each_pair { |col, column| @result[column.name] = row[col] }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
data/lib/csv_decision/header.rb
CHANGED
@@ -4,20 +4,155 @@
|
|
4
4
|
# Created December 2017 by Brett Vickers
|
5
5
|
# See LICENSE and README.md for details.
|
6
6
|
module CSVDecision
|
7
|
-
# Parse the CSV file's header row
|
8
|
-
|
9
|
-
# Column header looks like IN :col_name or
|
10
|
-
COLUMN_TYPE = %r{
|
7
|
+
# Parse the CSV file's header row. These methods are only required at table load time.
|
8
|
+
module Header
|
9
|
+
# Column header looks like IN :col_name or cond:
|
10
|
+
COLUMN_TYPE = %r{
|
11
|
+
\A(?<type>in|out|in/text|out/text|set|set/nil|set/blank|path|cond|if)
|
12
|
+
\s*:\s*(?<name>\S?.*)\z
|
13
|
+
}xi
|
11
14
|
|
15
|
+
# These column types do not need a name
|
16
|
+
COLUMN_TYPE_ANONYMOUS = Set.new(%i[path if cond]).freeze
|
17
|
+
|
18
|
+
# More lenient than a Ruby method name -
|
19
|
+
# any spaces will have been replaced with underscores
|
20
|
+
COLUMN_NAME = %r{\A\w[\w:/!?]*\z}
|
21
|
+
|
22
|
+
# Does this row contain a recognisable header cell?
|
23
|
+
#
|
24
|
+
# @param row [Array<String>]
|
25
|
+
# @return [true, false]
|
12
26
|
def self.row?(row)
|
13
27
|
row.find { |cell| cell.match(COLUMN_TYPE) }
|
14
28
|
end
|
15
29
|
|
16
|
-
#
|
17
|
-
|
18
|
-
|
30
|
+
# Strip empty columns from all data rows.
|
31
|
+
#
|
32
|
+
# @param rows [Array<Array<String>>]
|
33
|
+
# @return [Array<Array<String>>] - data array after removing any empty columns and the
|
34
|
+
# header row.
|
35
|
+
def self.strip_empty_columns(rows:)
|
36
|
+
empty_cols = empty_columns?(row: rows.first)
|
37
|
+
Data.strip_columns(data: rows, empty_columns: empty_cols) unless empty_cols.empty?
|
38
|
+
|
39
|
+
# Remove the header row from the data array.
|
40
|
+
rows.shift
|
41
|
+
end
|
42
|
+
|
43
|
+
# Classify and build a dictionary of all input and output columns.
|
44
|
+
#
|
45
|
+
# @param row [Array<String>] - the header row after removing any empty columns.
|
46
|
+
# @return [Hash<Hash>] - Column dictionary if a hash of hashes.
|
47
|
+
def self.dictionary(row:)
|
48
|
+
dictionary = Columns::Dictionary.new
|
49
|
+
|
50
|
+
row.each_with_index do |cell, index|
|
51
|
+
dictionary = parse_cell(cell: cell, index: index, dictionary: dictionary)
|
52
|
+
end
|
53
|
+
|
54
|
+
dictionary
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.header_column?(cell:)
|
58
|
+
match = COLUMN_TYPE.match(cell)
|
59
|
+
raise CellValidationError, 'column name is not well formed' unless match
|
60
|
+
|
61
|
+
column_type = match['type']&.downcase&.to_sym
|
62
|
+
column_name = column_name(type: column_type, name: match['name'])
|
63
|
+
|
64
|
+
[column_type, column_name]
|
65
|
+
rescue CellValidationError => exp
|
66
|
+
raise CellValidationError,
|
67
|
+
"header column '#{cell}' is not valid as the #{exp.message}"
|
68
|
+
end
|
69
|
+
private_class_method :header_column?
|
70
|
+
|
71
|
+
# Array of all empty column indices.
|
72
|
+
def self.empty_columns?(row:)
|
73
|
+
result = []
|
74
|
+
row&.each_with_index { |cell, index| result << index if cell == '' }
|
75
|
+
|
76
|
+
result
|
77
|
+
end
|
78
|
+
private_class_method :empty_columns?
|
79
|
+
|
80
|
+
def self.column_name(type:, name:)
|
81
|
+
return format_column_name(name) if name.present?
|
82
|
+
return if COLUMN_TYPE_ANONYMOUS.member?(type)
|
83
|
+
|
84
|
+
raise CellValidationError, 'column name is missing'
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.format_column_name(name)
|
88
|
+
column_name = name.strip.tr("\s", '_')
|
89
|
+
|
90
|
+
return column_name.to_sym if COLUMN_NAME.match(column_name)
|
91
|
+
|
92
|
+
raise CellValidationError, "column name '#{name}' contains invalid characters"
|
93
|
+
end
|
94
|
+
|
95
|
+
# Returns the normalized column type, along with an indication if
|
96
|
+
# the column is text only
|
97
|
+
def self.column_type(type)
|
98
|
+
case type
|
99
|
+
when :'in/text'
|
100
|
+
[:in, true]
|
101
|
+
|
102
|
+
when :cond
|
103
|
+
[:in, false]
|
104
|
+
|
105
|
+
when :'out/text'
|
106
|
+
[:out, true]
|
107
|
+
|
108
|
+
# Column may turn out to be text-only, or not
|
109
|
+
else
|
110
|
+
[type, nil]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.parse_cell(cell:, index:, dictionary:)
|
115
|
+
column_type, column_name = header_column?(cell: cell)
|
116
|
+
|
117
|
+
type, text_only = Header.column_type(column_type)
|
19
118
|
|
20
|
-
|
119
|
+
dictionary_entry(dictionary: dictionary,
|
120
|
+
type: type,
|
121
|
+
entry: Columns::Entry.new(column_name, text_only),
|
122
|
+
index: index)
|
21
123
|
end
|
124
|
+
private_class_method :parse_cell
|
125
|
+
|
126
|
+
def self.dictionary_entry(dictionary:, type:, entry:, index:)
|
127
|
+
case type
|
128
|
+
# Header column that has a function for setting the value (planned feature)
|
129
|
+
# when :set, :'set/nil', :'set/blank'
|
130
|
+
# # Default function will set the input value unconditionally or conditionally
|
131
|
+
# dictionary.defaults[index] =
|
132
|
+
# Columns::Default.new(entry.name, nil, default_if(type))
|
133
|
+
#
|
134
|
+
# # Treat set: as an in: column
|
135
|
+
# dictionary.ins[index] = entry
|
136
|
+
|
137
|
+
when :in
|
138
|
+
dictionary.ins[index] = entry
|
139
|
+
|
140
|
+
when :out
|
141
|
+
dictionary.outs[index] = entry
|
142
|
+
|
143
|
+
else
|
144
|
+
raise "internal error - column type #{type} not recognised"
|
145
|
+
end
|
146
|
+
|
147
|
+
dictionary
|
148
|
+
end
|
149
|
+
private_class_method :dictionary_entry
|
150
|
+
|
151
|
+
# def self.default_if(type)
|
152
|
+
# return nil if type == :set
|
153
|
+
# return :nil? if type == :'set/nil'
|
154
|
+
# :blank?
|
155
|
+
# end
|
156
|
+
# private_class_method :default_if
|
22
157
|
end
|
23
158
|
end
|