csv_decision 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.rubocop.yml +16 -4
- data/.travis.yml +10 -0
- data/CHANGELOG.md +2 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +133 -19
- data/benchmark.rb +143 -0
- data/csv_decision.gemspec +8 -6
- data/lib/csv_decision.rb +18 -4
- data/lib/csv_decision/columns.rb +69 -0
- data/lib/csv_decision/data.rb +31 -16
- data/lib/csv_decision/decide.rb +47 -0
- data/lib/csv_decision/decision.rb +105 -0
- data/lib/csv_decision/header.rb +143 -8
- data/lib/csv_decision/input.rb +49 -0
- data/lib/csv_decision/load.rb +31 -0
- data/lib/csv_decision/matchers.rb +131 -0
- data/lib/csv_decision/matchers/numeric.rb +37 -0
- data/lib/csv_decision/matchers/pattern.rb +76 -0
- data/lib/csv_decision/matchers/range.rb +76 -0
- data/lib/csv_decision/options.rb +80 -50
- data/lib/csv_decision/parse.rb +77 -23
- data/lib/csv_decision/scan_row.rb +68 -0
- data/lib/csv_decision/table.rb +34 -6
- data/spec/csv_decision/columns_spec.rb +86 -0
- data/spec/csv_decision/data_spec.rb +16 -3
- data/spec/csv_decision/decision_spec.rb +30 -0
- data/spec/csv_decision/input_spec.rb +54 -0
- data/spec/csv_decision/load_spec.rb +28 -0
- data/spec/csv_decision/matchers/numeric_spec.rb +84 -0
- data/spec/csv_decision/matchers/pattern_spec.rb +183 -0
- data/spec/csv_decision/matchers/range_spec.rb +132 -0
- data/spec/csv_decision/options_spec.rb +67 -0
- data/spec/csv_decision/parse_spec.rb +2 -3
- data/spec/csv_decision/simple_example_spec.rb +45 -0
- data/spec/csv_decision/table_spec.rb +151 -0
- data/spec/data/invalid/invalid_header1.csv +4 -0
- data/spec/data/invalid/invalid_header2.csv +4 -0
- data/spec/data/invalid/invalid_header3.csv +4 -0
- data/spec/data/invalid/invalid_header4.csv +4 -0
- data/spec/data/valid/options_in_file1.csv +5 -0
- data/spec/data/valid/options_in_file2.csv +5 -0
- data/spec/data/valid/simple_example.csv +10 -0
- data/spec/data/valid/valid.csv +4 -4
- data/spec/spec_helper.rb +6 -0
- metadata +89 -12
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# CSV Decision: CSV based Ruby decision tables.
|
4
|
+
# Created December 2017 by Brett Vickers
|
5
|
+
# See LICENSE and README.md for details.
|
6
|
+
module CSVDecision
|
7
|
+
# Dictionary of all this table's columns - inputs, outputs etc.
|
8
|
+
class Columns
|
9
|
+
# Value object used for column dictionary entries
|
10
|
+
Entry = Struct.new(:name, :text_only)
|
11
|
+
|
12
|
+
# Value object used for columns with defaults
|
13
|
+
Default = Struct.new(:name, :function, :default_if)
|
14
|
+
|
15
|
+
# Dictionary of all data columns.
|
16
|
+
# # Note that the key of each hash is the header cell's array column index.
|
17
|
+
# Note that input and output columns can be interspersed and need not have unique names.
|
18
|
+
class Dictionary
|
19
|
+
attr_accessor :ins
|
20
|
+
attr_accessor :outs
|
21
|
+
attr_accessor :path
|
22
|
+
attr_accessor :defaults
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@ins = {}
|
26
|
+
@outs = {}
|
27
|
+
|
28
|
+
# Path for the input hash - optional
|
29
|
+
@path = {}
|
30
|
+
# Hash of columns that require defaults to be set
|
31
|
+
@defaults = {}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Dictionary of all data columns
|
36
|
+
attr_reader :dictionary
|
37
|
+
|
38
|
+
# Input columns
|
39
|
+
def ins
|
40
|
+
@dictionary.ins
|
41
|
+
end
|
42
|
+
|
43
|
+
# Output columns
|
44
|
+
def outs
|
45
|
+
@dictionary.outs
|
46
|
+
end
|
47
|
+
|
48
|
+
# Input columns with defaults specified (planned feature)
|
49
|
+
# def defaults
|
50
|
+
# @dictionary.defaults
|
51
|
+
# end
|
52
|
+
|
53
|
+
# Input hash path (planned feature)
|
54
|
+
# def path
|
55
|
+
# @dictionary.path
|
56
|
+
# end
|
57
|
+
|
58
|
+
def initialize(table)
|
59
|
+
# If a column does not have a valid header cell, then it's empty of data.
|
60
|
+
# Return the stripped header row, removing it from the data array.
|
61
|
+
row = Header.strip_empty_columns(rows: table.rows)
|
62
|
+
|
63
|
+
# Build a dictionary of all valid data columns from the header row.
|
64
|
+
@dictionary = Header.dictionary(row: row) if row
|
65
|
+
|
66
|
+
freeze
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/csv_decision/data.rb
CHANGED
@@ -10,21 +10,32 @@ module CSVDecision
|
|
10
10
|
|
11
11
|
# Methods to load data from a file, CSV string or array of arrays
|
12
12
|
module Data
|
13
|
+
CSV_OPTIONS = { encoding: 'UTF-8', skip_blanks: true }.freeze
|
14
|
+
|
13
15
|
# Parse the input data which may either be a file path name, CSV string or
|
14
16
|
# array of arrays. Strips out empty columns/rows and comment cells
|
15
|
-
def self.to_array(data
|
16
|
-
strip_rows(data: data_array(data)
|
17
|
+
def self.to_array(data:)
|
18
|
+
strip_rows(data: data_array(data))
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.input_file?(input)
|
22
|
+
input.is_a?(Pathname) || input.is_a?(File)
|
17
23
|
end
|
18
24
|
|
19
|
-
|
20
|
-
|
25
|
+
def self.strip_columns(data:, empty_columns:)
|
26
|
+
# Adjust column indices as we delete columns the rest shift to the left by 1
|
27
|
+
empty_columns.map!.with_index { |col, index| col - index }
|
28
|
+
|
29
|
+
# Delete all empty columns from the array of arrays
|
30
|
+
empty_columns.each { |col| data.each_index { |row| data[row].delete_at(col) } }
|
31
|
+
end
|
21
32
|
|
22
33
|
# Parse the input data which may either be a file path name, CSV string or
|
23
34
|
# array of arrays
|
24
35
|
def self.data_array(input)
|
25
|
-
return CSV.read(input) if
|
36
|
+
return CSV.read(input, CSV_OPTIONS) if input_file?(input)
|
26
37
|
return input.deep_dup if input.is_a?(Array) && input[0].is_a?(Array)
|
27
|
-
return CSV.parse(input) if input.is_a?(String)
|
38
|
+
return CSV.parse(input, CSV_OPTIONS) if input.is_a?(String)
|
28
39
|
|
29
40
|
raise ArgumentError,
|
30
41
|
"#{input.class} input invalid; " \
|
@@ -32,10 +43,10 @@ module CSVDecision
|
|
32
43
|
end
|
33
44
|
private_class_method :data_array
|
34
45
|
|
35
|
-
def self.strip_rows(data
|
46
|
+
def self.strip_rows(data:)
|
36
47
|
rows = []
|
37
48
|
data.each do |row|
|
38
|
-
row = strip_cells(row: row
|
49
|
+
row = strip_cells(row: row)
|
39
50
|
rows << row if row.find { |cell| cell != '' }
|
40
51
|
end
|
41
52
|
rows
|
@@ -45,15 +56,19 @@ module CSVDecision
|
|
45
56
|
# Strip cells of leading/trailing spaces; treat comments as an empty cell.
|
46
57
|
# Non string values treated as empty cells.
|
47
58
|
# Non-ascii strings treated as empty cells by default.
|
48
|
-
def self.strip_cells(row
|
49
|
-
row.map!
|
50
|
-
next '' unless cell.is_a?(String)
|
51
|
-
cell = options[:force_encoding] ? cell.force_encoding(options[:force_encoding]) : cell
|
52
|
-
next '' if options[:ascii_only?] && !cell.ascii_only?
|
53
|
-
next '' if cell.lstrip[0] == COMMENT_CHARACTER
|
54
|
-
cell.strip
|
55
|
-
end
|
59
|
+
def self.strip_cells(row:)
|
60
|
+
row.map! { |cell| strip_cell(cell) }
|
56
61
|
end
|
57
62
|
private_class_method :strip_cells
|
63
|
+
|
64
|
+
def self.strip_cell(cell)
|
65
|
+
return '' unless cell.is_a?(String)
|
66
|
+
cell = cell.force_encoding('UTF-8')
|
67
|
+
return '' unless cell.ascii_only?
|
68
|
+
return '' if cell.lstrip[0] == COMMENT_CHARACTER
|
69
|
+
|
70
|
+
cell.strip
|
71
|
+
end
|
72
|
+
private_class_method :strip_cell
|
58
73
|
end
|
59
74
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# CSV Decision: CSV based Ruby decision tables.
|
4
|
+
# Created December 2017 by Brett Vickers
|
5
|
+
# See LICENSE and README.md for details.
|
6
|
+
module CSVDecision
|
7
|
+
# Main module for searching the decision table looking for one or more matches
|
8
|
+
module Decide
|
9
|
+
# Main method for making decisions.
|
10
|
+
#
|
11
|
+
# @param table [CSVDecision::Table]
|
12
|
+
# @param input [Hash] - input hash (keys may or may not be symbolized)
|
13
|
+
# @param symbolize_keys [true, false] - set to true if keys are symbolized and it's
|
14
|
+
# OK to mutate the input hash. Otherwise a copy of the input hash is symbolized.
|
15
|
+
# @return [Hash]
|
16
|
+
def self.decide(table:, input:, symbolize_keys:)
|
17
|
+
# Parse and transform the hash supplied as input
|
18
|
+
parsed_input = Input.parse(table: table, input: input, symbolize_keys: symbolize_keys)
|
19
|
+
|
20
|
+
# The decision object collects the results of the search and
|
21
|
+
# calculates the final result
|
22
|
+
decision = Decision.new(table: table, input: parsed_input)
|
23
|
+
|
24
|
+
# table_scan(table: table, input: parsed_input, decision: decision)
|
25
|
+
decision.scan(table: table, input: parsed_input)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.matches?(row:, input:, scan_row:)
|
29
|
+
match = scan_row.match_constants?(row: row, scan_cols: input[:scan_cols])
|
30
|
+
return false unless match
|
31
|
+
|
32
|
+
return true if scan_row.procs.empty?
|
33
|
+
|
34
|
+
scan_row.match_procs?(row: row, input: input)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.eval_matcher(proc:, value:, hash:)
|
38
|
+
function = proc.function
|
39
|
+
|
40
|
+
# A symbol expression just needs to be passed the input hash
|
41
|
+
return function[hash] if proc.type == :expression
|
42
|
+
|
43
|
+
# All other procs can take one or two args
|
44
|
+
function.arity == 1 ? function[value] : function[value, hash]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# CSV Decision: CSV based Ruby decision tables.
|
4
|
+
# Created December 2017 by Brett Vickers
|
5
|
+
# See LICENSE and README.md for details.
|
6
|
+
module CSVDecision
|
7
|
+
# Accumulate the matching row(s) and calculate the final result
|
8
|
+
class Decision
|
9
|
+
def initialize(table:, input:)
|
10
|
+
@result = {}
|
11
|
+
|
12
|
+
# Relevant table attributes
|
13
|
+
@first_match = table.options[:first_match]
|
14
|
+
@outs = table.columns.outs
|
15
|
+
@outs_functions = table.outs_functions
|
16
|
+
|
17
|
+
# Partial result always includes the input hash for calculating output functions
|
18
|
+
@partial_result = input[:hash].dup if @outs_functions
|
19
|
+
|
20
|
+
@row_picked = nil
|
21
|
+
return if @first_match
|
22
|
+
|
23
|
+
# Extra attributes for the accumulate option
|
24
|
+
@rows_picked = []
|
25
|
+
@multi_result = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Is the result set empty? That is, nothing matched?
|
29
|
+
def empty?
|
30
|
+
return @row_picked.nil? if @first_match
|
31
|
+
@rows_picked.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def exist?
|
35
|
+
!empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def result
|
39
|
+
return {} if empty?
|
40
|
+
return final_result unless @outs_functions
|
41
|
+
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def scan(table:, input:)
|
46
|
+
scan_rows = table.scan_rows
|
47
|
+
|
48
|
+
table.each do |row, index|
|
49
|
+
done = row_scan(input: input, row: row, scan_row: scan_rows[index])
|
50
|
+
|
51
|
+
return self if done
|
52
|
+
end
|
53
|
+
|
54
|
+
self
|
55
|
+
end
|
56
|
+
|
57
|
+
def add(row)
|
58
|
+
return add_first_match(row) if @first_match
|
59
|
+
|
60
|
+
# Accumulate output rows
|
61
|
+
@rows_picked << row
|
62
|
+
@outs.each_pair do |col, column|
|
63
|
+
accumulate_outs(column_name: column.name, cell: row[col])
|
64
|
+
end
|
65
|
+
|
66
|
+
# Not done
|
67
|
+
false
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def accumulate_outs(column_name:, cell:)
|
73
|
+
current = @result[column_name]
|
74
|
+
|
75
|
+
case current
|
76
|
+
when nil
|
77
|
+
@result[column_name] = cell
|
78
|
+
|
79
|
+
when Array
|
80
|
+
@result[column_name] << cell
|
81
|
+
|
82
|
+
else
|
83
|
+
@result[column_name] = [current, cell]
|
84
|
+
@multi_result ||= true
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def row_scan(input:, row:, scan_row:)
|
89
|
+
return unless Decide.matches?(row: row, input: input, scan_row: scan_row)
|
90
|
+
|
91
|
+
add(row)
|
92
|
+
end
|
93
|
+
|
94
|
+
def final_result
|
95
|
+
@result
|
96
|
+
end
|
97
|
+
|
98
|
+
def add_first_match(row)
|
99
|
+
@row_picked = row
|
100
|
+
|
101
|
+
# Common case is just copying output column values to the final result
|
102
|
+
@outs.each_pair { |col, column| @result[column.name] = row[col] }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
data/lib/csv_decision/header.rb
CHANGED
@@ -4,20 +4,155 @@
|
|
4
4
|
# Created December 2017 by Brett Vickers
|
5
5
|
# See LICENSE and README.md for details.
|
6
6
|
module CSVDecision
|
7
|
-
# Parse the CSV file's header row
|
8
|
-
|
9
|
-
# Column header looks like IN :col_name or
|
10
|
-
COLUMN_TYPE = %r{
|
7
|
+
# Parse the CSV file's header row. These methods are only required at table load time.
|
8
|
+
module Header
|
9
|
+
# Column header looks like IN :col_name or cond:
|
10
|
+
COLUMN_TYPE = %r{
|
11
|
+
\A(?<type>in|out|in/text|out/text|set|set/nil|set/blank|path|cond|if)
|
12
|
+
\s*:\s*(?<name>\S?.*)\z
|
13
|
+
}xi
|
11
14
|
|
15
|
+
# These column types do not need a name
|
16
|
+
COLUMN_TYPE_ANONYMOUS = Set.new(%i[path if cond]).freeze
|
17
|
+
|
18
|
+
# More lenient than a Ruby method name -
|
19
|
+
# any spaces will have been replaced with underscores
|
20
|
+
COLUMN_NAME = %r{\A\w[\w:/!?]*\z}
|
21
|
+
|
22
|
+
# Does this row contain a recognisable header cell?
|
23
|
+
#
|
24
|
+
# @param row [Array<String>]
|
25
|
+
# @return [true, false]
|
12
26
|
def self.row?(row)
|
13
27
|
row.find { |cell| cell.match(COLUMN_TYPE) }
|
14
28
|
end
|
15
29
|
|
16
|
-
#
|
17
|
-
|
18
|
-
|
30
|
+
# Strip empty columns from all data rows.
|
31
|
+
#
|
32
|
+
# @param rows [Array<Array<String>>]
|
33
|
+
# @return [Array<Array<String>>] - data array after removing any empty columns and the
|
34
|
+
# header row.
|
35
|
+
def self.strip_empty_columns(rows:)
|
36
|
+
empty_cols = empty_columns?(row: rows.first)
|
37
|
+
Data.strip_columns(data: rows, empty_columns: empty_cols) unless empty_cols.empty?
|
38
|
+
|
39
|
+
# Remove the header row from the data array.
|
40
|
+
rows.shift
|
41
|
+
end
|
42
|
+
|
43
|
+
# Classify and build a dictionary of all input and output columns.
|
44
|
+
#
|
45
|
+
# @param row [Array<String>] - the header row after removing any empty columns.
|
46
|
+
# @return [Hash<Hash>] - Column dictionary if a hash of hashes.
|
47
|
+
def self.dictionary(row:)
|
48
|
+
dictionary = Columns::Dictionary.new
|
49
|
+
|
50
|
+
row.each_with_index do |cell, index|
|
51
|
+
dictionary = parse_cell(cell: cell, index: index, dictionary: dictionary)
|
52
|
+
end
|
53
|
+
|
54
|
+
dictionary
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.header_column?(cell:)
|
58
|
+
match = COLUMN_TYPE.match(cell)
|
59
|
+
raise CellValidationError, 'column name is not well formed' unless match
|
60
|
+
|
61
|
+
column_type = match['type']&.downcase&.to_sym
|
62
|
+
column_name = column_name(type: column_type, name: match['name'])
|
63
|
+
|
64
|
+
[column_type, column_name]
|
65
|
+
rescue CellValidationError => exp
|
66
|
+
raise CellValidationError,
|
67
|
+
"header column '#{cell}' is not valid as the #{exp.message}"
|
68
|
+
end
|
69
|
+
private_class_method :header_column?
|
70
|
+
|
71
|
+
# Array of all empty column indices.
|
72
|
+
def self.empty_columns?(row:)
|
73
|
+
result = []
|
74
|
+
row&.each_with_index { |cell, index| result << index if cell == '' }
|
75
|
+
|
76
|
+
result
|
77
|
+
end
|
78
|
+
private_class_method :empty_columns?
|
79
|
+
|
80
|
+
def self.column_name(type:, name:)
|
81
|
+
return format_column_name(name) if name.present?
|
82
|
+
return if COLUMN_TYPE_ANONYMOUS.member?(type)
|
83
|
+
|
84
|
+
raise CellValidationError, 'column name is missing'
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.format_column_name(name)
|
88
|
+
column_name = name.strip.tr("\s", '_')
|
89
|
+
|
90
|
+
return column_name.to_sym if COLUMN_NAME.match(column_name)
|
91
|
+
|
92
|
+
raise CellValidationError, "column name '#{name}' contains invalid characters"
|
93
|
+
end
|
94
|
+
|
95
|
+
# Returns the normalized column type, along with an indication if
|
96
|
+
# the column is text only
|
97
|
+
def self.column_type(type)
|
98
|
+
case type
|
99
|
+
when :'in/text'
|
100
|
+
[:in, true]
|
101
|
+
|
102
|
+
when :cond
|
103
|
+
[:in, false]
|
104
|
+
|
105
|
+
when :'out/text'
|
106
|
+
[:out, true]
|
107
|
+
|
108
|
+
# Column may turn out to be text-only, or not
|
109
|
+
else
|
110
|
+
[type, nil]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.parse_cell(cell:, index:, dictionary:)
|
115
|
+
column_type, column_name = header_column?(cell: cell)
|
116
|
+
|
117
|
+
type, text_only = Header.column_type(column_type)
|
19
118
|
|
20
|
-
|
119
|
+
dictionary_entry(dictionary: dictionary,
|
120
|
+
type: type,
|
121
|
+
entry: Columns::Entry.new(column_name, text_only),
|
122
|
+
index: index)
|
21
123
|
end
|
124
|
+
private_class_method :parse_cell
|
125
|
+
|
126
|
+
def self.dictionary_entry(dictionary:, type:, entry:, index:)
|
127
|
+
case type
|
128
|
+
# Header column that has a function for setting the value (planned feature)
|
129
|
+
# when :set, :'set/nil', :'set/blank'
|
130
|
+
# # Default function will set the input value unconditionally or conditionally
|
131
|
+
# dictionary.defaults[index] =
|
132
|
+
# Columns::Default.new(entry.name, nil, default_if(type))
|
133
|
+
#
|
134
|
+
# # Treat set: as an in: column
|
135
|
+
# dictionary.ins[index] = entry
|
136
|
+
|
137
|
+
when :in
|
138
|
+
dictionary.ins[index] = entry
|
139
|
+
|
140
|
+
when :out
|
141
|
+
dictionary.outs[index] = entry
|
142
|
+
|
143
|
+
else
|
144
|
+
raise "internal error - column type #{type} not recognised"
|
145
|
+
end
|
146
|
+
|
147
|
+
dictionary
|
148
|
+
end
|
149
|
+
private_class_method :dictionary_entry
|
150
|
+
|
151
|
+
# def self.default_if(type)
|
152
|
+
# return nil if type == :set
|
153
|
+
# return :nil? if type == :'set/nil'
|
154
|
+
# :blank?
|
155
|
+
# end
|
156
|
+
# private_class_method :default_if
|
22
157
|
end
|
23
158
|
end
|