sycsvpro 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +29 -8
- data/bin/sycsvpro +68 -26
- data/html/Object.html +57 -0
- data/html/README_rdoc.html +18 -16
- data/html/Sycsvpro.html +2 -0
- data/html/Sycsvpro/Aggregator.html +482 -0
- data/html/Sycsvpro/Calculator.html +35 -8
- data/html/Sycsvpro/Counter.html +60 -27
- data/html/Sycsvpro/Filter.html +15 -12
- data/html/created.rid +9 -8
- data/html/index.html +20 -16
- data/html/js/search_index.js +1 -1
- data/html/table_of_contents.html +66 -38
- data/lib/sycsvpro.rb +1 -0
- data/lib/sycsvpro/aggregator.rb +112 -0
- data/lib/sycsvpro/calculator.rb +21 -5
- data/lib/sycsvpro/counter.rb +62 -16
- data/lib/sycsvpro/filter.rb +25 -9
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/aggregator_spec.rb +55 -0
- data/spec/sycsvpro/calculator_spec.rb +22 -0
- data/spec/sycsvpro/counter_spec.rb +77 -11
- data/sycsvpro.rdoc +17 -16
- metadata +5 -2
data/lib/sycsvpro.rb
CHANGED
@@ -0,0 +1,112 @@
|
|
1
|
+
require_relative 'row_filter'
|
2
|
+
require_relative 'column_filter'
|
3
|
+
require_relative 'dsl'
|
4
|
+
|
5
|
+
# Operating csv files
|
6
|
+
module Sycsvpro
|
7
|
+
|
8
|
+
# An Aggregator counts specified row values and adds a sum to the end of the row
|
9
|
+
class Aggregator
|
10
|
+
|
11
|
+
include Dsl
|
12
|
+
|
13
|
+
# infile contains the data that is operated on
|
14
|
+
attr_reader :infile
|
15
|
+
# outfile is the file where the result is written to
|
16
|
+
attr_reader :outfile
|
17
|
+
# file doesn't contain a header
|
18
|
+
attr_reader :headerless
|
19
|
+
# filter that is used for rows
|
20
|
+
attr_reader :row_filter
|
21
|
+
# filter that is used for columns
|
22
|
+
attr_reader :col_filter
|
23
|
+
# values that are aggregated
|
24
|
+
attr_reader :key_values
|
25
|
+
# header of the out file
|
26
|
+
attr_reader :heading
|
27
|
+
# Title of the sum row
|
28
|
+
attr_reader :sum_row_title
|
29
|
+
# row where to add the sums of the columns
|
30
|
+
attr_reader :sum_row
|
31
|
+
# Title of the sum column
|
32
|
+
attr_reader :sum_col_title
|
33
|
+
# column where to add the sum of the row sum
|
34
|
+
attr_reader :sum_col
|
35
|
+
# sums of the column values
|
36
|
+
attr_reader :sums
|
37
|
+
|
38
|
+
# Creates a new aggregator. Takes as attributes infile, outfile, key, rows, cols, date-format
|
39
|
+
# and indicator whether to add a sum row
|
40
|
+
def initialize(options={})
|
41
|
+
@infile = options[:infile]
|
42
|
+
@outfile = options[:outfile]
|
43
|
+
@headerless = options[:headerless] || false
|
44
|
+
@row_filter = RowFilter.new(options[:rows])
|
45
|
+
@col_filter = ColumnFilter.new(options[:cols], df: options[:df])
|
46
|
+
@key_values = Hash.new(0)
|
47
|
+
@heading = []
|
48
|
+
@sums = Hash.new(0)
|
49
|
+
init_sum_scheme(options[:sum])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Executes the aggregator
|
53
|
+
def execute
|
54
|
+
process_aggregation
|
55
|
+
write_result
|
56
|
+
end
|
57
|
+
|
58
|
+
# Process the aggregation of the key values
|
59
|
+
def process_aggregation
|
60
|
+
File.new(infile).each_with_index do |line, index|
|
61
|
+
result = col_filter.process(row_filter.process(line.chomp, row: index))
|
62
|
+
unless result.nil? or result.empty?
|
63
|
+
if heading.empty? and not headerless
|
64
|
+
heading << result.split(';')
|
65
|
+
next
|
66
|
+
else
|
67
|
+
@sum_col = [result.split(';').size, sum_col].max
|
68
|
+
end
|
69
|
+
key_values[result] += 1
|
70
|
+
sums[sum_col_title] += 1
|
71
|
+
end
|
72
|
+
end
|
73
|
+
heading.flatten!
|
74
|
+
heading[sum_col] = sum_col_title
|
75
|
+
end
|
76
|
+
|
77
|
+
# Writes the aggration results
|
78
|
+
def write_result
|
79
|
+
sum_line = [sum_row_title]
|
80
|
+
(heading.size - 2).times { sum_line << "" }
|
81
|
+
sum_line << sums[sum_col_title]
|
82
|
+
row = 0;
|
83
|
+
File.open(outfile, 'w') do |out|
|
84
|
+
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
85
|
+
out.puts heading.join(';')
|
86
|
+
key_values.each do |k, v|
|
87
|
+
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
88
|
+
out.puts [k, v].join(';')
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
# Initializes the sum row title an positions as well as the sum column title and position
|
96
|
+
def init_sum_scheme(sum_scheme)
|
97
|
+
row_scheme, col_scheme = sum_scheme.split(',') unless sum_scheme.nil?
|
98
|
+
|
99
|
+
unless row_scheme.nil?
|
100
|
+
@sum_row_title, @sum_row = row_scheme.split(':') unless row_scheme.empty?
|
101
|
+
end
|
102
|
+
|
103
|
+
@sum_row.nil? ? @sum_row = 0 : @sum_row = @sum_row.to_i
|
104
|
+
@sum_row_title = 'Total' if @sum_row_title.nil?
|
105
|
+
|
106
|
+
col_scheme.nil? ? @sum_col_title = 'Total' : @sum_col_title = col_scheme
|
107
|
+
@sum_col = 0
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
data/lib/sycsvpro/calculator.rb
CHANGED
@@ -24,16 +24,20 @@ module Sycsvpro
|
|
24
24
|
attr_reader :header
|
25
25
|
# filter that is used for columns
|
26
26
|
attr_reader :columns
|
27
|
+
# if true add a sum row at the bottom of the out file
|
28
|
+
attr_reader :add_sum_row
|
27
29
|
|
28
30
|
# Creates a new Calculator. Options expects :infile, :outfile, :rows and :columns. Optionally
|
29
31
|
# a header can be provided. The header can be supplemented with additional column names that
|
30
32
|
# are generated due to a arithmetic operation that creates new columns
|
31
33
|
def initialize(options={})
|
32
|
-
@infile
|
33
|
-
@outfile
|
34
|
-
@row_filter
|
35
|
-
@header
|
36
|
-
@
|
34
|
+
@infile = options[:infile]
|
35
|
+
@outfile = options[:outfile]
|
36
|
+
@row_filter = RowFilter.new(options[:rows])
|
37
|
+
@header = Header.new(options[:header])
|
38
|
+
@sum_row = []
|
39
|
+
@add_sum_row = options[:sum] || false
|
40
|
+
@formulae = {}
|
37
41
|
create_calculator(options[:cols])
|
38
42
|
end
|
39
43
|
|
@@ -64,7 +68,19 @@ module Sycsvpro
|
|
64
68
|
@columns[col.to_i] = eval(formula)
|
65
69
|
end
|
66
70
|
out.puts @columns.join(';')
|
71
|
+
|
72
|
+
@columns.each_with_index do |column, index|
|
73
|
+
if @sum_row[index]
|
74
|
+
@sum_row[index] += to_number column
|
75
|
+
else
|
76
|
+
@sum_row[index] = to_number column
|
77
|
+
end
|
78
|
+
end if add_sum_row
|
79
|
+
|
67
80
|
end
|
81
|
+
|
82
|
+
out.puts @sum_row.join(';') if add_sum_row
|
83
|
+
|
68
84
|
end
|
69
85
|
end
|
70
86
|
|
data/lib/sycsvpro/counter.rb
CHANGED
@@ -5,7 +5,7 @@ require_relative 'dsl'
|
|
5
5
|
# Operating csv files
|
6
6
|
module Sycsvpro
|
7
7
|
|
8
|
-
#
|
8
|
+
# Counter counts values and uses the values as column names and uses the count
|
9
9
|
# as the column value
|
10
10
|
class Counter
|
11
11
|
|
@@ -15,8 +15,10 @@ module Sycsvpro
|
|
15
15
|
attr_reader :infile
|
16
16
|
# outfile is the file where the result is written to
|
17
17
|
attr_reader :outfile
|
18
|
-
# values are assigned to the key
|
19
|
-
attr_reader :
|
18
|
+
# values are assigned to the key columns
|
19
|
+
attr_reader :key_columns
|
20
|
+
# key columns headers
|
21
|
+
attr_reader :key_titles
|
20
22
|
# filter that is used for rows
|
21
23
|
attr_reader :row_filter
|
22
24
|
# filter that is used for columns
|
@@ -26,9 +28,11 @@ module Sycsvpro
|
|
26
28
|
# header of the out file
|
27
29
|
attr_reader :heading
|
28
30
|
# Title of the sum row
|
29
|
-
attr_reader :
|
30
|
-
# row where to add the sums of the columns
|
31
|
+
attr_reader :sum_row_title
|
32
|
+
# row where to add the sums of the columns
|
31
33
|
attr_reader :sum_row
|
34
|
+
# Title of the sum column
|
35
|
+
attr_reader :sum_col_title
|
32
36
|
# sums of the column values
|
33
37
|
attr_reader :sums
|
34
38
|
|
@@ -37,59 +41,101 @@ module Sycsvpro
|
|
37
41
|
def initialize(options={})
|
38
42
|
@infile = options[:infile]
|
39
43
|
@outfile = options[:outfile]
|
40
|
-
|
44
|
+
init_key_columns(options[:key])
|
41
45
|
@row_filter = RowFilter.new(options[:rows])
|
42
46
|
@col_filter = ColumnFilter.new(options[:cols], df: options[:df])
|
43
47
|
@key_values = {}
|
44
48
|
@heading = []
|
45
|
-
|
46
|
-
@sum_row = @sum_row.to_i unless @sum_row.nil?
|
49
|
+
init_sum_scheme(options[:sum])
|
47
50
|
@sums = Hash.new(0)
|
48
51
|
end
|
49
52
|
|
50
53
|
# Executes the counter
|
51
54
|
def execute
|
52
|
-
|
55
|
+
process_count
|
53
56
|
write_result
|
54
57
|
end
|
55
58
|
|
56
59
|
# Processes the counting on the in file
|
57
|
-
def
|
60
|
+
def process_count
|
58
61
|
File.new(infile).each_with_index do |line, index|
|
59
62
|
result = col_filter.process(row_filter.process(line.chomp, row: index))
|
60
63
|
unless result.nil? or result.empty?
|
61
|
-
key = unstring(line).split(';')
|
62
|
-
key_value = key_values[key] || key_values[key] = { name: key,
|
64
|
+
key = unstring(line).split(';').values_at(*key_columns)
|
65
|
+
key_value = key_values[key] || key_values[key] = { name: key,
|
66
|
+
elements: Hash.new(0),
|
67
|
+
sum: 0 }
|
63
68
|
result.chomp.split(';').each do |column|
|
64
69
|
heading << column if heading.index(column).nil?
|
65
70
|
key_value[:elements][column] += 1
|
71
|
+
key_value[:sum] += 1
|
66
72
|
sums[column] += 1
|
67
73
|
end
|
68
74
|
end
|
69
75
|
end
|
76
|
+
unless sum_col_title.nil?
|
77
|
+
heading << sum_col_title
|
78
|
+
sums[sum_col_title] = sums.values.inject(:+)
|
79
|
+
end
|
70
80
|
end
|
71
81
|
|
72
|
-
|
82
|
+
# Writes the count results
|
73
83
|
def write_result
|
74
|
-
sum_line = [
|
84
|
+
sum_line = [sum_row_title] + [''] * (key_titles.size - 1)
|
75
85
|
heading.sort.each do |h|
|
76
86
|
sum_line << sums[h]
|
77
87
|
end
|
78
88
|
row = 0;
|
79
89
|
File.open(outfile, 'w') do |out|
|
80
90
|
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
81
|
-
out.puts (
|
91
|
+
out.puts (key_titles + heading.sort).join(';')
|
82
92
|
key_values.each do |k,v|
|
83
93
|
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
84
94
|
line = [k]
|
85
95
|
heading.sort.each do |h|
|
86
|
-
line << v[:elements][h]
|
96
|
+
line << v[:elements][h] unless h == sum_col_title
|
87
97
|
end
|
98
|
+
line << v[:sum] unless sum_col_title.nil?
|
88
99
|
out.puts line.join(';')
|
89
100
|
end
|
90
101
|
end
|
91
102
|
end
|
92
103
|
|
104
|
+
private
|
105
|
+
|
106
|
+
# Initializes the sum row title an positions as well as the cum column title
|
107
|
+
def init_sum_scheme(sum_scheme)
|
108
|
+
|
109
|
+
return if sum_scheme.nil?
|
110
|
+
|
111
|
+
re = /(\w+):(\d+)|(\w+)/
|
112
|
+
|
113
|
+
sum_scheme.scan(re).each do |part|
|
114
|
+
if part.compact.size == 2
|
115
|
+
@sum_row_title = part[0]
|
116
|
+
@sum_row = part[1].to_i
|
117
|
+
else
|
118
|
+
@sum_col_title = part[2]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
# Initialize the key columns and headers
|
125
|
+
def init_key_columns(key_scheme)
|
126
|
+
|
127
|
+
@key_titles = []
|
128
|
+
@key_columns = []
|
129
|
+
|
130
|
+
keys = key_scheme.scan(/(\d+):(\w+)/)
|
131
|
+
|
132
|
+
keys.each do |key|
|
133
|
+
@key_titles << key[1]
|
134
|
+
@key_columns << key[0].to_i
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
93
139
|
end
|
94
140
|
|
95
141
|
end
|
data/lib/sycsvpro/filter.rb
CHANGED
@@ -30,14 +30,16 @@ module Sycsvpro
|
|
30
30
|
|
31
31
|
# Creates the filters based on the given patterns
|
32
32
|
def method_missing(id, *args, &block)
|
33
|
-
return equal($1, args, block)
|
34
|
-
return equal_type($1, $2, args, block)
|
35
|
-
return range($1, $2, args, block)
|
36
|
-
return range_type($1, $2, $3, args, block)
|
37
|
-
return regex($1, args, block)
|
38
|
-
return col_regex($1, $2, args, block)
|
39
|
-
return date($1, $2, $3, args, block)
|
40
|
-
return date_range($1, $2, $3, args, block)
|
33
|
+
return equal($1, args, block) if id =~ /^(\d+)$/
|
34
|
+
return equal_type($1, $2, args, block) if id =~ /^(s|n|d):(\d+)$/
|
35
|
+
return range($1, $2, args, block) if id =~ /^(\d+)-(\d+)$/
|
36
|
+
return range_type($1, $2, $3, args, block) if id =~ /^(s|n|d):(\d+)-(\d+)$/
|
37
|
+
return regex($1, args, block) if id =~ /^\/(.*)\/$/
|
38
|
+
return col_regex($1, $2, args, block) if id =~ /^(\d+):\/(.*)\/$/
|
39
|
+
return date($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+.\d+.\d+)/
|
40
|
+
return date_range($1, $2, $3, args, block) if id =~ /^(\d+):(\d+.\d+.\d+.)-(\d+.\d+.\d+)$/
|
41
|
+
return number($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+)/
|
42
|
+
return number_range($1, $2, $3, args, block) if id =~ /^(\d):(\d+)-(\d+)/
|
41
43
|
super
|
42
44
|
end
|
43
45
|
|
@@ -54,7 +56,8 @@ module Sycsvpro
|
|
54
56
|
match = false
|
55
57
|
begin
|
56
58
|
match = eval(parameters[:operation].gsub('[value]', value))
|
57
|
-
rescue
|
59
|
+
rescue Exception => e
|
60
|
+
|
58
61
|
end
|
59
62
|
yield column, match
|
60
63
|
end
|
@@ -124,6 +127,19 @@ module Sycsvpro
|
|
124
127
|
pivot["#{start_date}-#{end_date}"] = { col: col, operation: operation }
|
125
128
|
end
|
126
129
|
|
130
|
+
# Adds a number filter
|
131
|
+
def number(col, comparator, number, args, block)
|
132
|
+
comparator = '==' if comparator == '='
|
133
|
+
operation = "[value] #{comparator} #{number}"
|
134
|
+
pivot["#{comparator}#{number}"] = { col: col, operation: operation }
|
135
|
+
end
|
136
|
+
|
137
|
+
# Adds a number range filter
|
138
|
+
def number_range(col, start_number, end_number, arg, block)
|
139
|
+
operation = " #{start_number} <= [value] && [value] <= #{end_number}"
|
140
|
+
pivot["#{start_number}-#{end_number}"] = { col: col, operation: operation }
|
141
|
+
end
|
142
|
+
|
127
143
|
end
|
128
144
|
|
129
145
|
end
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'sycsvpro/aggregator'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Aggregator do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@in_file = File.join(File.dirname(__FILE__), "files/in.csv")
|
9
|
+
@out_file = File.join(File.dirname(__FILE__), "files/out.csv")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should aggregate single column values" do
|
13
|
+
aggregator = Aggregator.new(infile: @in_file, outfile: @out_file, rows: "1-10",
|
14
|
+
cols: "0", sum: "Total:1,Machines", headerless: true)
|
15
|
+
|
16
|
+
aggregator.execute
|
17
|
+
|
18
|
+
result = [ ";Machines",
|
19
|
+
"Total;7",
|
20
|
+
"Fink;2",
|
21
|
+
"Haas;1",
|
22
|
+
"Gent;1",
|
23
|
+
"Rank;1",
|
24
|
+
"Klig;1",
|
25
|
+
"fink;1" ]
|
26
|
+
|
27
|
+
File.open(@out_file).each_with_index do |line, index|
|
28
|
+
line.chomp.should eq result[index]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should aggregate multiple column values" do
|
33
|
+
aggregator = Aggregator.new(infile: @in_file, outfile: @out_file, rows: "0-10",
|
34
|
+
cols: "0,1", sum: "Total:1,Machines", headerless: false)
|
35
|
+
|
36
|
+
aggregator.execute
|
37
|
+
|
38
|
+
result = [ "customer;contract-number;Machines",
|
39
|
+
"Total;;7",
|
40
|
+
"Fink;1234;2",
|
41
|
+
"Haas;3322;1",
|
42
|
+
"Gent;4323;1",
|
43
|
+
"Rank;3232;1",
|
44
|
+
"Klig;4432;1",
|
45
|
+
"fink;1234;1" ]
|
46
|
+
|
47
|
+
File.open(@out_file).each_with_index do |line, index|
|
48
|
+
line.chomp.should eq result[index]
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -40,6 +40,28 @@ module Sycsvpro
|
|
40
40
|
expect(line.chomp).to eq result[index]
|
41
41
|
end
|
42
42
|
end
|
43
|
+
|
44
|
+
it "should sum specified rows" do
|
45
|
+
header = "*,drives,motors"
|
46
|
+
rows = "1-8"
|
47
|
+
cols = "5:c3+c4,6:c3*2"
|
48
|
+
sums = "1,3-5"
|
49
|
+
calculator = Calculator.new(infile: @in_file, outfile: @out_file,
|
50
|
+
header: header, rows: rows, cols: cols, sum: true)
|
51
|
+
calculator.execute
|
52
|
+
|
53
|
+
result = ["customer;machines;controls;contracts;visits;drives;motors",
|
54
|
+
"Fink;2;2;1;1;2;2",
|
55
|
+
"Haas;3;3;1;1.0;2.0;2",
|
56
|
+
"Gent;4;4;1;1;2;2",
|
57
|
+
"Rank;5;5;1;1;2;2",
|
58
|
+
"0;14;14;4;4.0;8;8"]
|
59
|
+
|
60
|
+
File.new(@out_file, 'r').each_with_index do |line, index|
|
61
|
+
expect(line.chomp).to eq result[index]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
43
65
|
end
|
44
66
|
|
45
67
|
end
|