sycsvpro 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +29 -8
- data/bin/sycsvpro +68 -26
- data/html/Object.html +57 -0
- data/html/README_rdoc.html +18 -16
- data/html/Sycsvpro.html +2 -0
- data/html/Sycsvpro/Aggregator.html +482 -0
- data/html/Sycsvpro/Calculator.html +35 -8
- data/html/Sycsvpro/Counter.html +60 -27
- data/html/Sycsvpro/Filter.html +15 -12
- data/html/created.rid +9 -8
- data/html/index.html +20 -16
- data/html/js/search_index.js +1 -1
- data/html/table_of_contents.html +66 -38
- data/lib/sycsvpro.rb +1 -0
- data/lib/sycsvpro/aggregator.rb +112 -0
- data/lib/sycsvpro/calculator.rb +21 -5
- data/lib/sycsvpro/counter.rb +62 -16
- data/lib/sycsvpro/filter.rb +25 -9
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/aggregator_spec.rb +55 -0
- data/spec/sycsvpro/calculator_spec.rb +22 -0
- data/spec/sycsvpro/counter_spec.rb +77 -11
- data/sycsvpro.rdoc +17 -16
- metadata +5 -2
data/lib/sycsvpro.rb
CHANGED
@@ -0,0 +1,112 @@
|
|
1
|
+
require_relative 'row_filter'
|
2
|
+
require_relative 'column_filter'
|
3
|
+
require_relative 'dsl'
|
4
|
+
|
5
|
+
# Operating csv files
|
6
|
+
module Sycsvpro
|
7
|
+
|
8
|
+
# An Aggregator counts specified row values and adds a sum to the end of the row
|
9
|
+
class Aggregator
|
10
|
+
|
11
|
+
include Dsl
|
12
|
+
|
13
|
+
# infile contains the data that is operated on
|
14
|
+
attr_reader :infile
|
15
|
+
# outfile is the file where the result is written to
|
16
|
+
attr_reader :outfile
|
17
|
+
# file doesn't contain a header
|
18
|
+
attr_reader :headerless
|
19
|
+
# filter that is used for rows
|
20
|
+
attr_reader :row_filter
|
21
|
+
# filter that is used for columns
|
22
|
+
attr_reader :col_filter
|
23
|
+
# values that are aggregated
|
24
|
+
attr_reader :key_values
|
25
|
+
# header of the out file
|
26
|
+
attr_reader :heading
|
27
|
+
# Title of the sum row
|
28
|
+
attr_reader :sum_row_title
|
29
|
+
# row where to add the sums of the columns
|
30
|
+
attr_reader :sum_row
|
31
|
+
# Title of the sum column
|
32
|
+
attr_reader :sum_col_title
|
33
|
+
# column where to add the sum of the row sum
|
34
|
+
attr_reader :sum_col
|
35
|
+
# sums of the column values
|
36
|
+
attr_reader :sums
|
37
|
+
|
38
|
+
# Creates a new aggregator. Takes as attributes infile, outfile, key, rows, cols, date-format
|
39
|
+
# and indicator whether to add a sum row
|
40
|
+
def initialize(options={})
|
41
|
+
@infile = options[:infile]
|
42
|
+
@outfile = options[:outfile]
|
43
|
+
@headerless = options[:headerless] || false
|
44
|
+
@row_filter = RowFilter.new(options[:rows])
|
45
|
+
@col_filter = ColumnFilter.new(options[:cols], df: options[:df])
|
46
|
+
@key_values = Hash.new(0)
|
47
|
+
@heading = []
|
48
|
+
@sums = Hash.new(0)
|
49
|
+
init_sum_scheme(options[:sum])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Executes the aggregator
|
53
|
+
def execute
|
54
|
+
process_aggregation
|
55
|
+
write_result
|
56
|
+
end
|
57
|
+
|
58
|
+
# Process the aggregation of the key values
|
59
|
+
def process_aggregation
|
60
|
+
File.new(infile).each_with_index do |line, index|
|
61
|
+
result = col_filter.process(row_filter.process(line.chomp, row: index))
|
62
|
+
unless result.nil? or result.empty?
|
63
|
+
if heading.empty? and not headerless
|
64
|
+
heading << result.split(';')
|
65
|
+
next
|
66
|
+
else
|
67
|
+
@sum_col = [result.split(';').size, sum_col].max
|
68
|
+
end
|
69
|
+
key_values[result] += 1
|
70
|
+
sums[sum_col_title] += 1
|
71
|
+
end
|
72
|
+
end
|
73
|
+
heading.flatten!
|
74
|
+
heading[sum_col] = sum_col_title
|
75
|
+
end
|
76
|
+
|
77
|
+
# Writes the aggration results
|
78
|
+
def write_result
|
79
|
+
sum_line = [sum_row_title]
|
80
|
+
(heading.size - 2).times { sum_line << "" }
|
81
|
+
sum_line << sums[sum_col_title]
|
82
|
+
row = 0;
|
83
|
+
File.open(outfile, 'w') do |out|
|
84
|
+
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
85
|
+
out.puts heading.join(';')
|
86
|
+
key_values.each do |k, v|
|
87
|
+
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
88
|
+
out.puts [k, v].join(';')
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
# Initializes the sum row title an positions as well as the sum column title and position
|
96
|
+
def init_sum_scheme(sum_scheme)
|
97
|
+
row_scheme, col_scheme = sum_scheme.split(',') unless sum_scheme.nil?
|
98
|
+
|
99
|
+
unless row_scheme.nil?
|
100
|
+
@sum_row_title, @sum_row = row_scheme.split(':') unless row_scheme.empty?
|
101
|
+
end
|
102
|
+
|
103
|
+
@sum_row.nil? ? @sum_row = 0 : @sum_row = @sum_row.to_i
|
104
|
+
@sum_row_title = 'Total' if @sum_row_title.nil?
|
105
|
+
|
106
|
+
col_scheme.nil? ? @sum_col_title = 'Total' : @sum_col_title = col_scheme
|
107
|
+
@sum_col = 0
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
data/lib/sycsvpro/calculator.rb
CHANGED
@@ -24,16 +24,20 @@ module Sycsvpro
|
|
24
24
|
attr_reader :header
|
25
25
|
# filter that is used for columns
|
26
26
|
attr_reader :columns
|
27
|
+
# if true add a sum row at the bottom of the out file
|
28
|
+
attr_reader :add_sum_row
|
27
29
|
|
28
30
|
# Creates a new Calculator. Options expects :infile, :outfile, :rows and :columns. Optionally
|
29
31
|
# a header can be provided. The header can be supplemented with additional column names that
|
30
32
|
# are generated due to a arithmetic operation that creates new columns
|
31
33
|
def initialize(options={})
|
32
|
-
@infile
|
33
|
-
@outfile
|
34
|
-
@row_filter
|
35
|
-
@header
|
36
|
-
@
|
34
|
+
@infile = options[:infile]
|
35
|
+
@outfile = options[:outfile]
|
36
|
+
@row_filter = RowFilter.new(options[:rows])
|
37
|
+
@header = Header.new(options[:header])
|
38
|
+
@sum_row = []
|
39
|
+
@add_sum_row = options[:sum] || false
|
40
|
+
@formulae = {}
|
37
41
|
create_calculator(options[:cols])
|
38
42
|
end
|
39
43
|
|
@@ -64,7 +68,19 @@ module Sycsvpro
|
|
64
68
|
@columns[col.to_i] = eval(formula)
|
65
69
|
end
|
66
70
|
out.puts @columns.join(';')
|
71
|
+
|
72
|
+
@columns.each_with_index do |column, index|
|
73
|
+
if @sum_row[index]
|
74
|
+
@sum_row[index] += to_number column
|
75
|
+
else
|
76
|
+
@sum_row[index] = to_number column
|
77
|
+
end
|
78
|
+
end if add_sum_row
|
79
|
+
|
67
80
|
end
|
81
|
+
|
82
|
+
out.puts @sum_row.join(';') if add_sum_row
|
83
|
+
|
68
84
|
end
|
69
85
|
end
|
70
86
|
|
data/lib/sycsvpro/counter.rb
CHANGED
@@ -5,7 +5,7 @@ require_relative 'dsl'
|
|
5
5
|
# Operating csv files
|
6
6
|
module Sycsvpro
|
7
7
|
|
8
|
-
#
|
8
|
+
# Counter counts values and uses the values as column names and uses the count
|
9
9
|
# as the column value
|
10
10
|
class Counter
|
11
11
|
|
@@ -15,8 +15,10 @@ module Sycsvpro
|
|
15
15
|
attr_reader :infile
|
16
16
|
# outfile is the file where the result is written to
|
17
17
|
attr_reader :outfile
|
18
|
-
# values are assigned to the key
|
19
|
-
attr_reader :
|
18
|
+
# values are assigned to the key columns
|
19
|
+
attr_reader :key_columns
|
20
|
+
# key columns headers
|
21
|
+
attr_reader :key_titles
|
20
22
|
# filter that is used for rows
|
21
23
|
attr_reader :row_filter
|
22
24
|
# filter that is used for columns
|
@@ -26,9 +28,11 @@ module Sycsvpro
|
|
26
28
|
# header of the out file
|
27
29
|
attr_reader :heading
|
28
30
|
# Title of the sum row
|
29
|
-
attr_reader :
|
30
|
-
# row where to add the sums of the columns
|
31
|
+
attr_reader :sum_row_title
|
32
|
+
# row where to add the sums of the columns
|
31
33
|
attr_reader :sum_row
|
34
|
+
# Title of the sum column
|
35
|
+
attr_reader :sum_col_title
|
32
36
|
# sums of the column values
|
33
37
|
attr_reader :sums
|
34
38
|
|
@@ -37,59 +41,101 @@ module Sycsvpro
|
|
37
41
|
def initialize(options={})
|
38
42
|
@infile = options[:infile]
|
39
43
|
@outfile = options[:outfile]
|
40
|
-
|
44
|
+
init_key_columns(options[:key])
|
41
45
|
@row_filter = RowFilter.new(options[:rows])
|
42
46
|
@col_filter = ColumnFilter.new(options[:cols], df: options[:df])
|
43
47
|
@key_values = {}
|
44
48
|
@heading = []
|
45
|
-
|
46
|
-
@sum_row = @sum_row.to_i unless @sum_row.nil?
|
49
|
+
init_sum_scheme(options[:sum])
|
47
50
|
@sums = Hash.new(0)
|
48
51
|
end
|
49
52
|
|
50
53
|
# Executes the counter
|
51
54
|
def execute
|
52
|
-
|
55
|
+
process_count
|
53
56
|
write_result
|
54
57
|
end
|
55
58
|
|
56
59
|
# Processes the counting on the in file
|
57
|
-
def
|
60
|
+
def process_count
|
58
61
|
File.new(infile).each_with_index do |line, index|
|
59
62
|
result = col_filter.process(row_filter.process(line.chomp, row: index))
|
60
63
|
unless result.nil? or result.empty?
|
61
|
-
key = unstring(line).split(';')
|
62
|
-
key_value = key_values[key] || key_values[key] = { name: key,
|
64
|
+
key = unstring(line).split(';').values_at(*key_columns)
|
65
|
+
key_value = key_values[key] || key_values[key] = { name: key,
|
66
|
+
elements: Hash.new(0),
|
67
|
+
sum: 0 }
|
63
68
|
result.chomp.split(';').each do |column|
|
64
69
|
heading << column if heading.index(column).nil?
|
65
70
|
key_value[:elements][column] += 1
|
71
|
+
key_value[:sum] += 1
|
66
72
|
sums[column] += 1
|
67
73
|
end
|
68
74
|
end
|
69
75
|
end
|
76
|
+
unless sum_col_title.nil?
|
77
|
+
heading << sum_col_title
|
78
|
+
sums[sum_col_title] = sums.values.inject(:+)
|
79
|
+
end
|
70
80
|
end
|
71
81
|
|
72
|
-
|
82
|
+
# Writes the count results
|
73
83
|
def write_result
|
74
|
-
sum_line = [
|
84
|
+
sum_line = [sum_row_title] + [''] * (key_titles.size - 1)
|
75
85
|
heading.sort.each do |h|
|
76
86
|
sum_line << sums[h]
|
77
87
|
end
|
78
88
|
row = 0;
|
79
89
|
File.open(outfile, 'w') do |out|
|
80
90
|
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
81
|
-
out.puts (
|
91
|
+
out.puts (key_titles + heading.sort).join(';')
|
82
92
|
key_values.each do |k,v|
|
83
93
|
out.puts sum_line.join(';') if row == sum_row ; row += 1
|
84
94
|
line = [k]
|
85
95
|
heading.sort.each do |h|
|
86
|
-
line << v[:elements][h]
|
96
|
+
line << v[:elements][h] unless h == sum_col_title
|
87
97
|
end
|
98
|
+
line << v[:sum] unless sum_col_title.nil?
|
88
99
|
out.puts line.join(';')
|
89
100
|
end
|
90
101
|
end
|
91
102
|
end
|
92
103
|
|
104
|
+
private
|
105
|
+
|
106
|
+
# Initializes the sum row title an positions as well as the cum column title
|
107
|
+
def init_sum_scheme(sum_scheme)
|
108
|
+
|
109
|
+
return if sum_scheme.nil?
|
110
|
+
|
111
|
+
re = /(\w+):(\d+)|(\w+)/
|
112
|
+
|
113
|
+
sum_scheme.scan(re).each do |part|
|
114
|
+
if part.compact.size == 2
|
115
|
+
@sum_row_title = part[0]
|
116
|
+
@sum_row = part[1].to_i
|
117
|
+
else
|
118
|
+
@sum_col_title = part[2]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
# Initialize the key columns and headers
|
125
|
+
def init_key_columns(key_scheme)
|
126
|
+
|
127
|
+
@key_titles = []
|
128
|
+
@key_columns = []
|
129
|
+
|
130
|
+
keys = key_scheme.scan(/(\d+):(\w+)/)
|
131
|
+
|
132
|
+
keys.each do |key|
|
133
|
+
@key_titles << key[1]
|
134
|
+
@key_columns << key[0].to_i
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
93
139
|
end
|
94
140
|
|
95
141
|
end
|
data/lib/sycsvpro/filter.rb
CHANGED
@@ -30,14 +30,16 @@ module Sycsvpro
|
|
30
30
|
|
31
31
|
# Creates the filters based on the given patterns
|
32
32
|
def method_missing(id, *args, &block)
|
33
|
-
return equal($1, args, block)
|
34
|
-
return equal_type($1, $2, args, block)
|
35
|
-
return range($1, $2, args, block)
|
36
|
-
return range_type($1, $2, $3, args, block)
|
37
|
-
return regex($1, args, block)
|
38
|
-
return col_regex($1, $2, args, block)
|
39
|
-
return date($1, $2, $3, args, block)
|
40
|
-
return date_range($1, $2, $3, args, block)
|
33
|
+
return equal($1, args, block) if id =~ /^(\d+)$/
|
34
|
+
return equal_type($1, $2, args, block) if id =~ /^(s|n|d):(\d+)$/
|
35
|
+
return range($1, $2, args, block) if id =~ /^(\d+)-(\d+)$/
|
36
|
+
return range_type($1, $2, $3, args, block) if id =~ /^(s|n|d):(\d+)-(\d+)$/
|
37
|
+
return regex($1, args, block) if id =~ /^\/(.*)\/$/
|
38
|
+
return col_regex($1, $2, args, block) if id =~ /^(\d+):\/(.*)\/$/
|
39
|
+
return date($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+.\d+.\d+)/
|
40
|
+
return date_range($1, $2, $3, args, block) if id =~ /^(\d+):(\d+.\d+.\d+.)-(\d+.\d+.\d+)$/
|
41
|
+
return number($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+)/
|
42
|
+
return number_range($1, $2, $3, args, block) if id =~ /^(\d):(\d+)-(\d+)/
|
41
43
|
super
|
42
44
|
end
|
43
45
|
|
@@ -54,7 +56,8 @@ module Sycsvpro
|
|
54
56
|
match = false
|
55
57
|
begin
|
56
58
|
match = eval(parameters[:operation].gsub('[value]', value))
|
57
|
-
rescue
|
59
|
+
rescue Exception => e
|
60
|
+
|
58
61
|
end
|
59
62
|
yield column, match
|
60
63
|
end
|
@@ -124,6 +127,19 @@ module Sycsvpro
|
|
124
127
|
pivot["#{start_date}-#{end_date}"] = { col: col, operation: operation }
|
125
128
|
end
|
126
129
|
|
130
|
+
# Adds a number filter
|
131
|
+
def number(col, comparator, number, args, block)
|
132
|
+
comparator = '==' if comparator == '='
|
133
|
+
operation = "[value] #{comparator} #{number}"
|
134
|
+
pivot["#{comparator}#{number}"] = { col: col, operation: operation }
|
135
|
+
end
|
136
|
+
|
137
|
+
# Adds a number range filter
|
138
|
+
def number_range(col, start_number, end_number, arg, block)
|
139
|
+
operation = " #{start_number} <= [value] && [value] <= #{end_number}"
|
140
|
+
pivot["#{start_number}-#{end_number}"] = { col: col, operation: operation }
|
141
|
+
end
|
142
|
+
|
127
143
|
end
|
128
144
|
|
129
145
|
end
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'sycsvpro/aggregator'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Aggregator do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@in_file = File.join(File.dirname(__FILE__), "files/in.csv")
|
9
|
+
@out_file = File.join(File.dirname(__FILE__), "files/out.csv")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should aggregate single column values" do
|
13
|
+
aggregator = Aggregator.new(infile: @in_file, outfile: @out_file, rows: "1-10",
|
14
|
+
cols: "0", sum: "Total:1,Machines", headerless: true)
|
15
|
+
|
16
|
+
aggregator.execute
|
17
|
+
|
18
|
+
result = [ ";Machines",
|
19
|
+
"Total;7",
|
20
|
+
"Fink;2",
|
21
|
+
"Haas;1",
|
22
|
+
"Gent;1",
|
23
|
+
"Rank;1",
|
24
|
+
"Klig;1",
|
25
|
+
"fink;1" ]
|
26
|
+
|
27
|
+
File.open(@out_file).each_with_index do |line, index|
|
28
|
+
line.chomp.should eq result[index]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should aggregate multiple column values" do
|
33
|
+
aggregator = Aggregator.new(infile: @in_file, outfile: @out_file, rows: "0-10",
|
34
|
+
cols: "0,1", sum: "Total:1,Machines", headerless: false)
|
35
|
+
|
36
|
+
aggregator.execute
|
37
|
+
|
38
|
+
result = [ "customer;contract-number;Machines",
|
39
|
+
"Total;;7",
|
40
|
+
"Fink;1234;2",
|
41
|
+
"Haas;3322;1",
|
42
|
+
"Gent;4323;1",
|
43
|
+
"Rank;3232;1",
|
44
|
+
"Klig;4432;1",
|
45
|
+
"fink;1234;1" ]
|
46
|
+
|
47
|
+
File.open(@out_file).each_with_index do |line, index|
|
48
|
+
line.chomp.should eq result[index]
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -40,6 +40,28 @@ module Sycsvpro
|
|
40
40
|
expect(line.chomp).to eq result[index]
|
41
41
|
end
|
42
42
|
end
|
43
|
+
|
44
|
+
it "should sum specified rows" do
|
45
|
+
header = "*,drives,motors"
|
46
|
+
rows = "1-8"
|
47
|
+
cols = "5:c3+c4,6:c3*2"
|
48
|
+
sums = "1,3-5"
|
49
|
+
calculator = Calculator.new(infile: @in_file, outfile: @out_file,
|
50
|
+
header: header, rows: rows, cols: cols, sum: true)
|
51
|
+
calculator.execute
|
52
|
+
|
53
|
+
result = ["customer;machines;controls;contracts;visits;drives;motors",
|
54
|
+
"Fink;2;2;1;1;2;2",
|
55
|
+
"Haas;3;3;1;1.0;2.0;2",
|
56
|
+
"Gent;4;4;1;1;2;2",
|
57
|
+
"Rank;5;5;1;1;2;2",
|
58
|
+
"0;14;14;4;4.0;8;8"]
|
59
|
+
|
60
|
+
File.new(@out_file, 'r').each_with_index do |line, index|
|
61
|
+
expect(line.chomp).to eq result[index]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
43
65
|
end
|
44
66
|
|
45
67
|
end
|