sycsvpro 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sycsvpro.rb CHANGED
@@ -12,3 +12,4 @@ require 'sycsvpro/script_creator.rb'
12
12
  require 'sycsvpro/script_list.rb'
13
13
  require 'sycsvpro/inserter.rb'
14
14
  require 'sycsvpro/sorter.rb'
15
+ require 'sycsvpro/aggregator.rb'
@@ -0,0 +1,112 @@
1
+ require_relative 'row_filter'
2
+ require_relative 'column_filter'
3
+ require_relative 'dsl'
4
+
5
+ # Operating csv files
6
+ module Sycsvpro
7
+
8
+ # An Aggregator counts specified row values and adds a sum to the end of the row
9
+ class Aggregator
10
+
11
+ include Dsl
12
+
13
+ # infile contains the data that is operated on
14
+ attr_reader :infile
15
+ # outfile is the file where the result is written to
16
+ attr_reader :outfile
17
+ # file doesn't contain a header
18
+ attr_reader :headerless
19
+ # filter that is used for rows
20
+ attr_reader :row_filter
21
+ # filter that is used for columns
22
+ attr_reader :col_filter
23
+ # values that are aggregated
24
+ attr_reader :key_values
25
+ # header of the out file
26
+ attr_reader :heading
27
+ # Title of the sum row
28
+ attr_reader :sum_row_title
29
+ # row where to add the sums of the columns
30
+ attr_reader :sum_row
31
+ # Title of the sum column
32
+ attr_reader :sum_col_title
33
+ # column where to add the sum of the row sum
34
+ attr_reader :sum_col
35
+ # sums of the column values
36
+ attr_reader :sums
37
+
38
+ # Creates a new aggregator. Takes as attributes infile, outfile, key, rows, cols, date-format
39
+ # and indicator whether to add a sum row
40
+ def initialize(options={})
41
+ @infile = options[:infile]
42
+ @outfile = options[:outfile]
43
+ @headerless = options[:headerless] || false
44
+ @row_filter = RowFilter.new(options[:rows])
45
+ @col_filter = ColumnFilter.new(options[:cols], df: options[:df])
46
+ @key_values = Hash.new(0)
47
+ @heading = []
48
+ @sums = Hash.new(0)
49
+ init_sum_scheme(options[:sum])
50
+ end
51
+
52
+ # Executes the aggregator
53
+ def execute
54
+ process_aggregation
55
+ write_result
56
+ end
57
+
58
+ # Process the aggregation of the key values
59
+ def process_aggregation
60
+ File.new(infile).each_with_index do |line, index|
61
+ result = col_filter.process(row_filter.process(line.chomp, row: index))
62
+ unless result.nil? or result.empty?
63
+ if heading.empty? and not headerless
64
+ heading << result.split(';')
65
+ next
66
+ else
67
+ @sum_col = [result.split(';').size, sum_col].max
68
+ end
69
+ key_values[result] += 1
70
+ sums[sum_col_title] += 1
71
+ end
72
+ end
73
+ heading.flatten!
74
+ heading[sum_col] = sum_col_title
75
+ end
76
+
77
+ # Writes the aggration results
78
+ def write_result
79
+ sum_line = [sum_row_title]
80
+ (heading.size - 2).times { sum_line << "" }
81
+ sum_line << sums[sum_col_title]
82
+ row = 0;
83
+ File.open(outfile, 'w') do |out|
84
+ out.puts sum_line.join(';') if row == sum_row ; row += 1
85
+ out.puts heading.join(';')
86
+ key_values.each do |k, v|
87
+ out.puts sum_line.join(';') if row == sum_row ; row += 1
88
+ out.puts [k, v].join(';')
89
+ end
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Initializes the sum row title an positions as well as the sum column title and position
96
+ def init_sum_scheme(sum_scheme)
97
+ row_scheme, col_scheme = sum_scheme.split(',') unless sum_scheme.nil?
98
+
99
+ unless row_scheme.nil?
100
+ @sum_row_title, @sum_row = row_scheme.split(':') unless row_scheme.empty?
101
+ end
102
+
103
+ @sum_row.nil? ? @sum_row = 0 : @sum_row = @sum_row.to_i
104
+ @sum_row_title = 'Total' if @sum_row_title.nil?
105
+
106
+ col_scheme.nil? ? @sum_col_title = 'Total' : @sum_col_title = col_scheme
107
+ @sum_col = 0
108
+ end
109
+
110
+ end
111
+
112
+ end
@@ -24,16 +24,20 @@ module Sycsvpro
24
24
  attr_reader :header
25
25
  # filter that is used for columns
26
26
  attr_reader :columns
27
+ # if true add a sum row at the bottom of the out file
28
+ attr_reader :add_sum_row
27
29
 
28
30
  # Creates a new Calculator. Options expects :infile, :outfile, :rows and :columns. Optionally
29
31
  # a header can be provided. The header can be supplemented with additional column names that
30
32
  # are generated due to a arithmetic operation that creates new columns
31
33
  def initialize(options={})
32
- @infile = options[:infile]
33
- @outfile = options[:outfile]
34
- @row_filter = RowFilter.new(options[:rows])
35
- @header = Header.new(options[:header])
36
- @formulae = {}
34
+ @infile = options[:infile]
35
+ @outfile = options[:outfile]
36
+ @row_filter = RowFilter.new(options[:rows])
37
+ @header = Header.new(options[:header])
38
+ @sum_row = []
39
+ @add_sum_row = options[:sum] || false
40
+ @formulae = {}
37
41
  create_calculator(options[:cols])
38
42
  end
39
43
 
@@ -64,7 +68,19 @@ module Sycsvpro
64
68
  @columns[col.to_i] = eval(formula)
65
69
  end
66
70
  out.puts @columns.join(';')
71
+
72
+ @columns.each_with_index do |column, index|
73
+ if @sum_row[index]
74
+ @sum_row[index] += to_number column
75
+ else
76
+ @sum_row[index] = to_number column
77
+ end
78
+ end if add_sum_row
79
+
67
80
  end
81
+
82
+ out.puts @sum_row.join(';') if add_sum_row
83
+
68
84
  end
69
85
  end
70
86
 
@@ -5,7 +5,7 @@ require_relative 'dsl'
5
5
  # Operating csv files
6
6
  module Sycsvpro
7
7
 
8
- # Creates a new counter that counts values and uses the values as column names and uses the count
8
+ # Counter counts values and uses the values as column names and uses the count
9
9
  # as the column value
10
10
  class Counter
11
11
 
@@ -15,8 +15,10 @@ module Sycsvpro
15
15
  attr_reader :infile
16
16
  # outfile is the file where the result is written to
17
17
  attr_reader :outfile
18
- # values are assigned to the key column
19
- attr_reader :key_column
18
+ # values are assigned to the key columns
19
+ attr_reader :key_columns
20
+ # key columns headers
21
+ attr_reader :key_titles
20
22
  # filter that is used for rows
21
23
  attr_reader :row_filter
22
24
  # filter that is used for columns
@@ -26,9 +28,11 @@ module Sycsvpro
26
28
  # header of the out file
27
29
  attr_reader :heading
28
30
  # Title of the sum row
29
- attr_reader :sum_title
30
- # row where to add the sums of the columns of the sum columns
31
+ attr_reader :sum_row_title
32
+ # row where to add the sums of the columns
31
33
  attr_reader :sum_row
34
+ # Title of the sum column
35
+ attr_reader :sum_col_title
32
36
  # sums of the column values
33
37
  attr_reader :sums
34
38
 
@@ -37,59 +41,101 @@ module Sycsvpro
37
41
  def initialize(options={})
38
42
  @infile = options[:infile]
39
43
  @outfile = options[:outfile]
40
- @key_column = options[:key].to_i
44
+ init_key_columns(options[:key])
41
45
  @row_filter = RowFilter.new(options[:rows])
42
46
  @col_filter = ColumnFilter.new(options[:cols], df: options[:df])
43
47
  @key_values = {}
44
48
  @heading = []
45
- @sum_title, @sum_row = options[:sum].split(':') unless options[:sum].nil?
46
- @sum_row = @sum_row.to_i unless @sum_row.nil?
49
+ init_sum_scheme(options[:sum])
47
50
  @sums = Hash.new(0)
48
51
  end
49
52
 
50
53
  # Executes the counter
51
54
  def execute
52
- process_file
55
+ process_count
53
56
  write_result
54
57
  end
55
58
 
56
59
  # Processes the counting on the in file
57
- def process_file
60
+ def process_count
58
61
  File.new(infile).each_with_index do |line, index|
59
62
  result = col_filter.process(row_filter.process(line.chomp, row: index))
60
63
  unless result.nil? or result.empty?
61
- key = unstring(line).split(';')[key_column]
62
- key_value = key_values[key] || key_values[key] = { name: key, elements: Hash.new(0) }
64
+ key = unstring(line).split(';').values_at(*key_columns)
65
+ key_value = key_values[key] || key_values[key] = { name: key,
66
+ elements: Hash.new(0),
67
+ sum: 0 }
63
68
  result.chomp.split(';').each do |column|
64
69
  heading << column if heading.index(column).nil?
65
70
  key_value[:elements][column] += 1
71
+ key_value[:sum] += 1
66
72
  sums[column] += 1
67
73
  end
68
74
  end
69
75
  end
76
+ unless sum_col_title.nil?
77
+ heading << sum_col_title
78
+ sums[sum_col_title] = sums.values.inject(:+)
79
+ end
70
80
  end
71
81
 
72
- # Writes the results
82
+ # Writes the count results
73
83
  def write_result
74
- sum_line = [sum_title]
84
+ sum_line = [sum_row_title] + [''] * (key_titles.size - 1)
75
85
  heading.sort.each do |h|
76
86
  sum_line << sums[h]
77
87
  end
78
88
  row = 0;
79
89
  File.open(outfile, 'w') do |out|
80
90
  out.puts sum_line.join(';') if row == sum_row ; row += 1
81
- out.puts (["key"] + heading.sort).join(';')
91
+ out.puts (key_titles + heading.sort).join(';')
82
92
  key_values.each do |k,v|
83
93
  out.puts sum_line.join(';') if row == sum_row ; row += 1
84
94
  line = [k]
85
95
  heading.sort.each do |h|
86
- line << v[:elements][h]
96
+ line << v[:elements][h] unless h == sum_col_title
87
97
  end
98
+ line << v[:sum] unless sum_col_title.nil?
88
99
  out.puts line.join(';')
89
100
  end
90
101
  end
91
102
  end
92
103
 
104
+ private
105
+
106
+ # Initializes the sum row title an positions as well as the cum column title
107
+ def init_sum_scheme(sum_scheme)
108
+
109
+ return if sum_scheme.nil?
110
+
111
+ re = /(\w+):(\d+)|(\w+)/
112
+
113
+ sum_scheme.scan(re).each do |part|
114
+ if part.compact.size == 2
115
+ @sum_row_title = part[0]
116
+ @sum_row = part[1].to_i
117
+ else
118
+ @sum_col_title = part[2]
119
+ end
120
+ end
121
+
122
+ end
123
+
124
+ # Initialize the key columns and headers
125
+ def init_key_columns(key_scheme)
126
+
127
+ @key_titles = []
128
+ @key_columns = []
129
+
130
+ keys = key_scheme.scan(/(\d+):(\w+)/)
131
+
132
+ keys.each do |key|
133
+ @key_titles << key[1]
134
+ @key_columns << key[0].to_i
135
+ end
136
+
137
+ end
138
+
93
139
  end
94
140
 
95
141
  end
@@ -30,14 +30,16 @@ module Sycsvpro
30
30
 
31
31
  # Creates the filters based on the given patterns
32
32
  def method_missing(id, *args, &block)
33
- return equal($1, args, block) if id =~ /^(\d+)$/
34
- return equal_type($1, $2, args, block) if id =~ /^(s|n|d):(\d+)$/
35
- return range($1, $2, args, block) if id =~ /^(\d+)-(\d+)$/
36
- return range_type($1, $2, $3, args, block) if id =~ /^(s|n|d):(\d+)-(\d+)$/
37
- return regex($1, args, block) if id =~ /^\/(.*)\/$/
38
- return col_regex($1, $2, args, block) if id =~ /^(\d+):\/(.*)\/$/
39
- return date($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+.\d+.\d+)/
40
- return date_range($1, $2, $3, args, block) if id =~ /^(\d+):(\d+.\d+.\d+.)-(\d+.\d+.\d+)$/
33
+ return equal($1, args, block) if id =~ /^(\d+)$/
34
+ return equal_type($1, $2, args, block) if id =~ /^(s|n|d):(\d+)$/
35
+ return range($1, $2, args, block) if id =~ /^(\d+)-(\d+)$/
36
+ return range_type($1, $2, $3, args, block) if id =~ /^(s|n|d):(\d+)-(\d+)$/
37
+ return regex($1, args, block) if id =~ /^\/(.*)\/$/
38
+ return col_regex($1, $2, args, block) if id =~ /^(\d+):\/(.*)\/$/
39
+ return date($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+.\d+.\d+)/
40
+ return date_range($1, $2, $3, args, block) if id =~ /^(\d+):(\d+.\d+.\d+.)-(\d+.\d+.\d+)$/
41
+ return number($1, $2, $3, args, block) if id =~ /^(\d+):(<|=|>)(\d+)/
42
+ return number_range($1, $2, $3, args, block) if id =~ /^(\d):(\d+)-(\d+)/
41
43
  super
42
44
  end
43
45
 
@@ -54,7 +56,8 @@ module Sycsvpro
54
56
  match = false
55
57
  begin
56
58
  match = eval(parameters[:operation].gsub('[value]', value))
57
- rescue
59
+ rescue Exception => e
60
+
58
61
  end
59
62
  yield column, match
60
63
  end
@@ -124,6 +127,19 @@ module Sycsvpro
124
127
  pivot["#{start_date}-#{end_date}"] = { col: col, operation: operation }
125
128
  end
126
129
 
130
+ # Adds a number filter
131
+ def number(col, comparator, number, args, block)
132
+ comparator = '==' if comparator == '='
133
+ operation = "[value] #{comparator} #{number}"
134
+ pivot["#{comparator}#{number}"] = { col: col, operation: operation }
135
+ end
136
+
137
+ # Adds a number range filter
138
+ def number_range(col, start_number, end_number, arg, block)
139
+ operation = " #{start_number} <= [value] && [value] <= #{end_number}"
140
+ pivot["#{start_number}-#{end_number}"] = { col: col, operation: operation }
141
+ end
142
+
127
143
  end
128
144
 
129
145
  end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.0.9'
4
+ VERSION = '0.1.0'
5
5
  end
@@ -0,0 +1,55 @@
1
+ require 'sycsvpro/aggregator'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Aggregator do
6
+
7
+ before do
8
+ @in_file = File.join(File.dirname(__FILE__), "files/in.csv")
9
+ @out_file = File.join(File.dirname(__FILE__), "files/out.csv")
10
+ end
11
+
12
+ it "should aggregate single column values" do
13
+ aggregator = Aggregator.new(infile: @in_file, outfile: @out_file, rows: "1-10",
14
+ cols: "0", sum: "Total:1,Machines", headerless: true)
15
+
16
+ aggregator.execute
17
+
18
+ result = [ ";Machines",
19
+ "Total;7",
20
+ "Fink;2",
21
+ "Haas;1",
22
+ "Gent;1",
23
+ "Rank;1",
24
+ "Klig;1",
25
+ "fink;1" ]
26
+
27
+ File.open(@out_file).each_with_index do |line, index|
28
+ line.chomp.should eq result[index]
29
+ end
30
+ end
31
+
32
+ it "should aggregate multiple column values" do
33
+ aggregator = Aggregator.new(infile: @in_file, outfile: @out_file, rows: "0-10",
34
+ cols: "0,1", sum: "Total:1,Machines", headerless: false)
35
+
36
+ aggregator.execute
37
+
38
+ result = [ "customer;contract-number;Machines",
39
+ "Total;;7",
40
+ "Fink;1234;2",
41
+ "Haas;3322;1",
42
+ "Gent;4323;1",
43
+ "Rank;3232;1",
44
+ "Klig;4432;1",
45
+ "fink;1234;1" ]
46
+
47
+ File.open(@out_file).each_with_index do |line, index|
48
+ line.chomp.should eq result[index]
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -40,6 +40,28 @@ module Sycsvpro
40
40
  expect(line.chomp).to eq result[index]
41
41
  end
42
42
  end
43
+
44
+ it "should sum specified rows" do
45
+ header = "*,drives,motors"
46
+ rows = "1-8"
47
+ cols = "5:c3+c4,6:c3*2"
48
+ sums = "1,3-5"
49
+ calculator = Calculator.new(infile: @in_file, outfile: @out_file,
50
+ header: header, rows: rows, cols: cols, sum: true)
51
+ calculator.execute
52
+
53
+ result = ["customer;machines;controls;contracts;visits;drives;motors",
54
+ "Fink;2;2;1;1;2;2",
55
+ "Haas;3;3;1;1.0;2.0;2",
56
+ "Gent;4;4;1;1;2;2",
57
+ "Rank;5;5;1;1;2;2",
58
+ "0;14;14;4;4.0;8;8"]
59
+
60
+ File.new(@out_file, 'r').each_with_index do |line, index|
61
+ expect(line.chomp).to eq result[index]
62
+ end
63
+ end
64
+
43
65
  end
44
66
 
45
67
  end