sycsvpro 0.1.4 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,159 @@
1
+ # Operating csv files
2
+ module Sycsvpro
3
+
4
+ # Join joins two files based on a join key value.
5
+ # Example
6
+ # File 1 (infile)
7
+ # |Name |ID |
8
+ # |Hank |123|
9
+ # |Frank|234|
10
+ # |Mia |345|
11
+ # |Moira|234|
12
+ #
13
+ # File 2 (source)
14
+ # |Company|Phone|ID|
15
+ # |Siem |4848 |123|
16
+ # |Helo |993 |345|
17
+ # |Wara |3333 |234|
18
+ #
19
+ # File 3 (outfile)
20
+ # |Name |ID |Company|Phone|
21
+ # |Hank |123|Siem |4848 |
22
+ # |Frank|234|Wara |3333 |
23
+ # |Mia |345|Helo |993 |
24
+ # |Moira|234|Wara |3333 |
25
+ class Join
26
+
27
+ include Dsl
28
+
29
+ # infile contains the data that is operated on
30
+ attr_reader :infile
31
+ # outfile is the file where the result is written to
32
+ attr_reader :outfile
33
+ # source file from where columns are inserted into infile
34
+ attr_reader :source
35
+ # filter that is used for rows
36
+ attr_reader :row_filter
37
+ # columns to insert
38
+ attr_reader :columns
39
+ # posititon where to insert the columns into the infile
40
+ attr_reader :positions
41
+ # header of the outfile
42
+ attr_reader :header
43
+ # indicates whether the infile is headerless
44
+ attr_reader :headerless
45
+ # lookup table where the assigned values are stored at
46
+ attr_reader :lookup_table
47
+
48
+ # Creates a Join which can be invoked as follows
49
+ # :call-seq:
50
+ # Sycsvpro::Join.new(infile: "in.csv",
51
+ # outfile: "out.csv",
52
+ # source: "source.csv",
53
+ # rows: "1-eof",
54
+ # cols: "0,1",
55
+ # pos: "2,3",
56
+ # joins: "2=1",
57
+ # headerless: true,
58
+ # header: "*",
59
+ # insert_header: "Company,Phone").execute
60
+ #
61
+ # infile:: csv file to operate on
62
+ # outfile:: csv file with the result
63
+ # source:: csv file that contains the values to join to infile
64
+ # rows: rows to consider for operation. Rows that don't match the pattern
65
+ # will be skipped for operation
66
+ # cols:: columns to insert from the source to the infile
67
+ # pos:: column positions where to insert the values and the insert_header
68
+ # columns
69
+ # joins:: columns that match in infile and source.
70
+ # source_column=infile_column
71
+ # headerless:: indicates whether the infile has a header (default true)
72
+ # header:: Header of the csv file
73
+ # insert_header:: column names of the to be inserted values
74
+ def initialize(options = {})
75
+ @infile = options[:infile]
76
+ @outfile = options[:outfile]
77
+ @source = options[:source]
78
+ @row_filter = RowFilter.new(options[:rows], df: options[:df])
79
+ @columns = options[:cols].split(',').collect { |c| c.to_i }
80
+ @positions = col_positions(options[:pos], @columns)
81
+ @joins = options[:joins].split('=').collect { |j| j.to_i }
82
+ @headerless = options[:headerless].nil? ? false : options[:headerless]
83
+ @header = Header.new(options[:header] || '*',
84
+ pos: @positions,
85
+ insert: options[:insert_header])
86
+ create_lookup_table
87
+ end
88
+
89
+ # Executes the join
90
+ def execute
91
+ processed_header = headerless ? true : false
92
+
93
+ File.open(outfile, 'w') do |out|
94
+ File.open(infile).each_with_index do |line, index|
95
+ line = line.chomp
96
+
97
+ next if line.empty?
98
+
99
+ line = unstring(line).chomp
100
+
101
+ unless processed_header
102
+ header_line = header.process(line)
103
+ out.puts header unless header_line.empty?
104
+ processed_header = true
105
+ next
106
+ end
107
+
108
+ next if row_filter.process(line, row: index).nil?
109
+
110
+ values = line.split(';')
111
+
112
+ key = values[@joins[1]]
113
+ row = lookup_table[:rows][key] || []
114
+
115
+ lookup_table[:pos].sort.each { |p| values.insert(p, "") }
116
+ lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
117
+
118
+ out.puts values.join(';')
119
+ end
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ # Creates a lookup table from the source file values. The join column of
126
+ # the source file is the key
127
+ def create_lookup_table
128
+ @lookup_table = { pos: positions, rows: {} }
129
+
130
+ File.open(source).each_with_index do |line|
131
+ next if line.chomp.empty?
132
+
133
+ values = unstring(line).chomp.split(';')
134
+
135
+ next if values.empty?
136
+
137
+ key = values[@joins[0]]
138
+ lookup_table[:rows][key] = []
139
+
140
+ columns.each do |i|
141
+ lookup_table[:rows][key] << values[i]
142
+ end
143
+ end
144
+ end
145
+
146
+ # Initializes the column positions where the source file columns have to
147
+ # be inserted. If no column positions are provided the inserted columns
148
+ # are put at the beginning of the row
149
+ def col_positions(pos, cols)
150
+ if pos.nil? || pos.empty?
151
+ Array.new(cols.size) { |c| c }
152
+ else
153
+ pos.split(',').collect { |p| p.to_i }
154
+ end
155
+ end
156
+
157
+ end
158
+
159
+ end
@@ -3,12 +3,28 @@ require_relative 'header'
3
3
  require_relative 'dsl'
4
4
  require 'date'
5
5
 
6
+ # Operating csv files
6
7
  module Sycsvpro
7
8
 
9
+ # Extracts values from a csv file and enables to associate values to key
10
+ # values. Columns can be created dynamically based on the content of columns.
11
+ # Example:
12
+ # File 1 (infile)
13
+ # Date | Order-Type | Revenue
14
+ # 01.01.2013 | AZ | 22.50
15
+ # 13.04.2014 | BZ | 33.40
16
+ # 16.12.2014 | CZ | 12.80
17
+ #
18
+ # File 2 (outfile)
19
+ # Year | AZ | BZ | CZ | Total
20
+ # 2013 | 22.50 | | | 22.50
21
+ # 2014 | | 33.40 | 12.80 | 46.20
8
22
  class Table
9
23
 
10
24
  include Dsl
11
25
 
26
+ # Regex to split parameters
27
+ COL_SPLITTER = /,(?=[\w +]*:)/
12
28
  # infile contains the data that is operated on
13
29
  attr_reader :infile
14
30
  # outfile is the file where the result is written to
@@ -34,21 +50,41 @@ module Sycsvpro
34
50
  # header: "Year,c6,c1",
35
51
  # key: "c0=~/\\.(\\d{4})/,c6",
36
52
  # cols: "Value:+n1,c2+c3:+n1",
37
- # nf: "DE").execute
53
+ # nf: "DE",
54
+ # pr: "2",
55
+ # sum: "TOP:Value,c2+c3").execute
56
+ #
57
+ # infile:: csv file to operate on
58
+ # outfile:: csv file with the result
59
+ # df:: date format
60
+ # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
61
+ # US is 1,000.00
62
+ # pr:: precision of number values. Default 2
63
+ # rows: rows to consider for operation. Rows that don't match the pattern
64
+ # will be skipped for operation
65
+ # header:: Header of the csv file
66
+ # key:: Values located at value 0 and subsequent columns
67
+ # cols:: Values added to columns base on a operation or assignment
68
+ # sum:: sum row at specified position top or eof
38
69
  def initialize(options = {})
39
70
  @infile = options[:infile]
40
71
  @outfile = options[:outfile]
41
72
  @date_format = options[:df] || "%Y-%m-%d"
42
73
  @row_filter = RowFilter.new(options[:rows], df: options[:df])
43
74
  @header = Header.new(options[:header])
44
- @keys = options[:key].split(',')
45
- @cols = options[:cols].split(',')
75
+ @keys = split_by_comma_regex(options[:key]) #options[:key].split(',')
76
+ @cols = options[:cols].split(COL_SPLITTER)
46
77
  @number_format = options[:nf] || 'EN'
78
+ @precision = options[:pr] || 2
79
+ prepare_sum_row options[:sum]
47
80
  @rows = {}
48
81
  end
49
82
 
50
83
  # Retrieves the values from a row as the result of a arithmetic operation
51
- # with #eval
84
+ # with #eval. It reconizes
85
+ # c:: string value
86
+ # n:: number value
87
+ # d:: date value
52
88
  def method_missing(id, *args, &block)
53
89
  return @columns[$1.to_i] if id =~ /c(\d+)/
54
90
  return to_number(@columns[$1.to_i]) if id =~ /n(\d+)/
@@ -93,6 +129,7 @@ module Sycsvpro
93
129
  def write_to_file
94
130
  File.open(outfile, 'w') do |out|
95
131
  out.puts header.to_s
132
+ out.puts create_sum_row if @sum_row_pos == 'TOP'
96
133
  rows.each do |key, row|
97
134
  line = [] << row[:key]
98
135
  header.clear_header_cols.each_with_index do |col, index|
@@ -101,6 +138,7 @@ module Sycsvpro
101
138
  end
102
139
  out.puts line.flatten.join(';')
103
140
  end
141
+ out.puts create_sum_row if @sum_row_pos == 'EOF'
104
142
  end
105
143
  end
106
144
 
@@ -122,7 +160,11 @@ module Sycsvpro
122
160
  @cols.each do |col|
123
161
  column, formula = col.split(':')
124
162
  column = evaluate(column) if column =~ /^c\d+[=~+]/
125
- row[:cols][column] = eval("#{row[:cols][column]}#{formula}")
163
+ previous_value = row[:cols][column]
164
+ if value = eval("#{row[:cols][column]}#{formula}")
165
+ row[:cols][column] = value.round(@precision)
166
+ add_to_sum_row(row[:cols][column] - previous_value, column)
167
+ end
126
168
  end
127
169
  end
128
170
 
@@ -168,6 +210,42 @@ module Sycsvpro
168
210
  end
169
211
  end
170
212
 
213
+ # Initializes sum_row_pos, sum_row and sum_row_patterns based on the
214
+ # provided sum option
215
+ def prepare_sum_row(pattern)
216
+ return if pattern.nil? || pattern.empty?
217
+ @sum_row_pos, sum_row_pattern = pattern.split(':')
218
+ @sum_row_pos.upcase!
219
+ @sum_row = Hash.new
220
+ @sum_row_patterns = split_by_comma_regex(sum_row_pattern)
221
+ end
222
+
223
+ # Adds a value in the specified column to the sum_row
224
+ def add_to_sum_row(value, column)
225
+ return unless @sum_row_patterns
226
+ @sum_row_patterns.each do |pattern|
227
+ if pattern =~ /^c\d+[=~+]/
228
+ header_column = evaluate(pattern, "")
229
+ else
230
+ header_column = pattern
231
+ end
232
+
233
+ if header_column == column
234
+ @sum_row[header_column] ||= 0
235
+ @sum_row[header_column] += value
236
+ end
237
+ end
238
+ end
239
+
240
+ # Creates the sum_row when the file has been completely processed
241
+ def create_sum_row
242
+ line = []
243
+ header.clear_header_cols.each_with_index do |col, index|
244
+ line << @sum_row[col] || ""
245
+ end
246
+ line.flatten.join(';')
247
+ end
248
+
171
249
  end
172
250
 
173
251
  end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.4'
4
+ VERSION = '0.1.7'
5
5
  end
data/lib/sycsvpro.rb CHANGED
@@ -14,3 +14,4 @@ require 'sycsvpro/inserter.rb'
14
14
  require 'sycsvpro/sorter.rb'
15
15
  require 'sycsvpro/aggregator.rb'
16
16
  require 'sycsvpro/table.rb'
17
+ require 'sycsvpro/join.rb'
@@ -8,6 +8,7 @@ module Sycsvpro
8
8
  @in_file = File.join(File.dirname(__FILE__), "files/machines.csv")
9
9
  @in_date_file = File.join(File.dirname(__FILE__), "files/machine-delivery.csv")
10
10
  @in_number_file = File.join(File.dirname(__FILE__), "files/machine-count.csv")
11
+ @in_customer_file = File.join(File.dirname(__FILE__), "files/customers.csv")
11
12
  @out_file = File.join(File.dirname(__FILE__), "files/machines_out.csv")
12
13
  end
13
14
 
@@ -87,7 +88,7 @@ module Sycsvpro
87
88
 
88
89
  it "should find minimum of specified date rows" do
89
90
  header = "*,Min_Date"
90
- cols = "3:Min_Date=[d1,d2].compact.min"
91
+ cols = "3:[d1,d2].compact.min"
91
92
  rows = "1-8"
92
93
  df = "%d.%m.%Y"
93
94
 
@@ -148,6 +149,35 @@ module Sycsvpro
148
149
  end
149
150
  end
150
151
 
152
+ it "should split column value into multiple column values" do
153
+ header = "ID,Customer,Country"
154
+ cols = [ "2:s0.scan(/([A-Z]+)/).flatten[0]",
155
+ "0:s0.scan(/(?<=\\/)(.*)$/).flatten[0]",
156
+ "1:s1" ].join(',')
157
+ rows = "1-8"
158
+
159
+ Calculator.new(infile: @in_customer_file,
160
+ outfile: @out_file,
161
+ header: header,
162
+ rows: rows,
163
+ cols: cols).execute
164
+
165
+ result = [ "ID;Customer;Country",
166
+ "123945;Hank;DE",
167
+ "339339;Frank;AT",
168
+ "449399;Jane;DE",
169
+ "33A398;Jean;US" ]
170
+
171
+ rows = 0
172
+
173
+ File.new(@out_file, 'r').each_with_index do |line, index|
174
+ expect(line.chomp).to eq result[index]
175
+ rows += 1
176
+ end
177
+
178
+ rows.should eq result.size
179
+ end
180
+
151
181
  end
152
182
 
153
183
  end
@@ -52,6 +52,12 @@ module Sycsvpro
52
52
  header.process("5.5.2012;d1;d2;d3;d4;d5").should eq "a4;A;2012;2013;a1;B"
53
53
  end
54
54
 
55
+ it "should create a header with positioned columns" do
56
+ header = Header.new("*", insert: "C,D", pos: [3,7])
57
+
58
+ header.process("A;B;E;F;G").should eq "A;B;E;C;F;G;;D"
59
+ end
60
+
55
61
  it "should return the header" do
56
62
  header = Header.new("c4,A,c0=~/\\.(\\d{4})/,c1,B")
57
63
 
@@ -72,7 +78,7 @@ module Sycsvpro
72
78
  header.column_of("a1").should eq 3
73
79
  header.process("3.4.2013;c1;c2;c3;c4;c5").should eq "a4;A;2012;2013;a1;B"
74
80
  header.column_of("B").should eq 5
75
- end
81
+ end
76
82
 
77
83
  end
78
84
 
@@ -0,0 +1,178 @@
1
+ require 'sycsvpro/join'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Join do
6
+
7
+ before do
8
+ @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
9
+ @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
10
+ @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
11
+ end
12
+
13
+ it "should join files based on person ID" do
14
+ cols = "1,2"
15
+ insert_col_pos = "2,1"
16
+ insert_header = "COUNTRY,STATE"
17
+ header = "*"
18
+ joins = "0=1"
19
+ rows = "1-4"
20
+
21
+ Sycsvpro::Join.new(infile: @in_file,
22
+ outfile: @out_file,
23
+ source: @source_file,
24
+ cols: cols,
25
+ joins: joins,
26
+ insert_header: insert_header,
27
+ pos: insert_col_pos,
28
+ header: header,
29
+ rows: rows).execute
30
+
31
+ result = [ "Name;STATE;COUNTRY;N_ID",
32
+ "Hank;A4;AT;123",
33
+ "Frank;C3;CA;234",
34
+ "Mia;D1;DE;345",
35
+ "Arwen;U2;US;456" ]
36
+
37
+ rows = 0
38
+
39
+ File.new(@out_file, 'r').each_with_index do |line, index|
40
+ expect(line.chomp).to eq result[index]
41
+ rows += 1
42
+ end
43
+
44
+ rows.should eq result.size
45
+
46
+ end
47
+
48
+ it "should join files without explicit insert header" do
49
+ cols = "1,2"
50
+ insert_col_pos = "2,1"
51
+ joins = "0=1"
52
+ header = "*"
53
+ rows = "1-4"
54
+
55
+ Sycsvpro::Join.new(infile: @in_file,
56
+ outfile: @out_file,
57
+ source: @source_file,
58
+ cols: cols,
59
+ joins: joins,
60
+ pos: insert_col_pos,
61
+ header: header,
62
+ rows: rows).execute
63
+
64
+ result = [ "Name;;;N_ID",
65
+ "Hank;A4;AT;123",
66
+ "Frank;C3;CA;234",
67
+ "Mia;D1;DE;345",
68
+ "Arwen;U2;US;456" ]
69
+
70
+ rows = 0
71
+
72
+ File.new(@out_file, 'r').each_with_index do |line, index|
73
+ expect(line.chomp).to eq result[index]
74
+ rows += 1
75
+ end
76
+
77
+ rows.should eq result.size
78
+
79
+ end
80
+
81
+ it "should join files without explicit insert cols pos and insert header" do
82
+ cols = "1,2"
83
+ joins = "0=1"
84
+ header = "*"
85
+ rows = "1-4"
86
+
87
+ Sycsvpro::Join.new(infile: @in_file,
88
+ outfile: @out_file,
89
+ source: @source_file,
90
+ cols: cols,
91
+ joins: joins,
92
+ header: header,
93
+ rows: rows).execute
94
+
95
+ result = [ ";;Name;N_ID",
96
+ "AT;A4;Hank;123",
97
+ "CA;C3;Frank;234",
98
+ "DE;D1;Mia;345",
99
+ "US;U2;Arwen;456" ]
100
+
101
+ rows = 0
102
+
103
+ File.new(@out_file, 'r').each_with_index do |line, index|
104
+ expect(line.chomp).to eq result[index]
105
+ rows += 1
106
+ end
107
+
108
+ rows.should eq result.size
109
+
110
+ end
111
+
112
+ it "should join files without explicit header adding default header '*'" do
113
+ cols = "1,2"
114
+ joins = "0=1"
115
+ rows = "1-4"
116
+
117
+ Sycsvpro::Join.new(infile: @in_file,
118
+ outfile: @out_file,
119
+ source: @source_file,
120
+ cols: cols,
121
+ joins: joins,
122
+ rows: rows).execute
123
+
124
+ result = [ ";;Name;N_ID",
125
+ "AT;A4;Hank;123",
126
+ "CA;C3;Frank;234",
127
+ "DE;D1;Mia;345",
128
+ "US;U2;Arwen;456" ]
129
+
130
+ rows = 0
131
+
132
+ File.new(@out_file, 'r').each_with_index do |line, index|
133
+ expect(line.chomp).to eq result[index]
134
+ rows += 1
135
+ end
136
+
137
+ rows.should eq result.size
138
+
139
+ end
140
+
141
+ it "should join files without header" do
142
+ cols = "1,2"
143
+ insert_col_pos = "2,1"
144
+ insert_header = "COUNTRY,STATE"
145
+ header = "*"
146
+ joins = "0=1"
147
+ rows = "1-4"
148
+
149
+ Sycsvpro::Join.new(infile: @in_file,
150
+ outfile: @out_file,
151
+ source: @source_file,
152
+ cols: cols,
153
+ joins: joins,
154
+ insert_header: insert_header,
155
+ pos: insert_col_pos,
156
+ header: header,
157
+ headerless: true,
158
+ rows: rows).execute
159
+
160
+ result = [ "Hank;A4;AT;123",
161
+ "Frank;C3;CA;234",
162
+ "Mia;D1;DE;345",
163
+ "Arwen;U2;US;456" ]
164
+
165
+ rows = 0
166
+
167
+ File.new(@out_file, 'r').each_with_index do |line, index|
168
+ expect(line.chomp).to eq result[index]
169
+ rows += 1
170
+ end
171
+
172
+ rows.should eq result.size
173
+
174
+ end
175
+
176
+ end
177
+
178
+ end