sycsvpro 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ # Operating csv files
2
+ module Sycsvpro
3
+
4
+ # Join joins two files based on a join key value.
5
+ # Example
6
+ # File 1 (infile)
7
+ # |Name |ID |
8
+ # |Hank |123|
9
+ # |Frank|234|
10
+ # |Mia |345|
11
+ # |Moira|234|
12
+ #
13
+ # File 2 (source)
14
+ # |Company|Phone|ID|
15
+ # |Siem |4848 |123|
16
+ # |Helo |993 |345|
17
+ # |Wara |3333 |234|
18
+ #
19
+ # File 3 (outfile)
20
+ # |Name |ID |Company|Phone|
21
+ # |Hank |123|Siem |4848 |
22
+ # |Frank|234|Wara |3333 |
23
+ # |Mia |345|Helo |993 |
24
+ # |Moira|234|Wara |3333 |
25
+ class Join
26
+
27
+ include Dsl
28
+
29
+ # infile contains the data that is operated on
30
+ attr_reader :infile
31
+ # outfile is the file where the result is written to
32
+ attr_reader :outfile
33
+ # source file from where columns are inserted into infile
34
+ attr_reader :source
35
+ # filter that is used for rows
36
+ attr_reader :row_filter
37
+ # columns to insert
38
+ attr_reader :columns
39
+ # posititon where to insert the columns into the infile
40
+ attr_reader :positions
41
+ # header of the outfile
42
+ attr_reader :header
43
+ # indicates whether the infile is headerless
44
+ attr_reader :headerless
45
+ # lookup table where the assigned values are stored at
46
+ attr_reader :lookup_table
47
+
48
+ # Creates a Join which can be invoked as follows
49
+ # :call-seq:
50
+ # Sycsvpro::Join.new(infile: "in.csv",
51
+ # outfile: "out.csv",
52
+ # source: "source.csv",
53
+ # rows: "1-eof",
54
+ # cols: "0,1",
55
+ # pos: "2,3",
56
+ # joins: "2=1",
57
+ # headerless: true,
58
+ # header: "*",
59
+ # insert_header: "Company,Phone").execute
60
+ #
61
+ # infile:: csv file to operate on
62
+ # outfile:: csv file with the result
63
+ # source:: csv file that contains the values to join to infile
64
+ # rows: rows to consider for operation. Rows that don't match the pattern
65
+ # will be skipped for operation
66
+ # cols:: columns to insert from the source to the infile
67
+ # pos:: column positions where to insert the values and the insert_header
68
+ # columns
69
+ # joins:: columns that match in infile and source.
70
+ # source_column=infile_column
71
+ # headerless:: indicates whether the infile has a header (default true)
72
+ # header:: Header of the csv file
73
+ # insert_header:: column names of the to be inserted values
74
+ def initialize(options = {})
75
+ @infile = options[:infile]
76
+ @outfile = options[:outfile]
77
+ @source = options[:source]
78
+ @row_filter = RowFilter.new(options[:rows], df: options[:df])
79
+ @columns = options[:cols].split(',').collect { |c| c.to_i }
80
+ @positions = col_positions(options[:pos], @columns)
81
+ @joins = options[:joins].split('=').collect { |j| j.to_i }
82
+ @headerless = options[:headerless].nil? ? false : options[:headerless]
83
+ @header = Header.new(options[:header] || '*',
84
+ pos: @positions,
85
+ insert: options[:insert_header])
86
+ create_lookup_table
87
+ end
88
+
89
+ # Executes the join
90
+ def execute
91
+ processed_header = headerless ? true : false
92
+
93
+ File.open(outfile, 'w') do |out|
94
+ File.open(infile).each_with_index do |line, index|
95
+ line = line.chomp
96
+
97
+ next if line.empty?
98
+
99
+ line = unstring(line).chomp
100
+
101
+ unless processed_header
102
+ header_line = header.process(line)
103
+ out.puts header unless header_line.empty?
104
+ processed_header = true
105
+ next
106
+ end
107
+
108
+ next if row_filter.process(line, row: index).nil?
109
+
110
+ values = line.split(';')
111
+
112
+ key = values[@joins[1]]
113
+ row = lookup_table[:rows][key] || []
114
+
115
+ lookup_table[:pos].sort.each { |p| values.insert(p, "") }
116
+ lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
117
+
118
+ out.puts values.join(';')
119
+ end
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ # Creates a lookup table from the source file values. The join column of
126
+ # the source file is the key
127
+ def create_lookup_table
128
+ @lookup_table = { pos: positions, rows: {} }
129
+
130
+ File.open(source).each_with_index do |line|
131
+ next if line.chomp.empty?
132
+
133
+ values = unstring(line).chomp.split(';')
134
+
135
+ next if values.empty?
136
+
137
+ key = values[@joins[0]]
138
+ lookup_table[:rows][key] = []
139
+
140
+ columns.each do |i|
141
+ lookup_table[:rows][key] << values[i]
142
+ end
143
+ end
144
+ end
145
+
146
+ # Initializes the column positions where the source file columns have to
147
+ # be inserted. If no column positions are provided the inserted columns
148
+ # are put at the beginning of the row
149
+ def col_positions(pos, cols)
150
+ if pos.nil? || pos.empty?
151
+ Array.new(cols.size) { |c| c }
152
+ else
153
+ pos.split(',').collect { |p| p.to_i }
154
+ end
155
+ end
156
+
157
+ end
158
+
159
+ end
@@ -3,12 +3,28 @@ require_relative 'header'
3
3
  require_relative 'dsl'
4
4
  require 'date'
5
5
 
6
+ # Operating csv files
6
7
  module Sycsvpro
7
8
 
9
+ # Extracts values from a csv file and enables to associate values to key
10
+ # values. Columns can be created dynamically based on the content of columns.
11
+ # Example:
12
+ # File 1 (infile)
13
+ # Date | Order-Type | Revenue
14
+ # 01.01.2013 | AZ | 22.50
15
+ # 13.04.2014 | BZ | 33.40
16
+ # 16.12.2014 | CZ | 12.80
17
+ #
18
+ # File 2 (outfile)
19
+ # Year | AZ | BZ | CZ | Total
20
+ # 2013 | 22.50 | | | 22.50
21
+ # 2014 | | 33.40 | 12.80 | 46.20
8
22
  class Table
9
23
 
10
24
  include Dsl
11
25
 
26
+ # Regex to split parameters
27
+ COL_SPLITTER = /,(?=[\w +]*:)/
12
28
  # infile contains the data that is operated on
13
29
  attr_reader :infile
14
30
  # outfile is the file where the result is written to
@@ -34,21 +50,41 @@ module Sycsvpro
34
50
  # header: "Year,c6,c1",
35
51
  # key: "c0=~/\\.(\\d{4})/,c6",
36
52
  # cols: "Value:+n1,c2+c3:+n1",
37
- # nf: "DE").execute
53
+ # nf: "DE",
54
+ # pr: "2",
55
+ # sum: "TOP:Value,c2+c3").execute
56
+ #
57
+ # infile:: csv file to operate on
58
+ # outfile:: csv file with the result
59
+ # df:: date format
60
+ # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
61
+ # US is 1,000.00
62
+ # pr:: precision of number values. Default 2
63
+ # rows: rows to consider for operation. Rows that don't match the pattern
64
+ # will be skipped for operation
65
+ # header:: Header of the csv file
66
+ # key:: Values located at value 0 and subsequent columns
67
+ # cols:: Values added to columns base on a operation or assignment
68
+ # sum:: sum row at specified position top or eof
38
69
  def initialize(options = {})
39
70
  @infile = options[:infile]
40
71
  @outfile = options[:outfile]
41
72
  @date_format = options[:df] || "%Y-%m-%d"
42
73
  @row_filter = RowFilter.new(options[:rows], df: options[:df])
43
74
  @header = Header.new(options[:header])
44
- @keys = options[:key].split(',')
45
- @cols = options[:cols].split(',')
75
+ @keys = split_by_comma_regex(options[:key]) #options[:key].split(',')
76
+ @cols = options[:cols].split(COL_SPLITTER)
46
77
  @number_format = options[:nf] || 'EN'
78
+ @precision = options[:pr] || 2
79
+ prepare_sum_row options[:sum]
47
80
  @rows = {}
48
81
  end
49
82
 
50
83
  # Retrieves the values from a row as the result of a arithmetic operation
51
- # with #eval
84
+ # with #eval. It reconizes
85
+ # c:: string value
86
+ # n:: number value
87
+ # d:: date value
52
88
  def method_missing(id, *args, &block)
53
89
  return @columns[$1.to_i] if id =~ /c(\d+)/
54
90
  return to_number(@columns[$1.to_i]) if id =~ /n(\d+)/
@@ -93,6 +129,7 @@ module Sycsvpro
93
129
  def write_to_file
94
130
  File.open(outfile, 'w') do |out|
95
131
  out.puts header.to_s
132
+ out.puts create_sum_row if @sum_row_pos == 'TOP'
96
133
  rows.each do |key, row|
97
134
  line = [] << row[:key]
98
135
  header.clear_header_cols.each_with_index do |col, index|
@@ -101,6 +138,7 @@ module Sycsvpro
101
138
  end
102
139
  out.puts line.flatten.join(';')
103
140
  end
141
+ out.puts create_sum_row if @sum_row_pos == 'EOF'
104
142
  end
105
143
  end
106
144
 
@@ -122,7 +160,11 @@ module Sycsvpro
122
160
  @cols.each do |col|
123
161
  column, formula = col.split(':')
124
162
  column = evaluate(column) if column =~ /^c\d+[=~+]/
125
- row[:cols][column] = eval("#{row[:cols][column]}#{formula}")
163
+ previous_value = row[:cols][column]
164
+ if value = eval("#{row[:cols][column]}#{formula}")
165
+ row[:cols][column] = value.round(@precision)
166
+ add_to_sum_row(row[:cols][column] - previous_value, column)
167
+ end
126
168
  end
127
169
  end
128
170
 
@@ -168,6 +210,42 @@ module Sycsvpro
168
210
  end
169
211
  end
170
212
 
213
+ # Initializes sum_row_pos, sum_row and sum_row_patterns based on the
214
+ # provided sum option
215
+ def prepare_sum_row(pattern)
216
+ return if pattern.nil? || pattern.empty?
217
+ @sum_row_pos, sum_row_pattern = pattern.split(':')
218
+ @sum_row_pos.upcase!
219
+ @sum_row = Hash.new
220
+ @sum_row_patterns = split_by_comma_regex(sum_row_pattern)
221
+ end
222
+
223
+ # Adds a value in the specified column to the sum_row
224
+ def add_to_sum_row(value, column)
225
+ return unless @sum_row_patterns
226
+ @sum_row_patterns.each do |pattern|
227
+ if pattern =~ /^c\d+[=~+]/
228
+ header_column = evaluate(pattern, "")
229
+ else
230
+ header_column = pattern
231
+ end
232
+
233
+ if header_column == column
234
+ @sum_row[header_column] ||= 0
235
+ @sum_row[header_column] += value
236
+ end
237
+ end
238
+ end
239
+
240
+ # Creates the sum_row when the file has been completely processed
241
+ def create_sum_row
242
+ line = []
243
+ header.clear_header_cols.each_with_index do |col, index|
244
+ line << @sum_row[col] || ""
245
+ end
246
+ line.flatten.join(';')
247
+ end
248
+
171
249
  end
172
250
 
173
251
  end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.4'
4
+ VERSION = '0.1.7'
5
5
  end
data/lib/sycsvpro.rb CHANGED
@@ -14,3 +14,4 @@ require 'sycsvpro/inserter.rb'
14
14
  require 'sycsvpro/sorter.rb'
15
15
  require 'sycsvpro/aggregator.rb'
16
16
  require 'sycsvpro/table.rb'
17
+ require 'sycsvpro/join.rb'
@@ -8,6 +8,7 @@ module Sycsvpro
8
8
  @in_file = File.join(File.dirname(__FILE__), "files/machines.csv")
9
9
  @in_date_file = File.join(File.dirname(__FILE__), "files/machine-delivery.csv")
10
10
  @in_number_file = File.join(File.dirname(__FILE__), "files/machine-count.csv")
11
+ @in_customer_file = File.join(File.dirname(__FILE__), "files/customers.csv")
11
12
  @out_file = File.join(File.dirname(__FILE__), "files/machines_out.csv")
12
13
  end
13
14
 
@@ -87,7 +88,7 @@ module Sycsvpro
87
88
 
88
89
  it "should find minimum of specified date rows" do
89
90
  header = "*,Min_Date"
90
- cols = "3:Min_Date=[d1,d2].compact.min"
91
+ cols = "3:[d1,d2].compact.min"
91
92
  rows = "1-8"
92
93
  df = "%d.%m.%Y"
93
94
 
@@ -148,6 +149,35 @@ module Sycsvpro
148
149
  end
149
150
  end
150
151
 
152
+ it "should split column value into multiple column values" do
153
+ header = "ID,Customer,Country"
154
+ cols = [ "2:s0.scan(/([A-Z]+)/).flatten[0]",
155
+ "0:s0.scan(/(?<=\\/)(.*)$/).flatten[0]",
156
+ "1:s1" ].join(',')
157
+ rows = "1-8"
158
+
159
+ Calculator.new(infile: @in_customer_file,
160
+ outfile: @out_file,
161
+ header: header,
162
+ rows: rows,
163
+ cols: cols).execute
164
+
165
+ result = [ "ID;Customer;Country",
166
+ "123945;Hank;DE",
167
+ "339339;Frank;AT",
168
+ "449399;Jane;DE",
169
+ "33A398;Jean;US" ]
170
+
171
+ rows = 0
172
+
173
+ File.new(@out_file, 'r').each_with_index do |line, index|
174
+ expect(line.chomp).to eq result[index]
175
+ rows += 1
176
+ end
177
+
178
+ rows.should eq result.size
179
+ end
180
+
151
181
  end
152
182
 
153
183
  end
@@ -52,6 +52,12 @@ module Sycsvpro
52
52
  header.process("5.5.2012;d1;d2;d3;d4;d5").should eq "a4;A;2012;2013;a1;B"
53
53
  end
54
54
 
55
+ it "should create a header with positioned columns" do
56
+ header = Header.new("*", insert: "C,D", pos: [3,7])
57
+
58
+ header.process("A;B;E;F;G").should eq "A;B;E;C;F;G;;D"
59
+ end
60
+
55
61
  it "should return the header" do
56
62
  header = Header.new("c4,A,c0=~/\\.(\\d{4})/,c1,B")
57
63
 
@@ -72,7 +78,7 @@ module Sycsvpro
72
78
  header.column_of("a1").should eq 3
73
79
  header.process("3.4.2013;c1;c2;c3;c4;c5").should eq "a4;A;2012;2013;a1;B"
74
80
  header.column_of("B").should eq 5
75
- end
81
+ end
76
82
 
77
83
  end
78
84
 
@@ -0,0 +1,178 @@
1
+ require 'sycsvpro/join'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Join do
6
+
7
+ before do
8
+ @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
9
+ @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
10
+ @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
11
+ end
12
+
13
+ it "should join files based on person ID" do
14
+ cols = "1,2"
15
+ insert_col_pos = "2,1"
16
+ insert_header = "COUNTRY,STATE"
17
+ header = "*"
18
+ joins = "0=1"
19
+ rows = "1-4"
20
+
21
+ Sycsvpro::Join.new(infile: @in_file,
22
+ outfile: @out_file,
23
+ source: @source_file,
24
+ cols: cols,
25
+ joins: joins,
26
+ insert_header: insert_header,
27
+ pos: insert_col_pos,
28
+ header: header,
29
+ rows: rows).execute
30
+
31
+ result = [ "Name;STATE;COUNTRY;N_ID",
32
+ "Hank;A4;AT;123",
33
+ "Frank;C3;CA;234",
34
+ "Mia;D1;DE;345",
35
+ "Arwen;U2;US;456" ]
36
+
37
+ rows = 0
38
+
39
+ File.new(@out_file, 'r').each_with_index do |line, index|
40
+ expect(line.chomp).to eq result[index]
41
+ rows += 1
42
+ end
43
+
44
+ rows.should eq result.size
45
+
46
+ end
47
+
48
+ it "should join files without explicit insert header" do
49
+ cols = "1,2"
50
+ insert_col_pos = "2,1"
51
+ joins = "0=1"
52
+ header = "*"
53
+ rows = "1-4"
54
+
55
+ Sycsvpro::Join.new(infile: @in_file,
56
+ outfile: @out_file,
57
+ source: @source_file,
58
+ cols: cols,
59
+ joins: joins,
60
+ pos: insert_col_pos,
61
+ header: header,
62
+ rows: rows).execute
63
+
64
+ result = [ "Name;;;N_ID",
65
+ "Hank;A4;AT;123",
66
+ "Frank;C3;CA;234",
67
+ "Mia;D1;DE;345",
68
+ "Arwen;U2;US;456" ]
69
+
70
+ rows = 0
71
+
72
+ File.new(@out_file, 'r').each_with_index do |line, index|
73
+ expect(line.chomp).to eq result[index]
74
+ rows += 1
75
+ end
76
+
77
+ rows.should eq result.size
78
+
79
+ end
80
+
81
+ it "should join files without explicit insert cols pos and insert header" do
82
+ cols = "1,2"
83
+ joins = "0=1"
84
+ header = "*"
85
+ rows = "1-4"
86
+
87
+ Sycsvpro::Join.new(infile: @in_file,
88
+ outfile: @out_file,
89
+ source: @source_file,
90
+ cols: cols,
91
+ joins: joins,
92
+ header: header,
93
+ rows: rows).execute
94
+
95
+ result = [ ";;Name;N_ID",
96
+ "AT;A4;Hank;123",
97
+ "CA;C3;Frank;234",
98
+ "DE;D1;Mia;345",
99
+ "US;U2;Arwen;456" ]
100
+
101
+ rows = 0
102
+
103
+ File.new(@out_file, 'r').each_with_index do |line, index|
104
+ expect(line.chomp).to eq result[index]
105
+ rows += 1
106
+ end
107
+
108
+ rows.should eq result.size
109
+
110
+ end
111
+
112
+ it "should join files without explicit header adding default header '*'" do
113
+ cols = "1,2"
114
+ joins = "0=1"
115
+ rows = "1-4"
116
+
117
+ Sycsvpro::Join.new(infile: @in_file,
118
+ outfile: @out_file,
119
+ source: @source_file,
120
+ cols: cols,
121
+ joins: joins,
122
+ rows: rows).execute
123
+
124
+ result = [ ";;Name;N_ID",
125
+ "AT;A4;Hank;123",
126
+ "CA;C3;Frank;234",
127
+ "DE;D1;Mia;345",
128
+ "US;U2;Arwen;456" ]
129
+
130
+ rows = 0
131
+
132
+ File.new(@out_file, 'r').each_with_index do |line, index|
133
+ expect(line.chomp).to eq result[index]
134
+ rows += 1
135
+ end
136
+
137
+ rows.should eq result.size
138
+
139
+ end
140
+
141
+ it "should join files without header" do
142
+ cols = "1,2"
143
+ insert_col_pos = "2,1"
144
+ insert_header = "COUNTRY,STATE"
145
+ header = "*"
146
+ joins = "0=1"
147
+ rows = "1-4"
148
+
149
+ Sycsvpro::Join.new(infile: @in_file,
150
+ outfile: @out_file,
151
+ source: @source_file,
152
+ cols: cols,
153
+ joins: joins,
154
+ insert_header: insert_header,
155
+ pos: insert_col_pos,
156
+ header: header,
157
+ headerless: true,
158
+ rows: rows).execute
159
+
160
+ result = [ "Hank;A4;AT;123",
161
+ "Frank;C3;CA;234",
162
+ "Mia;D1;DE;345",
163
+ "Arwen;U2;US;456" ]
164
+
165
+ rows = 0
166
+
167
+ File.new(@out_file, 'r').each_with_index do |line, index|
168
+ expect(line.chomp).to eq result[index]
169
+ rows += 1
170
+ end
171
+
172
+ rows.should eq result.size
173
+
174
+ end
175
+
176
+ end
177
+
178
+ end