sycsvpro 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +76 -7
- data/bin/sycsvpro +39 -7
- data/lib/sycsvpro/calculator.rb +32 -19
- data/lib/sycsvpro/dsl.rb +3 -2
- data/lib/sycsvpro/filter.rb +1 -1
- data/lib/sycsvpro/mapper.rb +72 -9
- data/lib/sycsvpro/merger.rb +19 -6
- data/lib/sycsvpro/transposer.rb +77 -0
- data/lib/sycsvpro/version.rb +1 -1
- data/lib/sycsvpro.rb +1 -0
- data/spec/sycsvpro/calculator_spec.rb +90 -0
- data/spec/sycsvpro/mapper_spec.rb +60 -2
- data/spec/sycsvpro/merger_spec.rb +93 -0
- data/spec/sycsvpro/transposer_spec.rb +76 -0
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,7 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
7
7
|
* extract rows and columns from a file
|
8
8
|
* remove duplicate lines from a file where duplicates are identified by key
|
9
9
|
columns (since version 0.1.11)
|
10
|
+
add unique to command line interface (since version 0.1.12)
|
10
11
|
* collect values of rows and assign them to categories
|
11
12
|
* map column values to new values
|
12
13
|
* allocate column values to a key column (since version 0.0.4)
|
@@ -22,6 +23,7 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
22
23
|
version 0.1.4)
|
23
24
|
* join two file based on a joint column value (since version 0.1.7)
|
24
25
|
* merge files based on common headline columns (since version 0.1.10)
|
26
|
+
* transpose (swapping) rows and columns (since version 0.1.13)
|
25
27
|
|
26
28
|
To get help type
|
27
29
|
|
@@ -108,7 +110,7 @@ Collect all product rows (2, 3 and 4) to the category product
|
|
108
110
|
|
109
111
|
Map
|
110
112
|
---
|
111
|
-
Map the product names to new names
|
113
|
+
Map the product names to new names. Consider columns 2-4 only for mapping
|
112
114
|
|
113
115
|
The mapping file (mapping) uses the result from the collect command above
|
114
116
|
|
@@ -127,6 +129,35 @@ The mapping file (mapping) uses the result from the collect command above
|
|
127
129
|
|
128
130
|
$ sycsvpro -f in.csv -o out.csv map mapping -c 2-4
|
129
131
|
|
132
|
+
Transpose
|
133
|
+
---------
|
134
|
+
Swap rows and columns of revenue.csv to out.csv
|
135
|
+
|
136
|
+
$ sycsvpro -f revenue.csv -o out.csv transpose
|
137
|
+
|
138
|
+
2010;50;100;2000
|
139
|
+
2011;100;50;250
|
140
|
+
2012;150;10;300
|
141
|
+
2013;100;1000;3000
|
142
|
+
2014;200;20;20
|
143
|
+
customer;hello;indix;chiro
|
144
|
+
|
145
|
+
To use only columns 2013 and 2014 you can specify a the columns to transpose
|
146
|
+
|
147
|
+
$ sycsvpro -f revenue.csv -o out.csv transpose -c 3-5
|
148
|
+
|
149
|
+
2013;100;1000;3000
|
150
|
+
2014;200;20;20
|
151
|
+
customer;hello;indix;chiro
|
152
|
+
|
153
|
+
To filter for hello only
|
154
|
+
|
155
|
+
$ sycsvpor -f revenue.csv -o out.csv transpose -c 3-5 -r 0,1
|
156
|
+
|
157
|
+
2013;100
|
158
|
+
2014;200
|
159
|
+
customer;hello
|
160
|
+
|
130
161
|
Allocate
|
131
162
|
--------
|
132
163
|
Allocate all the machine types to the customer
|
@@ -196,7 +227,7 @@ Process arithmetic operations on the contract count and create a target column
|
|
196
227
|
and a sum which is added at the end of the result file
|
197
228
|
|
198
229
|
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target
|
199
|
-
-c 6:*2,7:
|
230
|
+
-c 6:*2,7:c6*10
|
200
231
|
|
201
232
|
$ cat out.csv
|
202
233
|
customer;machine;control;drive;motor;date;contract;target
|
@@ -210,6 +241,20 @@ and a sum which is added at the end of the result file
|
|
210
241
|
In the sum row non-numbers in the colums are converted to 0. Therefore column 0
|
211
242
|
is summed up to 0 as all strings are converted to 0.
|
212
243
|
|
244
|
+
Write only columns 0, 6 and 7 by specifying write columns
|
245
|
+
|
246
|
+
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h "customer,contract,target"
|
247
|
+
-c 6:*2,7:c6*10
|
248
|
+
-w 0,6-7
|
249
|
+
$ cat out.csv
|
250
|
+
customer;contract;target
|
251
|
+
hello;2;20
|
252
|
+
hello;2;20
|
253
|
+
indix;2;20
|
254
|
+
chiro;2;20
|
255
|
+
chiro;2;20
|
256
|
+
0;10;100
|
257
|
+
|
213
258
|
Join
|
214
259
|
----
|
215
260
|
Join the machine and contract file with columns from the customer address file
|
@@ -250,6 +295,7 @@ Merge files machine_count.csv and revenue.csv based on the year columns.
|
|
250
295
|
This will create the out.csv
|
251
296
|
|
252
297
|
```
|
298
|
+
$ cat out.csv
|
253
299
|
;2010;2013;2014
|
254
300
|
hello;1;0;0
|
255
301
|
indix;1;0;0
|
@@ -266,6 +312,7 @@ Sort rows on specified columns as an example sort rows based on customer
|
|
266
312
|
|
267
313
|
$ sycsvpro -f in.csv -o out.csv sort -r 2-20 -c s:0,d:5
|
268
314
|
|
315
|
+
$cat out.csv
|
269
316
|
customer;machine;control;drive;motor;date;contract;target
|
270
317
|
hello;h2;con123;dri130;mot110;1.02.3012;1
|
271
318
|
hello;h1;con123;dri120;mot100;1.01.3013;1
|
@@ -406,8 +453,8 @@ row are added on top of the sorted file
|
|
406
453
|
* `sycsvpro -f infile analyze` now lists the columns with sample data
|
407
454
|
* Add `params` method to *Dsl* that retrieves the params provided in the execute
|
408
455
|
command: `sycsvpro execute script.rb method infile param1 param2`
|
409
|
-
* Add `
|
410
|
-
run: `
|
456
|
+
* Add `clean\_up` to *Dsl* that takes files to be deleted after the script has
|
457
|
+
run: `clean\_up(%w{file1 file2})`
|
411
458
|
|
412
459
|
Version 0.1.4
|
413
460
|
-------------
|
@@ -465,7 +512,7 @@ Version 0.1.7
|
|
465
512
|
This will join infile.csv with source.csv based on the join columns (j "1=3").
|
466
513
|
From source.csv columns 2 and 4 (-c "2,4") will be inserted at column
|
467
514
|
positions 1 and 3 (-p "1,3"). The header will be used from the infile.csv
|
468
|
-
(-h "
|
515
|
+
(-h "\*") supplemented by the columns A and B (-i "A,B") that will also be
|
469
516
|
positioned at column 1 and 3 (-p "1,3").
|
470
517
|
|
471
518
|
Version 0.1.8
|
@@ -474,8 +521,9 @@ Version 0.1.8
|
|
474
521
|
|
475
522
|
Version 0.1.9
|
476
523
|
-------------
|
477
|
-
* When creating columns dynamically they are in arbitrary sequence.
|
478
|
-
provide a switch `sort: "2"` which will sort the header from
|
524
|
+
* When creating columns dynamically in count they are in arbitrary sequence.
|
525
|
+
You can now provide a switch `sort: "2"` which will sort the header from
|
526
|
+
column 2 on.
|
479
527
|
|
480
528
|
Version 0.1.10
|
481
529
|
--------------
|
@@ -488,6 +536,27 @@ Version 0.1.11
|
|
488
536
|
* Unique removes duplicate lines from the infile. Duplicate lines are identified
|
489
537
|
by key columns
|
490
538
|
|
539
|
+
Version 0.1.12
|
540
|
+
--------------
|
541
|
+
* Add unique to sycsvpro command line interface
|
542
|
+
|
543
|
+
Version 0.1.13
|
544
|
+
--------------
|
545
|
+
* Optimize Mapper by only considering columns provided for mapping which should
|
546
|
+
increase performance
|
547
|
+
* match\_boolean\_filter? in Filter now also processes strings with single
|
548
|
+
quotes inside
|
549
|
+
* Tranposer tranposes rows and columns that is make columns rows and vice versa
|
550
|
+
* Calculator can now have colons inside the operation
|
551
|
+
sycsvpro -f in.csv -o out.csv -c "122:+[1,3,5].inject(:+)"
|
552
|
+
Previously the operation would have been cut after inject(
|
553
|
+
* A write flag in Calculator specifies which colons to add to the result.
|
554
|
+
* Calculator introduced a switch 'final\_header' which indicates the header
|
555
|
+
provided should not be filtered in regard to a provided 'write' flag but
|
556
|
+
written to the result file as is
|
557
|
+
* Merger now doesn't require a key column that is files can be merged without
|
558
|
+
key columns.
|
559
|
+
|
491
560
|
Installation
|
492
561
|
============
|
493
562
|
[![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
|
data/bin/sycsvpro
CHANGED
@@ -589,6 +589,27 @@ command :map do |c|
|
|
589
589
|
end
|
590
590
|
end
|
591
591
|
|
592
|
+
desc 'Transposes rows and columns'
|
593
|
+
command :transpose do |c|
|
594
|
+
c.desc 'Rows to consider'
|
595
|
+
c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
|
596
|
+
c.flag [:r, :row], :must_match => row_regex
|
597
|
+
|
598
|
+
c.desc 'Columns to consider for mapping'
|
599
|
+
c.arg_name 'COL1,COL2,COL10-COL30'
|
600
|
+
c.flag [:c, :col], :must_match => /\d+(?:,\d+|-\d+)*/
|
601
|
+
|
602
|
+
c.action do |global_options,options,args|
|
603
|
+
print "Transpose..."
|
604
|
+
transpose = Sycsvpro::Transposer.new(infile: global_options[:f],
|
605
|
+
outfile: global_options[:o],
|
606
|
+
rows: options[:r],
|
607
|
+
cols: options[:c])
|
608
|
+
transpose.execute
|
609
|
+
puts "done"
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
592
613
|
desc 'Process operations on columns. Optionally add a sum row for columns with'+
|
593
614
|
'number values'
|
594
615
|
command :calc do |c|
|
@@ -600,6 +621,11 @@ command :calc do |c|
|
|
600
621
|
default_value '*'
|
601
622
|
c.flag [:h, :header], :must_match => /^[*|\w ]+(?:,[\w ]+)*/
|
602
623
|
|
624
|
+
c.desc 'Indicates whether the provided header is final. That is if columns'+
|
625
|
+
' to be written to the outfile are selected by the write flag then '+
|
626
|
+
'the header should left untouched and written as is'
|
627
|
+
c.switch [:f, :final], :default_value => false
|
628
|
+
|
603
629
|
c.desc 'Rows to consider for calculations'
|
604
630
|
c.arg_name 'ROW1,ROW2-ROW10,45-EOF,REGEXP'
|
605
631
|
c.flag [:r, :row], :must_match => row_regex
|
@@ -610,6 +636,10 @@ command :calc do |c|
|
|
610
636
|
c.arg_name "COL1:*2,COL2:-C3,COL3:*2+(4+C5)"
|
611
637
|
c.flag [:c, :col], :must_match => /^\d+:.+/
|
612
638
|
|
639
|
+
c.desc 'Columns to be written to the result file'
|
640
|
+
c.arg_name "COL1,COL2-COL5"
|
641
|
+
c.flag [:w, :write], :must_match => /\d+(?:,\d+|-\d+)*/
|
642
|
+
|
613
643
|
c.desc 'Date format of date columns'
|
614
644
|
c.arg_name '%d.%m.%Y|%Y-%m-%d|...'
|
615
645
|
c.flag [:df]
|
@@ -622,13 +652,15 @@ command :calc do |c|
|
|
622
652
|
help_now! "You need to provide the column flag" if options[:c].nil?
|
623
653
|
|
624
654
|
print "Calculating..."
|
625
|
-
calculator = Sycsvpro::Calculator.new(infile:
|
626
|
-
outfile:
|
627
|
-
header:
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
655
|
+
calculator = Sycsvpro::Calculator.new(infile: global_options[:f],
|
656
|
+
outfile: global_options[:o],
|
657
|
+
header: options[:h],
|
658
|
+
final_header: options[:f],
|
659
|
+
rows: options[:r],
|
660
|
+
cols: options[:c],
|
661
|
+
write: options[:w],
|
662
|
+
sum: options[:s],
|
663
|
+
df: options[:df])
|
632
664
|
calculator.execute
|
633
665
|
puts "done"
|
634
666
|
end
|
data/lib/sycsvpro/calculator.rb
CHANGED
@@ -58,8 +58,13 @@ module Sycsvpro
|
|
58
58
|
attr_reader :formulae
|
59
59
|
# header of the outfile
|
60
60
|
attr_reader :header
|
61
|
+
# indicates whether this header is final and should not be filtered in
|
62
|
+
# respect to the columns defined by write
|
63
|
+
attr_reader :final_header
|
61
64
|
# filter that is used for columns
|
62
65
|
attr_reader :columns
|
66
|
+
# selected columns to be written to outfile
|
67
|
+
attr_reader :write
|
63
68
|
# if true add a sum row at the bottom of the out file
|
64
69
|
attr_reader :add_sum_row
|
65
70
|
|
@@ -67,29 +72,36 @@ module Sycsvpro
|
|
67
72
|
# can be supplemented with additional column names that are generated due
|
68
73
|
# to an arithmetic operation that creates new columns
|
69
74
|
# :call-seq:
|
70
|
-
# Sycsvpro::Calculator.new(infile:
|
71
|
-
# outfile:
|
72
|
-
# df:
|
73
|
-
# rows:
|
74
|
-
# header:
|
75
|
-
#
|
76
|
-
#
|
75
|
+
# Sycsvpro::Calculator.new(infile: "in.csv",
|
76
|
+
# outfile: "out.csv",
|
77
|
+
# df: "%d.%m.%Y",
|
78
|
+
# rows: "1,2,BEGINn3>20END",
|
79
|
+
# header: "*,Count",
|
80
|
+
# final_header: false,
|
81
|
+
# cols: "4:c1+c2*2",
|
82
|
+
# write: "1,3-5",
|
83
|
+
# sum: true).execute
|
77
84
|
# infile:: File that contains the rows to be operated on
|
78
85
|
# outfile:: Result of the operations
|
79
86
|
# df:: Date format
|
80
87
|
# rows:: Row filter that indicates which rows to consider
|
81
88
|
# header:: Header of the columns
|
89
|
+
# final_header:: Indicates that if write filters columns the header should
|
90
|
+
# not be filtered when written
|
82
91
|
# cols:: Operations on the column values
|
92
|
+
# write:: Columns that are written to the outfile
|
83
93
|
# sum:: Indicate whether to add a sum row
|
84
94
|
def initialize(options={})
|
85
|
-
@infile
|
86
|
-
@outfile
|
87
|
-
@date_format
|
88
|
-
@row_filter
|
89
|
-
@
|
90
|
-
@
|
91
|
-
@
|
92
|
-
@
|
95
|
+
@infile = options[:infile]
|
96
|
+
@outfile = options[:outfile]
|
97
|
+
@date_format = options[:df] || "%Y-%m-%d"
|
98
|
+
@row_filter = RowFilter.new(options[:rows], df: options[:df])
|
99
|
+
@write_filter = ColumnFilter.new(options[:write], df: options[:df])
|
100
|
+
@header = Header.new(options[:header])
|
101
|
+
@final_header = options[:final_header]
|
102
|
+
@sum_row = []
|
103
|
+
@add_sum_row = options[:sum]
|
104
|
+
@formulae = {}
|
93
105
|
create_calculator(options[:cols])
|
94
106
|
end
|
95
107
|
|
@@ -112,7 +124,8 @@ module Sycsvpro
|
|
112
124
|
|
113
125
|
unless processed_header
|
114
126
|
header_row = header.process(line.chomp)
|
115
|
-
|
127
|
+
header_row = @write_filter.process(header_row) unless @final_header
|
128
|
+
out.puts header_row unless header_row.nil? or header_row.empty?
|
116
129
|
processed_header = true
|
117
130
|
next
|
118
131
|
end
|
@@ -123,7 +136,7 @@ module Sycsvpro
|
|
123
136
|
formulae.each do |col, formula|
|
124
137
|
@columns[col.to_i] = eval(formula)
|
125
138
|
end
|
126
|
-
out.puts @columns.join(';')
|
139
|
+
out.puts @write_filter.process(@columns.join(';'))
|
127
140
|
|
128
141
|
@columns.each_with_index do |column, index|
|
129
142
|
column = 0 unless column.to_s =~ /^[\d\.,]*$/
|
@@ -137,7 +150,7 @@ module Sycsvpro
|
|
137
150
|
|
138
151
|
end
|
139
152
|
|
140
|
-
out.puts @sum_row.join(';') if add_sum_row
|
153
|
+
out.puts @write_filter.process(@sum_row.join(';')) if add_sum_row
|
141
154
|
|
142
155
|
end
|
143
156
|
end
|
@@ -154,7 +167,7 @@ module Sycsvpro
|
|
154
167
|
# column 1 + 1 c[4] = c[1] + 1
|
155
168
|
def create_calculator(code)
|
156
169
|
code.split(/,(?=\d+:)/).each do |operation|
|
157
|
-
col, term = operation.split(':')
|
170
|
+
col, term = operation.split(':', 2)
|
158
171
|
term = "c#{col}#{term}" if term =~ /^[+\-*\/%]/
|
159
172
|
formulae[col] = term
|
160
173
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -76,8 +76,9 @@ module Dsl
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
-
# Remove leading and trailing " and spaces as well as reducing more than 2
|
80
|
-
# from csv values.
|
79
|
+
# Remove leading and trailing " and spaces as well as reducing more than 2
|
80
|
+
# spaces between words from csv values. Replace ; with , from values as ;
|
81
|
+
# is used as value separator
|
81
82
|
def unstring(line)
|
82
83
|
line = str2utf8(line)
|
83
84
|
line.scan(/(?<=^"|;")[^"]+(?=;)+[^"]*|;+[^"](?=";|"$)/).each do |value|
|
data/lib/sycsvpro/filter.rb
CHANGED
data/lib/sycsvpro/mapper.rb
CHANGED
@@ -2,8 +2,33 @@
|
|
2
2
|
module Sycsvpro
|
3
3
|
|
4
4
|
# Map values to new values described in a mapping file
|
5
|
+
#
|
6
|
+
# in.csv
|
7
|
+
#
|
8
|
+
# | ID | Name |
|
9
|
+
# | --- | ---- |
|
10
|
+
# | 1 | Hank |
|
11
|
+
# | 2 | Jane |
|
12
|
+
#
|
13
|
+
# mapping
|
14
|
+
#
|
15
|
+
# 1:01
|
16
|
+
# 2:02
|
17
|
+
#
|
18
|
+
# Sycsvpro::Mapping.new(infile: "in.csv",
|
19
|
+
# outfile: "out.csv",
|
20
|
+
# mapping: "mapping",
|
21
|
+
# cols: "0").execute
|
22
|
+
# out.csv
|
23
|
+
#
|
24
|
+
# | ID | Name |
|
25
|
+
# | --- | ---- |
|
26
|
+
# | 01 | Hank |
|
27
|
+
# | 02 | Jane |
|
5
28
|
class Mapper
|
6
29
|
|
30
|
+
include Dsl
|
31
|
+
|
7
32
|
# infile contains the data that is operated on
|
8
33
|
attr_reader :infile
|
9
34
|
# outfile is the file where the result is written to
|
@@ -12,15 +37,29 @@ module Sycsvpro
|
|
12
37
|
attr_reader :mapper
|
13
38
|
# filter that is used for rows
|
14
39
|
attr_reader :row_filter
|
15
|
-
# filter that
|
40
|
+
# filter that contains columns that are considered for mappings
|
16
41
|
attr_reader :col_filter
|
17
42
|
|
18
43
|
# Creates new mapper
|
44
|
+
# :call-seq:
|
45
|
+
# Sycsvpro::Mapper.new(infile: "in.csv",
|
46
|
+
# outfile: "out.csv",
|
47
|
+
# mapping: "mapping.csv",
|
48
|
+
# rows: "1,3-5",
|
49
|
+
# cols: "3,4-7"
|
50
|
+
# df: "%Y-%m-%d").execute
|
51
|
+
#
|
52
|
+
# infile:: File that contains columns to be mapped
|
53
|
+
# outfile:: File that contains the mapping result after execute
|
54
|
+
# mapping:: File that contains the mappings. Mappings are separated by ':'
|
55
|
+
# rows:: Rows to consider for mappings
|
56
|
+
# cols:: Columns that should be mapped
|
57
|
+
# df:: Date format for row filter if rows are filtered on date values
|
19
58
|
def initialize(options={})
|
20
59
|
@infile = options[:infile]
|
21
60
|
@outfile = options[:outfile]
|
22
|
-
@row_filter = RowFilter.new(options[:
|
23
|
-
@col_filter =
|
61
|
+
@row_filter = RowFilter.new(options[:rows], df: options[:df])
|
62
|
+
@col_filter = init_col_filter(options[:cols], @infile)
|
24
63
|
@mapper = {}
|
25
64
|
init_mapper(options[:mapping])
|
26
65
|
end
|
@@ -29,25 +68,49 @@ module Sycsvpro
|
|
29
68
|
def execute
|
30
69
|
File.open(outfile, 'w') do |out|
|
31
70
|
File.new(infile, 'r').each_with_index do |line, index|
|
32
|
-
result =
|
71
|
+
result = row_filter.process(line, row: index)
|
33
72
|
next if result.chomp.empty? or result.nil?
|
34
|
-
|
35
|
-
|
73
|
+
result += ' ' if result =~ /;$/
|
74
|
+
cols = result.split(';')
|
75
|
+
@col_filter.each do |key|
|
76
|
+
substitute = mapper[cols[key]]
|
77
|
+
cols[key] = substitute if substitute
|
36
78
|
end
|
37
|
-
out.puts
|
79
|
+
out.puts cols.join(';').strip
|
38
80
|
end
|
39
81
|
end
|
40
82
|
end
|
41
83
|
|
42
84
|
private
|
43
85
|
|
44
|
-
# Initializes the mappings
|
86
|
+
# Initializes the mappings. A mapping consists of the value to be mapped
|
87
|
+
# to another value. The values are spearated by colons ':'
|
88
|
+
# Example:
|
89
|
+
# source_value:mapping_value
|
45
90
|
def init_mapper(file)
|
46
91
|
File.new(file, 'r').each_line do |line|
|
47
|
-
from, to = line.
|
92
|
+
from, to = unstring(line).split(':')
|
48
93
|
mapper[from] = to
|
49
94
|
end
|
50
95
|
end
|
96
|
+
|
97
|
+
# Initialize the col_filter that contains columns to be considered for
|
98
|
+
# mapping. If no columns are provided, that is being empty, a filter with
|
99
|
+
# all columns is returned
|
100
|
+
def init_col_filter(columns, source)
|
101
|
+
if columns.nil?
|
102
|
+
File.open(source, 'r').each do |line|
|
103
|
+
line = unstring(line)
|
104
|
+
next if line.empty?
|
105
|
+
line += ' ' if line =~ /;$/
|
106
|
+
size = line.split(';').size
|
107
|
+
columns = "0-#{size-1}"
|
108
|
+
break
|
109
|
+
end
|
110
|
+
end
|
111
|
+
ColumnFilter.new(columns).filter.flatten
|
112
|
+
end
|
113
|
+
|
51
114
|
end
|
52
115
|
|
53
116
|
end
|
data/lib/sycsvpro/merger.rb
CHANGED
@@ -69,21 +69,25 @@ module Sycsvpro
|
|
69
69
|
# source_header:: pattern for each header of the source file to determine
|
70
70
|
# the column. The pattern is a regex without the enclosing slashes '/'
|
71
71
|
# key:: first column value from the source file that is used as first
|
72
|
-
# column in the target file
|
72
|
+
# column in the target file. The key is optional.
|
73
73
|
def initialize(options = {})
|
74
74
|
@outfile = options[:outfile]
|
75
75
|
@header_cols = options[:header].split(',')
|
76
76
|
@source_header = options[:source_header].split(',')
|
77
|
-
@key = options[:key].split(',')
|
77
|
+
@key = options[:key] ? options[:key].split(',') : []
|
78
|
+
@has_key = !@key.empty?
|
78
79
|
@files = options[:files].split(',')
|
80
|
+
if @source_header.count != @files.count
|
81
|
+
raise "file count has to be equal to source_header count"
|
82
|
+
end
|
79
83
|
end
|
80
84
|
|
81
85
|
# Merges the files based on the provided parameters
|
82
86
|
def execute
|
83
87
|
File.open(outfile, 'w') do |out|
|
84
|
-
out.puts "
|
88
|
+
out.puts "#{';' unless @key.empty?}#{header_cols.join(';')}"
|
85
89
|
files.each do |file|
|
86
|
-
@current_key =
|
90
|
+
@current_key = create_current_key
|
87
91
|
@current_source_header = @source_header.shift
|
88
92
|
processed_header = false
|
89
93
|
File.open(file).each_with_index do |line, index|
|
@@ -110,16 +114,25 @@ module Sycsvpro
|
|
110
114
|
columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
|
111
115
|
end
|
112
116
|
|
113
|
-
@file_header = [@current_key.to_i]
|
117
|
+
@file_header = @current_key ? [@current_key.to_i] : []
|
118
|
+
|
114
119
|
header_cols.each do |h|
|
115
120
|
@file_header << columns.index(h)
|
116
121
|
end
|
122
|
+
|
117
123
|
@file_header.compact!
|
118
124
|
end
|
119
125
|
|
126
|
+
# create the current key dependent on the value returns a number or nil
|
127
|
+
def create_current_key
|
128
|
+
key = @key.shift
|
129
|
+
key.nil? || key.strip.empty? ? nil : key
|
130
|
+
end
|
131
|
+
|
120
132
|
# create a line filtered by the file_header
|
121
133
|
def create_line(columns)
|
122
|
-
|
134
|
+
empty_col = ';' if @has_key && @current_key.nil?
|
135
|
+
"#{empty_col}#{columns.values_at(*@file_header).join(';')}"
|
123
136
|
end
|
124
137
|
|
125
138
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Operating csv files
|
2
|
+
module Sycsvpro
|
3
|
+
|
4
|
+
# Tranposes rows to columns and vice versa
|
5
|
+
#
|
6
|
+
# Example
|
7
|
+
#
|
8
|
+
# infile.csv
|
9
|
+
# | Year | SP | RP | Total | SP-O | RP-O | O |
|
10
|
+
# | ---- | -- | -- | ----- | ---- | ---- | --- |
|
11
|
+
# | | 10 | 20 | 30 | 100 | 40 | 140 |
|
12
|
+
# | 2008 | 5 | 10 | 15 | 10 | 20 | 10 |
|
13
|
+
# | 2009 | 2 | 5 | 5 | 20 | 10 | 30 |
|
14
|
+
# | 2010 | 3 | 5 | 10 | 70 | 10 | 100 |
|
15
|
+
#
|
16
|
+
# outfile.csv
|
17
|
+
# | Year | | 2008 | 2009 | 2010 |
|
18
|
+
# | ----- | --- | ---- | ---- | ---- |
|
19
|
+
# | SP | 10 | 5 | 5 | 3 |
|
20
|
+
# | RP | 20 | 10 | 10 | 5 |
|
21
|
+
# | Total | 30 | 15 | 15 | 10 |
|
22
|
+
# | SP-O | 100 | 10 | 10 | 70 |
|
23
|
+
# | RP-O | 40 | 20 | 20 | 10 |
|
24
|
+
# | O | 140 | 10 | 30 | 100 |
|
25
|
+
#
|
26
|
+
class Transposer
|
27
|
+
|
28
|
+
include Dsl
|
29
|
+
|
30
|
+
# infile contains the data that is operated on
|
31
|
+
attr_reader :infile
|
32
|
+
# outfile is the file where the result is written to
|
33
|
+
attr_reader :outfile
|
34
|
+
# filter that is used for rows
|
35
|
+
attr_reader :row_filter
|
36
|
+
# filter that is used for columns
|
37
|
+
attr_reader :col_filter
|
38
|
+
|
39
|
+
# Create a new Transpose
|
40
|
+
# :call-seq:
|
41
|
+
# Sycsvpro::Transpose(infile: "infile.csv",
|
42
|
+
# outfile: "outfile.csv",
|
43
|
+
# rows: "0,3-5",
|
44
|
+
# cols: "1,3").execute
|
45
|
+
def initialize(options = {})
|
46
|
+
@infile = options[:infile]
|
47
|
+
@outfile = options[:outfile]
|
48
|
+
@row_filter = RowFilter.new(options[:rows])
|
49
|
+
@col_filter = ColumnFilter.new(options[:cols])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Executes the transpose by reading the infile and writing the result to
|
53
|
+
# the outfile
|
54
|
+
def execute
|
55
|
+
transpose = {}
|
56
|
+
|
57
|
+
File.open(@infile).each_with_index do |line, index|
|
58
|
+
line = unstring(line)
|
59
|
+
next if line.empty?
|
60
|
+
|
61
|
+
result = @col_filter.process(@row_filter.process(line, row: index))
|
62
|
+
next if result.nil?
|
63
|
+
|
64
|
+
result.split(';').each_with_index do |col, index|
|
65
|
+
transpose[index] ||= []
|
66
|
+
transpose[index] << col
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
File.open(@outfile, 'w') do |out|
|
71
|
+
transpose.values.each { |value| out.puts value.join(';') }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
data/lib/sycsvpro/version.rb
CHANGED
data/lib/sycsvpro.rb
CHANGED
@@ -12,6 +12,96 @@ module Sycsvpro
|
|
12
12
|
@out_file = File.join(File.dirname(__FILE__), "files/machines_out.csv")
|
13
13
|
end
|
14
14
|
|
15
|
+
it "should ignore colons within calculation expression" do
|
16
|
+
cols = "3:+[c1,c2].inject(:+),4:c2*3"
|
17
|
+
header = "*,times"
|
18
|
+
|
19
|
+
calculator = Calculator.new(infile: @in_number_file,
|
20
|
+
outfile: @out_file,
|
21
|
+
header: header,
|
22
|
+
cols: cols)
|
23
|
+
|
24
|
+
calculator.execute
|
25
|
+
|
26
|
+
result = [ "customer;before;between;after;times",
|
27
|
+
"Fink;2;3;6;9",
|
28
|
+
"Haas;3;1;10;3",
|
29
|
+
"Gent;4;4;12;12",
|
30
|
+
"Rank;5;4;10;12" ]
|
31
|
+
|
32
|
+
rows = 0
|
33
|
+
|
34
|
+
File.open(@out_file).each_with_index do |line, index|
|
35
|
+
line.chomp.should eq result[index]
|
36
|
+
rows += 1
|
37
|
+
end
|
38
|
+
|
39
|
+
rows.should eq result.size
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should save only specified columns" do
|
43
|
+
cols = "3:+[c1,c2].inject(:+),4:c3*3"
|
44
|
+
write = "0,3-4"
|
45
|
+
header = "customer;sum;times"
|
46
|
+
|
47
|
+
calculator = Calculator.new(infile: @in_number_file,
|
48
|
+
outfile: @out_file,
|
49
|
+
header: header,
|
50
|
+
final_header: true,
|
51
|
+
write: write,
|
52
|
+
cols: cols,
|
53
|
+
sum: true)
|
54
|
+
|
55
|
+
calculator.execute
|
56
|
+
|
57
|
+
result = [ "customer;sum;times",
|
58
|
+
"Fink;6;18",
|
59
|
+
"Haas;10;30",
|
60
|
+
"Gent;12;36",
|
61
|
+
"Rank;10;30",
|
62
|
+
"0;38;114" ]
|
63
|
+
|
64
|
+
rows = 0
|
65
|
+
|
66
|
+
File.open(@out_file).each_with_index do |line, index|
|
67
|
+
line.chomp.should eq result[index]
|
68
|
+
rows += 1
|
69
|
+
end
|
70
|
+
|
71
|
+
rows.should eq result.size
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should save only specified columns" do
|
75
|
+
cols = "3:+[c1,c2].inject(:+),4:c3*3"
|
76
|
+
write = "0,3-4"
|
77
|
+
header = "*,times"
|
78
|
+
|
79
|
+
calculator = Calculator.new(infile: @in_number_file,
|
80
|
+
outfile: @out_file,
|
81
|
+
header: header,
|
82
|
+
write: write,
|
83
|
+
cols: cols,
|
84
|
+
sum: true)
|
85
|
+
|
86
|
+
calculator.execute
|
87
|
+
|
88
|
+
result = [ "customer;after;times",
|
89
|
+
"Fink;6;18",
|
90
|
+
"Haas;10;30",
|
91
|
+
"Gent;12;36",
|
92
|
+
"Rank;10;30",
|
93
|
+
"0;38;114" ]
|
94
|
+
|
95
|
+
rows = 0
|
96
|
+
|
97
|
+
File.open(@out_file).each_with_index do |line, index|
|
98
|
+
line.chomp.should eq result[index]
|
99
|
+
rows += 1
|
100
|
+
end
|
101
|
+
|
102
|
+
rows.should eq result.size
|
103
|
+
end
|
104
|
+
|
15
105
|
it "should operate on existing row" do
|
16
106
|
rows = "2-8"
|
17
107
|
cols = "3:*3,4:*4+1"
|
@@ -6,12 +6,16 @@ module Sycsvpro
|
|
6
6
|
|
7
7
|
before do
|
8
8
|
@in_file = File.join(File.dirname(__FILE__), "files/in.csv")
|
9
|
+
@in_file5 = File.join(File.dirname(__FILE__), "files/in5.csv")
|
9
10
|
@out_file = File.join(File.dirname(__FILE__), "files/out.csv")
|
10
11
|
@mappings = File.join(File.dirname(__FILE__), "files/mappings")
|
11
12
|
end
|
12
13
|
|
13
|
-
it "should map values to new values" do
|
14
|
-
mapper = Mapper.new(infile:
|
14
|
+
it "should map values to new values in all columns" do
|
15
|
+
mapper = Mapper.new(infile: @in_file,
|
16
|
+
outfile: @out_file,
|
17
|
+
rows: "0-7",
|
18
|
+
mapping: @mappings)
|
15
19
|
|
16
20
|
mapper.execute
|
17
21
|
|
@@ -30,6 +34,60 @@ module Sycsvpro
|
|
30
34
|
|
31
35
|
end
|
32
36
|
|
37
|
+
it "should map values to new values on specified columns only" do
|
38
|
+
mapper = Mapper.new(infile: @in_file,
|
39
|
+
outfile: @out_file,
|
40
|
+
rows: "0-7",
|
41
|
+
cols: "4",
|
42
|
+
mapping: @mappings).execute
|
43
|
+
|
44
|
+
result = [ "customer;contract-number;expires-on;machine;product1;product2",
|
45
|
+
"Fink;1234;20.12.2015;f1;control123;dri222",
|
46
|
+
"Haas;3322;1.10.2011;h1;control332;dri111",
|
47
|
+
"Gent;4323;1.3.2014;g1;control123;dri111",
|
48
|
+
"Fink;1234;30.12.2016;f2;control333;dri321",
|
49
|
+
"Rank;3232;1.5.2013;r1;control332;dri321",
|
50
|
+
"Klig;4432;;k1;control332;dri222",
|
51
|
+
"fink;1234;;f3;control332;dri321" ]
|
52
|
+
|
53
|
+
rows = 0
|
54
|
+
|
55
|
+
File.open(@out_file).each_with_index do |line, index|
|
56
|
+
line.chomp.should eq result[index]
|
57
|
+
rows += 1
|
58
|
+
end
|
59
|
+
|
60
|
+
rows.should eq result.size
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should map values to new values where last column is empty" do
|
65
|
+
mapper = Mapper.new(infile: @in_file5,
|
66
|
+
outfile: @out_file,
|
67
|
+
cols: "5",
|
68
|
+
mapping: @mappings).execute
|
69
|
+
|
70
|
+
result = [ "customer;contract-number;expires-on;machine;product1;product2",
|
71
|
+
"Fink;1234;20.12.2015;f1;con123;drive222",
|
72
|
+
"Haas;3322;1.10.2011;h1;con332;drive111",
|
73
|
+
"Gent;4323;1.3.2014;g1;con123;drive111",
|
74
|
+
"Fink;1234;30.12.2016;f2;con333;drive321",
|
75
|
+
"Rank;3232;1.5.2013;r1;con332;drive321",
|
76
|
+
"Klig;4432;;k1;con332;drive222",
|
77
|
+
"fink;1234;;f3;con332;drive321",
|
78
|
+
"zink;8839;8.8.2018;z3;con332;" ]
|
79
|
+
|
80
|
+
rows = 0
|
81
|
+
|
82
|
+
File.open(@out_file).each_with_index do |line, index|
|
83
|
+
line.chomp.should eq result[index]
|
84
|
+
rows += 1
|
85
|
+
end
|
86
|
+
|
87
|
+
rows.should eq result.size
|
88
|
+
|
89
|
+
end
|
90
|
+
|
33
91
|
end
|
34
92
|
|
35
93
|
end
|
@@ -7,6 +7,8 @@ module Sycsvpro
|
|
7
7
|
before do
|
8
8
|
@file1 = File.join(File.dirname(__FILE__), "files/merge1.csv")
|
9
9
|
@file2 = File.join(File.dirname(__FILE__), "files/merge2.csv")
|
10
|
+
@file3 = File.join(File.dirname(__FILE__), "files/merge3.csv")
|
11
|
+
@file4 = File.join(File.dirname(__FILE__), "files/merge4.csv")
|
10
12
|
@outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
|
11
13
|
end
|
12
14
|
|
@@ -100,6 +102,97 @@ module Sycsvpro
|
|
100
102
|
rows.should eq result.size
|
101
103
|
end
|
102
104
|
|
105
|
+
it "should merge two files without key columns" do
|
106
|
+
header = "2010,2011,2012,2014"
|
107
|
+
source_header = "(\\d{4}),(\\d{4})"
|
108
|
+
|
109
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
110
|
+
files: "#{@file4},#{@file3}",
|
111
|
+
header: header,
|
112
|
+
source_header: source_header).execute
|
113
|
+
|
114
|
+
result = [ "2010;2011;2012;2014",
|
115
|
+
"20;30;40;60",
|
116
|
+
"30;40;50;70",
|
117
|
+
"40;50;60;80",
|
118
|
+
"50;60;70;90",
|
119
|
+
"m1;m2;m3",
|
120
|
+
"n1;n2;n3",
|
121
|
+
"o1;;o3", ]
|
122
|
+
|
123
|
+
rows = 0
|
124
|
+
|
125
|
+
File.open(@outfile).each_with_index do |row, index|
|
126
|
+
row.chomp.should eq result[index]
|
127
|
+
rows += 1
|
128
|
+
end
|
129
|
+
|
130
|
+
rows.should eq result.size
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should merge two files key columns in one file only" do
|
134
|
+
header = "2010,2011,2012,2014"
|
135
|
+
key = "0"
|
136
|
+
source_header = "(\\d{4}),(\\d{4})"
|
137
|
+
|
138
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
139
|
+
files: "#{@file1},#{@file3}",
|
140
|
+
header: header,
|
141
|
+
key: key,
|
142
|
+
source_header: source_header).execute
|
143
|
+
|
144
|
+
result = [ ";2010;2011;2012;2014",
|
145
|
+
"SP;20;30;40;60",
|
146
|
+
"RP;30;40;50;70",
|
147
|
+
"MP;40;50;60;80",
|
148
|
+
"NP;50;60;70;90",
|
149
|
+
";m1;m2;m3",
|
150
|
+
";n1;n2;n3",
|
151
|
+
";o1;;o3", ]
|
152
|
+
|
153
|
+
rows = 0
|
154
|
+
|
155
|
+
File.open(@outfile).each_with_index do |row, index|
|
156
|
+
row.chomp.should eq result[index]
|
157
|
+
rows += 1
|
158
|
+
end
|
159
|
+
|
160
|
+
rows.should eq result.size
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should merge two files key columns in two files of three only" do
|
164
|
+
header = "2010,2011,2012,2014"
|
165
|
+
key = "0, ,0"
|
166
|
+
source_header = "(\\d{4}),(\\d{4}),(\\d{4})"
|
167
|
+
|
168
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
169
|
+
files: "#{@file1},#{@file3},#{@file2}",
|
170
|
+
header: header,
|
171
|
+
key: key,
|
172
|
+
source_header: source_header).execute
|
173
|
+
|
174
|
+
result = [ ";2010;2011;2012;2014",
|
175
|
+
"SP;20;30;40;60",
|
176
|
+
"RP;30;40;50;70",
|
177
|
+
"MP;40;50;60;80",
|
178
|
+
"NP;50;60;70;90",
|
179
|
+
";m1;m2;m3",
|
180
|
+
";n1;n2;n3",
|
181
|
+
";o1;;o3",
|
182
|
+
"M;m1;m2;m3",
|
183
|
+
"N;n1;n2;n3",
|
184
|
+
"O;o1;;o3" ]
|
185
|
+
|
186
|
+
rows = 0
|
187
|
+
|
188
|
+
File.open(@outfile).each_with_index do |row, index|
|
189
|
+
row.chomp.should eq result[index]
|
190
|
+
rows += 1
|
191
|
+
end
|
192
|
+
|
193
|
+
rows.should eq result.size
|
194
|
+
end
|
195
|
+
|
103
196
|
end
|
104
197
|
|
105
198
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'sycsvpro/transposer'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Transposer do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@infile = File.join(File.dirname(__FILE__), 'files/in6.csv')
|
9
|
+
@outfile = File.join(File.dirname(__FILE__), 'files/out.csv')
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should transpose (change rows to columns) complete file" do
|
13
|
+
Sycsvpro::Transposer.new(infile: @infile,
|
14
|
+
outfile: @outfile).execute
|
15
|
+
|
16
|
+
result = [ "Year;;2008;2009;2010",
|
17
|
+
"SP;10;5;2;3",
|
18
|
+
"RP;20;10;5;5",
|
19
|
+
"Total;30;15;5;10",
|
20
|
+
"SP-O;100;10;20;70",
|
21
|
+
"RP-O;40;20;10;10",
|
22
|
+
"O;140;10;30;100" ]
|
23
|
+
|
24
|
+
rows = 0
|
25
|
+
|
26
|
+
File.open(@outfile).each_with_index do |line, i|
|
27
|
+
line.chomp.should eq result[i]
|
28
|
+
rows += 1
|
29
|
+
end
|
30
|
+
|
31
|
+
rows.should eq result.size
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should transpose selected columns" do
|
35
|
+
Sycsvpro::Transposer.new(infile: @infile,
|
36
|
+
outfile: @outfile,
|
37
|
+
cols: "0-2").execute
|
38
|
+
|
39
|
+
result = [ "Year;;2008;2009;2010",
|
40
|
+
"SP;10;5;2;3",
|
41
|
+
"RP;20;10;5;5" ]
|
42
|
+
|
43
|
+
rows = 0
|
44
|
+
|
45
|
+
File.open(@outfile).each_with_index do |line, i|
|
46
|
+
line.chomp.should eq result[i]
|
47
|
+
rows += 1
|
48
|
+
end
|
49
|
+
|
50
|
+
rows.should eq result.size
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should transpose selected rows and columns" do
|
54
|
+
Sycsvpro::Transposer.new(infile: @infile,
|
55
|
+
outfile: @outfile,
|
56
|
+
rows: "0,2-4",
|
57
|
+
cols: "0-2").execute
|
58
|
+
|
59
|
+
result = [ "Year;2008;2009;2010",
|
60
|
+
"SP;5;2;3",
|
61
|
+
"RP;10;5;5" ]
|
62
|
+
|
63
|
+
rows = 0
|
64
|
+
|
65
|
+
File.open(@outfile).each_with_index do |line, i|
|
66
|
+
line.chomp.should eq result[i]
|
67
|
+
rows += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
rows.should eq result.size
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -151,6 +151,7 @@ files:
|
|
151
151
|
- lib/sycsvpro/script_list.rb
|
152
152
|
- lib/sycsvpro/sorter.rb
|
153
153
|
- lib/sycsvpro/table.rb
|
154
|
+
- lib/sycsvpro/transposer.rb
|
154
155
|
- lib/sycsvpro/unique.rb
|
155
156
|
- lib/sycsvpro/version.rb
|
156
157
|
- spec/sycsvpro/aggregator_spec.rb
|
@@ -175,6 +176,7 @@ files:
|
|
175
176
|
- spec/sycsvpro/script_list_spec.rb
|
176
177
|
- spec/sycsvpro/sorter_spec.rb
|
177
178
|
- spec/sycsvpro/table_spec.rb
|
179
|
+
- spec/sycsvpro/transposer_spec.rb
|
178
180
|
- spec/sycsvpro/unique_spec.rb
|
179
181
|
- sycsvpro.gemspec
|
180
182
|
- sycsvpro.rdoc
|