sycsvpro 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +76 -7
- data/bin/sycsvpro +39 -7
- data/lib/sycsvpro/calculator.rb +32 -19
- data/lib/sycsvpro/dsl.rb +3 -2
- data/lib/sycsvpro/filter.rb +1 -1
- data/lib/sycsvpro/mapper.rb +72 -9
- data/lib/sycsvpro/merger.rb +19 -6
- data/lib/sycsvpro/transposer.rb +77 -0
- data/lib/sycsvpro/version.rb +1 -1
- data/lib/sycsvpro.rb +1 -0
- data/spec/sycsvpro/calculator_spec.rb +90 -0
- data/spec/sycsvpro/mapper_spec.rb +60 -2
- data/spec/sycsvpro/merger_spec.rb +93 -0
- data/spec/sycsvpro/transposer_spec.rb +76 -0
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,6 +7,7 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
7
7
|
* extract rows and columns from a file
|
8
8
|
* remove duplicate lines from a file where duplicates are identified by key
|
9
9
|
columns (since version 0.1.11)
|
10
|
+
add unique to command line interface (since version 0.1.12)
|
10
11
|
* collect values of rows and assign them to categories
|
11
12
|
* map column values to new values
|
12
13
|
* allocate column values to a key column (since version 0.0.4)
|
@@ -22,6 +23,7 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
22
23
|
version 0.1.4)
|
23
24
|
* join two file based on a joint column value (since version 0.1.7)
|
24
25
|
* merge files based on common headline columns (since version 0.1.10)
|
26
|
+
* transpose (swapping) rows and columns (since version 0.1.13)
|
25
27
|
|
26
28
|
To get help type
|
27
29
|
|
@@ -108,7 +110,7 @@ Collect all product rows (2, 3 and 4) to the category product
|
|
108
110
|
|
109
111
|
Map
|
110
112
|
---
|
111
|
-
Map the product names to new names
|
113
|
+
Map the product names to new names. Consider columns 2-4 only for mapping
|
112
114
|
|
113
115
|
The mapping file (mapping) uses the result from the collect command above
|
114
116
|
|
@@ -127,6 +129,35 @@ The mapping file (mapping) uses the result from the collect command above
|
|
127
129
|
|
128
130
|
$ sycsvpro -f in.csv -o out.csv map mapping -c 2-4
|
129
131
|
|
132
|
+
Transpose
|
133
|
+
---------
|
134
|
+
Swap rows and columns of revenue.csv to out.csv
|
135
|
+
|
136
|
+
$ sycsvpro -f revenue.csv -o out.csv transpose
|
137
|
+
|
138
|
+
2010;50;100;2000
|
139
|
+
2011;100;50;250
|
140
|
+
2012;150;10;300
|
141
|
+
2013;100;1000;3000
|
142
|
+
2014;200;20;20
|
143
|
+
customer;hello;indix;chiro
|
144
|
+
|
145
|
+
To use only columns 2013 and 2014 you can specify a the columns to transpose
|
146
|
+
|
147
|
+
$ sycsvpro -f revenue.csv -o out.csv transpose -c 3-5
|
148
|
+
|
149
|
+
2013;100;1000;3000
|
150
|
+
2014;200;20;20
|
151
|
+
customer;hello;indix;chiro
|
152
|
+
|
153
|
+
To filter for hello only
|
154
|
+
|
155
|
+
$ sycsvpor -f revenue.csv -o out.csv transpose -c 3-5 -r 0,1
|
156
|
+
|
157
|
+
2013;100
|
158
|
+
2014;200
|
159
|
+
customer;hello
|
160
|
+
|
130
161
|
Allocate
|
131
162
|
--------
|
132
163
|
Allocate all the machine types to the customer
|
@@ -196,7 +227,7 @@ Process arithmetic operations on the contract count and create a target column
|
|
196
227
|
and a sum which is added at the end of the result file
|
197
228
|
|
198
229
|
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target
|
199
|
-
-c 6:*2,7:
|
230
|
+
-c 6:*2,7:c6*10
|
200
231
|
|
201
232
|
$ cat out.csv
|
202
233
|
customer;machine;control;drive;motor;date;contract;target
|
@@ -210,6 +241,20 @@ and a sum which is added at the end of the result file
|
|
210
241
|
In the sum row non-numbers in the colums are converted to 0. Therefore column 0
|
211
242
|
is summed up to 0 as all strings are converted to 0.
|
212
243
|
|
244
|
+
Write only columns 0, 6 and 7 by specifying write columns
|
245
|
+
|
246
|
+
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h "customer,contract,target"
|
247
|
+
-c 6:*2,7:c6*10
|
248
|
+
-w 0,6-7
|
249
|
+
$ cat out.csv
|
250
|
+
customer;contract;target
|
251
|
+
hello;2;20
|
252
|
+
hello;2;20
|
253
|
+
indix;2;20
|
254
|
+
chiro;2;20
|
255
|
+
chiro;2;20
|
256
|
+
0;10;100
|
257
|
+
|
213
258
|
Join
|
214
259
|
----
|
215
260
|
Join the machine and contract file with columns from the customer address file
|
@@ -250,6 +295,7 @@ Merge files machine_count.csv and revenue.csv based on the year columns.
|
|
250
295
|
This will create the out.csv
|
251
296
|
|
252
297
|
```
|
298
|
+
$ cat out.csv
|
253
299
|
;2010;2013;2014
|
254
300
|
hello;1;0;0
|
255
301
|
indix;1;0;0
|
@@ -266,6 +312,7 @@ Sort rows on specified columns as an example sort rows based on customer
|
|
266
312
|
|
267
313
|
$ sycsvpro -f in.csv -o out.csv sort -r 2-20 -c s:0,d:5
|
268
314
|
|
315
|
+
$cat out.csv
|
269
316
|
customer;machine;control;drive;motor;date;contract;target
|
270
317
|
hello;h2;con123;dri130;mot110;1.02.3012;1
|
271
318
|
hello;h1;con123;dri120;mot100;1.01.3013;1
|
@@ -406,8 +453,8 @@ row are added on top of the sorted file
|
|
406
453
|
* `sycsvpro -f infile analyze` now lists the columns with sample data
|
407
454
|
* Add `params` method to *Dsl* that retrieves the params provided in the execute
|
408
455
|
command: `sycsvpro execute script.rb method infile param1 param2`
|
409
|
-
* Add `
|
410
|
-
run: `
|
456
|
+
* Add `clean\_up` to *Dsl* that takes files to be deleted after the script has
|
457
|
+
run: `clean\_up(%w{file1 file2})`
|
411
458
|
|
412
459
|
Version 0.1.4
|
413
460
|
-------------
|
@@ -465,7 +512,7 @@ Version 0.1.7
|
|
465
512
|
This will join infile.csv with source.csv based on the join columns (j "1=3").
|
466
513
|
From source.csv columns 2 and 4 (-c "2,4") will be inserted at column
|
467
514
|
positions 1 and 3 (-p "1,3"). The header will be used from the infile.csv
|
468
|
-
(-h "
|
515
|
+
(-h "\*") supplemented by the columns A and B (-i "A,B") that will also be
|
469
516
|
positioned at column 1 and 3 (-p "1,3").
|
470
517
|
|
471
518
|
Version 0.1.8
|
@@ -474,8 +521,9 @@ Version 0.1.8
|
|
474
521
|
|
475
522
|
Version 0.1.9
|
476
523
|
-------------
|
477
|
-
* When creating columns dynamically they are in arbitrary sequence.
|
478
|
-
provide a switch `sort: "2"` which will sort the header from
|
524
|
+
* When creating columns dynamically in count they are in arbitrary sequence.
|
525
|
+
You can now provide a switch `sort: "2"` which will sort the header from
|
526
|
+
column 2 on.
|
479
527
|
|
480
528
|
Version 0.1.10
|
481
529
|
--------------
|
@@ -488,6 +536,27 @@ Version 0.1.11
|
|
488
536
|
* Unique removes duplicate lines from the infile. Duplicate lines are identified
|
489
537
|
by key columns
|
490
538
|
|
539
|
+
Version 0.1.12
|
540
|
+
--------------
|
541
|
+
* Add unique to sycsvpro command line interface
|
542
|
+
|
543
|
+
Version 0.1.13
|
544
|
+
--------------
|
545
|
+
* Optimize Mapper by only considering columns provided for mapping which should
|
546
|
+
increase performance
|
547
|
+
* match\_boolean\_filter? in Filter now also processes strings with single
|
548
|
+
quotes inside
|
549
|
+
* Tranposer tranposes rows and columns that is make columns rows and vice versa
|
550
|
+
* Calculator can now have colons inside the operation
|
551
|
+
sycsvpro -f in.csv -o out.csv -c "122:+[1,3,5].inject(:+)"
|
552
|
+
Previously the operation would have been cut after inject(
|
553
|
+
* A write flag in Calculator specifies which colons to add to the result.
|
554
|
+
* Calculator introduced a switch 'final\_header' which indicates the header
|
555
|
+
provided should not be filtered in regard to a provided 'write' flag but
|
556
|
+
written to the result file as is
|
557
|
+
* Merger now doesn't require a key column that is files can be merged without
|
558
|
+
key columns.
|
559
|
+
|
491
560
|
Installation
|
492
561
|
============
|
493
562
|
[](http://badge.fury.io/rb/sycsvpro)
|
data/bin/sycsvpro
CHANGED
@@ -589,6 +589,27 @@ command :map do |c|
|
|
589
589
|
end
|
590
590
|
end
|
591
591
|
|
592
|
+
desc 'Transposes rows and columns'
|
593
|
+
command :transpose do |c|
|
594
|
+
c.desc 'Rows to consider'
|
595
|
+
c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
|
596
|
+
c.flag [:r, :row], :must_match => row_regex
|
597
|
+
|
598
|
+
c.desc 'Columns to consider for mapping'
|
599
|
+
c.arg_name 'COL1,COL2,COL10-COL30'
|
600
|
+
c.flag [:c, :col], :must_match => /\d+(?:,\d+|-\d+)*/
|
601
|
+
|
602
|
+
c.action do |global_options,options,args|
|
603
|
+
print "Transpose..."
|
604
|
+
transpose = Sycsvpro::Transposer.new(infile: global_options[:f],
|
605
|
+
outfile: global_options[:o],
|
606
|
+
rows: options[:r],
|
607
|
+
cols: options[:c])
|
608
|
+
transpose.execute
|
609
|
+
puts "done"
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
592
613
|
desc 'Process operations on columns. Optionally add a sum row for columns with'+
|
593
614
|
'number values'
|
594
615
|
command :calc do |c|
|
@@ -600,6 +621,11 @@ command :calc do |c|
|
|
600
621
|
default_value '*'
|
601
622
|
c.flag [:h, :header], :must_match => /^[*|\w ]+(?:,[\w ]+)*/
|
602
623
|
|
624
|
+
c.desc 'Indicates whether the provided header is final. That is if columns'+
|
625
|
+
' to be written to the outfile are selected by the write flag then '+
|
626
|
+
'the header should left untouched and written as is'
|
627
|
+
c.switch [:f, :final], :default_value => false
|
628
|
+
|
603
629
|
c.desc 'Rows to consider for calculations'
|
604
630
|
c.arg_name 'ROW1,ROW2-ROW10,45-EOF,REGEXP'
|
605
631
|
c.flag [:r, :row], :must_match => row_regex
|
@@ -610,6 +636,10 @@ command :calc do |c|
|
|
610
636
|
c.arg_name "COL1:*2,COL2:-C3,COL3:*2+(4+C5)"
|
611
637
|
c.flag [:c, :col], :must_match => /^\d+:.+/
|
612
638
|
|
639
|
+
c.desc 'Columns to be written to the result file'
|
640
|
+
c.arg_name "COL1,COL2-COL5"
|
641
|
+
c.flag [:w, :write], :must_match => /\d+(?:,\d+|-\d+)*/
|
642
|
+
|
613
643
|
c.desc 'Date format of date columns'
|
614
644
|
c.arg_name '%d.%m.%Y|%Y-%m-%d|...'
|
615
645
|
c.flag [:df]
|
@@ -622,13 +652,15 @@ command :calc do |c|
|
|
622
652
|
help_now! "You need to provide the column flag" if options[:c].nil?
|
623
653
|
|
624
654
|
print "Calculating..."
|
625
|
-
calculator = Sycsvpro::Calculator.new(infile:
|
626
|
-
outfile:
|
627
|
-
header:
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
655
|
+
calculator = Sycsvpro::Calculator.new(infile: global_options[:f],
|
656
|
+
outfile: global_options[:o],
|
657
|
+
header: options[:h],
|
658
|
+
final_header: options[:f],
|
659
|
+
rows: options[:r],
|
660
|
+
cols: options[:c],
|
661
|
+
write: options[:w],
|
662
|
+
sum: options[:s],
|
663
|
+
df: options[:df])
|
632
664
|
calculator.execute
|
633
665
|
puts "done"
|
634
666
|
end
|
data/lib/sycsvpro/calculator.rb
CHANGED
@@ -58,8 +58,13 @@ module Sycsvpro
|
|
58
58
|
attr_reader :formulae
|
59
59
|
# header of the outfile
|
60
60
|
attr_reader :header
|
61
|
+
# indicates whether this header is final and should not be filtered in
|
62
|
+
# respect to the columns defined by write
|
63
|
+
attr_reader :final_header
|
61
64
|
# filter that is used for columns
|
62
65
|
attr_reader :columns
|
66
|
+
# selected columns to be written to outfile
|
67
|
+
attr_reader :write
|
63
68
|
# if true add a sum row at the bottom of the out file
|
64
69
|
attr_reader :add_sum_row
|
65
70
|
|
@@ -67,29 +72,36 @@ module Sycsvpro
|
|
67
72
|
# can be supplemented with additional column names that are generated due
|
68
73
|
# to an arithmetic operation that creates new columns
|
69
74
|
# :call-seq:
|
70
|
-
# Sycsvpro::Calculator.new(infile:
|
71
|
-
# outfile:
|
72
|
-
# df:
|
73
|
-
# rows:
|
74
|
-
# header:
|
75
|
-
#
|
76
|
-
#
|
75
|
+
# Sycsvpro::Calculator.new(infile: "in.csv",
|
76
|
+
# outfile: "out.csv",
|
77
|
+
# df: "%d.%m.%Y",
|
78
|
+
# rows: "1,2,BEGINn3>20END",
|
79
|
+
# header: "*,Count",
|
80
|
+
# final_header: false,
|
81
|
+
# cols: "4:c1+c2*2",
|
82
|
+
# write: "1,3-5",
|
83
|
+
# sum: true).execute
|
77
84
|
# infile:: File that contains the rows to be operated on
|
78
85
|
# outfile:: Result of the operations
|
79
86
|
# df:: Date format
|
80
87
|
# rows:: Row filter that indicates which rows to consider
|
81
88
|
# header:: Header of the columns
|
89
|
+
# final_header:: Indicates that if write filters columns the header should
|
90
|
+
# not be filtered when written
|
82
91
|
# cols:: Operations on the column values
|
92
|
+
# write:: Columns that are written to the outfile
|
83
93
|
# sum:: Indicate whether to add a sum row
|
84
94
|
def initialize(options={})
|
85
|
-
@infile
|
86
|
-
@outfile
|
87
|
-
@date_format
|
88
|
-
@row_filter
|
89
|
-
@
|
90
|
-
@
|
91
|
-
@
|
92
|
-
@
|
95
|
+
@infile = options[:infile]
|
96
|
+
@outfile = options[:outfile]
|
97
|
+
@date_format = options[:df] || "%Y-%m-%d"
|
98
|
+
@row_filter = RowFilter.new(options[:rows], df: options[:df])
|
99
|
+
@write_filter = ColumnFilter.new(options[:write], df: options[:df])
|
100
|
+
@header = Header.new(options[:header])
|
101
|
+
@final_header = options[:final_header]
|
102
|
+
@sum_row = []
|
103
|
+
@add_sum_row = options[:sum]
|
104
|
+
@formulae = {}
|
93
105
|
create_calculator(options[:cols])
|
94
106
|
end
|
95
107
|
|
@@ -112,7 +124,8 @@ module Sycsvpro
|
|
112
124
|
|
113
125
|
unless processed_header
|
114
126
|
header_row = header.process(line.chomp)
|
115
|
-
|
127
|
+
header_row = @write_filter.process(header_row) unless @final_header
|
128
|
+
out.puts header_row unless header_row.nil? or header_row.empty?
|
116
129
|
processed_header = true
|
117
130
|
next
|
118
131
|
end
|
@@ -123,7 +136,7 @@ module Sycsvpro
|
|
123
136
|
formulae.each do |col, formula|
|
124
137
|
@columns[col.to_i] = eval(formula)
|
125
138
|
end
|
126
|
-
out.puts @columns.join(';')
|
139
|
+
out.puts @write_filter.process(@columns.join(';'))
|
127
140
|
|
128
141
|
@columns.each_with_index do |column, index|
|
129
142
|
column = 0 unless column.to_s =~ /^[\d\.,]*$/
|
@@ -137,7 +150,7 @@ module Sycsvpro
|
|
137
150
|
|
138
151
|
end
|
139
152
|
|
140
|
-
out.puts @sum_row.join(';') if add_sum_row
|
153
|
+
out.puts @write_filter.process(@sum_row.join(';')) if add_sum_row
|
141
154
|
|
142
155
|
end
|
143
156
|
end
|
@@ -154,7 +167,7 @@ module Sycsvpro
|
|
154
167
|
# column 1 + 1 c[4] = c[1] + 1
|
155
168
|
def create_calculator(code)
|
156
169
|
code.split(/,(?=\d+:)/).each do |operation|
|
157
|
-
col, term = operation.split(':')
|
170
|
+
col, term = operation.split(':', 2)
|
158
171
|
term = "c#{col}#{term}" if term =~ /^[+\-*\/%]/
|
159
172
|
formulae[col] = term
|
160
173
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -76,8 +76,9 @@ module Dsl
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
-
# Remove leading and trailing " and spaces as well as reducing more than 2
|
80
|
-
# from csv values.
|
79
|
+
# Remove leading and trailing " and spaces as well as reducing more than 2
|
80
|
+
# spaces between words from csv values. Replace ; with , from values as ;
|
81
|
+
# is used as value separator
|
81
82
|
def unstring(line)
|
82
83
|
line = str2utf8(line)
|
83
84
|
line.scan(/(?<=^"|;")[^"]+(?=;)+[^"]*|;+[^"](?=";|"$)/).each do |value|
|
data/lib/sycsvpro/filter.rb
CHANGED
data/lib/sycsvpro/mapper.rb
CHANGED
@@ -2,8 +2,33 @@
|
|
2
2
|
module Sycsvpro
|
3
3
|
|
4
4
|
# Map values to new values described in a mapping file
|
5
|
+
#
|
6
|
+
# in.csv
|
7
|
+
#
|
8
|
+
# | ID | Name |
|
9
|
+
# | --- | ---- |
|
10
|
+
# | 1 | Hank |
|
11
|
+
# | 2 | Jane |
|
12
|
+
#
|
13
|
+
# mapping
|
14
|
+
#
|
15
|
+
# 1:01
|
16
|
+
# 2:02
|
17
|
+
#
|
18
|
+
# Sycsvpro::Mapping.new(infile: "in.csv",
|
19
|
+
# outfile: "out.csv",
|
20
|
+
# mapping: "mapping",
|
21
|
+
# cols: "0").execute
|
22
|
+
# out.csv
|
23
|
+
#
|
24
|
+
# | ID | Name |
|
25
|
+
# | --- | ---- |
|
26
|
+
# | 01 | Hank |
|
27
|
+
# | 02 | Jane |
|
5
28
|
class Mapper
|
6
29
|
|
30
|
+
include Dsl
|
31
|
+
|
7
32
|
# infile contains the data that is operated on
|
8
33
|
attr_reader :infile
|
9
34
|
# outfile is the file where the result is written to
|
@@ -12,15 +37,29 @@ module Sycsvpro
|
|
12
37
|
attr_reader :mapper
|
13
38
|
# filter that is used for rows
|
14
39
|
attr_reader :row_filter
|
15
|
-
# filter that
|
40
|
+
# filter that contains columns that are considered for mappings
|
16
41
|
attr_reader :col_filter
|
17
42
|
|
18
43
|
# Creates new mapper
|
44
|
+
# :call-seq:
|
45
|
+
# Sycsvpro::Mapper.new(infile: "in.csv",
|
46
|
+
# outfile: "out.csv",
|
47
|
+
# mapping: "mapping.csv",
|
48
|
+
# rows: "1,3-5",
|
49
|
+
# cols: "3,4-7"
|
50
|
+
# df: "%Y-%m-%d").execute
|
51
|
+
#
|
52
|
+
# infile:: File that contains columns to be mapped
|
53
|
+
# outfile:: File that contains the mapping result after execute
|
54
|
+
# mapping:: File that contains the mappings. Mappings are separated by ':'
|
55
|
+
# rows:: Rows to consider for mappings
|
56
|
+
# cols:: Columns that should be mapped
|
57
|
+
# df:: Date format for row filter if rows are filtered on date values
|
19
58
|
def initialize(options={})
|
20
59
|
@infile = options[:infile]
|
21
60
|
@outfile = options[:outfile]
|
22
|
-
@row_filter = RowFilter.new(options[:
|
23
|
-
@col_filter =
|
61
|
+
@row_filter = RowFilter.new(options[:rows], df: options[:df])
|
62
|
+
@col_filter = init_col_filter(options[:cols], @infile)
|
24
63
|
@mapper = {}
|
25
64
|
init_mapper(options[:mapping])
|
26
65
|
end
|
@@ -29,25 +68,49 @@ module Sycsvpro
|
|
29
68
|
def execute
|
30
69
|
File.open(outfile, 'w') do |out|
|
31
70
|
File.new(infile, 'r').each_with_index do |line, index|
|
32
|
-
result =
|
71
|
+
result = row_filter.process(line, row: index)
|
33
72
|
next if result.chomp.empty? or result.nil?
|
34
|
-
|
35
|
-
|
73
|
+
result += ' ' if result =~ /;$/
|
74
|
+
cols = result.split(';')
|
75
|
+
@col_filter.each do |key|
|
76
|
+
substitute = mapper[cols[key]]
|
77
|
+
cols[key] = substitute if substitute
|
36
78
|
end
|
37
|
-
out.puts
|
79
|
+
out.puts cols.join(';').strip
|
38
80
|
end
|
39
81
|
end
|
40
82
|
end
|
41
83
|
|
42
84
|
private
|
43
85
|
|
44
|
-
# Initializes the mappings
|
86
|
+
# Initializes the mappings. A mapping consists of the value to be mapped
|
87
|
+
# to another value. The values are spearated by colons ':'
|
88
|
+
# Example:
|
89
|
+
# source_value:mapping_value
|
45
90
|
def init_mapper(file)
|
46
91
|
File.new(file, 'r').each_line do |line|
|
47
|
-
from, to = line.
|
92
|
+
from, to = unstring(line).split(':')
|
48
93
|
mapper[from] = to
|
49
94
|
end
|
50
95
|
end
|
96
|
+
|
97
|
+
# Initialize the col_filter that contains columns to be considered for
|
98
|
+
# mapping. If no columns are provided, that is being empty, a filter with
|
99
|
+
# all columns is returned
|
100
|
+
def init_col_filter(columns, source)
|
101
|
+
if columns.nil?
|
102
|
+
File.open(source, 'r').each do |line|
|
103
|
+
line = unstring(line)
|
104
|
+
next if line.empty?
|
105
|
+
line += ' ' if line =~ /;$/
|
106
|
+
size = line.split(';').size
|
107
|
+
columns = "0-#{size-1}"
|
108
|
+
break
|
109
|
+
end
|
110
|
+
end
|
111
|
+
ColumnFilter.new(columns).filter.flatten
|
112
|
+
end
|
113
|
+
|
51
114
|
end
|
52
115
|
|
53
116
|
end
|
data/lib/sycsvpro/merger.rb
CHANGED
@@ -69,21 +69,25 @@ module Sycsvpro
|
|
69
69
|
# source_header:: pattern for each header of the source file to determine
|
70
70
|
# the column. The pattern is a regex without the enclosing slashes '/'
|
71
71
|
# key:: first column value from the source file that is used as first
|
72
|
-
# column in the target file
|
72
|
+
# column in the target file. The key is optional.
|
73
73
|
def initialize(options = {})
|
74
74
|
@outfile = options[:outfile]
|
75
75
|
@header_cols = options[:header].split(',')
|
76
76
|
@source_header = options[:source_header].split(',')
|
77
|
-
@key = options[:key].split(',')
|
77
|
+
@key = options[:key] ? options[:key].split(',') : []
|
78
|
+
@has_key = !@key.empty?
|
78
79
|
@files = options[:files].split(',')
|
80
|
+
if @source_header.count != @files.count
|
81
|
+
raise "file count has to be equal to source_header count"
|
82
|
+
end
|
79
83
|
end
|
80
84
|
|
81
85
|
# Merges the files based on the provided parameters
|
82
86
|
def execute
|
83
87
|
File.open(outfile, 'w') do |out|
|
84
|
-
out.puts "
|
88
|
+
out.puts "#{';' unless @key.empty?}#{header_cols.join(';')}"
|
85
89
|
files.each do |file|
|
86
|
-
@current_key =
|
90
|
+
@current_key = create_current_key
|
87
91
|
@current_source_header = @source_header.shift
|
88
92
|
processed_header = false
|
89
93
|
File.open(file).each_with_index do |line, index|
|
@@ -110,16 +114,25 @@ module Sycsvpro
|
|
110
114
|
columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
|
111
115
|
end
|
112
116
|
|
113
|
-
@file_header = [@current_key.to_i]
|
117
|
+
@file_header = @current_key ? [@current_key.to_i] : []
|
118
|
+
|
114
119
|
header_cols.each do |h|
|
115
120
|
@file_header << columns.index(h)
|
116
121
|
end
|
122
|
+
|
117
123
|
@file_header.compact!
|
118
124
|
end
|
119
125
|
|
126
|
+
# create the current key dependent on the value returns a number or nil
|
127
|
+
def create_current_key
|
128
|
+
key = @key.shift
|
129
|
+
key.nil? || key.strip.empty? ? nil : key
|
130
|
+
end
|
131
|
+
|
120
132
|
# create a line filtered by the file_header
|
121
133
|
def create_line(columns)
|
122
|
-
|
134
|
+
empty_col = ';' if @has_key && @current_key.nil?
|
135
|
+
"#{empty_col}#{columns.values_at(*@file_header).join(';')}"
|
123
136
|
end
|
124
137
|
|
125
138
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Operating csv files
|
2
|
+
module Sycsvpro
|
3
|
+
|
4
|
+
# Tranposes rows to columns and vice versa
|
5
|
+
#
|
6
|
+
# Example
|
7
|
+
#
|
8
|
+
# infile.csv
|
9
|
+
# | Year | SP | RP | Total | SP-O | RP-O | O |
|
10
|
+
# | ---- | -- | -- | ----- | ---- | ---- | --- |
|
11
|
+
# | | 10 | 20 | 30 | 100 | 40 | 140 |
|
12
|
+
# | 2008 | 5 | 10 | 15 | 10 | 20 | 10 |
|
13
|
+
# | 2009 | 2 | 5 | 5 | 20 | 10 | 30 |
|
14
|
+
# | 2010 | 3 | 5 | 10 | 70 | 10 | 100 |
|
15
|
+
#
|
16
|
+
# outfile.csv
|
17
|
+
# | Year | | 2008 | 2009 | 2010 |
|
18
|
+
# | ----- | --- | ---- | ---- | ---- |
|
19
|
+
# | SP | 10 | 5 | 5 | 3 |
|
20
|
+
# | RP | 20 | 10 | 10 | 5 |
|
21
|
+
# | Total | 30 | 15 | 15 | 10 |
|
22
|
+
# | SP-O | 100 | 10 | 10 | 70 |
|
23
|
+
# | RP-O | 40 | 20 | 20 | 10 |
|
24
|
+
# | O | 140 | 10 | 30 | 100 |
|
25
|
+
#
|
26
|
+
class Transposer
|
27
|
+
|
28
|
+
include Dsl
|
29
|
+
|
30
|
+
# infile contains the data that is operated on
|
31
|
+
attr_reader :infile
|
32
|
+
# outfile is the file where the result is written to
|
33
|
+
attr_reader :outfile
|
34
|
+
# filter that is used for rows
|
35
|
+
attr_reader :row_filter
|
36
|
+
# filter that is used for columns
|
37
|
+
attr_reader :col_filter
|
38
|
+
|
39
|
+
# Create a new Transpose
|
40
|
+
# :call-seq:
|
41
|
+
# Sycsvpro::Transpose(infile: "infile.csv",
|
42
|
+
# outfile: "outfile.csv",
|
43
|
+
# rows: "0,3-5",
|
44
|
+
# cols: "1,3").execute
|
45
|
+
def initialize(options = {})
|
46
|
+
@infile = options[:infile]
|
47
|
+
@outfile = options[:outfile]
|
48
|
+
@row_filter = RowFilter.new(options[:rows])
|
49
|
+
@col_filter = ColumnFilter.new(options[:cols])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Executes the transpose by reading the infile and writing the result to
|
53
|
+
# the outfile
|
54
|
+
def execute
|
55
|
+
transpose = {}
|
56
|
+
|
57
|
+
File.open(@infile).each_with_index do |line, index|
|
58
|
+
line = unstring(line)
|
59
|
+
next if line.empty?
|
60
|
+
|
61
|
+
result = @col_filter.process(@row_filter.process(line, row: index))
|
62
|
+
next if result.nil?
|
63
|
+
|
64
|
+
result.split(';').each_with_index do |col, index|
|
65
|
+
transpose[index] ||= []
|
66
|
+
transpose[index] << col
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
File.open(@outfile, 'w') do |out|
|
71
|
+
transpose.values.each { |value| out.puts value.join(';') }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
data/lib/sycsvpro/version.rb
CHANGED
data/lib/sycsvpro.rb
CHANGED
@@ -12,6 +12,96 @@ module Sycsvpro
|
|
12
12
|
@out_file = File.join(File.dirname(__FILE__), "files/machines_out.csv")
|
13
13
|
end
|
14
14
|
|
15
|
+
it "should ignore colons within calculation expression" do
|
16
|
+
cols = "3:+[c1,c2].inject(:+),4:c2*3"
|
17
|
+
header = "*,times"
|
18
|
+
|
19
|
+
calculator = Calculator.new(infile: @in_number_file,
|
20
|
+
outfile: @out_file,
|
21
|
+
header: header,
|
22
|
+
cols: cols)
|
23
|
+
|
24
|
+
calculator.execute
|
25
|
+
|
26
|
+
result = [ "customer;before;between;after;times",
|
27
|
+
"Fink;2;3;6;9",
|
28
|
+
"Haas;3;1;10;3",
|
29
|
+
"Gent;4;4;12;12",
|
30
|
+
"Rank;5;4;10;12" ]
|
31
|
+
|
32
|
+
rows = 0
|
33
|
+
|
34
|
+
File.open(@out_file).each_with_index do |line, index|
|
35
|
+
line.chomp.should eq result[index]
|
36
|
+
rows += 1
|
37
|
+
end
|
38
|
+
|
39
|
+
rows.should eq result.size
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should save only specified columns" do
|
43
|
+
cols = "3:+[c1,c2].inject(:+),4:c3*3"
|
44
|
+
write = "0,3-4"
|
45
|
+
header = "customer;sum;times"
|
46
|
+
|
47
|
+
calculator = Calculator.new(infile: @in_number_file,
|
48
|
+
outfile: @out_file,
|
49
|
+
header: header,
|
50
|
+
final_header: true,
|
51
|
+
write: write,
|
52
|
+
cols: cols,
|
53
|
+
sum: true)
|
54
|
+
|
55
|
+
calculator.execute
|
56
|
+
|
57
|
+
result = [ "customer;sum;times",
|
58
|
+
"Fink;6;18",
|
59
|
+
"Haas;10;30",
|
60
|
+
"Gent;12;36",
|
61
|
+
"Rank;10;30",
|
62
|
+
"0;38;114" ]
|
63
|
+
|
64
|
+
rows = 0
|
65
|
+
|
66
|
+
File.open(@out_file).each_with_index do |line, index|
|
67
|
+
line.chomp.should eq result[index]
|
68
|
+
rows += 1
|
69
|
+
end
|
70
|
+
|
71
|
+
rows.should eq result.size
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should save only specified columns" do
|
75
|
+
cols = "3:+[c1,c2].inject(:+),4:c3*3"
|
76
|
+
write = "0,3-4"
|
77
|
+
header = "*,times"
|
78
|
+
|
79
|
+
calculator = Calculator.new(infile: @in_number_file,
|
80
|
+
outfile: @out_file,
|
81
|
+
header: header,
|
82
|
+
write: write,
|
83
|
+
cols: cols,
|
84
|
+
sum: true)
|
85
|
+
|
86
|
+
calculator.execute
|
87
|
+
|
88
|
+
result = [ "customer;after;times",
|
89
|
+
"Fink;6;18",
|
90
|
+
"Haas;10;30",
|
91
|
+
"Gent;12;36",
|
92
|
+
"Rank;10;30",
|
93
|
+
"0;38;114" ]
|
94
|
+
|
95
|
+
rows = 0
|
96
|
+
|
97
|
+
File.open(@out_file).each_with_index do |line, index|
|
98
|
+
line.chomp.should eq result[index]
|
99
|
+
rows += 1
|
100
|
+
end
|
101
|
+
|
102
|
+
rows.should eq result.size
|
103
|
+
end
|
104
|
+
|
15
105
|
it "should operate on existing row" do
|
16
106
|
rows = "2-8"
|
17
107
|
cols = "3:*3,4:*4+1"
|
@@ -6,12 +6,16 @@ module Sycsvpro
|
|
6
6
|
|
7
7
|
before do
|
8
8
|
@in_file = File.join(File.dirname(__FILE__), "files/in.csv")
|
9
|
+
@in_file5 = File.join(File.dirname(__FILE__), "files/in5.csv")
|
9
10
|
@out_file = File.join(File.dirname(__FILE__), "files/out.csv")
|
10
11
|
@mappings = File.join(File.dirname(__FILE__), "files/mappings")
|
11
12
|
end
|
12
13
|
|
13
|
-
it "should map values to new values" do
|
14
|
-
mapper = Mapper.new(infile:
|
14
|
+
it "should map values to new values in all columns" do
|
15
|
+
mapper = Mapper.new(infile: @in_file,
|
16
|
+
outfile: @out_file,
|
17
|
+
rows: "0-7",
|
18
|
+
mapping: @mappings)
|
15
19
|
|
16
20
|
mapper.execute
|
17
21
|
|
@@ -30,6 +34,60 @@ module Sycsvpro
|
|
30
34
|
|
31
35
|
end
|
32
36
|
|
37
|
+
it "should map values to new values on specified columns only" do
|
38
|
+
mapper = Mapper.new(infile: @in_file,
|
39
|
+
outfile: @out_file,
|
40
|
+
rows: "0-7",
|
41
|
+
cols: "4",
|
42
|
+
mapping: @mappings).execute
|
43
|
+
|
44
|
+
result = [ "customer;contract-number;expires-on;machine;product1;product2",
|
45
|
+
"Fink;1234;20.12.2015;f1;control123;dri222",
|
46
|
+
"Haas;3322;1.10.2011;h1;control332;dri111",
|
47
|
+
"Gent;4323;1.3.2014;g1;control123;dri111",
|
48
|
+
"Fink;1234;30.12.2016;f2;control333;dri321",
|
49
|
+
"Rank;3232;1.5.2013;r1;control332;dri321",
|
50
|
+
"Klig;4432;;k1;control332;dri222",
|
51
|
+
"fink;1234;;f3;control332;dri321" ]
|
52
|
+
|
53
|
+
rows = 0
|
54
|
+
|
55
|
+
File.open(@out_file).each_with_index do |line, index|
|
56
|
+
line.chomp.should eq result[index]
|
57
|
+
rows += 1
|
58
|
+
end
|
59
|
+
|
60
|
+
rows.should eq result.size
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should map values to new values where last column is empty" do
|
65
|
+
mapper = Mapper.new(infile: @in_file5,
|
66
|
+
outfile: @out_file,
|
67
|
+
cols: "5",
|
68
|
+
mapping: @mappings).execute
|
69
|
+
|
70
|
+
result = [ "customer;contract-number;expires-on;machine;product1;product2",
|
71
|
+
"Fink;1234;20.12.2015;f1;con123;drive222",
|
72
|
+
"Haas;3322;1.10.2011;h1;con332;drive111",
|
73
|
+
"Gent;4323;1.3.2014;g1;con123;drive111",
|
74
|
+
"Fink;1234;30.12.2016;f2;con333;drive321",
|
75
|
+
"Rank;3232;1.5.2013;r1;con332;drive321",
|
76
|
+
"Klig;4432;;k1;con332;drive222",
|
77
|
+
"fink;1234;;f3;con332;drive321",
|
78
|
+
"zink;8839;8.8.2018;z3;con332;" ]
|
79
|
+
|
80
|
+
rows = 0
|
81
|
+
|
82
|
+
File.open(@out_file).each_with_index do |line, index|
|
83
|
+
line.chomp.should eq result[index]
|
84
|
+
rows += 1
|
85
|
+
end
|
86
|
+
|
87
|
+
rows.should eq result.size
|
88
|
+
|
89
|
+
end
|
90
|
+
|
33
91
|
end
|
34
92
|
|
35
93
|
end
|
@@ -7,6 +7,8 @@ module Sycsvpro
|
|
7
7
|
before do
|
8
8
|
@file1 = File.join(File.dirname(__FILE__), "files/merge1.csv")
|
9
9
|
@file2 = File.join(File.dirname(__FILE__), "files/merge2.csv")
|
10
|
+
@file3 = File.join(File.dirname(__FILE__), "files/merge3.csv")
|
11
|
+
@file4 = File.join(File.dirname(__FILE__), "files/merge4.csv")
|
10
12
|
@outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
|
11
13
|
end
|
12
14
|
|
@@ -100,6 +102,97 @@ module Sycsvpro
|
|
100
102
|
rows.should eq result.size
|
101
103
|
end
|
102
104
|
|
105
|
+
it "should merge two files without key columns" do
|
106
|
+
header = "2010,2011,2012,2014"
|
107
|
+
source_header = "(\\d{4}),(\\d{4})"
|
108
|
+
|
109
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
110
|
+
files: "#{@file4},#{@file3}",
|
111
|
+
header: header,
|
112
|
+
source_header: source_header).execute
|
113
|
+
|
114
|
+
result = [ "2010;2011;2012;2014",
|
115
|
+
"20;30;40;60",
|
116
|
+
"30;40;50;70",
|
117
|
+
"40;50;60;80",
|
118
|
+
"50;60;70;90",
|
119
|
+
"m1;m2;m3",
|
120
|
+
"n1;n2;n3",
|
121
|
+
"o1;;o3", ]
|
122
|
+
|
123
|
+
rows = 0
|
124
|
+
|
125
|
+
File.open(@outfile).each_with_index do |row, index|
|
126
|
+
row.chomp.should eq result[index]
|
127
|
+
rows += 1
|
128
|
+
end
|
129
|
+
|
130
|
+
rows.should eq result.size
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should merge two files key columns in one file only" do
|
134
|
+
header = "2010,2011,2012,2014"
|
135
|
+
key = "0"
|
136
|
+
source_header = "(\\d{4}),(\\d{4})"
|
137
|
+
|
138
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
139
|
+
files: "#{@file1},#{@file3}",
|
140
|
+
header: header,
|
141
|
+
key: key,
|
142
|
+
source_header: source_header).execute
|
143
|
+
|
144
|
+
result = [ ";2010;2011;2012;2014",
|
145
|
+
"SP;20;30;40;60",
|
146
|
+
"RP;30;40;50;70",
|
147
|
+
"MP;40;50;60;80",
|
148
|
+
"NP;50;60;70;90",
|
149
|
+
";m1;m2;m3",
|
150
|
+
";n1;n2;n3",
|
151
|
+
";o1;;o3", ]
|
152
|
+
|
153
|
+
rows = 0
|
154
|
+
|
155
|
+
File.open(@outfile).each_with_index do |row, index|
|
156
|
+
row.chomp.should eq result[index]
|
157
|
+
rows += 1
|
158
|
+
end
|
159
|
+
|
160
|
+
rows.should eq result.size
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should merge two files key columns in two files of three only" do
|
164
|
+
header = "2010,2011,2012,2014"
|
165
|
+
key = "0, ,0"
|
166
|
+
source_header = "(\\d{4}),(\\d{4}),(\\d{4})"
|
167
|
+
|
168
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
169
|
+
files: "#{@file1},#{@file3},#{@file2}",
|
170
|
+
header: header,
|
171
|
+
key: key,
|
172
|
+
source_header: source_header).execute
|
173
|
+
|
174
|
+
result = [ ";2010;2011;2012;2014",
|
175
|
+
"SP;20;30;40;60",
|
176
|
+
"RP;30;40;50;70",
|
177
|
+
"MP;40;50;60;80",
|
178
|
+
"NP;50;60;70;90",
|
179
|
+
";m1;m2;m3",
|
180
|
+
";n1;n2;n3",
|
181
|
+
";o1;;o3",
|
182
|
+
"M;m1;m2;m3",
|
183
|
+
"N;n1;n2;n3",
|
184
|
+
"O;o1;;o3" ]
|
185
|
+
|
186
|
+
rows = 0
|
187
|
+
|
188
|
+
File.open(@outfile).each_with_index do |row, index|
|
189
|
+
row.chomp.should eq result[index]
|
190
|
+
rows += 1
|
191
|
+
end
|
192
|
+
|
193
|
+
rows.should eq result.size
|
194
|
+
end
|
195
|
+
|
103
196
|
end
|
104
197
|
|
105
198
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'sycsvpro/transposer'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Transposer do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@infile = File.join(File.dirname(__FILE__), 'files/in6.csv')
|
9
|
+
@outfile = File.join(File.dirname(__FILE__), 'files/out.csv')
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should transpose (change rows to columns) complete file" do
|
13
|
+
Sycsvpro::Transposer.new(infile: @infile,
|
14
|
+
outfile: @outfile).execute
|
15
|
+
|
16
|
+
result = [ "Year;;2008;2009;2010",
|
17
|
+
"SP;10;5;2;3",
|
18
|
+
"RP;20;10;5;5",
|
19
|
+
"Total;30;15;5;10",
|
20
|
+
"SP-O;100;10;20;70",
|
21
|
+
"RP-O;40;20;10;10",
|
22
|
+
"O;140;10;30;100" ]
|
23
|
+
|
24
|
+
rows = 0
|
25
|
+
|
26
|
+
File.open(@outfile).each_with_index do |line, i|
|
27
|
+
line.chomp.should eq result[i]
|
28
|
+
rows += 1
|
29
|
+
end
|
30
|
+
|
31
|
+
rows.should eq result.size
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should transpose selected columns" do
|
35
|
+
Sycsvpro::Transposer.new(infile: @infile,
|
36
|
+
outfile: @outfile,
|
37
|
+
cols: "0-2").execute
|
38
|
+
|
39
|
+
result = [ "Year;;2008;2009;2010",
|
40
|
+
"SP;10;5;2;3",
|
41
|
+
"RP;20;10;5;5" ]
|
42
|
+
|
43
|
+
rows = 0
|
44
|
+
|
45
|
+
File.open(@outfile).each_with_index do |line, i|
|
46
|
+
line.chomp.should eq result[i]
|
47
|
+
rows += 1
|
48
|
+
end
|
49
|
+
|
50
|
+
rows.should eq result.size
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should transpose selected rows and columns" do
|
54
|
+
Sycsvpro::Transposer.new(infile: @infile,
|
55
|
+
outfile: @outfile,
|
56
|
+
rows: "0,2-4",
|
57
|
+
cols: "0-2").execute
|
58
|
+
|
59
|
+
result = [ "Year;2008;2009;2010",
|
60
|
+
"SP;5;2;3",
|
61
|
+
"RP;10;5;5" ]
|
62
|
+
|
63
|
+
rows = 0
|
64
|
+
|
65
|
+
File.open(@outfile).each_with_index do |line, i|
|
66
|
+
line.chomp.should eq result[i]
|
67
|
+
rows += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
rows.should eq result.size
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -151,6 +151,7 @@ files:
|
|
151
151
|
- lib/sycsvpro/script_list.rb
|
152
152
|
- lib/sycsvpro/sorter.rb
|
153
153
|
- lib/sycsvpro/table.rb
|
154
|
+
- lib/sycsvpro/transposer.rb
|
154
155
|
- lib/sycsvpro/unique.rb
|
155
156
|
- lib/sycsvpro/version.rb
|
156
157
|
- spec/sycsvpro/aggregator_spec.rb
|
@@ -175,6 +176,7 @@ files:
|
|
175
176
|
- spec/sycsvpro/script_list_spec.rb
|
176
177
|
- spec/sycsvpro/sorter_spec.rb
|
177
178
|
- spec/sycsvpro/table_spec.rb
|
179
|
+
- spec/sycsvpro/transposer_spec.rb
|
178
180
|
- spec/sycsvpro/unique_spec.rb
|
179
181
|
- sycsvpro.gemspec
|
180
182
|
- sycsvpro.rdoc
|