sycsvpro 0.1.9 → 0.1.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +37 -3
- data/bin/sycsvpro +38 -2
- data/lib/sycsvpro/aggregator.rb +13 -0
- data/lib/sycsvpro/allocator.rb +12 -0
- data/lib/sycsvpro/analyzer.rb +14 -0
- data/lib/sycsvpro/join.rb +1 -1
- data/lib/sycsvpro/merger.rb +127 -0
- data/lib/sycsvpro/version.rb +1 -1
- data/lib/sycsvpro.rb +1 -0
- data/spec/sycsvpro/merger_spec.rb +105 -0
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -19,13 +19,14 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
19
19
|
* create a table from a source file with dynamically create columns (since
|
20
20
|
version 0.1.4)
|
21
21
|
* join two file based on a joint column value (since version 0.1.7)
|
22
|
+
* merge files based on common headline columns (since version 0.1.10)
|
22
23
|
|
23
24
|
To get help type
|
24
25
|
|
25
26
|
$ sycsvpro -h
|
26
27
|
|
27
|
-
In the following examples we assume the following files 'machines.csv'
|
28
|
-
'region.csv'
|
28
|
+
In the following examples we assume the following files 'machines.csv',
|
29
|
+
'region.csv' and revenue.csv
|
29
30
|
|
30
31
|
```
|
31
32
|
customer;machine;control;drive;motor;date;contract;price;c-id
|
@@ -44,6 +45,13 @@ R3;US;345
|
|
44
45
|
R4;CA;456
|
45
46
|
```
|
46
47
|
|
48
|
+
```
|
49
|
+
2010;2011;2012;2013;2014;customer
|
50
|
+
50;100;150;100;200;hello
|
51
|
+
100;50;10;1000;20;indix
|
52
|
+
2000;250;300;3000;chiro
|
53
|
+
```
|
54
|
+
|
47
55
|
Analyze
|
48
56
|
-------
|
49
57
|
Analyze the content of the provided file *in.csv*
|
@@ -220,7 +228,27 @@ on streak.
|
|
220
228
|
-i "COUNTRY,REGION"
|
221
229
|
-j "3=8;3=10"
|
222
230
|
|
223
|
-
|
231
|
+
Merge
|
232
|
+
-----
|
233
|
+
Merge files machine_count.csv and revenue.csv based on the year columns.
|
234
|
+
|
235
|
+
$ sycsvpro -o out.csv merge machines.csv,revenue.csv
|
236
|
+
-h "2010,2013,2014"
|
237
|
+
-k "0,5"
|
238
|
+
-s "(\\d{4}),(\\d{4})"
|
239
|
+
|
240
|
+
This will create the out.csv
|
241
|
+
|
242
|
+
```
|
243
|
+
;2010;2013;2014
|
244
|
+
hello;1;0;0
|
245
|
+
indix;1;0;0
|
246
|
+
chiro;0;1;0
|
247
|
+
hello;50;100;200
|
248
|
+
indix;100;1000;20
|
249
|
+
chiro;2000;300;3000
|
250
|
+
```
|
251
|
+
|
224
252
|
Sort
|
225
253
|
----
|
226
254
|
Sort rows on specified columns as an example sort rows based on customer
|
@@ -439,6 +467,12 @@ Version 0.1.9
|
|
439
467
|
* When creating columns dynamically they are in arbitrary sequence. You can now
|
440
468
|
provide a switch `sort: "2"` which will sort the header from column 2 on.
|
441
469
|
|
470
|
+
Version 0.1.10
|
471
|
+
--------------
|
472
|
+
* It is now possible to merge multiple files based on common headline columns
|
473
|
+
* Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
|
474
|
+
available
|
475
|
+
|
442
476
|
Installation
|
443
477
|
============
|
444
478
|
[![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
|
data/bin/sycsvpro
CHANGED
@@ -21,7 +21,7 @@ row_regex = %r{
|
|
21
21
|
sycsvpro_directory = File.expand_path("~/.syc/sycsvpro")
|
22
22
|
|
23
23
|
unless File.exists? sycsvpro_directory
|
24
|
-
|
24
|
+
FileUtils.mkdir_p sycsvpro_directory
|
25
25
|
end
|
26
26
|
|
27
27
|
# Script directory
|
@@ -427,6 +427,37 @@ command :join do |c|
|
|
427
427
|
end
|
428
428
|
end
|
429
429
|
|
430
|
+
desc 'Merge multiple files based on a common column value with a key value at '+
|
431
|
+
'the first column of a row'
|
432
|
+
arg_name 'FILE1 FILE2 ...'
|
433
|
+
command :merge do |c|
|
434
|
+
c.desc 'The key columns in the source files, which contains the columns to '+
|
435
|
+
'be inserted into the outfile as first row column'
|
436
|
+
c.arg_name '0,3'
|
437
|
+
c.flag [:k, :key], :must_match => /^\d+(?:,\d+)*/
|
438
|
+
|
439
|
+
c.desc 'Header columns to be used as identifires for the columns of the '+
|
440
|
+
'merging files'
|
441
|
+
c.arg_name 'COL1,COL2,COL3'
|
442
|
+
c.flag [:h, :header]
|
443
|
+
|
444
|
+
c.desc 'Header column patterns to be used as the identifier of the columns '+
|
445
|
+
'of the files to be merged into the outfile'
|
446
|
+
c.arg_name 'PATTERN1,PATTERN2'
|
447
|
+
c.flag [:s, :source_header]
|
448
|
+
|
449
|
+
c.action do |global_options,options,args|
|
450
|
+
merge = Sycsvpro::Merger.new(outfile: global_options[:o],
|
451
|
+
files: args[0],
|
452
|
+
header: options[:h],
|
453
|
+
source_header: options[:s],
|
454
|
+
key: options[:k])
|
455
|
+
print 'Merging...'
|
456
|
+
merge.execute
|
457
|
+
print 'done'
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
430
461
|
desc 'Sort rows based on column values. It is possible to sort on multiple '+
|
431
462
|
'columns'
|
432
463
|
command :sort do |c|
|
@@ -584,11 +615,16 @@ pre do |global,command,options,args|
|
|
584
615
|
when :aggregate, :allocate, :calc, :collect, :count, :extract, :map, :sort
|
585
616
|
help_now! "You need to provide an input file '-f FILE'" if global[:f].nil?
|
586
617
|
help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
|
618
|
+
when :merge
|
619
|
+
help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
|
587
620
|
end
|
588
621
|
|
589
622
|
count = 0
|
590
623
|
|
591
|
-
unless command.name == :edit or
|
624
|
+
unless command.name == :edit or
|
625
|
+
command.name == :execute or
|
626
|
+
command.name == :list or
|
627
|
+
command.name == :merge
|
592
628
|
analyzer = Sycsvpro::Analyzer.new(global[:f])
|
593
629
|
result = analyzer.result
|
594
630
|
count = result.row_count
|
data/lib/sycsvpro/aggregator.rb
CHANGED
@@ -7,6 +7,19 @@ module Sycsvpro
|
|
7
7
|
|
8
8
|
# An Aggregator counts specified row values and adds a sum to the end of
|
9
9
|
# the row
|
10
|
+
#
|
11
|
+
# in.csv
|
12
|
+
#
|
13
|
+
# | Customer | 2013 | 2014 |
|
14
|
+
# | A | A1 | |
|
15
|
+
# | B | B1 | B16 |
|
16
|
+
# | A | A3 | A7 |
|
17
|
+
#
|
18
|
+
# out.csv
|
19
|
+
#
|
20
|
+
# | Customer | 2013 | 2014 | Sum |
|
21
|
+
# | A | 2 | 1 | 3 |
|
22
|
+
# | B | 1 | 1 | 2 |
|
10
23
|
class Aggregator
|
11
24
|
|
12
25
|
include Dsl
|
data/lib/sycsvpro/allocator.rb
CHANGED
@@ -2,6 +2,18 @@
|
|
2
2
|
module Sycsvpro
|
3
3
|
|
4
4
|
# Allocates columns to a key column
|
5
|
+
#
|
6
|
+
# infile.csv
|
7
|
+
#
|
8
|
+
# | Name | Product |
|
9
|
+
# | A | X1 |
|
10
|
+
# | B | Y2 |
|
11
|
+
# | A | W10 |
|
12
|
+
#
|
13
|
+
# outfile.csv
|
14
|
+
#
|
15
|
+
# | A | X1 | W10 |
|
16
|
+
# | B | Y2 | |
|
5
17
|
class Allocator
|
6
18
|
|
7
19
|
# File from that values are read
|
data/lib/sycsvpro/analyzer.rb
CHANGED
@@ -5,6 +5,20 @@ module Sycsvpro
|
|
5
5
|
Result = Struct.new(:cols, :col_count, :row_count, :sample_row)
|
6
6
|
|
7
7
|
# Analyzes the file structure
|
8
|
+
#
|
9
|
+
# | Name | C1 | C2 |
|
10
|
+
# | A | a | b |
|
11
|
+
#
|
12
|
+
# 3 columns: ["Name", "C1", "C2"]
|
13
|
+
# 2 rows
|
14
|
+
#
|
15
|
+
# Row sample data:
|
16
|
+
# A;b;c
|
17
|
+
#
|
18
|
+
# Column index: Column name | Column sample value
|
19
|
+
# 0: Name | A
|
20
|
+
# 1: C1 | a
|
21
|
+
# 2: C2 | b
|
8
22
|
class Analyzer
|
9
23
|
|
10
24
|
# File that is analyzed
|
data/lib/sycsvpro/join.rb
CHANGED
@@ -0,0 +1,127 @@
|
|
1
|
+
# Operating csv files
|
2
|
+
module Sycsvpro
|
3
|
+
|
4
|
+
# Merge files based on common header columns
|
5
|
+
#
|
6
|
+
# file1.csv
|
7
|
+
#
|
8
|
+
# | | 2010 | 2011 | 2012 | 2013 |
|
9
|
+
# | --- | ---- | ---- | ---- | ---- |
|
10
|
+
# | SP | 20 | 30 | 40 | 50 |
|
11
|
+
# | RP | 30 | 40 | 50 | 60 |
|
12
|
+
#
|
13
|
+
# file2.csv
|
14
|
+
#
|
15
|
+
# | | 2010 | 2011 | 2012 |
|
16
|
+
# | --- | ---- | ---- | ---- |
|
17
|
+
# | M | m1 | m2 | m3 |
|
18
|
+
# | N | n1 | n2 | n3 |
|
19
|
+
#
|
20
|
+
# merging restults in
|
21
|
+
#
|
22
|
+
# merge.csv
|
23
|
+
#
|
24
|
+
# | | 2010 | 2011 | 2012 | 2013 |
|
25
|
+
# | --- | ---- | ---- | ---- | ---- |
|
26
|
+
# | SP | 20 | 30 | 40 | 50 |
|
27
|
+
# | RP | 30 | 40 | 50 | 60 |
|
28
|
+
# | M | m1 | m2 | m3 | |
|
29
|
+
# | N | n1 | n2 | n3 | |
|
30
|
+
#
|
31
|
+
class Merger
|
32
|
+
|
33
|
+
include Dsl
|
34
|
+
|
35
|
+
# file to that the result is written
|
36
|
+
attr_reader :outfile
|
37
|
+
# header patterns to be used to identify merge columns
|
38
|
+
attr_reader :source_header
|
39
|
+
# header columns
|
40
|
+
attr_reader :header_cols
|
41
|
+
# value that is used as first of column of a row
|
42
|
+
attr_reader :key
|
43
|
+
# files to be merged based on header columns
|
44
|
+
attr_reader :files
|
45
|
+
# file to that the result is written to
|
46
|
+
attr_reader :outfile
|
47
|
+
|
48
|
+
# Merge files based on common header columns
|
49
|
+
#
|
50
|
+
# :call-seq:
|
51
|
+
# Sycsvpro::Merger.new(outfile: "out.csv",
|
52
|
+
# files: "file1.csv,file2.csv,filen.csv",
|
53
|
+
# header: "2010,2011,2012,2013,2014",
|
54
|
+
# source_header: "(\\d{4}/),(/\\d{4}/)",
|
55
|
+
# key: "0,0").execute
|
56
|
+
#
|
57
|
+
# Semantics
|
58
|
+
# =========
|
59
|
+
# Merges the files file1.csv, file2.csv ... based on the header columns
|
60
|
+
# 2010, 2011, 2012, 2013 and 2014 where columns are identified by the
|
61
|
+
# regex /(\d{4})/. The first column in a row is column 0 of the file1.csv
|
62
|
+
# and so on.
|
63
|
+
#
|
64
|
+
# outfile:: result is written to the outfile
|
65
|
+
# files:: list of files that get merged. In the result file the files are
|
66
|
+
# inserted in the sequence they are provided
|
67
|
+
# header:: header of the result file and key for assigning column values
|
68
|
+
# from source files to result file
|
69
|
+
# source_header:: pattern for each header of the source file to determine
|
70
|
+
# the column. The pattern is a regex without the enclosing slashes '/'
|
71
|
+
# key:: first column value from the source file that is used as first
|
72
|
+
# column in the target file
|
73
|
+
def initialize(options = {})
|
74
|
+
@outfile = options[:outfile]
|
75
|
+
@header_cols = options[:header].split(',')
|
76
|
+
@source_header = options[:source_header].split(',')
|
77
|
+
@key = options[:key].split(',')
|
78
|
+
@files = options[:files].split(',')
|
79
|
+
end
|
80
|
+
|
81
|
+
# Merges the files based on the provided parameters
|
82
|
+
def execute
|
83
|
+
File.open(outfile, 'w') do |out|
|
84
|
+
out.puts ";#{header_cols.join(';')}"
|
85
|
+
files.each do |file|
|
86
|
+
@current_key = @key.shift
|
87
|
+
@current_source_header = @source_header.shift
|
88
|
+
processed_header = false
|
89
|
+
File.open(file).each_with_index do |line, index|
|
90
|
+
next if line.chomp.empty?
|
91
|
+
|
92
|
+
unless processed_header
|
93
|
+
create_file_header unstring(line).split(';')
|
94
|
+
processed_header = true
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
out.puts create_line unstring(line).split(';')
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
|
106
|
+
# create a filter for the columns that match the header filter
|
107
|
+
def create_file_header(columns)
|
108
|
+
columns.each_with_index do |c,i|
|
109
|
+
next if i == @current_key
|
110
|
+
columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
|
111
|
+
end
|
112
|
+
|
113
|
+
@file_header = [@current_key.to_i]
|
114
|
+
header_cols.each do |h|
|
115
|
+
@file_header << columns.index(h)
|
116
|
+
end
|
117
|
+
@file_header.compact!
|
118
|
+
end
|
119
|
+
|
120
|
+
# create a line filtered by the file_header
|
121
|
+
def create_line(columns)
|
122
|
+
columns.values_at(*@file_header).join(';')
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
data/lib/sycsvpro/version.rb
CHANGED
data/lib/sycsvpro.rb
CHANGED
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'sycsvpro/merger.rb'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Merger do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@file1 = File.join(File.dirname(__FILE__), "files/merge1.csv")
|
9
|
+
@file2 = File.join(File.dirname(__FILE__), "files/merge2.csv")
|
10
|
+
@outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should merge two files" do
|
14
|
+
header = "2010,2011,2012,2014"
|
15
|
+
key = "0,0"
|
16
|
+
source_header = "(\\d{4}),(\\d{4})"
|
17
|
+
|
18
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
19
|
+
files: "#{@file1},#{@file2}",
|
20
|
+
header: header,
|
21
|
+
key: key,
|
22
|
+
source_header: source_header).execute
|
23
|
+
|
24
|
+
result = [ ";2010;2011;2012;2014",
|
25
|
+
"SP;20;30;40;60",
|
26
|
+
"RP;30;40;50;70",
|
27
|
+
"MP;40;50;60;80",
|
28
|
+
"NP;50;60;70;90",
|
29
|
+
"M;m1;m2;m3",
|
30
|
+
"N;n1;n2;n3",
|
31
|
+
"O;o1;;o3", ]
|
32
|
+
|
33
|
+
rows = 0
|
34
|
+
|
35
|
+
File.open(@outfile).each_with_index do |row, index|
|
36
|
+
row.chomp.should eq result[index]
|
37
|
+
rows += 1
|
38
|
+
end
|
39
|
+
|
40
|
+
rows.should eq result.size
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should merge two files with differnt key columns in the middle" do
|
44
|
+
header = "2010,2011,2012,2014"
|
45
|
+
key = "0,3"
|
46
|
+
source_header = "(\\d{4}),(\\d{4})"
|
47
|
+
|
48
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
49
|
+
files: "#{@file1},#{@file2}",
|
50
|
+
header: header,
|
51
|
+
key: key,
|
52
|
+
source_header: source_header).execute
|
53
|
+
|
54
|
+
result = [ ";2010;2011;2012;2014",
|
55
|
+
"SP;20;30;40;60",
|
56
|
+
"RP;30;40;50;70",
|
57
|
+
"MP;40;50;60;80",
|
58
|
+
"NP;50;60;70;90",
|
59
|
+
"MO;m1;m2;m3",
|
60
|
+
"NO;n1;n2;n3",
|
61
|
+
"OO;o1;;o3", ]
|
62
|
+
|
63
|
+
rows = 0
|
64
|
+
|
65
|
+
File.open(@outfile).each_with_index do |row, index|
|
66
|
+
row.chomp.should eq result[index]
|
67
|
+
rows += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
rows.should eq result.size
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should merge two files with differnt key columns at the end" do
|
74
|
+
header = "2010,2011,2012,2014"
|
75
|
+
key = "0,6"
|
76
|
+
source_header = "(\\d{4}),(\\d{4})"
|
77
|
+
|
78
|
+
Sycsvpro::Merger.new(outfile: @outfile,
|
79
|
+
files: "#{@file1},#{@file2}",
|
80
|
+
header: header,
|
81
|
+
key: key,
|
82
|
+
source_header: source_header).execute
|
83
|
+
|
84
|
+
result = [ ";2010;2011;2012;2014",
|
85
|
+
"SP;20;30;40;60",
|
86
|
+
"RP;30;40;50;70",
|
87
|
+
"MP;40;50;60;80",
|
88
|
+
"NP;50;60;70;90",
|
89
|
+
"MI;m1;m2;m3",
|
90
|
+
"NI;n1;n2;n3",
|
91
|
+
"OI;o1;;o3", ]
|
92
|
+
|
93
|
+
rows = 0
|
94
|
+
|
95
|
+
File.open(@outfile).each_with_index do |row, index|
|
96
|
+
row.chomp.should eq result[index]
|
97
|
+
rows += 1
|
98
|
+
end
|
99
|
+
|
100
|
+
rows.should eq result.size
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-07-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -144,6 +144,7 @@ files:
|
|
144
144
|
- lib/sycsvpro/inserter.rb
|
145
145
|
- lib/sycsvpro/join.rb
|
146
146
|
- lib/sycsvpro/mapper.rb
|
147
|
+
- lib/sycsvpro/merger.rb
|
147
148
|
- lib/sycsvpro/profiler.rb
|
148
149
|
- lib/sycsvpro/row_filter.rb
|
149
150
|
- lib/sycsvpro/script_creator.rb
|
@@ -167,6 +168,7 @@ files:
|
|
167
168
|
- spec/sycsvpro/inserter_spec.rb
|
168
169
|
- spec/sycsvpro/join_spec.rb
|
169
170
|
- spec/sycsvpro/mapper_spec.rb
|
171
|
+
- spec/sycsvpro/merger_spec.rb
|
170
172
|
- spec/sycsvpro/profiler_spec.rb
|
171
173
|
- spec/sycsvpro/row_filter_spec.rb
|
172
174
|
- spec/sycsvpro/script_list_spec.rb
|