sycsvpro 0.1.9 → 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.1.9)
4
+ sycsvpro (0.1.10)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -19,13 +19,14 @@ Processing of csv files. *sycsvpro* offers following functions
19
19
  * create a table from a source file with dynamically create columns (since
20
20
  version 0.1.4)
21
21
  * join two file based on a joint column value (since version 0.1.7)
22
+ * merge files based on common headline columns (since version 0.1.10)
22
23
 
23
24
  To get help type
24
25
 
25
26
  $ sycsvpro -h
26
27
 
27
- In the following examples we assume the following files 'machines.csv' and
28
- 'region.csv'
28
+ In the following examples we assume the following files 'machines.csv',
29
+ 'region.csv' and revenue.csv
29
30
 
30
31
  ```
31
32
  customer;machine;control;drive;motor;date;contract;price;c-id
@@ -44,6 +45,13 @@ R3;US;345
44
45
  R4;CA;456
45
46
  ```
46
47
 
48
+ ```
49
+ 2010;2011;2012;2013;2014;customer
50
+ 50;100;150;100;200;hello
51
+ 100;50;10;1000;20;indix
52
+ 2000;250;300;3000;chiro
53
+ ```
54
+
47
55
  Analyze
48
56
  -------
49
57
  Analyze the content of the provided file *in.csv*
@@ -220,7 +228,27 @@ on streak.
220
228
  -i "COUNTRY,REGION"
221
229
  -j "3=8;3=10"
222
230
 
223
-
231
+ Merge
232
+ -----
233
+ Merge files machine_count.csv and revenue.csv based on the year columns.
234
+
235
+ $ sycsvpro -o out.csv merge machines.csv,revenue.csv
236
+ -h "2010,2013,2014"
237
+ -k "0,5"
238
+ -s "(\\d{4}),(\\d{4})"
239
+
240
+ This will create the out.csv
241
+
242
+ ```
243
+ ;2010;2013;2014
244
+ hello;1;0;0
245
+ indix;1;0;0
246
+ chiro;0;1;0
247
+ hello;50;100;200
248
+ indix;100;1000;20
249
+ chiro;2000;300;3000
250
+ ```
251
+
224
252
  Sort
225
253
  ----
226
254
  Sort rows on specified columns as an example sort rows based on customer
@@ -439,6 +467,12 @@ Version 0.1.9
439
467
  * When creating columns dynamically they are in arbitrary sequence. You can now
440
468
  provide a switch `sort: "2"` which will sort the header from column 2 on.
441
469
 
470
+ Version 0.1.10
471
+ --------------
472
+ * It is now possible to merge multiple files based on common headline columns
473
+ * Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
474
+ available
475
+
442
476
  Installation
443
477
  ============
444
478
  [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
data/bin/sycsvpro CHANGED
@@ -21,7 +21,7 @@ row_regex = %r{
21
21
  sycsvpro_directory = File.expand_path("~/.syc/sycsvpro")
22
22
 
23
23
  unless File.exists? sycsvpro_directory
24
- Dir.mkdir sycsvpro_directory
24
+ FileUtils.mkdir_p sycsvpro_directory
25
25
  end
26
26
 
27
27
  # Script directory
@@ -427,6 +427,37 @@ command :join do |c|
427
427
  end
428
428
  end
429
429
 
430
+ desc 'Merge multiple files based on a common column value with a key value at '+
431
+ 'the first column of a row'
432
+ arg_name 'FILE1 FILE2 ...'
433
+ command :merge do |c|
434
+ c.desc 'The key columns in the source files, which contains the columns to '+
435
+ 'be inserted into the outfile as first row column'
436
+ c.arg_name '0,3'
437
+ c.flag [:k, :key], :must_match => /^\d+(?:,\d+)*/
438
+
439
+ c.desc 'Header columns to be used as identifires for the columns of the '+
440
+ 'merging files'
441
+ c.arg_name 'COL1,COL2,COL3'
442
+ c.flag [:h, :header]
443
+
444
+ c.desc 'Header column patterns to be used as the identifier of the columns '+
445
+ 'of the files to be merged into the outfile'
446
+ c.arg_name 'PATTERN1,PATTERN2'
447
+ c.flag [:s, :source_header]
448
+
449
+ c.action do |global_options,options,args|
450
+ merge = Sycsvpro::Merger.new(outfile: global_options[:o],
451
+ files: args[0],
452
+ header: options[:h],
453
+ source_header: options[:s],
454
+ key: options[:k])
455
+ print 'Merging...'
456
+ merge.execute
457
+ print 'done'
458
+ end
459
+ end
460
+
430
461
  desc 'Sort rows based on column values. It is possible to sort on multiple '+
431
462
  'columns'
432
463
  command :sort do |c|
@@ -584,11 +615,16 @@ pre do |global,command,options,args|
584
615
  when :aggregate, :allocate, :calc, :collect, :count, :extract, :map, :sort
585
616
  help_now! "You need to provide an input file '-f FILE'" if global[:f].nil?
586
617
  help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
618
+ when :merge
619
+ help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
587
620
  end
588
621
 
589
622
  count = 0
590
623
 
591
- unless command.name == :edit or command.name == :execute or command.name == :list
624
+ unless command.name == :edit or
625
+ command.name == :execute or
626
+ command.name == :list or
627
+ command.name == :merge
592
628
  analyzer = Sycsvpro::Analyzer.new(global[:f])
593
629
  result = analyzer.result
594
630
  count = result.row_count
@@ -7,6 +7,19 @@ module Sycsvpro
7
7
 
8
8
  # An Aggregator counts specified row values and adds a sum to the end of
9
9
  # the row
10
+ #
11
+ # in.csv
12
+ #
13
+ # | Customer | 2013 | 2014 |
14
+ # | A | A1 | |
15
+ # | B | B1 | B16 |
16
+ # | A | A3 | A7 |
17
+ #
18
+ # out.csv
19
+ #
20
+ # | Customer | 2013 | 2014 | Sum |
21
+ # | A | 2 | 1 | 3 |
22
+ # | B | 1 | 1 | 2 |
10
23
  class Aggregator
11
24
 
12
25
  include Dsl
@@ -2,6 +2,18 @@
2
2
  module Sycsvpro
3
3
 
4
4
  # Allocates columns to a key column
5
+ #
6
+ # infile.csv
7
+ #
8
+ # | Name | Product |
9
+ # | A | X1 |
10
+ # | B | Y2 |
11
+ # | A | W10 |
12
+ #
13
+ # outfile.csv
14
+ #
15
+ # | A | X1 | W10 |
16
+ # | B | Y2 | |
5
17
  class Allocator
6
18
 
7
19
  # File from that values are read
@@ -5,6 +5,20 @@ module Sycsvpro
5
5
  Result = Struct.new(:cols, :col_count, :row_count, :sample_row)
6
6
 
7
7
  # Analyzes the file structure
8
+ #
9
+ # | Name | C1 | C2 |
10
+ # | A | a | b |
11
+ #
12
+ # 3 columns: ["Name", "C1", "C2"]
13
+ # 2 rows
14
+ #
15
+ # Row sample data:
16
+ # A;b;c
17
+ #
18
+ # Column index: Column name | Column sample value
19
+ # 0: Name | A
20
+ # 1: C1 | a
21
+ # 2: C2 | b
8
22
  class Analyzer
9
23
 
10
24
  # File that is analyzed
data/lib/sycsvpro/join.rb CHANGED
@@ -152,7 +152,7 @@ module Sycsvpro
152
152
 
153
153
  end
154
154
  end
155
-
155
+
156
156
  # Initializes the column positions where the source file columns have to
157
157
  # be inserted. If no column positions are provided the inserted columns
158
158
  # are put at the beginning of the row
@@ -0,0 +1,127 @@
1
+ # Operating csv files
2
+ module Sycsvpro
3
+
4
+ # Merge files based on common header columns
5
+ #
6
+ # file1.csv
7
+ #
8
+ # | | 2010 | 2011 | 2012 | 2013 |
9
+ # | --- | ---- | ---- | ---- | ---- |
10
+ # | SP | 20 | 30 | 40 | 50 |
11
+ # | RP | 30 | 40 | 50 | 60 |
12
+ #
13
+ # file2.csv
14
+ #
15
+ # | | 2010 | 2011 | 2012 |
16
+ # | --- | ---- | ---- | ---- |
17
+ # | M | m1 | m2 | m3 |
18
+ # | N | n1 | n2 | n3 |
19
+ #
20
+ # merging restults in
21
+ #
22
+ # merge.csv
23
+ #
24
+ # | | 2010 | 2011 | 2012 | 2013 |
25
+ # | --- | ---- | ---- | ---- | ---- |
26
+ # | SP | 20 | 30 | 40 | 50 |
27
+ # | RP | 30 | 40 | 50 | 60 |
28
+ # | M | m1 | m2 | m3 | |
29
+ # | N | n1 | n2 | n3 | |
30
+ #
31
+ class Merger
32
+
33
+ include Dsl
34
+
35
+ # file to that the result is written
36
+ attr_reader :outfile
37
+ # header patterns to be used to identify merge columns
38
+ attr_reader :source_header
39
+ # header columns
40
+ attr_reader :header_cols
41
+ # value that is used as first of column of a row
42
+ attr_reader :key
43
+ # files to be merged based on header columns
44
+ attr_reader :files
45
+ # file to that the result is written to
46
+ attr_reader :outfile
47
+
48
+ # Merge files based on common header columns
49
+ #
50
+ # :call-seq:
51
+ # Sycsvpro::Merger.new(outfile: "out.csv",
52
+ # files: "file1.csv,file2.csv,filen.csv",
53
+ # header: "2010,2011,2012,2013,2014",
54
+ # source_header: "(\\d{4}/),(/\\d{4}/)",
55
+ # key: "0,0").execute
56
+ #
57
+ # Semantics
58
+ # =========
59
+ # Merges the files file1.csv, file2.csv ... based on the header columns
60
+ # 2010, 2011, 2012, 2013 and 2014 where columns are identified by the
61
+ # regex /(\d{4})/. The first column in a row is column 0 of the file1.csv
62
+ # and so on.
63
+ #
64
+ # outfile:: result is written to the outfile
65
+ # files:: list of files that get merged. In the result file the files are
66
+ # inserted in the sequence they are provided
67
+ # header:: header of the result file and key for assigning column values
68
+ # from source files to result file
69
+ # source_header:: pattern for each header of the source file to determine
70
+ # the column. The pattern is a regex without the enclosing slashes '/'
71
+ # key:: first column value from the source file that is used as first
72
+ # column in the target file
73
+ def initialize(options = {})
74
+ @outfile = options[:outfile]
75
+ @header_cols = options[:header].split(',')
76
+ @source_header = options[:source_header].split(',')
77
+ @key = options[:key].split(',')
78
+ @files = options[:files].split(',')
79
+ end
80
+
81
+ # Merges the files based on the provided parameters
82
+ def execute
83
+ File.open(outfile, 'w') do |out|
84
+ out.puts ";#{header_cols.join(';')}"
85
+ files.each do |file|
86
+ @current_key = @key.shift
87
+ @current_source_header = @source_header.shift
88
+ processed_header = false
89
+ File.open(file).each_with_index do |line, index|
90
+ next if line.chomp.empty?
91
+
92
+ unless processed_header
93
+ create_file_header unstring(line).split(';')
94
+ processed_header = true
95
+ next
96
+ end
97
+
98
+ out.puts create_line unstring(line).split(';')
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ # create a filter for the columns that match the header filter
107
+ def create_file_header(columns)
108
+ columns.each_with_index do |c,i|
109
+ next if i == @current_key
110
+ columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
111
+ end
112
+
113
+ @file_header = [@current_key.to_i]
114
+ header_cols.each do |h|
115
+ @file_header << columns.index(h)
116
+ end
117
+ @file_header.compact!
118
+ end
119
+
120
+ # create a line filtered by the file_header
121
+ def create_line(columns)
122
+ columns.values_at(*@file_header).join(';')
123
+ end
124
+
125
+ end
126
+
127
+ end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.9'
4
+ VERSION = '0.1.10'
5
5
  end
data/lib/sycsvpro.rb CHANGED
@@ -15,3 +15,4 @@ require 'sycsvpro/sorter.rb'
15
15
  require 'sycsvpro/aggregator.rb'
16
16
  require 'sycsvpro/table.rb'
17
17
  require 'sycsvpro/join.rb'
18
+ require 'sycsvpro/merger.rb'
@@ -0,0 +1,105 @@
1
+ require 'sycsvpro/merger.rb'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Merger do
6
+
7
+ before do
8
+ @file1 = File.join(File.dirname(__FILE__), "files/merge1.csv")
9
+ @file2 = File.join(File.dirname(__FILE__), "files/merge2.csv")
10
+ @outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
11
+ end
12
+
13
+ it "should merge two files" do
14
+ header = "2010,2011,2012,2014"
15
+ key = "0,0"
16
+ source_header = "(\\d{4}),(\\d{4})"
17
+
18
+ Sycsvpro::Merger.new(outfile: @outfile,
19
+ files: "#{@file1},#{@file2}",
20
+ header: header,
21
+ key: key,
22
+ source_header: source_header).execute
23
+
24
+ result = [ ";2010;2011;2012;2014",
25
+ "SP;20;30;40;60",
26
+ "RP;30;40;50;70",
27
+ "MP;40;50;60;80",
28
+ "NP;50;60;70;90",
29
+ "M;m1;m2;m3",
30
+ "N;n1;n2;n3",
31
+ "O;o1;;o3", ]
32
+
33
+ rows = 0
34
+
35
+ File.open(@outfile).each_with_index do |row, index|
36
+ row.chomp.should eq result[index]
37
+ rows += 1
38
+ end
39
+
40
+ rows.should eq result.size
41
+ end
42
+
43
+ it "should merge two files with differnt key columns in the middle" do
44
+ header = "2010,2011,2012,2014"
45
+ key = "0,3"
46
+ source_header = "(\\d{4}),(\\d{4})"
47
+
48
+ Sycsvpro::Merger.new(outfile: @outfile,
49
+ files: "#{@file1},#{@file2}",
50
+ header: header,
51
+ key: key,
52
+ source_header: source_header).execute
53
+
54
+ result = [ ";2010;2011;2012;2014",
55
+ "SP;20;30;40;60",
56
+ "RP;30;40;50;70",
57
+ "MP;40;50;60;80",
58
+ "NP;50;60;70;90",
59
+ "MO;m1;m2;m3",
60
+ "NO;n1;n2;n3",
61
+ "OO;o1;;o3", ]
62
+
63
+ rows = 0
64
+
65
+ File.open(@outfile).each_with_index do |row, index|
66
+ row.chomp.should eq result[index]
67
+ rows += 1
68
+ end
69
+
70
+ rows.should eq result.size
71
+ end
72
+
73
+ it "should merge two files with differnt key columns at the end" do
74
+ header = "2010,2011,2012,2014"
75
+ key = "0,6"
76
+ source_header = "(\\d{4}),(\\d{4})"
77
+
78
+ Sycsvpro::Merger.new(outfile: @outfile,
79
+ files: "#{@file1},#{@file2}",
80
+ header: header,
81
+ key: key,
82
+ source_header: source_header).execute
83
+
84
+ result = [ ";2010;2011;2012;2014",
85
+ "SP;20;30;40;60",
86
+ "RP;30;40;50;70",
87
+ "MP;40;50;60;80",
88
+ "NP;50;60;70;90",
89
+ "MI;m1;m2;m3",
90
+ "NI;n1;n2;n3",
91
+ "OI;o1;;o3", ]
92
+
93
+ rows = 0
94
+
95
+ File.open(@outfile).each_with_index do |row, index|
96
+ row.chomp.should eq result[index]
97
+ rows += 1
98
+ end
99
+
100
+ rows.should eq result.size
101
+ end
102
+
103
+ end
104
+
105
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-29 00:00:00.000000000 Z
12
+ date: 2014-07-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -144,6 +144,7 @@ files:
144
144
  - lib/sycsvpro/inserter.rb
145
145
  - lib/sycsvpro/join.rb
146
146
  - lib/sycsvpro/mapper.rb
147
+ - lib/sycsvpro/merger.rb
147
148
  - lib/sycsvpro/profiler.rb
148
149
  - lib/sycsvpro/row_filter.rb
149
150
  - lib/sycsvpro/script_creator.rb
@@ -167,6 +168,7 @@ files:
167
168
  - spec/sycsvpro/inserter_spec.rb
168
169
  - spec/sycsvpro/join_spec.rb
169
170
  - spec/sycsvpro/mapper_spec.rb
171
+ - spec/sycsvpro/merger_spec.rb
170
172
  - spec/sycsvpro/profiler_spec.rb
171
173
  - spec/sycsvpro/row_filter_spec.rb
172
174
  - spec/sycsvpro/script_list_spec.rb