sycsvpro 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.1.9)
4
+ sycsvpro (0.1.10)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -19,13 +19,14 @@ Processing of csv files. *sycsvpro* offers following functions
19
19
  * create a table from a source file with dynamically create columns (since
20
20
  version 0.1.4)
21
21
  * join two file based on a joint column value (since version 0.1.7)
22
+ * merge files based on common headline columns (since version 0.1.10)
22
23
 
23
24
  To get help type
24
25
 
25
26
  $ sycsvpro -h
26
27
 
27
- In the following examples we assume the following files 'machines.csv' and
28
- 'region.csv'
28
+ In the following examples we assume the following files 'machines.csv',
29
+ 'region.csv' and revenue.csv
29
30
 
30
31
  ```
31
32
  customer;machine;control;drive;motor;date;contract;price;c-id
@@ -44,6 +45,13 @@ R3;US;345
44
45
  R4;CA;456
45
46
  ```
46
47
 
48
+ ```
49
+ 2010;2011;2012;2013;2014;customer
50
+ 50;100;150;100;200;hello
51
+ 100;50;10;1000;20;indix
52
+ 2000;250;300;3000;chiro
53
+ ```
54
+
47
55
  Analyze
48
56
  -------
49
57
  Analyze the content of the provided file *in.csv*
@@ -220,7 +228,27 @@ on streak.
220
228
  -i "COUNTRY,REGION"
221
229
  -j "3=8;3=10"
222
230
 
223
-
231
+ Merge
232
+ -----
233
+ Merge files machine_count.csv and revenue.csv based on the year columns.
234
+
235
+ $ sycsvpro -o out.csv merge machines.csv,revenue.csv
236
+ -h "2010,2013,2014"
237
+ -k "0,5"
238
+ -s "(\\d{4}),(\\d{4})"
239
+
240
+ This will create the out.csv
241
+
242
+ ```
243
+ ;2010;2013;2014
244
+ hello;1;0;0
245
+ indix;1;0;0
246
+ chiro;0;1;0
247
+ hello;50;100;200
248
+ indix;100;1000;20
249
+ chiro;2000;300;3000
250
+ ```
251
+
224
252
  Sort
225
253
  ----
226
254
  Sort rows on specified columns as an example sort rows based on customer
@@ -439,6 +467,12 @@ Version 0.1.9
439
467
  * When creating columns dynamically they are in arbitrary sequence. You can now
440
468
  provide a switch `sort: "2"` which will sort the header from column 2 on.
441
469
 
470
+ Version 0.1.10
471
+ --------------
472
+ * It is now possible to merge multiple files based on common headline columns
473
+ * Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
474
+ available
475
+
442
476
  Installation
443
477
  ============
444
478
  [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
data/bin/sycsvpro CHANGED
@@ -21,7 +21,7 @@ row_regex = %r{
21
21
  sycsvpro_directory = File.expand_path("~/.syc/sycsvpro")
22
22
 
23
23
  unless File.exists? sycsvpro_directory
24
- Dir.mkdir sycsvpro_directory
24
+ FileUtils.mkdir_p sycsvpro_directory
25
25
  end
26
26
 
27
27
  # Script directory
@@ -427,6 +427,37 @@ command :join do |c|
427
427
  end
428
428
  end
429
429
 
430
+ desc 'Merge multiple files based on a common column value with a key value at '+
431
+ 'the first column of a row'
432
+ arg_name 'FILE1 FILE2 ...'
433
+ command :merge do |c|
434
+ c.desc 'The key columns in the source files, which contains the columns to '+
435
+ 'be inserted into the outfile as first row column'
436
+ c.arg_name '0,3'
437
+ c.flag [:k, :key], :must_match => /^\d+(?:,\d+)*/
438
+
439
+ c.desc 'Header columns to be used as identifires for the columns of the '+
440
+ 'merging files'
441
+ c.arg_name 'COL1,COL2,COL3'
442
+ c.flag [:h, :header]
443
+
444
+ c.desc 'Header column patterns to be used as the identifier of the columns '+
445
+ 'of the files to be merged into the outfile'
446
+ c.arg_name 'PATTERN1,PATTERN2'
447
+ c.flag [:s, :source_header]
448
+
449
+ c.action do |global_options,options,args|
450
+ merge = Sycsvpro::Merger.new(outfile: global_options[:o],
451
+ files: args[0],
452
+ header: options[:h],
453
+ source_header: options[:s],
454
+ key: options[:k])
455
+ print 'Merging...'
456
+ merge.execute
457
+ print 'done'
458
+ end
459
+ end
460
+
430
461
  desc 'Sort rows based on column values. It is possible to sort on multiple '+
431
462
  'columns'
432
463
  command :sort do |c|
@@ -584,11 +615,16 @@ pre do |global,command,options,args|
584
615
  when :aggregate, :allocate, :calc, :collect, :count, :extract, :map, :sort
585
616
  help_now! "You need to provide an input file '-f FILE'" if global[:f].nil?
586
617
  help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
618
+ when :merge
619
+ help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
587
620
  end
588
621
 
589
622
  count = 0
590
623
 
591
- unless command.name == :edit or command.name == :execute or command.name == :list
624
+ unless command.name == :edit or
625
+ command.name == :execute or
626
+ command.name == :list or
627
+ command.name == :merge
592
628
  analyzer = Sycsvpro::Analyzer.new(global[:f])
593
629
  result = analyzer.result
594
630
  count = result.row_count
@@ -7,6 +7,19 @@ module Sycsvpro
7
7
 
8
8
  # An Aggregator counts specified row values and adds a sum to the end of
9
9
  # the row
10
+ #
11
+ # in.csv
12
+ #
13
+ # | Customer | 2013 | 2014 |
14
+ # | A | A1 | |
15
+ # | B | B1 | B16 |
16
+ # | A | A3 | A7 |
17
+ #
18
+ # out.csv
19
+ #
20
+ # | Customer | 2013 | 2014 | Sum |
21
+ # | A | 2 | 1 | 3 |
22
+ # | B | 1 | 1 | 2 |
10
23
  class Aggregator
11
24
 
12
25
  include Dsl
@@ -2,6 +2,18 @@
2
2
  module Sycsvpro
3
3
 
4
4
  # Allocates columns to a key column
5
+ #
6
+ # infile.csv
7
+ #
8
+ # | Name | Product |
9
+ # | A | X1 |
10
+ # | B | Y2 |
11
+ # | A | W10 |
12
+ #
13
+ # outfile.csv
14
+ #
15
+ # | A | X1 | W10 |
16
+ # | B | Y2 | |
5
17
  class Allocator
6
18
 
7
19
  # File from that values are read
@@ -5,6 +5,20 @@ module Sycsvpro
5
5
  Result = Struct.new(:cols, :col_count, :row_count, :sample_row)
6
6
 
7
7
  # Analyzes the file structure
8
+ #
9
+ # | Name | C1 | C2 |
10
+ # | A | a | b |
11
+ #
12
+ # 3 columns: ["Name", "C1", "C2"]
13
+ # 2 rows
14
+ #
15
+ # Row sample data:
16
+ # A;b;c
17
+ #
18
+ # Column index: Column name | Column sample value
19
+ # 0: Name | A
20
+ # 1: C1 | a
21
+ # 2: C2 | b
8
22
  class Analyzer
9
23
 
10
24
  # File that is analyzed
data/lib/sycsvpro/join.rb CHANGED
@@ -152,7 +152,7 @@ module Sycsvpro
152
152
 
153
153
  end
154
154
  end
155
-
155
+
156
156
  # Initializes the column positions where the source file columns have to
157
157
  # be inserted. If no column positions are provided the inserted columns
158
158
  # are put at the beginning of the row
@@ -0,0 +1,127 @@
1
+ # Operating csv files
2
+ module Sycsvpro
3
+
4
+ # Merge files based on common header columns
5
+ #
6
+ # file1.csv
7
+ #
8
+ # | | 2010 | 2011 | 2012 | 2013 |
9
+ # | --- | ---- | ---- | ---- | ---- |
10
+ # | SP | 20 | 30 | 40 | 50 |
11
+ # | RP | 30 | 40 | 50 | 60 |
12
+ #
13
+ # file2.csv
14
+ #
15
+ # | | 2010 | 2011 | 2012 |
16
+ # | --- | ---- | ---- | ---- |
17
+ # | M | m1 | m2 | m3 |
18
+ # | N | n1 | n2 | n3 |
19
+ #
20
+ # merging restults in
21
+ #
22
+ # merge.csv
23
+ #
24
+ # | | 2010 | 2011 | 2012 | 2013 |
25
+ # | --- | ---- | ---- | ---- | ---- |
26
+ # | SP | 20 | 30 | 40 | 50 |
27
+ # | RP | 30 | 40 | 50 | 60 |
28
+ # | M | m1 | m2 | m3 | |
29
+ # | N | n1 | n2 | n3 | |
30
+ #
31
+ class Merger
32
+
33
+ include Dsl
34
+
35
+ # file to that the result is written
36
+ attr_reader :outfile
37
+ # header patterns to be used to identify merge columns
38
+ attr_reader :source_header
39
+ # header columns
40
+ attr_reader :header_cols
41
+ # value that is used as first of column of a row
42
+ attr_reader :key
43
+ # files to be merged based on header columns
44
+ attr_reader :files
45
+ # file to that the result is written to
46
+ attr_reader :outfile
47
+
48
+ # Merge files based on common header columns
49
+ #
50
+ # :call-seq:
51
+ # Sycsvpro::Merger.new(outfile: "out.csv",
52
+ # files: "file1.csv,file2.csv,filen.csv",
53
+ # header: "2010,2011,2012,2013,2014",
54
+ # source_header: "(\\d{4}/),(/\\d{4}/)",
55
+ # key: "0,0").execute
56
+ #
57
+ # Semantics
58
+ # =========
59
+ # Merges the files file1.csv, file2.csv ... based on the header columns
60
+ # 2010, 2011, 2012, 2013 and 2014 where columns are identified by the
61
+ # regex /(\d{4})/. The first column in a row is column 0 of the file1.csv
62
+ # and so on.
63
+ #
64
+ # outfile:: result is written to the outfile
65
+ # files:: list of files that get merged. In the result file the files are
66
+ # inserted in the sequence they are provided
67
+ # header:: header of the result file and key for assigning column values
68
+ # from source files to result file
69
+ # source_header:: pattern for each header of the source file to determine
70
+ # the column. The pattern is a regex without the enclosing slashes '/'
71
+ # key:: first column value from the source file that is used as first
72
+ # column in the target file
73
+ def initialize(options = {})
74
+ @outfile = options[:outfile]
75
+ @header_cols = options[:header].split(',')
76
+ @source_header = options[:source_header].split(',')
77
+ @key = options[:key].split(',')
78
+ @files = options[:files].split(',')
79
+ end
80
+
81
+ # Merges the files based on the provided parameters
82
+ def execute
83
+ File.open(outfile, 'w') do |out|
84
+ out.puts ";#{header_cols.join(';')}"
85
+ files.each do |file|
86
+ @current_key = @key.shift
87
+ @current_source_header = @source_header.shift
88
+ processed_header = false
89
+ File.open(file).each_with_index do |line, index|
90
+ next if line.chomp.empty?
91
+
92
+ unless processed_header
93
+ create_file_header unstring(line).split(';')
94
+ processed_header = true
95
+ next
96
+ end
97
+
98
+ out.puts create_line unstring(line).split(';')
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ # create a filter for the columns that match the header filter
107
+ def create_file_header(columns)
108
+ columns.each_with_index do |c,i|
109
+ next if i == @current_key
110
+ columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
111
+ end
112
+
113
+ @file_header = [@current_key.to_i]
114
+ header_cols.each do |h|
115
+ @file_header << columns.index(h)
116
+ end
117
+ @file_header.compact!
118
+ end
119
+
120
+ # create a line filtered by the file_header
121
+ def create_line(columns)
122
+ columns.values_at(*@file_header).join(';')
123
+ end
124
+
125
+ end
126
+
127
+ end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.9'
4
+ VERSION = '0.1.10'
5
5
  end
data/lib/sycsvpro.rb CHANGED
@@ -15,3 +15,4 @@ require 'sycsvpro/sorter.rb'
15
15
  require 'sycsvpro/aggregator.rb'
16
16
  require 'sycsvpro/table.rb'
17
17
  require 'sycsvpro/join.rb'
18
+ require 'sycsvpro/merger.rb'
@@ -0,0 +1,105 @@
1
+ require 'sycsvpro/merger.rb'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Merger do
6
+
7
+ before do
8
+ @file1 = File.join(File.dirname(__FILE__), "files/merge1.csv")
9
+ @file2 = File.join(File.dirname(__FILE__), "files/merge2.csv")
10
+ @outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
11
+ end
12
+
13
+ it "should merge two files" do
14
+ header = "2010,2011,2012,2014"
15
+ key = "0,0"
16
+ source_header = "(\\d{4}),(\\d{4})"
17
+
18
+ Sycsvpro::Merger.new(outfile: @outfile,
19
+ files: "#{@file1},#{@file2}",
20
+ header: header,
21
+ key: key,
22
+ source_header: source_header).execute
23
+
24
+ result = [ ";2010;2011;2012;2014",
25
+ "SP;20;30;40;60",
26
+ "RP;30;40;50;70",
27
+ "MP;40;50;60;80",
28
+ "NP;50;60;70;90",
29
+ "M;m1;m2;m3",
30
+ "N;n1;n2;n3",
31
+ "O;o1;;o3", ]
32
+
33
+ rows = 0
34
+
35
+ File.open(@outfile).each_with_index do |row, index|
36
+ row.chomp.should eq result[index]
37
+ rows += 1
38
+ end
39
+
40
+ rows.should eq result.size
41
+ end
42
+
43
+ it "should merge two files with differnt key columns in the middle" do
44
+ header = "2010,2011,2012,2014"
45
+ key = "0,3"
46
+ source_header = "(\\d{4}),(\\d{4})"
47
+
48
+ Sycsvpro::Merger.new(outfile: @outfile,
49
+ files: "#{@file1},#{@file2}",
50
+ header: header,
51
+ key: key,
52
+ source_header: source_header).execute
53
+
54
+ result = [ ";2010;2011;2012;2014",
55
+ "SP;20;30;40;60",
56
+ "RP;30;40;50;70",
57
+ "MP;40;50;60;80",
58
+ "NP;50;60;70;90",
59
+ "MO;m1;m2;m3",
60
+ "NO;n1;n2;n3",
61
+ "OO;o1;;o3", ]
62
+
63
+ rows = 0
64
+
65
+ File.open(@outfile).each_with_index do |row, index|
66
+ row.chomp.should eq result[index]
67
+ rows += 1
68
+ end
69
+
70
+ rows.should eq result.size
71
+ end
72
+
73
+ it "should merge two files with differnt key columns at the end" do
74
+ header = "2010,2011,2012,2014"
75
+ key = "0,6"
76
+ source_header = "(\\d{4}),(\\d{4})"
77
+
78
+ Sycsvpro::Merger.new(outfile: @outfile,
79
+ files: "#{@file1},#{@file2}",
80
+ header: header,
81
+ key: key,
82
+ source_header: source_header).execute
83
+
84
+ result = [ ";2010;2011;2012;2014",
85
+ "SP;20;30;40;60",
86
+ "RP;30;40;50;70",
87
+ "MP;40;50;60;80",
88
+ "NP;50;60;70;90",
89
+ "MI;m1;m2;m3",
90
+ "NI;n1;n2;n3",
91
+ "OI;o1;;o3", ]
92
+
93
+ rows = 0
94
+
95
+ File.open(@outfile).each_with_index do |row, index|
96
+ row.chomp.should eq result[index]
97
+ rows += 1
98
+ end
99
+
100
+ rows.should eq result.size
101
+ end
102
+
103
+ end
104
+
105
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-29 00:00:00.000000000 Z
12
+ date: 2014-07-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -144,6 +144,7 @@ files:
144
144
  - lib/sycsvpro/inserter.rb
145
145
  - lib/sycsvpro/join.rb
146
146
  - lib/sycsvpro/mapper.rb
147
+ - lib/sycsvpro/merger.rb
147
148
  - lib/sycsvpro/profiler.rb
148
149
  - lib/sycsvpro/row_filter.rb
149
150
  - lib/sycsvpro/script_creator.rb
@@ -167,6 +168,7 @@ files:
167
168
  - spec/sycsvpro/inserter_spec.rb
168
169
  - spec/sycsvpro/join_spec.rb
169
170
  - spec/sycsvpro/mapper_spec.rb
171
+ - spec/sycsvpro/merger_spec.rb
170
172
  - spec/sycsvpro/profiler_spec.rb
171
173
  - spec/sycsvpro/row_filter_spec.rb
172
174
  - spec/sycsvpro/script_list_spec.rb