RubyGems - sycsvpro - Versions diffs - 0.1.9 → 0.1.10 - Mend

sycsvpro 0.1.9 → 0.1.10

Files changed (12) hide show

data/Gemfile.lock +1 -1
data/README.md +37 -3
data/bin/sycsvpro +38 -2
data/lib/sycsvpro/aggregator.rb +13 -0
data/lib/sycsvpro/allocator.rb +12 -0
data/lib/sycsvpro/analyzer.rb +14 -0
data/lib/sycsvpro/join.rb +1 -1
data/lib/sycsvpro/merger.rb +127 -0
data/lib/sycsvpro/version.rb +1 -1
data/lib/sycsvpro.rb +1 -0
data/spec/sycsvpro/merger_spec.rb +105 -0
metadata +4 -2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    sycsvpro (0.1.9)
+    sycsvpro (0.1.10)
       gli (= 2.9.0)
       timeleap (~> 0.0.1)

data/README.md CHANGED Viewed

@@ -19,13 +19,14 @@ Processing of csv files. *sycsvpro* offers following functions
 * create a table from a source file with dynamically create columns (since
   version 0.1.4)
 * join two file based on a joint column value (since version 0.1.7)
+* merge files based on common headline columns (since version 0.1.10)
 To get help type
     $ sycsvpro -h
-In the following examples we assume the following files 'machines.csv' and
-'region.csv'
+In the following examples we assume the following files 'machines.csv',
+'region.csv' and revenue.csv
 ```
 customer;machine;control;drive;motor;date;contract;price;c-id
@@ -44,6 +45,13 @@ R3;US;345
 R4;CA;456
 ```
+```
+2010;2011;2012;2013;2014;customer
+50;100;150;100;200;hello
+100;50;10;1000;20;indix
+2000;250;300;3000;chiro
+```
 Analyze
 -------
 Analyze the content of the provided file *in.csv*
@@ -220,7 +228,27 @@ on streak.
                                                      -i "COUNTRY,REGION"
                                                      -j "3=8;3=10"
+Merge
+-----
+Merge files machine_count.csv and revenue.csv based on the year columns.
+    $ sycsvpro -o out.csv merge machines.csv,revenue.csv
+                                -h "2010,2013,2014"
+                                -k "0,5"
+                                -s "(\\d{4}),(\\d{4})"
+This will create the out.csv
+```
+;2010;2013;2014
+hello;1;0;0
+indix;1;0;0
+chiro;0;1;0
+hello;50;100;200
+indix;100;1000;20
+chiro;2000;300;3000
+```
 Sort
 ----
 Sort rows on specified columns as an example sort rows based on customer
@@ -439,6 +467,12 @@ Version 0.1.9
 * When creating columns dynamically they are in arbitrary sequence. You can now
   provide a switch `sort: "2"` which will sort the header from column 2 on.
+Version 0.1.10
+--------------
+* It is now possible to merge multiple files based on common headline columns
+* Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
+ available
 Installation
 ============
 [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)

data/bin/sycsvpro CHANGED Viewed

@@ -21,7 +21,7 @@ row_regex = %r{
 sycsvpro_directory = File.expand_path("~/.syc/sycsvpro")
 unless File.exists? sycsvpro_directory
-  Dir.mkdir sycsvpro_directory
+  FileUtils.mkdir_p sycsvpro_directory
 end
 # Script directory
@@ -427,6 +427,37 @@ command :join do |c|
   end
 end
+desc 'Merge multiple files based on a common column value with a key value at '+
+     'the first column of a row'
+arg_name 'FILE1 FILE2 ...'
+command :merge do |c|
+  c.desc 'The key columns in the source files, which contains the columns to '+
+         'be inserted into the outfile as first row column'
+  c.arg_name '0,3'
+  c.flag [:k, :key], :must_match => /^\d+(?:,\d+)*/
+  c.desc 'Header columns to be used as identifires for the columns of the '+
+         'merging files'
+  c.arg_name 'COL1,COL2,COL3'
+  c.flag [:h, :header]
+  c.desc 'Header column patterns to be used as the identifier of the columns '+
+         'of the files to be merged into the outfile'
+  c.arg_name 'PATTERN1,PATTERN2'
+  c.flag [:s, :source_header]
+  c.action do |global_options,options,args|
+    merge = Sycsvpro::Merger.new(outfile:       global_options[:o],
+                                 files:         args[0],
+                                 header:        options[:h],
+                                 source_header: options[:s],
+                                 key:           options[:k])
+    print 'Merging...'
+    merge.execute
+    print 'done'
+  end
+end
 desc 'Sort rows based on column values. It is possible to sort on multiple '+
      'columns'
 command :sort do |c|
@@ -584,11 +615,16 @@ pre do |global,command,options,args|
   when :aggregate, :allocate, :calc, :collect, :count, :extract, :map, :sort
     help_now! "You need to provide an input file '-f FILE'"     if global[:f].nil?
     help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
+  when :merge
+    help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
   end
   count = 0
-  unless command.name == :edit or command.name == :execute or command.name == :list
+  unless command.name == :edit or
+         command.name == :execute or
+         command.name == :list or
+         command.name == :merge
     analyzer = Sycsvpro::Analyzer.new(global[:f])
     result = analyzer.result
     count = result.row_count

data/lib/sycsvpro/aggregator.rb CHANGED Viewed

@@ -7,6 +7,19 @@ module Sycsvpro
   # An Aggregator counts specified row values and adds a sum to the end of
   # the row
+  #
+  # in.csv
+  #
+  # | Customer | 2013 | 2014 |
+  # | A        | A1   |      |
+  # | B        | B1   | B16  |
+  # | A        | A3   | A7   |
+  #
+  # out.csv
+  #
+  # | Customer | 2013 | 2014 | Sum |
+  # | A        | 2    | 1    | 3   |
+  # | B        | 1    | 1    | 2   |
   class Aggregator
     include Dsl

data/lib/sycsvpro/allocator.rb CHANGED Viewed

@@ -2,6 +2,18 @@
 module Sycsvpro
   # Allocates columns to a key column
+  #
+  # infile.csv
+  #
+  # | Name | Product |
+  # | A    | X1      |
+  # | B    | Y2      |
+  # | A    | W10     |
+  #
+  # outfile.csv
+  #
+  # | A    | X1 | W10 |
+  # | B    | Y2 |     |
   class Allocator
     # File from that values are read

data/lib/sycsvpro/analyzer.rb CHANGED Viewed

@@ -5,6 +5,20 @@ module Sycsvpro
   Result = Struct.new(:cols, :col_count, :row_count, :sample_row)
   # Analyzes the file structure
+  #
+  # | Name | C1 | C2 |
+  # | A    | a  | b  |
+  #
+  # 3 columns: ["Name", "C1", "C2"]
+  # 2 rows
+  #
+  # Row sample data:
+  # A;b;c
+  #
+  # Column index: Column name | Column sample value
+  # 0: Name | A
+  # 1: C1 | a
+  # 2: C2 | b
   class Analyzer
     # File that is analyzed

data/lib/sycsvpro/join.rb CHANGED Viewed

@@ -152,7 +152,7 @@ module Sycsvpro
         end
       end
       # Initializes the column positions where the source file columns have to
       # be inserted. If no column positions are provided the inserted columns
       # are put at the beginning of the row

data/lib/sycsvpro/merger.rb ADDED Viewed

@@ -0,0 +1,127 @@
+# Operating csv files
+module Sycsvpro
+  # Merge files based on common header columns
+  #
+  # file1.csv
+  #
+  # |     | 2010 | 2011 | 2012 | 2013 |
+  # | --- | ---- | ---- | ---- | ---- |
+  # | SP  | 20   | 30   | 40   | 50   |
+  # | RP  | 30   | 40   | 50   | 60   |
+  #
+  # file2.csv
+  #
+  # |     | 2010 | 2011 | 2012 |
+  # | --- | ---- | ---- | ---- |
+  # | M   | m1   | m2   | m3   |
+  # | N   | n1   | n2   | n3   |
+  #
+  # merging restults in
+  #
+  # merge.csv
+  #
+  # |     | 2010 | 2011 | 2012 | 2013 |
+  # | --- | ---- | ---- | ---- | ---- |
+  # | SP  | 20   | 30   | 40   | 50   |
+  # | RP  | 30   | 40   | 50   | 60   |
+  # | M   | m1   | m2   | m3   |      |
+  # | N   | n1   | n2   | n3   |      |
+  #
+  class Merger
+    include Dsl
+    # file to that the result is written
+    attr_reader :outfile
+    # header patterns to be used to identify merge columns
+    attr_reader :source_header
+    # header columns
+    attr_reader :header_cols
+    # value that is used as first of column of a row
+    attr_reader :key
+    # files to be merged based on header columns
+    attr_reader :files
+    # file to that the result is written to
+    attr_reader :outfile
+    # Merge files based on common header columns
+    #
+    # :call-seq:
+    #   Sycsvpro::Merger.new(outfile:       "out.csv",
+    #                        files:         "file1.csv,file2.csv,filen.csv",
+    #                        header:        "2010,2011,2012,2013,2014",
+    #                        source_header: "(\\d{4}/),(/\\d{4}/)",
+    #                        key:           "0,0").execute
+    #
+    # Semantics
+    # =========
+    # Merges the files file1.csv, file2.csv ... based on the header columns
+    # 2010, 2011, 2012, 2013 and 2014 where columns are identified by the
+    # regex /(\d{4})/. The first column in a row is column 0 of the file1.csv
+    # and so on.
+    #
+    # outfile:: result is written to the outfile
+    # files:: list of files that get merged. In the result file the files are
+    # inserted in the sequence they are provided
+    # header:: header of the result file and key for assigning column values
+    # from source files to result file
+    # source_header:: pattern for each header of the source file to determine
+    # the column. The pattern is a regex without the enclosing slashes '/'
+    # key:: first column value from the source file that is used as first
+    # column in the target file
+    def initialize(options = {})
+      @outfile       = options[:outfile]
+      @header_cols   = options[:header].split(',')
+      @source_header = options[:source_header].split(',')
+      @key           = options[:key].split(',')
+      @files         = options[:files].split(',')
+    end
+    # Merges the files based on the provided parameters
+    def execute
+      File.open(outfile, 'w') do |out|
+        out.puts ";#{header_cols.join(';')}"
+        files.each do |file|
+          @current_key = @key.shift
+          @current_source_header = @source_header.shift
+          processed_header = false
+          File.open(file).each_with_index do |line, index|
+            next if line.chomp.empty?
+            unless processed_header
+              create_file_header unstring(line).split(';')
+              processed_header = true
+              next
+            end
+            out.puts create_line unstring(line).split(';')
+          end
+        end
+      end
+    end
+    private
+      # create a filter for the columns that match the header filter
+      def create_file_header(columns)
+        columns.each_with_index do |c,i|
+          next if i == @current_key
+          columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
+        end
+        @file_header = [@current_key.to_i]
+        header_cols.each do |h|
+          @file_header << columns.index(h)
+        end
+        @file_header.compact!
+      end
+      # create a line filtered by the file_header
+      def create_line(columns)
+        columns.values_at(*@file_header).join(';')
+      end
+  end
+end

data/lib/sycsvpro/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # Operating csv files
 module Sycsvpro
   # Version number of sycsvpro
-  VERSION = '0.1.9'
+  VERSION = '0.1.10'
 end

data/lib/sycsvpro.rb CHANGED Viewed

@@ -15,3 +15,4 @@ require 'sycsvpro/sorter.rb'
 require 'sycsvpro/aggregator.rb'
 require 'sycsvpro/table.rb'
 require 'sycsvpro/join.rb'
+require 'sycsvpro/merger.rb'

data/spec/sycsvpro/merger_spec.rb ADDED Viewed

@@ -0,0 +1,105 @@
+require 'sycsvpro/merger.rb'
+module Sycsvpro
+  describe Merger do
+    before do
+      @file1   = File.join(File.dirname(__FILE__), "files/merge1.csv")
+      @file2   = File.join(File.dirname(__FILE__), "files/merge2.csv")
+      @outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
+    end
+    it "should merge two files" do
+      header = "2010,2011,2012,2014"
+      key = "0,0"
+      source_header = "(\\d{4}),(\\d{4})"
+      Sycsvpro::Merger.new(outfile:       @outfile,
+                           files:         "#{@file1},#{@file2}",
+                           header:        header,
+                           key:           key,
+                           source_header: source_header).execute
+      result = [ ";2010;2011;2012;2014",
+                 "SP;20;30;40;60",
+                 "RP;30;40;50;70",
+                 "MP;40;50;60;80",
+                 "NP;50;60;70;90",
+                 "M;m1;m2;m3",
+                 "N;n1;n2;n3",
+                 "O;o1;;o3", ]
+      rows = 0
+      File.open(@outfile).each_with_index do |row, index|
+        row.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should merge two files with differnt key columns in the middle" do
+      header = "2010,2011,2012,2014"
+      key = "0,3"
+      source_header = "(\\d{4}),(\\d{4})"
+      Sycsvpro::Merger.new(outfile:       @outfile,
+                           files:         "#{@file1},#{@file2}",
+                           header:        header,
+                           key:           key,
+                           source_header: source_header).execute
+      result = [ ";2010;2011;2012;2014",
+                 "SP;20;30;40;60",
+                 "RP;30;40;50;70",
+                 "MP;40;50;60;80",
+                 "NP;50;60;70;90",
+                 "MO;m1;m2;m3",
+                 "NO;n1;n2;n3",
+                 "OO;o1;;o3", ]
+      rows = 0
+      File.open(@outfile).each_with_index do |row, index|
+        row.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should merge two files with differnt key columns at the end" do
+      header = "2010,2011,2012,2014"
+      key = "0,6"
+      source_header = "(\\d{4}),(\\d{4})"
+      Sycsvpro::Merger.new(outfile:       @outfile,
+                           files:         "#{@file1},#{@file2}",
+                           header:        header,
+                           key:           key,
+                           source_header: source_header).execute
+      result = [ ";2010;2011;2012;2014",
+                 "SP;20;30;40;60",
+                 "RP;30;40;50;70",
+                 "MP;40;50;60;80",
+                 "NP;50;60;70;90",
+                 "MI;m1;m2;m3",
+                 "NI;n1;n2;n3",
+                 "OI;o1;;o3", ]
+      rows = 0
+      File.open(@outfile).each_with_index do |row, index|
+        row.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sycsvpro
 version: !ruby/object:Gem::Version
-  version: 0.1.9
+  version: 0.1.10
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-29 00:00:00.000000000 Z
+date: 2014-07-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -144,6 +144,7 @@ files:
 - lib/sycsvpro/inserter.rb
 - lib/sycsvpro/join.rb
 - lib/sycsvpro/mapper.rb
+- lib/sycsvpro/merger.rb
 - lib/sycsvpro/profiler.rb
 - lib/sycsvpro/row_filter.rb
 - lib/sycsvpro/script_creator.rb
@@ -167,6 +168,7 @@ files:
 - spec/sycsvpro/inserter_spec.rb
 - spec/sycsvpro/join_spec.rb
 - spec/sycsvpro/mapper_spec.rb
+- spec/sycsvpro/merger_spec.rb
 - spec/sycsvpro/profiler_spec.rb
 - spec/sycsvpro/row_filter_spec.rb
 - spec/sycsvpro/script_list_spec.rb