RubyGems - sycsvpro - Versions diffs - 0.1.9 → 0.1.10 - Mend

sycsvpro 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/Gemfile.lock +1 -1
data/README.md +37 -3
data/bin/sycsvpro +38 -2
data/lib/sycsvpro/aggregator.rb +13 -0
data/lib/sycsvpro/allocator.rb +12 -0
data/lib/sycsvpro/analyzer.rb +14 -0
data/lib/sycsvpro/join.rb +1 -1
data/lib/sycsvpro/merger.rb +127 -0
data/lib/sycsvpro/version.rb +1 -1
data/lib/sycsvpro.rb +1 -0
data/spec/sycsvpro/merger_spec.rb +105 -0
metadata +4 -2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    sycsvpro (0.1.9)
+    sycsvpro (0.1.10)
       gli (= 2.9.0)
       timeleap (~> 0.0.1)

data/README.md CHANGED Viewed

@@ -19,13 +19,14 @@ Processing of csv files. *sycsvpro* offers following functions
 * create a table from a source file with dynamically create columns (since
   version 0.1.4)
 * join two file based on a joint column value (since version 0.1.7)
+* merge files based on common headline columns (since version 0.1.10)
 To get help type
     $ sycsvpro -h
-In the following examples we assume the following files 'machines.csv' and
-'region.csv'
+In the following examples we assume the following files 'machines.csv',
+'region.csv' and revenue.csv
 ```
 customer;machine;control;drive;motor;date;contract;price;c-id
@@ -44,6 +45,13 @@ R3;US;345
 R4;CA;456
 ```
+```
+2010;2011;2012;2013;2014;customer
+50;100;150;100;200;hello
+100;50;10;1000;20;indix
+2000;250;300;3000;chiro
+```
 Analyze
 -------
 Analyze the content of the provided file *in.csv*
@@ -220,7 +228,27 @@ on streak.
                                                      -i "COUNTRY,REGION"
                                                      -j "3=8;3=10"
+Merge
+-----
+Merge files machine_count.csv and revenue.csv based on the year columns.
+    $ sycsvpro -o out.csv merge machines.csv,revenue.csv
+                                -h "2010,2013,2014"
+                                -k "0,5"
+                                -s "(\\d{4}),(\\d{4})"
+This will create the out.csv
+```
+;2010;2013;2014
+hello;1;0;0
+indix;1;0;0
+chiro;0;1;0
+hello;50;100;200
+indix;100;1000;20
+chiro;2000;300;3000
+```
 Sort
 ----
 Sort rows on specified columns as an example sort rows based on customer
@@ -439,6 +467,12 @@ Version 0.1.9
 * When creating columns dynamically they are in arbitrary sequence. You can now
   provide a switch `sort: "2"` which will sort the header from column 2 on.
+Version 0.1.10
+--------------
+* It is now possible to merge multiple files based on common headline columns
+* Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
+ available
 Installation
 ============
 [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)

data/bin/sycsvpro CHANGED Viewed

@@ -21,7 +21,7 @@ row_regex = %r{
 sycsvpro_directory = File.expand_path("~/.syc/sycsvpro")
 unless File.exists? sycsvpro_directory
-  Dir.mkdir sycsvpro_directory
+  FileUtils.mkdir_p sycsvpro_directory
 end
 # Script directory
@@ -427,6 +427,37 @@ command :join do |c|
   end
 end
+desc 'Merge multiple files based on a common column value with a key value at '+
+     'the first column of a row'
+arg_name 'FILE1 FILE2 ...'
+command :merge do |c|
+  c.desc 'The key columns in the source files, which contains the columns to '+
+         'be inserted into the outfile as first row column'
+  c.arg_name '0,3'
+  c.flag [:k, :key], :must_match => /^\d+(?:,\d+)*/
+  c.desc 'Header columns to be used as identifires for the columns of the '+
+         'merging files'
+  c.arg_name 'COL1,COL2,COL3'
+  c.flag [:h, :header]
+  c.desc 'Header column patterns to be used as the identifier of the columns '+
+         'of the files to be merged into the outfile'
+  c.arg_name 'PATTERN1,PATTERN2'
+  c.flag [:s, :source_header]
+  c.action do |global_options,options,args|
+    merge = Sycsvpro::Merger.new(outfile:       global_options[:o],
+                                 files:         args[0],
+                                 header:        options[:h],
+                                 source_header: options[:s],
+                                 key:           options[:k])
+    print 'Merging...'
+    merge.execute
+    print 'done'
+  end
+end
 desc 'Sort rows based on column values. It is possible to sort on multiple '+
      'columns'
 command :sort do |c|
@@ -584,11 +615,16 @@ pre do |global,command,options,args|
   when :aggregate, :allocate, :calc, :collect, :count, :extract, :map, :sort
     help_now! "You need to provide an input file '-f FILE'"     if global[:f].nil?
     help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
+  when :merge
+    help_now! "You need to provide a result file '-o OUT_FILE'" if global[:o].nil?
   end
   count = 0
-  unless command.name == :edit or command.name == :execute or command.name == :list
+  unless command.name == :edit or
+         command.name == :execute or
+         command.name == :list or
+         command.name == :merge
     analyzer = Sycsvpro::Analyzer.new(global[:f])
     result = analyzer.result
     count = result.row_count

data/lib/sycsvpro/aggregator.rb CHANGED Viewed

@@ -7,6 +7,19 @@ module Sycsvpro
   # An Aggregator counts specified row values and adds a sum to the end of
   # the row
+  #
+  # in.csv
+  #
+  # | Customer | 2013 | 2014 |
+  # | A        | A1   |      |
+  # | B        | B1   | B16  |
+  # | A        | A3   | A7   |
+  #
+  # out.csv
+  #
+  # | Customer | 2013 | 2014 | Sum |
+  # | A        | 2    | 1    | 3   |
+  # | B        | 1    | 1    | 2   |
   class Aggregator
     include Dsl

data/lib/sycsvpro/allocator.rb CHANGED Viewed

@@ -2,6 +2,18 @@
 module Sycsvpro
   # Allocates columns to a key column
+  #
+  # infile.csv
+  #
+  # | Name | Product |
+  # | A    | X1      |
+  # | B    | Y2      |
+  # | A    | W10     |
+  #
+  # outfile.csv
+  #
+  # | A    | X1 | W10 |
+  # | B    | Y2 |     |
   class Allocator
     # File from that values are read

data/lib/sycsvpro/analyzer.rb CHANGED Viewed

@@ -5,6 +5,20 @@ module Sycsvpro
   Result = Struct.new(:cols, :col_count, :row_count, :sample_row)
   # Analyzes the file structure
+  #
+  # | Name | C1 | C2 |
+  # | A    | a  | b  |
+  #
+  # 3 columns: ["Name", "C1", "C2"]
+  # 2 rows
+  #
+  # Row sample data:
+  # A;b;c
+  #
+  # Column index: Column name | Column sample value
+  # 0: Name | A
+  # 1: C1 | a
+  # 2: C2 | b
   class Analyzer
     # File that is analyzed

data/lib/sycsvpro/join.rb CHANGED Viewed

@@ -152,7 +152,7 @@ module Sycsvpro
         end
       end
       # Initializes the column positions where the source file columns have to
       # be inserted. If no column positions are provided the inserted columns
       # are put at the beginning of the row

data/lib/sycsvpro/merger.rb ADDED Viewed

@@ -0,0 +1,127 @@
+# Operating csv files
+module Sycsvpro
+  # Merge files based on common header columns
+  #
+  # file1.csv
+  #
+  # |     | 2010 | 2011 | 2012 | 2013 |
+  # | --- | ---- | ---- | ---- | ---- |
+  # | SP  | 20   | 30   | 40   | 50   |
+  # | RP  | 30   | 40   | 50   | 60   |
+  #
+  # file2.csv
+  #
+  # |     | 2010 | 2011 | 2012 |
+  # | --- | ---- | ---- | ---- |
+  # | M   | m1   | m2   | m3   |
+  # | N   | n1   | n2   | n3   |
+  #
+  # merging restults in
+  #
+  # merge.csv
+  #
+  # |     | 2010 | 2011 | 2012 | 2013 |
+  # | --- | ---- | ---- | ---- | ---- |
+  # | SP  | 20   | 30   | 40   | 50   |
+  # | RP  | 30   | 40   | 50   | 60   |
+  # | M   | m1   | m2   | m3   |      |
+  # | N   | n1   | n2   | n3   |      |
+  #
+  class Merger
+    include Dsl
+    # file to that the result is written
+    attr_reader :outfile
+    # header patterns to be used to identify merge columns
+    attr_reader :source_header
+    # header columns
+    attr_reader :header_cols
+    # value that is used as first of column of a row
+    attr_reader :key
+    # files to be merged based on header columns
+    attr_reader :files
+    # file to that the result is written to
+    attr_reader :outfile
+    # Merge files based on common header columns
+    #
+    # :call-seq:
+    #   Sycsvpro::Merger.new(outfile:       "out.csv",
+    #                        files:         "file1.csv,file2.csv,filen.csv",
+    #                        header:        "2010,2011,2012,2013,2014",
+    #                        source_header: "(\\d{4}/),(/\\d{4}/)",
+    #                        key:           "0,0").execute
+    #
+    # Semantics
+    # =========
+    # Merges the files file1.csv, file2.csv ... based on the header columns
+    # 2010, 2011, 2012, 2013 and 2014 where columns are identified by the
+    # regex /(\d{4})/. The first column in a row is column 0 of the file1.csv
+    # and so on.
+    #
+    # outfile:: result is written to the outfile
+    # files:: list of files that get merged. In the result file the files are
+    # inserted in the sequence they are provided
+    # header:: header of the result file and key for assigning column values
+    # from source files to result file
+    # source_header:: pattern for each header of the source file to determine
+    # the column. The pattern is a regex without the enclosing slashes '/'
+    # key:: first column value from the source file that is used as first
+    # column in the target file
+    def initialize(options = {})
+      @outfile       = options[:outfile]
+      @header_cols   = options[:header].split(',')
+      @source_header = options[:source_header].split(',')
+      @key           = options[:key].split(',')
+      @files         = options[:files].split(',')
+    end
+    # Merges the files based on the provided parameters
+    def execute
+      File.open(outfile, 'w') do |out|
+        out.puts ";#{header_cols.join(';')}"
+        files.each do |file|
+          @current_key = @key.shift
+          @current_source_header = @source_header.shift
+          processed_header = false
+          File.open(file).each_with_index do |line, index|
+            next if line.chomp.empty?
+            unless processed_header
+              create_file_header unstring(line).split(';')
+              processed_header = true
+              next
+            end
+            out.puts create_line unstring(line).split(';')
+          end
+        end
+      end
+    end
+    private
+      # create a filter for the columns that match the header filter
+      def create_file_header(columns)
+        columns.each_with_index do |c,i|
+          next if i == @current_key
+          columns[i] = c.scan(Regexp.new(@current_source_header)).flatten[0]
+        end
+        @file_header = [@current_key.to_i]
+        header_cols.each do |h|
+          @file_header << columns.index(h)
+        end
+        @file_header.compact!
+      end
+      # create a line filtered by the file_header
+      def create_line(columns)
+        columns.values_at(*@file_header).join(';')
+      end
+  end
+end

data/lib/sycsvpro/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # Operating csv files
 module Sycsvpro
   # Version number of sycsvpro
-  VERSION = '0.1.9'
+  VERSION = '0.1.10'
 end

data/lib/sycsvpro.rb CHANGED Viewed

@@ -15,3 +15,4 @@ require 'sycsvpro/sorter.rb'
 require 'sycsvpro/aggregator.rb'
 require 'sycsvpro/table.rb'
 require 'sycsvpro/join.rb'
+require 'sycsvpro/merger.rb'

data/spec/sycsvpro/merger_spec.rb ADDED Viewed

@@ -0,0 +1,105 @@
+require 'sycsvpro/merger.rb'
+module Sycsvpro
+  describe Merger do
+    before do
+      @file1   = File.join(File.dirname(__FILE__), "files/merge1.csv")
+      @file2   = File.join(File.dirname(__FILE__), "files/merge2.csv")
+      @outfile = File.join(File.dirname(__FILE__), "files/merged.csv")
+    end
+    it "should merge two files" do
+      header = "2010,2011,2012,2014"
+      key = "0,0"
+      source_header = "(\\d{4}),(\\d{4})"
+      Sycsvpro::Merger.new(outfile:       @outfile,
+                           files:         "#{@file1},#{@file2}",
+                           header:        header,
+                           key:           key,
+                           source_header: source_header).execute
+      result = [ ";2010;2011;2012;2014",
+                 "SP;20;30;40;60",
+                 "RP;30;40;50;70",
+                 "MP;40;50;60;80",
+                 "NP;50;60;70;90",
+                 "M;m1;m2;m3",
+                 "N;n1;n2;n3",
+                 "O;o1;;o3", ]
+      rows = 0
+      File.open(@outfile).each_with_index do |row, index|
+        row.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should merge two files with differnt key columns in the middle" do
+      header = "2010,2011,2012,2014"
+      key = "0,3"
+      source_header = "(\\d{4}),(\\d{4})"
+      Sycsvpro::Merger.new(outfile:       @outfile,
+                           files:         "#{@file1},#{@file2}",
+                           header:        header,
+                           key:           key,
+                           source_header: source_header).execute
+      result = [ ";2010;2011;2012;2014",
+                 "SP;20;30;40;60",
+                 "RP;30;40;50;70",
+                 "MP;40;50;60;80",
+                 "NP;50;60;70;90",
+                 "MO;m1;m2;m3",
+                 "NO;n1;n2;n3",
+                 "OO;o1;;o3", ]
+      rows = 0
+      File.open(@outfile).each_with_index do |row, index|
+        row.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should merge two files with differnt key columns at the end" do
+      header = "2010,2011,2012,2014"
+      key = "0,6"
+      source_header = "(\\d{4}),(\\d{4})"
+      Sycsvpro::Merger.new(outfile:       @outfile,
+                           files:         "#{@file1},#{@file2}",
+                           header:        header,
+                           key:           key,
+                           source_header: source_header).execute
+      result = [ ";2010;2011;2012;2014",
+                 "SP;20;30;40;60",
+                 "RP;30;40;50;70",
+                 "MP;40;50;60;80",
+                 "NP;50;60;70;90",
+                 "MI;m1;m2;m3",
+                 "NI;n1;n2;n3",
+                 "OI;o1;;o3", ]
+      rows = 0
+      File.open(@outfile).each_with_index do |row, index|
+        row.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sycsvpro
 version: !ruby/object:Gem::Version
-  version: 0.1.9
+  version: 0.1.10
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-29 00:00:00.000000000 Z
+date: 2014-07-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -144,6 +144,7 @@ files:
 - lib/sycsvpro/inserter.rb
 - lib/sycsvpro/join.rb
 - lib/sycsvpro/mapper.rb
+- lib/sycsvpro/merger.rb
 - lib/sycsvpro/profiler.rb
 - lib/sycsvpro/row_filter.rb
 - lib/sycsvpro/script_creator.rb
@@ -167,6 +168,7 @@ files:
 - spec/sycsvpro/inserter_spec.rb
 - spec/sycsvpro/join_spec.rb
 - spec/sycsvpro/mapper_spec.rb
+- spec/sycsvpro/merger_spec.rb
 - spec/sycsvpro/profiler_spec.rb
 - spec/sycsvpro/row_filter_spec.rb
 - spec/sycsvpro/script_list_spec.rb