RubyGems - sycsvpro - Versions diffs - 0.1.4 → 0.1.7 - Mend

sycsvpro 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/Gemfile.lock +1 -1
data/README.md +113 -21
data/bin/sycsvpro +98 -25
data/lib/sycsvpro/calculator.rb +50 -10
data/lib/sycsvpro/dsl.rb +12 -0
data/lib/sycsvpro/header.rb +24 -8
data/lib/sycsvpro/join.rb +159 -0
data/lib/sycsvpro/table.rb +83 -5
data/lib/sycsvpro/version.rb +1 -1
data/lib/sycsvpro.rb +1 -0
data/spec/sycsvpro/calculator_spec.rb +31 -1
data/spec/sycsvpro/header_spec.rb +7 -1
data/spec/sycsvpro/join_spec.rb +178 -0
data/spec/sycsvpro/table_spec.rb +153 -2
data/sycsvpro.rdoc +9 -4
metadata +4 -2

data/lib/sycsvpro/join.rb ADDED Viewed

@@ -0,0 +1,159 @@
+# Operating csv files
+module Sycsvpro
+  # Join joins two files based on a join key value.
+  # Example
+  # File 1 (infile)
+  #     |Name |ID |
+  #     |Hank |123|
+  #     |Frank|234|
+  #     |Mia  |345|
+  #     |Moira|234|
+  #
+  # File 2 (source)
+  #     |Company|Phone|ID|
+  #     |Siem   |4848 |123|
+  #     |Helo   |993  |345|
+  #     |Wara   |3333 |234|
+  #
+  # File 3 (outfile)
+  #     |Name |ID |Company|Phone|
+  #     |Hank |123|Siem   |4848 |
+  #     |Frank|234|Wara   |3333 |
+  #     |Mia  |345|Helo   |993  |
+  #     |Moira|234|Wara   |3333 |
+  class Join
+    include Dsl
+    # infile contains the data that is operated on
+    attr_reader :infile
+    # outfile is the file where the result is written to
+    attr_reader :outfile
+    # source file from where columns are inserted into infile
+    attr_reader :source
+    # filter that is used for rows
+    attr_reader :row_filter
+    # columns to insert
+    attr_reader :columns
+    # posititon where to insert the columns into the infile
+    attr_reader :positions
+    # header of the outfile
+    attr_reader :header
+    # indicates whether the infile is headerless
+    attr_reader :headerless
+    # lookup table where the assigned values are stored at
+    attr_reader :lookup_table
+    # Creates a Join which can be invoked as follows
+    # :call-seq:
+    #   Sycsvpro::Join.new(infile:  "in.csv",
+    #                      outfile: "out.csv",
+    #                      source:  "source.csv",
+    #                      rows:    "1-eof",
+    #                      cols:    "0,1",
+    #                      pos:     "2,3",
+    #                      joins:   "2=1",
+    #                      headerless: true,
+    #                      header:  "*",
+    #                      insert_header: "Company,Phone").execute
+    #
+    # infile:: csv file to operate on
+    # outfile:: csv file with the result
+    # source:: csv file that contains the values to join to infile
+    # rows: rows to consider for operation. Rows that don't match the pattern
+    #       will be skipped for operation
+    # cols:: columns to insert from the source to the infile
+    # pos:: column positions where to insert the values and the insert_header
+    #       columns
+    # joins:: columns that match in infile and source.
+    #         source_column=infile_column
+    # headerless:: indicates whether the infile has a header (default true)
+    # header:: Header of the csv file
+    # insert_header:: column names of the to be inserted values
+    def initialize(options = {})
+      @infile     = options[:infile]
+      @outfile    = options[:outfile]
+      @source     = options[:source]
+      @row_filter = RowFilter.new(options[:rows], df: options[:df])
+      @columns    = options[:cols].split(',').collect { |c| c.to_i }
+      @positions  = col_positions(options[:pos], @columns)
+      @joins      = options[:joins].split('=').collect { |j| j.to_i }
+      @headerless = options[:headerless].nil? ? false : options[:headerless]
+      @header     = Header.new(options[:header] || '*',
+                               pos:    @positions,
+                               insert: options[:insert_header])
+      create_lookup_table
+    end
+    # Executes the join
+    def execute
+      processed_header = headerless ? true : false
+      File.open(outfile, 'w') do |out|
+        File.open(infile).each_with_index do |line, index|
+          line = line.chomp
+          next if line.empty?
+          line = unstring(line).chomp
+          unless processed_header
+            header_line = header.process(line)
+            out.puts header unless header_line.empty?
+            processed_header = true
+            next
+          end
+          next if row_filter.process(line, row: index).nil?
+          values = line.split(';')
+          key = values[@joins[1]]
+          row = lookup_table[:rows][key] || []
+          lookup_table[:pos].sort.each { |p| values.insert(p, "") }
+          lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
+          out.puts values.join(';')
+        end
+      end
+    end
+    private
+      # Creates a lookup table from the source file values. The join column of
+      # the source file is the key
+      def create_lookup_table
+        @lookup_table = { pos: positions, rows: {} }
+        File.open(source).each_with_index do |line|
+          next if line.chomp.empty?
+          values = unstring(line).chomp.split(';')
+          next if values.empty?
+          key = values[@joins[0]]
+          lookup_table[:rows][key] = []
+          columns.each do |i|
+            lookup_table[:rows][key] << values[i]
+          end
+        end
+      end
+      # Initializes the column positions where the source file columns have to
+      # be inserted. If no column positions are provided the inserted columns
+      # are put at the beginning of the row
+      def col_positions(pos, cols)
+        if pos.nil? || pos.empty?
+          Array.new(cols.size) { |c| c }
+        else
+          pos.split(',').collect { |p| p.to_i }
+        end
+      end
+  end
+end

data/lib/sycsvpro/table.rb CHANGED Viewed

@@ -3,12 +3,28 @@ require_relative 'header'
 require_relative 'dsl'
 require 'date'
+# Operating csv files
 module Sycsvpro
+  # Extracts values from a csv file and enables to associate values to key
+  # values. Columns can be created dynamically based on the content of columns.
+  # Example:
+  # File 1 (infile)
+  #     Date       | Order-Type | Revenue
+  #     01.01.2013 | AZ         | 22.50
+  #     13.04.2014 | BZ         | 33.40
+  #     16.12.2014 | CZ         | 12.80
+  #
+  # File 2 (outfile)
+  #     Year | AZ    | BZ    | CZ    | Total
+  #     2013 | 22.50 |       |       | 22.50
+  #     2014 |       | 33.40 | 12.80 | 46.20
   class Table
     include Dsl
+    # Regex to split parameters
+    COL_SPLITTER = /,(?=[\w +]*:)/
     # infile contains the data that is operated on
     attr_reader :infile
     # outfile is the file where the result is written to
@@ -34,21 +50,41 @@ module Sycsvpro
     #                       header:  "Year,c6,c1",
     #                       key:     "c0=~/\\.(\\d{4})/,c6",
     #                       cols:    "Value:+n1,c2+c3:+n1",
-    #                       nf:      "DE").execute
+    #                       nf:      "DE",
+    #                       pr:      "2",
+    #                       sum:     "TOP:Value,c2+c3").execute
+    #
+    # infile:: csv file to operate on
+    # outfile:: csv file with the result
+    # df:: date format
+    # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
+    #      US is 1,000.00
+    # pr:: precision of number values. Default 2
+    # rows: rows to consider for operation. Rows that don't match the pattern
+    #       will be skipped for operation
+    # header:: Header of the csv file
+    # key:: Values located at value 0 and subsequent columns
+    # cols:: Values added to columns base on a operation or assignment
+    # sum:: sum row at specified position top or eof
     def initialize(options = {})
       @infile      = options[:infile]
       @outfile     = options[:outfile]
       @date_format = options[:df] || "%Y-%m-%d"
       @row_filter  = RowFilter.new(options[:rows], df: options[:df])
       @header      = Header.new(options[:header])
-      @keys        = options[:key].split(',')
-      @cols        = options[:cols].split(',')
+      @keys        = split_by_comma_regex(options[:key]) #options[:key].split(',')
+      @cols        = options[:cols].split(COL_SPLITTER)
       @number_format = options[:nf] || 'EN'
+      @precision     = options[:pr] || 2
+      prepare_sum_row options[:sum]
       @rows        = {}
     end
     # Retrieves the values from a row as the result of a arithmetic operation
-    # with #eval
+    # with #eval. It reconizes
+    # c:: string value
+    # n:: number value
+    # d:: date value
     def method_missing(id, *args, &block)
       return @columns[$1.to_i]            if id =~ /c(\d+)/
       return to_number(@columns[$1.to_i]) if id =~ /n(\d+)/
@@ -93,6 +129,7 @@ module Sycsvpro
     def write_to_file
       File.open(outfile, 'w') do |out|
         out.puts header.to_s
+        out.puts create_sum_row if @sum_row_pos == 'TOP'
         rows.each do |key, row|
           line = [] << row[:key]
           header.clear_header_cols.each_with_index do |col, index|
@@ -101,6 +138,7 @@ module Sycsvpro
           end
           out.puts line.flatten.join(';')
         end
+        out.puts create_sum_row if @sum_row_pos == 'EOF'
       end
     end
@@ -122,7 +160,11 @@ module Sycsvpro
       @cols.each do |col|
         column, formula = col.split(':')
         column = evaluate(column) if column =~ /^c\d+[=~+]/
-        row[:cols][column] = eval("#{row[:cols][column]}#{formula}")
+        previous_value = row[:cols][column]
+        if value = eval("#{row[:cols][column]}#{formula}")
+          row[:cols][column] = value.round(@precision)
+          add_to_sum_row(row[:cols][column] - previous_value, column)
+        end
       end
     end
@@ -168,6 +210,42 @@ module Sycsvpro
         end
       end
+      # Initializes sum_row_pos, sum_row and sum_row_patterns based on the
+      # provided sum option
+      def prepare_sum_row(pattern)
+        return if pattern.nil? || pattern.empty?
+        @sum_row_pos, sum_row_pattern = pattern.split(':')
+        @sum_row_pos.upcase!
+        @sum_row = Hash.new
+        @sum_row_patterns = split_by_comma_regex(sum_row_pattern)
+      end
+      # Adds a value in the specified column to the sum_row
+      def add_to_sum_row(value, column)
+        return unless @sum_row_patterns
+        @sum_row_patterns.each do |pattern|
+          if pattern =~ /^c\d+[=~+]/
+            header_column = evaluate(pattern, "")
+          else
+            header_column = pattern
+          end
+          if header_column == column
+            @sum_row[header_column] ||= 0
+            @sum_row[header_column] += value
+          end
+        end
+      end
+      # Creates the sum_row when the file has been completely processed
+      def create_sum_row
+        line = []
+        header.clear_header_cols.each_with_index do |col, index|
+          line << @sum_row[col] || ""
+        end
+        line.flatten.join(';')
+      end
   end
 end

data/lib/sycsvpro/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # Operating csv files
 module Sycsvpro
   # Version number of sycsvpro
-  VERSION = '0.1.4'
+  VERSION = '0.1.7'
 end

data/lib/sycsvpro.rb CHANGED Viewed

@@ -14,3 +14,4 @@ require 'sycsvpro/inserter.rb'
 require 'sycsvpro/sorter.rb'
 require 'sycsvpro/aggregator.rb'
 require 'sycsvpro/table.rb'
+require 'sycsvpro/join.rb'

data/spec/sycsvpro/calculator_spec.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module Sycsvpro
       @in_file = File.join(File.dirname(__FILE__), "files/machines.csv")
       @in_date_file = File.join(File.dirname(__FILE__), "files/machine-delivery.csv")
       @in_number_file = File.join(File.dirname(__FILE__), "files/machine-count.csv")
+      @in_customer_file = File.join(File.dirname(__FILE__), "files/customers.csv")
       @out_file = File.join(File.dirname(__FILE__), "files/machines_out.csv")
     end
@@ -87,7 +88,7 @@ module Sycsvpro
     it "should find minimum of specified date rows" do
       header = "*,Min_Date"
-      cols   = "3:Min_Date=[d1,d2].compact.min"
+      cols   = "3:[d1,d2].compact.min"
       rows   = "1-8"
       df     = "%d.%m.%Y"
@@ -148,6 +149,35 @@ module Sycsvpro
       end
     end
+    it "should split column value into multiple column values" do
+      header = "ID,Customer,Country"
+      cols   = [ "2:s0.scan(/([A-Z]+)/).flatten[0]",
+                 "0:s0.scan(/(?<=\\/)(.*)$/).flatten[0]",
+                 "1:s1" ].join(',')
+      rows   = "1-8"
+      Calculator.new(infile:  @in_customer_file,
+                     outfile: @out_file,
+                     header:  header,
+                     rows:    rows,
+                     cols:    cols).execute
+      result = [ "ID;Customer;Country",
+                 "123945;Hank;DE",
+                 "339339;Frank;AT",
+                 "449399;Jane;DE",
+                 "33A398;Jean;US" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
   end
 end

data/spec/sycsvpro/header_spec.rb CHANGED Viewed

@@ -52,6 +52,12 @@ module Sycsvpro
       header.process("5.5.2012;d1;d2;d3;d4;d5").should eq "a4;A;2012;2013;a1;B"
     end
+    it "should create a header with positioned columns" do
+      header = Header.new("*", insert: "C,D", pos: [3,7])
+      header.process("A;B;E;F;G").should eq "A;B;E;C;F;G;;D"
+    end
     it "should return the header" do
       header = Header.new("c4,A,c0=~/\\.(\\d{4})/,c1,B")
@@ -72,7 +78,7 @@ module Sycsvpro
       header.column_of("a1").should eq 3
       header.process("3.4.2013;c1;c2;c3;c4;c5").should eq "a4;A;2012;2013;a1;B"
       header.column_of("B").should eq 5
-     end
+    end
   end

data/spec/sycsvpro/join_spec.rb ADDED Viewed

@@ -0,0 +1,178 @@
+require 'sycsvpro/join'
+module Sycsvpro
+  describe Join do
+    before do
+      @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
+      @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
+      @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
+    end
+    it "should join files based on person ID" do
+      cols           = "1,2"
+      insert_col_pos = "2,1"
+      insert_header  = "COUNTRY,STATE"
+      header         = "*"
+      joins          = "0=1"
+      rows           = "1-4"
+      Sycsvpro::Join.new(infile:         @in_file,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         insert_header:  insert_header,
+                         pos:            insert_col_pos,
+                         header:         header,
+                         rows:           rows).execute
+      result = [ "Name;STATE;COUNTRY;N_ID",
+                 "Hank;A4;AT;123",
+                 "Frank;C3;CA;234",
+                 "Mia;D1;DE;345",
+                 "Arwen;U2;US;456" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should join files without explicit insert header" do
+      cols           = "1,2"
+      insert_col_pos = "2,1"
+      joins          = "0=1"
+      header         = "*"
+      rows           = "1-4"
+      Sycsvpro::Join.new(infile:         @in_file,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         pos:            insert_col_pos,
+                         header:         header,
+                         rows:           rows).execute
+      result = [ "Name;;;N_ID",
+                 "Hank;A4;AT;123",
+                 "Frank;C3;CA;234",
+                 "Mia;D1;DE;345",
+                 "Arwen;U2;US;456" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should join files without explicit insert cols pos and insert header" do
+      cols           = "1,2"
+      joins          = "0=1"
+      header         = "*"
+      rows           = "1-4"
+      Sycsvpro::Join.new(infile:         @in_file,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         header:         header,
+                         rows:           rows).execute
+      result = [ ";;Name;N_ID",
+                 "AT;A4;Hank;123",
+                 "CA;C3;Frank;234",
+                 "DE;D1;Mia;345",
+                 "US;U2;Arwen;456" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should join files without explicit header adding default header '*'" do
+      cols           = "1,2"
+      joins          = "0=1"
+      rows           = "1-4"
+      Sycsvpro::Join.new(infile:         @in_file,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         rows:           rows).execute
+      result = [ ";;Name;N_ID",
+                 "AT;A4;Hank;123",
+                 "CA;C3;Frank;234",
+                 "DE;D1;Mia;345",
+                 "US;U2;Arwen;456" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+    it "should join files without header" do
+      cols           = "1,2"
+      insert_col_pos = "2,1"
+      insert_header  = "COUNTRY,STATE"
+      header         = "*"
+      joins          = "0=1"
+      rows           = "1-4"
+      Sycsvpro::Join.new(infile:         @in_file,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         insert_header:  insert_header,
+                         pos:            insert_col_pos,
+                         header:         header,
+                         headerless:     true,
+                         rows:           rows).execute
+      result = [ "Hank;A4;AT;123",
+                 "Frank;C3;CA;234",
+                 "Mia;D1;DE;345",
+                 "Arwen;U2;US;456" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
+  end
+end