RubyGems - sycsvpro - Versions diffs - 0.1.7 → 0.1.8 - Mend

sycsvpro 0.1.7 → 0.1.8

Files changed (11) hide show

data/Gemfile.lock +1 -1
data/README.md +13 -0
data/bin/sycsvpro +22 -13
data/lib/sycsvpro/header.rb +1 -1
data/lib/sycsvpro/join.rb +62 -25
data/lib/sycsvpro/table.rb +6 -6
data/lib/sycsvpro/version.rb +1 -1
data/spec/sycsvpro/extractor_spec.rb +3 -1
data/spec/sycsvpro/join_spec.rb +31 -0
data/spec/sycsvpro/table_spec.rb +25 -0
metadata +2 -2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    sycsvpro (0.1.7)
+    sycsvpro (0.1.8)
       gli (= 2.9.0)
       timeleap (~> 0.0.1)

data/README.md CHANGED Viewed

@@ -212,6 +212,15 @@ chiro;CA;R4;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
 chiro;CA;R4;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
 ```
+If you have multiple IDs in a row than you can also conduct multiple joins in
+on streak.
+    $ sycsvpro -f in.csv -o out.csv join address.csv -c 0,1;0,3
+                                                     -p 2,1;4,5
+                                                     -i "COUNTRY,REGION"
+                                                     -j "3=8;3=10"
 Sort
 ----
 Sort rows on specified columns as an example sort rows based on customer
@@ -421,6 +430,10 @@ Version 0.1.7
   (-h "*") supplemented by the columns A and B (-i "A,B") that will also be
   positioned at column 1 and 3 (-p "1,3").
+Version 0.1.8
+-------------
+* Join now can join multiple key values in 1 streak
 Installation
 ============
 [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)

data/bin/sycsvpro CHANGED Viewed

@@ -227,9 +227,10 @@ command :execute do |c|
   end
 end
-desc 'Counts the occurences of column values. Uses column values as headings with count as ' +
-     'values. Columns with a condition will be added as new columns and the condition will ' +
-     'be set as column name. Optionally adds a sum row'
+desc 'Counts the occurences of column values. Uses column values as headings '+
+     'with count as values. Columns with a condition will be added as new '+
+     'columns and the condition will be set as column name. Optionally adds a '+
+     'sum row'
 command :count do |c|
@@ -284,8 +285,9 @@ command :aggregate do |c|
   c.arg_name '1,2-4'
   c.flag [:c, :col], :must_match => /^\d+(?:,\d+|-\d+)*/
-  c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. The sum row is ' +
-         'specified by the row position. The sum column is the last column in the row'
+  c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. '+
+         'The sum row is specified by the row position. The sum column is the '+
+         'last column in the row'
   c.arg_name 'SUM_ROW_TITLE:ROW,SUM_COL_TITLE'
   c.flag [:s, :sum], :must_match => /^\w+:\d+(?:,\w+)?|^\w+/
@@ -307,7 +309,10 @@ command :aggregate do |c|
 end
-desc 'Creates a table from a source file'
+desc 'Associates columns to a key value. A key value can be a combination of '+
+     'multiple column values. Values associated can be generated from an '+
+     'arithmetic or string operation. Header columns can be generated '+
+     'dynamically based on column values'
 command :table do |c|
@@ -371,18 +376,18 @@ command :join do |c|
   c.desc 'Columns to merge into the infile'
   c.arg_name '1,5,7'
-  c.flag [:c, :cols], :must_match => /^\d+(?:,\d+)*/
+  c.flag [:c, :cols], :must_match => /^\d+(?:[,;]\d+)*/
   c.desc 'The position at which column position to insert the columns within '+
          'the infile. The sequence of the position is assigned to the columns '+
          'to be inserted'
   c.arg_name '5,1'
-  c.flag [:p, :pos], :must_match => /^\d+(?:,\d+)*/
+  c.flag [:p, :pos], :must_match => /^\d+(?:[,;]\d+)*/
   c.desc 'The join columns in the source file, which contains the columns to '+
          'be inserted into the infile'
   c.arg_name '2=1'
-  c.flag [:j, :join], :must_match => /^\d+=\d+$/
+  c.flag [:j, :join], :must_match => /^\d+(?:[=;]\d+)*/
   c.desc 'Indicates whether the infile headerless'
   c.default_value false
@@ -416,7 +421,8 @@ command :join do |c|
   end
 end
-desc 'Sort rows based on column values'
+desc 'Sort rows based on column values. It is possible to sort on multiple '+
+     'columns'
 command :sort do |c|
   c.desc 'Rows to consider'
   c.arg_name '1,2,10-30,45-EOF,REGEXP'
@@ -457,7 +463,9 @@ command :sort do |c|
   end
 end
-desc 'Inserts rows from a file to a csv-file'
+desc 'Inserts rows from a file to a csv-file. You can for instance add sum '+
+     'operations in Excel or LibreOffice style if you want to process the '+
+     'resulting file in Excel or LibreOffice'
 arg_name 'INSERT_FILE'
 command :insert do |c|
@@ -486,7 +494,7 @@ arg_name 'MAPPINGS-FILE'
 command :map do |c|
   c.desc 'Rows to consider'
   c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
-  c.flag [:r, :row], :must_match => row_regex #/\d+(?:,\d+|-\d+|-eof|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/i
+  c.flag [:r, :row], :must_match => row_regex
   c.desc 'Columns to consider for mapping'
   c.arg_name 'COL1,COL2,COL10-COL30'
@@ -648,7 +656,8 @@ on_error do |exception|
   true
 end
-# the -r flag can take a EOF value which is replaced by the actual row value of the input file
+# the -r flag can take a EOF value which is replaced by the actual row value
+# of the input file
 def set_max_row(options, max_row)
   options.each do |option, value|
     case option

data/lib/sycsvpro/header.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module Sycsvpro
     # Create a new header
     def initialize(header, options = {})
       @header_cols = split_by_comma_regex(header || "")
-      @insert_cols = (options[:insert] || "").split(',')
+      @insert_cols = (options[:insert] || "").split(/,|;/)
       @positions   = options[:pos] || []
     end

data/lib/sycsvpro/join.rb CHANGED Viewed

@@ -1,23 +1,33 @@
 # Operating csv files
 module Sycsvpro
+  # Joiner holds all join data as join columns, positions where to insert the
+  # columns from the source file, cols wich are the cols inserted from the
+  # source file and the lookup table with keys and associated column values.
+  # :call-seq:
+  #   Sycsvpro::Joiner.new([1,2], [3,4], [4,5,6], { rows: {} }
+  Joiner = Struct.new(:join, :pos, :cols, :lookup)
   # Join joins two files based on a join key value.
   # Example
   # File 1 (infile)
   #     |Name |ID |
+  #     |-----|---|
   #     |Hank |123|
   #     |Frank|234|
   #     |Mia  |345|
   #     |Moira|234|
   #
   # File 2 (source)
-  #     |Company|Phone|ID|
+  #     |Company|Phone|ID |
+  #     |-------|-----|---|
   #     |Siem   |4848 |123|
   #     |Helo   |993  |345|
   #     |Wara   |3333 |234|
   #
   # File 3 (outfile)
   #     |Name |ID |Company|Phone|
+  #     |-----|---|-------|-----|
   #     |Hank |123|Siem   |4848 |
   #     |Frank|234|Wara   |3333 |
   #     |Mia  |345|Helo   |993  |
@@ -34,16 +44,12 @@ module Sycsvpro
     attr_reader :source
     # filter that is used for rows
     attr_reader :row_filter
-    # columns to insert
-    attr_reader :columns
     # posititon where to insert the columns into the infile
     attr_reader :positions
     # header of the outfile
     attr_reader :header
     # indicates whether the infile is headerless
     attr_reader :headerless
-    # lookup table where the assigned values are stored at
-    attr_reader :lookup_table
     # Creates a Join which can be invoked as follows
     # :call-seq:
@@ -61,8 +67,8 @@ module Sycsvpro
     # infile:: csv file to operate on
     # outfile:: csv file with the result
     # source:: csv file that contains the values to join to infile
-    # rows: rows to consider for operation. Rows that don't match the pattern
-    #       will be skipped for operation
+    # rows:: rows to consider for operation. Rows that don't match the pattern
+    #        will be skipped for operation
     # cols:: columns to insert from the source to the infile
     # pos:: column positions where to insert the values and the insert_header
     #       columns
@@ -76,9 +82,9 @@ module Sycsvpro
       @outfile    = options[:outfile]
       @source     = options[:source]
       @row_filter = RowFilter.new(options[:rows], df: options[:df])
-      @columns    = options[:cols].split(',').collect { |c| c.to_i }
-      @positions  = col_positions(options[:pos], @columns)
-      @joins      = options[:joins].split('=').collect { |j| j.to_i }
+      @positions  = create_joiners(options[:joins],
+                                   options[:cols],
+                                   options[:pos])
       @headerless = options[:headerless].nil? ? false : options[:headerless]
       @header     = Header.new(options[:header] || '*',
                                pos:    @positions,
@@ -108,14 +114,17 @@ module Sycsvpro
           next if row_filter.process(line, row: index).nil?
           values = line.split(';')
+          target = values.dup
+          @positions.sort.each { |p| target.insert(p, "") }
+          @joiners.each do |joiner|
+            key = values[joiner.join[1]]
+            row = joiner.lookup[:rows][key] || []
+            joiner.pos.each_with_index { |p,i| target[p] = row[i] }
+          end
-          key = values[@joins[1]]
-          row = lookup_table[:rows][key] || []
-          lookup_table[:pos].sort.each { |p| values.insert(p, "") }
-          lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
-          out.puts values.join(';')
+          out.puts target.join(';')
         end
       end
     end
@@ -125,8 +134,6 @@ module Sycsvpro
       # Creates a lookup table from the source file values. The join column of
       # the source file is the key
       def create_lookup_table
-        @lookup_table = { pos: positions, rows: {} }
         File.open(source).each_with_index do |line|
           next if line.chomp.empty?
@@ -134,12 +141,15 @@ module Sycsvpro
           next if values.empty?
-          key = values[@joins[0]]
-          lookup_table[:rows][key] = []
+          @joiners.each do |joiner|
+            key = values[joiner.join[0]]
+            joiner.lookup[:rows][key] = []
-          columns.each do |i|
-            lookup_table[:rows][key] << values[i]
+            joiner.cols.each do |i|
+              joiner.lookup[:rows][key] << values[i]
+            end
           end
         end
       end
@@ -148,12 +158,39 @@ module Sycsvpro
       # are put at the beginning of the row
       def col_positions(pos, cols)
         if pos.nil? || pos.empty?
-          Array.new(cols.size) { |c| c }
+          pos = []
+          cols.each { |c| pos << Array.new(c.size) { |c| c } }
+          pos
         else
-          pos.split(',').collect { |p| p.to_i }
+          pos.split(';').collect { |p| p.split(',').collect { |p| p.to_i } }
         end
       end
+      # Initializes joiners based on joins, positions and columns
+      #
+      # Possible input forms are:
+      # joins:: "4=0;4=1" or "4=1"
+      # positions:: "1,2;4,5" or "1,2"
+      # columns:: "1,2;3,4"
+      #
+      # This has the semantic of 'insert columns 1 and 2 at positions 1 and 2
+      # for key 0 and columns 3 and 4 at positions 4 and 5 for key 1. Key 4 is
+      # the corresponding value in the source file
+      #
+      # Return value:: positions where to insert values from source file
+      def create_joiners(j, c, p)
+        js = j.split(';').collect { |j| j.split('=').collect { |j| j.to_i } }
+        cs = c.split(';').collect { |c| c.split(',').collect { |c| c.to_i } }
+        ps = col_positions(p, cs)
+        @joiners = []
+        (0...js.size).each do |i|
+          @joiners << Joiner.new(js[i], ps[i], cs[i], { rows: { } })
+        end
+        ps.flatten
+      end
   end
 end

data/lib/sycsvpro/table.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module Sycsvpro
     include Dsl
     # Regex to split parameters
-    COL_SPLITTER = /,(?=[\w +]*:)/
+    COL_SPLITTER = /,(?=['\w +-]*:)/
     # infile contains the data that is operated on
     attr_reader :infile
     # outfile is the file where the result is written to
@@ -59,9 +59,9 @@ module Sycsvpro
     # df:: date format
     # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
     #      US is 1,000.00
-    # pr:: precision of number values. Default 2
-    # rows: rows to consider for operation. Rows that don't match the pattern
-    #       will be skipped for operation
+    # pr:: precision of number values.
+    # rows:: rows to consider for operation. Rows that don't match the pattern
+    #        will be skipped for operation
     # header:: Header of the csv file
     # key:: Values located at value 0 and subsequent columns
     # cols:: Values added to columns base on a operation or assignment
@@ -75,7 +75,7 @@ module Sycsvpro
       @keys        = split_by_comma_regex(options[:key]) #options[:key].split(',')
       @cols        = options[:cols].split(COL_SPLITTER)
       @number_format = options[:nf] || 'EN'
-      @precision     = options[:pr] || 2
+      @precision     = options[:pr].to_i if options[:pr]
       prepare_sum_row options[:sum]
       @rows        = {}
     end
@@ -162,7 +162,7 @@ module Sycsvpro
         column = evaluate(column) if column =~ /^c\d+[=~+]/
         previous_value = row[:cols][column]
         if value = eval("#{row[:cols][column]}#{formula}")
-          row[:cols][column] = value.round(@precision)
+          row[:cols][column] = @precision ? value.round(@precision) : value
           add_to_sum_row(row[:cols][column] - previous_value, column)
         end
       end

data/lib/sycsvpro/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # Operating csv files
 module Sycsvpro
   # Version number of sycsvpro
-  VERSION = '0.1.7'
+  VERSION = '0.1.8'
 end

data/spec/sycsvpro/extractor_spec.rb CHANGED Viewed

@@ -39,7 +39,9 @@ module Sycsvpro
     end
     it "should extract rows base on regex including commas" do
-      extractor = Extractor.new(infile: @in_file2, outfile: @out_file, rows: "/[56789]\\d+|\\d{3,}/")
+      extractor = Extractor.new(infile: @in_file2,
+                                outfile: @out_file,
+                                rows: "/[56789]\\d+|\\d{3,}/")
       extractor.execute

data/spec/sycsvpro/join_spec.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Sycsvpro
     before do
       @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
+      @in_file_2 = File.join(File.dirname(__FILE__), "files/multiple-persons.csv")
       @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
       @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
     end
@@ -45,6 +46,36 @@ module Sycsvpro
     end
+    it "should join files inserting values on multiple positions" do
+      cols           = "1,2;1,2"
+      insert_col_pos = "3,2;6,5"
+      insert_header  = "A-COUNTRY,A-STATE;B-COUNTRY,B-STATE"
+      joins          = "0=1;0=2"
+      Sycsvpro::Join.new(infile:         @in_file_2,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         insert_header:  insert_header,
+                         pos:            insert_col_pos).execute
+      result = [ "Name;A_ID;A-STATE;A-COUNTRY;B_ID;B-STATE;B-COUNTRY",
+                 "Hank;123;A4;AT;234;C3;CA",
+                 "Frank;234;C3;CA;345;D1;DE",
+                 "Mia;345;D1;DE;456;U2;US",
+                 "Arwen;456;U2;US;123;A4;AT" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
     it "should join files without explicit insert header" do
       cols           = "1,2"
       insert_col_pos = "2,1"

data/spec/sycsvpro/table_spec.rb CHANGED Viewed

@@ -142,6 +142,7 @@ module Sycsvpro
                                    "RP:+n2 if #{rp_order_type}.index(c1),"+
                                    "Total:+n2",
                           nf:      "DE",
+                          pr:      "2",
                           sum:     "top:SP,RP,Total").execute
       result = [ "Year;SP;RP;Total",
@@ -167,6 +168,7 @@ module Sycsvpro
                        key:     "c0=~/\\.(\\d{4})/",
                        cols:    "c1=~/^([A-Z]{1,2})/:+n2,Total:+n2",
                        nf:      "DE",
+                       pr:      2,
                        sum:     "top:BEGINc1=~/^([A-Z]{1,2})/END,Total").execute
       result = [ "Year;ZE;ZR;Total",
@@ -210,6 +212,29 @@ module Sycsvpro
     end
+    it "should add a count column for the occurance of column values" do
+      Sycsvpro::Table.new(infile: @in_file,
+                          outfile: @out_file,
+                          header:  "Year,c6,c1,c2+c3,c2+c3+'-Count'",
+                          key:     "c0=~/\\.(\\d{4})/,c6",
+                          cols:    "Value:+n1,c2+c3:+n1,c2+c3+'-Count':+1",
+                          sum:     "top:Value,c2+c3").execute
+      result = [ "Year;Country;Value;A1;B2;B4;B4-Count;B2-Count;A1-Count",
+                 ";;95.2;41.0;21.0;33.2;;;",
+                 "2013;AT;53.7;20.5;0;33.2;1;0;1",
+                 "2014;DE;21.0;0;21.0;0;0;1;0",
+                 "2014;AT;20.5;20.5;0;0;0;0;1" ]
+      rows = 0
+      File.open(@out_file).each_with_index do |line, index|
+        line.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sycsvpro
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.1.8
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-27 00:00:00.000000000 Z
+date: 2014-06-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake