RubyGems - sycsvpro - Versions diffs - 0.1.7 → 0.1.8 - Mend

sycsvpro 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/Gemfile.lock +1 -1
data/README.md +13 -0
data/bin/sycsvpro +22 -13
data/lib/sycsvpro/header.rb +1 -1
data/lib/sycsvpro/join.rb +62 -25
data/lib/sycsvpro/table.rb +6 -6
data/lib/sycsvpro/version.rb +1 -1
data/spec/sycsvpro/extractor_spec.rb +3 -1
data/spec/sycsvpro/join_spec.rb +31 -0
data/spec/sycsvpro/table_spec.rb +25 -0
metadata +2 -2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    sycsvpro (0.1.7)
+    sycsvpro (0.1.8)
       gli (= 2.9.0)
       timeleap (~> 0.0.1)

data/README.md CHANGED Viewed

@@ -212,6 +212,15 @@ chiro;CA;R4;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
 chiro;CA;R4;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
 ```
+If you have multiple IDs in a row than you can also conduct multiple joins in
+on streak.
+    $ sycsvpro -f in.csv -o out.csv join address.csv -c 0,1;0,3
+                                                     -p 2,1;4,5
+                                                     -i "COUNTRY,REGION"
+                                                     -j "3=8;3=10"
 Sort
 ----
 Sort rows on specified columns as an example sort rows based on customer
@@ -421,6 +430,10 @@ Version 0.1.7
   (-h "*") supplemented by the columns A and B (-i "A,B") that will also be
   positioned at column 1 and 3 (-p "1,3").
+Version 0.1.8
+-------------
+* Join now can join multiple key values in 1 streak
 Installation
 ============
 [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)

data/bin/sycsvpro CHANGED Viewed

@@ -227,9 +227,10 @@ command :execute do |c|
   end
 end
-desc 'Counts the occurences of column values. Uses column values as headings with count as ' +
-     'values. Columns with a condition will be added as new columns and the condition will ' +
-     'be set as column name. Optionally adds a sum row'
+desc 'Counts the occurences of column values. Uses column values as headings '+
+     'with count as values. Columns with a condition will be added as new '+
+     'columns and the condition will be set as column name. Optionally adds a '+
+     'sum row'
 command :count do |c|
@@ -284,8 +285,9 @@ command :aggregate do |c|
   c.arg_name '1,2-4'
   c.flag [:c, :col], :must_match => /^\d+(?:,\d+|-\d+)*/
-  c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. The sum row is ' +
-         'specified by the row position. The sum column is the last column in the row'
+  c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. '+
+         'The sum row is specified by the row position. The sum column is the '+
+         'last column in the row'
   c.arg_name 'SUM_ROW_TITLE:ROW,SUM_COL_TITLE'
   c.flag [:s, :sum], :must_match => /^\w+:\d+(?:,\w+)?|^\w+/
@@ -307,7 +309,10 @@ command :aggregate do |c|
 end
-desc 'Creates a table from a source file'
+desc 'Associates columns to a key value. A key value can be a combination of '+
+     'multiple column values. Values associated can be generated from an '+
+     'arithmetic or string operation. Header columns can be generated '+
+     'dynamically based on column values'
 command :table do |c|
@@ -371,18 +376,18 @@ command :join do |c|
   c.desc 'Columns to merge into the infile'
   c.arg_name '1,5,7'
-  c.flag [:c, :cols], :must_match => /^\d+(?:,\d+)*/
+  c.flag [:c, :cols], :must_match => /^\d+(?:[,;]\d+)*/
   c.desc 'The position at which column position to insert the columns within '+
          'the infile. The sequence of the position is assigned to the columns '+
          'to be inserted'
   c.arg_name '5,1'
-  c.flag [:p, :pos], :must_match => /^\d+(?:,\d+)*/
+  c.flag [:p, :pos], :must_match => /^\d+(?:[,;]\d+)*/
   c.desc 'The join columns in the source file, which contains the columns to '+
          'be inserted into the infile'
   c.arg_name '2=1'
-  c.flag [:j, :join], :must_match => /^\d+=\d+$/
+  c.flag [:j, :join], :must_match => /^\d+(?:[=;]\d+)*/
   c.desc 'Indicates whether the infile headerless'
   c.default_value false
@@ -416,7 +421,8 @@ command :join do |c|
   end
 end
-desc 'Sort rows based on column values'
+desc 'Sort rows based on column values. It is possible to sort on multiple '+
+     'columns'
 command :sort do |c|
   c.desc 'Rows to consider'
   c.arg_name '1,2,10-30,45-EOF,REGEXP'
@@ -457,7 +463,9 @@ command :sort do |c|
   end
 end
-desc 'Inserts rows from a file to a csv-file'
+desc 'Inserts rows from a file to a csv-file. You can for instance add sum '+
+     'operations in Excel or LibreOffice style if you want to process the '+
+     'resulting file in Excel or LibreOffice'
 arg_name 'INSERT_FILE'
 command :insert do |c|
@@ -486,7 +494,7 @@ arg_name 'MAPPINGS-FILE'
 command :map do |c|
   c.desc 'Rows to consider'
   c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
-  c.flag [:r, :row], :must_match => row_regex #/\d+(?:,\d+|-\d+|-eof|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/i
+  c.flag [:r, :row], :must_match => row_regex
   c.desc 'Columns to consider for mapping'
   c.arg_name 'COL1,COL2,COL10-COL30'
@@ -648,7 +656,8 @@ on_error do |exception|
   true
 end
-# the -r flag can take a EOF value which is replaced by the actual row value of the input file
+# the -r flag can take a EOF value which is replaced by the actual row value
+# of the input file
 def set_max_row(options, max_row)
   options.each do |option, value|
     case option

data/lib/sycsvpro/header.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module Sycsvpro
     # Create a new header
     def initialize(header, options = {})
       @header_cols = split_by_comma_regex(header || "")
-      @insert_cols = (options[:insert] || "").split(',')
+      @insert_cols = (options[:insert] || "").split(/,|;/)
       @positions   = options[:pos] || []
     end

data/lib/sycsvpro/join.rb CHANGED Viewed

@@ -1,23 +1,33 @@
 # Operating csv files
 module Sycsvpro
+  # Joiner holds all join data as join columns, positions where to insert the
+  # columns from the source file, cols wich are the cols inserted from the
+  # source file and the lookup table with keys and associated column values.
+  # :call-seq:
+  #   Sycsvpro::Joiner.new([1,2], [3,4], [4,5,6], { rows: {} }
+  Joiner = Struct.new(:join, :pos, :cols, :lookup)
   # Join joins two files based on a join key value.
   # Example
   # File 1 (infile)
   #     |Name |ID |
+  #     |-----|---|
   #     |Hank |123|
   #     |Frank|234|
   #     |Mia  |345|
   #     |Moira|234|
   #
   # File 2 (source)
-  #     |Company|Phone|ID|
+  #     |Company|Phone|ID |
+  #     |-------|-----|---|
   #     |Siem   |4848 |123|
   #     |Helo   |993  |345|
   #     |Wara   |3333 |234|
   #
   # File 3 (outfile)
   #     |Name |ID |Company|Phone|
+  #     |-----|---|-------|-----|
   #     |Hank |123|Siem   |4848 |
   #     |Frank|234|Wara   |3333 |
   #     |Mia  |345|Helo   |993  |
@@ -34,16 +44,12 @@ module Sycsvpro
     attr_reader :source
     # filter that is used for rows
     attr_reader :row_filter
-    # columns to insert
-    attr_reader :columns
     # posititon where to insert the columns into the infile
     attr_reader :positions
     # header of the outfile
     attr_reader :header
     # indicates whether the infile is headerless
     attr_reader :headerless
-    # lookup table where the assigned values are stored at
-    attr_reader :lookup_table
     # Creates a Join which can be invoked as follows
     # :call-seq:
@@ -61,8 +67,8 @@ module Sycsvpro
     # infile:: csv file to operate on
     # outfile:: csv file with the result
     # source:: csv file that contains the values to join to infile
-    # rows: rows to consider for operation. Rows that don't match the pattern
-    #       will be skipped for operation
+    # rows:: rows to consider for operation. Rows that don't match the pattern
+    #        will be skipped for operation
     # cols:: columns to insert from the source to the infile
     # pos:: column positions where to insert the values and the insert_header
     #       columns
@@ -76,9 +82,9 @@ module Sycsvpro
       @outfile    = options[:outfile]
       @source     = options[:source]
       @row_filter = RowFilter.new(options[:rows], df: options[:df])
-      @columns    = options[:cols].split(',').collect { |c| c.to_i }
-      @positions  = col_positions(options[:pos], @columns)
-      @joins      = options[:joins].split('=').collect { |j| j.to_i }
+      @positions  = create_joiners(options[:joins],
+                                   options[:cols],
+                                   options[:pos])
       @headerless = options[:headerless].nil? ? false : options[:headerless]
       @header     = Header.new(options[:header] || '*',
                                pos:    @positions,
@@ -108,14 +114,17 @@ module Sycsvpro
           next if row_filter.process(line, row: index).nil?
           values = line.split(';')
+          target = values.dup
+          @positions.sort.each { |p| target.insert(p, "") }
+          @joiners.each do |joiner|
+            key = values[joiner.join[1]]
+            row = joiner.lookup[:rows][key] || []
+            joiner.pos.each_with_index { |p,i| target[p] = row[i] }
+          end
-          key = values[@joins[1]]
-          row = lookup_table[:rows][key] || []
-          lookup_table[:pos].sort.each { |p| values.insert(p, "") }
-          lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
-          out.puts values.join(';')
+          out.puts target.join(';')
         end
       end
     end
@@ -125,8 +134,6 @@ module Sycsvpro
       # Creates a lookup table from the source file values. The join column of
       # the source file is the key
       def create_lookup_table
-        @lookup_table = { pos: positions, rows: {} }
         File.open(source).each_with_index do |line|
           next if line.chomp.empty?
@@ -134,12 +141,15 @@ module Sycsvpro
           next if values.empty?
-          key = values[@joins[0]]
-          lookup_table[:rows][key] = []
+          @joiners.each do |joiner|
+            key = values[joiner.join[0]]
+            joiner.lookup[:rows][key] = []
-          columns.each do |i|
-            lookup_table[:rows][key] << values[i]
+            joiner.cols.each do |i|
+              joiner.lookup[:rows][key] << values[i]
+            end
           end
         end
       end
@@ -148,12 +158,39 @@ module Sycsvpro
       # are put at the beginning of the row
       def col_positions(pos, cols)
         if pos.nil? || pos.empty?
-          Array.new(cols.size) { |c| c }
+          pos = []
+          cols.each { |c| pos << Array.new(c.size) { |c| c } }
+          pos
         else
-          pos.split(',').collect { |p| p.to_i }
+          pos.split(';').collect { |p| p.split(',').collect { |p| p.to_i } }
         end
       end
+      # Initializes joiners based on joins, positions and columns
+      #
+      # Possible input forms are:
+      # joins:: "4=0;4=1" or "4=1"
+      # positions:: "1,2;4,5" or "1,2"
+      # columns:: "1,2;3,4"
+      #
+      # This has the semantic of 'insert columns 1 and 2 at positions 1 and 2
+      # for key 0 and columns 3 and 4 at positions 4 and 5 for key 1. Key 4 is
+      # the corresponding value in the source file
+      #
+      # Return value:: positions where to insert values from source file
+      def create_joiners(j, c, p)
+        js = j.split(';').collect { |j| j.split('=').collect { |j| j.to_i } }
+        cs = c.split(';').collect { |c| c.split(',').collect { |c| c.to_i } }
+        ps = col_positions(p, cs)
+        @joiners = []
+        (0...js.size).each do |i|
+          @joiners << Joiner.new(js[i], ps[i], cs[i], { rows: { } })
+        end
+        ps.flatten
+      end
   end
 end

data/lib/sycsvpro/table.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module Sycsvpro
     include Dsl
     # Regex to split parameters
-    COL_SPLITTER = /,(?=[\w +]*:)/
+    COL_SPLITTER = /,(?=['\w +-]*:)/
     # infile contains the data that is operated on
     attr_reader :infile
     # outfile is the file where the result is written to
@@ -59,9 +59,9 @@ module Sycsvpro
     # df:: date format
     # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
     #      US is 1,000.00
-    # pr:: precision of number values. Default 2
-    # rows: rows to consider for operation. Rows that don't match the pattern
-    #       will be skipped for operation
+    # pr:: precision of number values.
+    # rows:: rows to consider for operation. Rows that don't match the pattern
+    #        will be skipped for operation
     # header:: Header of the csv file
     # key:: Values located at value 0 and subsequent columns
     # cols:: Values added to columns base on a operation or assignment
@@ -75,7 +75,7 @@ module Sycsvpro
       @keys        = split_by_comma_regex(options[:key]) #options[:key].split(',')
       @cols        = options[:cols].split(COL_SPLITTER)
       @number_format = options[:nf] || 'EN'
-      @precision     = options[:pr] || 2
+      @precision     = options[:pr].to_i if options[:pr]
       prepare_sum_row options[:sum]
       @rows        = {}
     end
@@ -162,7 +162,7 @@ module Sycsvpro
         column = evaluate(column) if column =~ /^c\d+[=~+]/
         previous_value = row[:cols][column]
         if value = eval("#{row[:cols][column]}#{formula}")
-          row[:cols][column] = value.round(@precision)
+          row[:cols][column] = @precision ? value.round(@precision) : value
           add_to_sum_row(row[:cols][column] - previous_value, column)
         end
       end

data/lib/sycsvpro/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # Operating csv files
 module Sycsvpro
   # Version number of sycsvpro
-  VERSION = '0.1.7'
+  VERSION = '0.1.8'
 end

data/spec/sycsvpro/extractor_spec.rb CHANGED Viewed

@@ -39,7 +39,9 @@ module Sycsvpro
     end
     it "should extract rows base on regex including commas" do
-      extractor = Extractor.new(infile: @in_file2, outfile: @out_file, rows: "/[56789]\\d+|\\d{3,}/")
+      extractor = Extractor.new(infile: @in_file2,
+                                outfile: @out_file,
+                                rows: "/[56789]\\d+|\\d{3,}/")
       extractor.execute

data/spec/sycsvpro/join_spec.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Sycsvpro
     before do
       @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
+      @in_file_2 = File.join(File.dirname(__FILE__), "files/multiple-persons.csv")
       @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
       @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
     end
@@ -45,6 +46,36 @@ module Sycsvpro
     end
+    it "should join files inserting values on multiple positions" do
+      cols           = "1,2;1,2"
+      insert_col_pos = "3,2;6,5"
+      insert_header  = "A-COUNTRY,A-STATE;B-COUNTRY,B-STATE"
+      joins          = "0=1;0=2"
+      Sycsvpro::Join.new(infile:         @in_file_2,
+                         outfile:        @out_file,
+                         source:         @source_file,
+                         cols:           cols,
+                         joins:          joins,
+                         insert_header:  insert_header,
+                         pos:            insert_col_pos).execute
+      result = [ "Name;A_ID;A-STATE;A-COUNTRY;B_ID;B-STATE;B-COUNTRY",
+                 "Hank;123;A4;AT;234;C3;CA",
+                 "Frank;234;C3;CA;345;D1;DE",
+                 "Mia;345;D1;DE;456;U2;US",
+                 "Arwen;456;U2;US;123;A4;AT" ]
+      rows = 0
+      File.new(@out_file, 'r').each_with_index do |line, index|
+        expect(line.chomp).to eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
     it "should join files without explicit insert header" do
       cols           = "1,2"
       insert_col_pos = "2,1"

data/spec/sycsvpro/table_spec.rb CHANGED Viewed

@@ -142,6 +142,7 @@ module Sycsvpro
                                    "RP:+n2 if #{rp_order_type}.index(c1),"+
                                    "Total:+n2",
                           nf:      "DE",
+                          pr:      "2",
                           sum:     "top:SP,RP,Total").execute
       result = [ "Year;SP;RP;Total",
@@ -167,6 +168,7 @@ module Sycsvpro
                        key:     "c0=~/\\.(\\d{4})/",
                        cols:    "c1=~/^([A-Z]{1,2})/:+n2,Total:+n2",
                        nf:      "DE",
+                       pr:      2,
                        sum:     "top:BEGINc1=~/^([A-Z]{1,2})/END,Total").execute
       result = [ "Year;ZE;ZR;Total",
@@ -210,6 +212,29 @@ module Sycsvpro
     end
+    it "should add a count column for the occurance of column values" do
+      Sycsvpro::Table.new(infile: @in_file,
+                          outfile: @out_file,
+                          header:  "Year,c6,c1,c2+c3,c2+c3+'-Count'",
+                          key:     "c0=~/\\.(\\d{4})/,c6",
+                          cols:    "Value:+n1,c2+c3:+n1,c2+c3+'-Count':+1",
+                          sum:     "top:Value,c2+c3").execute
+      result = [ "Year;Country;Value;A1;B2;B4;B4-Count;B2-Count;A1-Count",
+                 ";;95.2;41.0;21.0;33.2;;;",
+                 "2013;AT;53.7;20.5;0;33.2;1;0;1",
+                 "2014;DE;21.0;0;21.0;0;0;1;0",
+                 "2014;AT;20.5;20.5;0;0;0;0;1" ]
+      rows = 0
+      File.open(@out_file).each_with_index do |line, index|
+        line.chomp.should eq result[index]
+        rows += 1
+      end
+      rows.should eq result.size
+    end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sycsvpro
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.1.8
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-27 00:00:00.000000000 Z
+date: 2014-06-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake