RubyGems - sycsvpro - Versions diffs - 0.2.0 → 0.2.1 - Mend

sycsvpro 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/Gemfile.lock +1 -1
data/README.md +16 -4
data/bin/sycsvpro +5 -0
data/lib/sycsvpro/dsl.rb +53 -0
data/lib/sycsvpro/spread_sheet.rb +34 -6
data/lib/sycsvpro/spread_sheet_builder.rb +7 -5
data/lib/sycsvpro/version.rb +1 -1
data/spec/sycsvpro/spread_sheet_spec.rb +49 -1
metadata +2 -2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    sycsvpro (0.2.0)
+    sycsvpro (0.2.1)
       gli (= 2.9.0)
       timeleap (~> 0.0.1)

data/README.md CHANGED Viewed

@@ -714,10 +714,22 @@ Version 0.1.13
 Version 0.2.0
 -------------
-* SpreadSheet is used to conduct operations like multiplication, division,
-  addition and subtraction between multiple files that have a table like
-  structure. SpreadSheet can also be used to retrieve information about csv
-  files
+* SpreadSheet has been introduced. A spread sheet is used to conduct
+  operations like multiplication, division, addition and subtraction between
+  multiple files that have a table like structure. SpreadSheet can also be used
+  to retrieve information about csv files
+Version 0.2.1
+-------------
+* When creating spread sheets from file empty rows are skipped
+* To equalize column sizes of rows in spread sheets `equalize: true` flag was
+  introduced
+* To distinguish between different number locales like _1.234.567,89_,
+  _1,234,567.89_, _1 234 567.89_ and the like a `ds` flag was introduced to
+  spread sheet to indicate the number formatting
+* Optimize performance when creating spread sheets from files
+* Dsl module has got 3 new methods #is\_integer?, #is\_float? and #str2num to
+  convert strings that represent numbers to numericals
 Documentation
 =============

data/bin/sycsvpro CHANGED Viewed

@@ -426,6 +426,10 @@ command :spreadsheet do |c|
   c.arg_name 'ALIAS_1,ALIAS_2,...,ALIAS_N'
   c.flag [:a, :alias]
+  c.desc 'Decimal separator for number values'
+  c.arg_name '.|,'
+  c.flag [:ds], default: '.'
   c.desc 'The arithmetic operation with the table data'
   c.arg_name 'ARITHMETIC_OPERATION'
   c.flag [:o, :operation]
@@ -441,6 +445,7 @@ command :spreadsheet do |c|
                                      clabels:   options[:c],
                                      aliases:   options[:a],
                                      operation: options[:o],
+                                     ds:        options[:ds],
                                      print:     options[:p]).execute
     print 'done'
   end

data/lib/sycsvpro/dsl.rb CHANGED Viewed

@@ -8,6 +8,25 @@ module Dsl
   # Example:
   #     Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
   COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
+  # Recognizes a string that represents an integer value
+  INTEGER_REGEX = /^\d{1,3}(?:[,\. ]\d{3}|\d)*$/
+  COMMA_POINT_SPACE_REGEX = /[,\. ]/
+  # Recognizes a string that represents a float value in the form of 1,333.45
+  DECIMAL_POINT_REGEX = /^\d{1,3}(?:[, ]\d{3}|\d)*(?:\.\d*)$/
+  # Recognizes a string that represents a float value in the form of 1.333,45
+  DECIMAL_COMMA_REGEX = /^\d{1,3}(?:[\. ]\d{3}|\d)*(?:,\d*)$/
+  # A regex that recognizes '.' and ' ' to be used e.g. in #gsub to optimize performance
+  POINT_SPACE_REGEX    = /[\. ]/
+  # A regex that recognizes ',' and ' ' to be used e.g. in #gsub to optimize performance
+  COMMA_SPACE_REGEX    = /[, ]/
+  # A point '.' to be used e.g. in #gsub to optimize performance
+  POINT = '.'
+  # A comma ',' to be used e.g. in #gsub to optimize performance
+  COMMA = ','
+  # A semicolon ';' to be used e.g. in #gsub to optimize performance
+  SEMICOLON = ';'
+  # An empty string '' to be used e.g. in #gsub to optimize performance
+  EMPTY = ''
   # read arguments provided at invocation
   # :call-seq:
@@ -98,6 +117,40 @@ module Dsl
       collect { |h| h.gsub(/BEGIN|END/, "") }
   end
+  # Checks if the string represents an integer if so returns the integer
+  # otherwise nil
+  def is_integer?(value)
+    return value.
+      gsub(COMMA_POINT_SPACE_REGEX, EMPTY) if !(value =~ INTEGER_REGEX).nil?
+  end
+  # Checks if the string represents a float and in case it is a float returns
+  # the float value otherwise nil
+  #   "1.5" -> 1.5
+  #   "1."  -> 1.0
+  def is_float?(value, decimal_separator = POINT)
+    if decimal_separator == POINT
+      return value.
+        gsub(COMMA_SPACE_REGEX, EMPTY) if !(value =~ DECIMAL_POINT_REGEX).nil?
+    else
+      return value.
+        gsub(POINT_SPACE_REGEX, EMPTY).
+        gsub(COMMA, POINT) if !(value =~ DECIMAL_COMMA_REGEX).nil?
+    end
+  end
+  # Converts a string to a numeric if the string represents a numerical value
+  def str2num(value, decimal_separator = POINT)
+    case
+    when v = is_integer?(value)
+      v.to_i
+    when v = is_float?(value, decimal_separator)
+      v.to_f
+    else
+      value
+    end
+  end
   private
     # Assigns values to keys that are used in rows and yielded to the block

data/lib/sycsvpro/spread_sheet.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require_relative 'not_available'
+require_relative 'dsl'
 # Operating csv files
 module Sycsvpro
@@ -34,6 +35,8 @@ module Sycsvpro
   #           [1*0]    24     32
   class SpreadSheet
+    include Dsl
     # rows of the spread sheet
     attr_accessor :rows
     # options of the spread sheet
@@ -80,10 +83,14 @@ module Sycsvpro
     # rows::       indicates the row count in combination with values param
     # cols::       indicates the col count in combination with values param
     # file::       file that contains values to create spread sheet with
+    # ds::         decimal spearator '.' or ',' where '.' is default. The
+    #              decimal separator is used when spread sheet is created from
+    #              file
     def initialize(*rows)
       opts = rows.pop if rows.last.is_a?(::Hash)
       @opts = opts || {}
       rows = rows_from_params(@opts) if rows.empty?
+      rows = equalize_rows(rows) if @opts[:equalize]
       check_validity_of(rows)
       @row_labels, @col_labels = create_labels(rows)
       @rows = rows
@@ -392,12 +399,32 @@ module Sycsvpro
           end
           values.each_slice(col_count) { |row| rows << row }
         elsif opts[:file]
+          start_read = Time.now
           File.readlines(opts[:file]).each do |line|
-            row = line.split(';')
-            rows << row.collect { |v|
-              v.strip.empty? ? NotAvailable : Float(v.chomp) rescue v.chomp
+            next if line.chomp.empty?
+            rows << line.split(SEMICOLON).collect { |v|
+              v.strip.empty? ? NotAvailable : str2num(v.chomp, opts[:ds])
             }
           end
+          STDERR.puts "Reading file in #{Time.now - start_read} seconds"
+        end
+        rows
+      end
+      # If rows are of different column size the rows are equalized in column
+      # size by filling missing columns with NA
+      def equalize_rows(rows)
+        column_sizes = rows.collect { |r| r.size }
+        return rows if column_sizes.uniq.size == 1
+        max_size = column_sizes.max
+        small_rows = []
+        column_sizes.each_with_index { |c,i| small_rows << i if c < max_size }
+        small_rows.each do |i|
+          rows[i] += [NotAvailable] * (max_size - rows[i].size)
         end
         rows
@@ -408,9 +435,10 @@ module Sycsvpro
       #   * not nil
       #   * at least one row
       def check_validity_of(rows)
-        raise "rows need to be arrays"           if !rows_are_arrays?(rows)
-        raise "needs at least one row"           if rows.empty?
-        raise "rows must be of same column size" if !same_column_size?(rows)
+        raise "rows need to be arrays"              if !rows_are_arrays?(rows)
+        raise "needs at least one row"              if rows.empty?
+        raise "rows must be of same column size. "+
+              "Use equalize: true flag to fix."     if !same_column_size?(rows)
       end
       # Checks whether all rows have the same column size. Returns true if

data/lib/sycsvpro/spread_sheet_builder.rb CHANGED Viewed

@@ -20,20 +20,22 @@ module Sycsvpro
     #
     #     SpreadSheetBuilder.new(outfile:   "out.csv",
     #                            files:     "f1.csv,f2.csv",
-    #                            rlabels:   "true,false",
-    #                            clabels:   "false,true",
+    #                            r:         "true,false",
+    #                            c:         "false,true",
     #                            aliases:   "a,b",
     #                            operation: "(a*b).transpose",
+    #                            ds:        ",",
     #                            print:     "true").execute
     #
     # outfile:   file where the result of the operation is written to
     # files:     files that hold the spread sheet data
-    # rlabels:   indication whether the corresponding file has row labels
-    # clabels:   indication whether the corresponding file has column labels
+    # r:         indication whether the corresponding file has row labels
+    # c:         indication whether the corresponding file has column labels
     # aliases:   symbols that correspond to the spread sheet created from the
     #            files. The symbols are used in the operation. The symbols have
     #            to be choosen carefully not to conflict with existing methods
     #            and variables
+    # ds:        decimal spearator '.' or ',' where '.' is default
     # operation: arithmetic operation on spread sheets using the aliases as
     #            place holders for the spread sheets. The last evaluated
     #            operation is returned as result and saved to outfile in case
@@ -93,7 +95,7 @@ module Sycsvpro
         operands = {}
         opts[:aliases].split(',').each_with_index do |a,i|
-          operands[a] = SpreadSheet.new(file: files[i],
+          operands[a] = SpreadSheet.new(file: files[i], ds: opts[:ds],
                                         r: rlabels[i], c: clabels[i])
         end

data/lib/sycsvpro/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # Operating csv files
 module Sycsvpro
   # Version number of sycsvpro
-  VERSION = '0.2.0'
+  VERSION = '0.2.1'
 end

data/spec/sycsvpro/spread_sheet_spec.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module Sycsvpro
     # Creation of spread sheets
     it "should ensure all rows have the same column size" do
       expect { SpreadSheet.new([1,2], [3,4,5]) }.to raise_error(RuntimeError,
-                                             "rows must be of same column size")
+                    "rows must be of same column size. Use equalize: true "+
+                    "flag to fix.")
     end
     it "should not accept non arrays as rows" do
@@ -30,6 +31,10 @@ module Sycsvpro
       expect { s1 == s2 }
     end
+    it "should be created from first n rows of file"
+    it "should be created from last n rows of file"
     it "should be created from file with missing values" do
       file = File.join(File.dirname(__FILE__), "files/spread_sheet_na.csv")
@@ -42,6 +47,45 @@ module Sycsvpro
       expect { s1 == s2 }
     end
+    it "should skip empty rows in file" do
+      file = File.join(File.dirname(__FILE__),
+                       "files/spread_sheet_with_empty_rows.csv")
+      s1 = SpreadSheet.new(file: file, r: true, c: true)
+      s2 = SpreadSheet.new(['Alpha', 'Beta', 'Gamma'],
+                           ['A',NotAvailable,2,3],
+                           ['C',7,NotAvailable,9],
+                           r: true, c: true)
+      expect { s1 == s2 }.to be_true
+    end
+    it "should equalize column size through NA" do
+      s1 = SpreadSheet.new([1,2,3],[4,5],[6,7,8,9],[10], equalize: true)
+      s2 = SpreadSheet.new([1,2,3,NotAvailable],
+                           [4,5,NotAvailable,NotAvailable],
+                           [6,7,8,9],
+                           [10,NotAvailable,NotAvailable,NotAvailable])
+      s1.should eq s2
+    end
+    it "should equalize column size through NA with row and column labels" do
+      s1 = SpreadSheet.new(['A','B'],
+                           ['W',1,2,3],
+                           ['X',4,5],
+                           ['Y',6,7,8,9],
+                           ['Z',10],
+                           r: true, c: true,
+                           equalize: true)
+      s2 = SpreadSheet.new(['A','B',2,3],['W',1,2,3,NotAvailable],
+                           ['X',4,5,NotAvailable,NotAvailable],
+                           ['Y',6,7,8,9],
+                           ['Z',10,NotAvailable,NotAvailable,NotAvailable],
+                           r: true, c: true)
+      s1.should eq s2
+    end
     it "should be created from flat array" do
       s1 = SpreadSheet.new(values: [1,2,3,4,5,6], cols: 2)
       s2 = SpreadSheet.new([1,2],[3,4],[5,6])
@@ -79,6 +123,10 @@ module Sycsvpro
       expect { s1.tranpose == s2 }
     end
+    it "should sort on columns"
+    it "should filter rows on column values"
     it "should assign new values to rows and columns"
     it "should delete columns"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sycsvpro
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-10-09 00:00:00.000000000 Z
+date: 2014-10-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake