sycsvpro 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.2.0)
4
+ sycsvpro (0.2.1)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -714,10 +714,22 @@ Version 0.1.13
714
714
 
715
715
  Version 0.2.0
716
716
  -------------
717
- * SpreadSheet is used to conduct operations like multiplication, division,
718
- addition and subtraction between multiple files that have a table like
719
- structure. SpreadSheet can also be used to retrieve information about csv
720
- files
717
+ * SpreadSheet has been introduced. A spread sheet is used to conduct
718
+ operations like multiplication, division, addition and subtraction between
719
+ multiple files that have a table like structure. SpreadSheet can also be used
720
+ to retrieve information about csv files
721
+
722
+ Version 0.2.1
723
+ -------------
724
+ * When creating spread sheets from file empty rows are skipped
725
+ * To equalize column sizes of rows in spread sheets `equalize: true` flag was
726
+ introduced
727
+ * To distinguish between different number locales like _1.234.567,89_,
728
+ _1,234,567.89_, _1 234 567.89_ and the like a `ds` flag was introduced to
729
+ spread sheet to indicate the number formatting
730
+ * Optimize performance when creating spread sheets from files
731
+ * Dsl module has got 3 new methods #is\_integer?, #is\_float? and #str2num to
732
+ convert strings that represent numbers to numericals
721
733
 
722
734
  Documentation
723
735
  =============
data/bin/sycsvpro CHANGED
@@ -426,6 +426,10 @@ command :spreadsheet do |c|
426
426
  c.arg_name 'ALIAS_1,ALIAS_2,...,ALIAS_N'
427
427
  c.flag [:a, :alias]
428
428
 
429
+ c.desc 'Decimal separator for number values'
430
+ c.arg_name '.|,'
431
+ c.flag [:ds], default: '.'
432
+
429
433
  c.desc 'The arithmetic operation with the table data'
430
434
  c.arg_name 'ARITHMETIC_OPERATION'
431
435
  c.flag [:o, :operation]
@@ -441,6 +445,7 @@ command :spreadsheet do |c|
441
445
  clabels: options[:c],
442
446
  aliases: options[:a],
443
447
  operation: options[:o],
448
+ ds: options[:ds],
444
449
  print: options[:p]).execute
445
450
  print 'done'
446
451
  end
data/lib/sycsvpro/dsl.rb CHANGED
@@ -8,6 +8,25 @@ module Dsl
8
8
  # Example:
9
9
  # Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
10
10
  COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
11
+ # Recognizes a string that represents an integer value
12
+ INTEGER_REGEX = /^\d{1,3}(?:[,\. ]\d{3}|\d)*$/
13
+ COMMA_POINT_SPACE_REGEX = /[,\. ]/
14
+ # Recognizes a string that represents a float value in the form of 1,333.45
15
+ DECIMAL_POINT_REGEX = /^\d{1,3}(?:[, ]\d{3}|\d)*(?:\.\d*)$/
16
+ # Recognizes a string that represents a float value in the form of 1.333,45
17
+ DECIMAL_COMMA_REGEX = /^\d{1,3}(?:[\. ]\d{3}|\d)*(?:,\d*)$/
18
+ # A regex that recognizes '.' and ' ' to be used e.g. in #gsub to optimize performance
19
+ POINT_SPACE_REGEX = /[\. ]/
20
+ # A regex that recognizes ',' and ' ' to be used e.g. in #gsub to optimize performance
21
+ COMMA_SPACE_REGEX = /[, ]/
22
+ # A point '.' to be used e.g. in #gsub to optimize performance
23
+ POINT = '.'
24
+ # A comma ',' to be used e.g. in #gsub to optimize performance
25
+ COMMA = ','
26
+ # A semicolon ';' to be used e.g. in #gsub to optimize performance
27
+ SEMICOLON = ';'
28
+ # An empty string '' to be used e.g. in #gsub to optimize performance
29
+ EMPTY = ''
11
30
 
12
31
  # read arguments provided at invocation
13
32
  # :call-seq:
@@ -98,6 +117,40 @@ module Dsl
98
117
  collect { |h| h.gsub(/BEGIN|END/, "") }
99
118
  end
100
119
 
120
+ # Checks if the string represents an integer if so returns the integer
121
+ # otherwise nil
122
+ def is_integer?(value)
123
+ return value.
124
+ gsub(COMMA_POINT_SPACE_REGEX, EMPTY) if !(value =~ INTEGER_REGEX).nil?
125
+ end
126
+
127
+ # Checks if the string represents a float and in case it is a float returns
128
+ # the float value otherwise nil
129
+ # "1.5" -> 1.5
130
+ # "1." -> 1.0
131
+ def is_float?(value, decimal_separator = POINT)
132
+ if decimal_separator == POINT
133
+ return value.
134
+ gsub(COMMA_SPACE_REGEX, EMPTY) if !(value =~ DECIMAL_POINT_REGEX).nil?
135
+ else
136
+ return value.
137
+ gsub(POINT_SPACE_REGEX, EMPTY).
138
+ gsub(COMMA, POINT) if !(value =~ DECIMAL_COMMA_REGEX).nil?
139
+ end
140
+ end
141
+
142
+ # Converts a string to a numeric if the string represents a numerical value
143
+ def str2num(value, decimal_separator = POINT)
144
+ case
145
+ when v = is_integer?(value)
146
+ v.to_i
147
+ when v = is_float?(value, decimal_separator)
148
+ v.to_f
149
+ else
150
+ value
151
+ end
152
+ end
153
+
101
154
  private
102
155
 
103
156
  # Assigns values to keys that are used in rows and yielded to the block
@@ -1,4 +1,5 @@
1
1
  require_relative 'not_available'
2
+ require_relative 'dsl'
2
3
 
3
4
  # Operating csv files
4
5
  module Sycsvpro
@@ -34,6 +35,8 @@ module Sycsvpro
34
35
  # [1*0] 24 32
35
36
  class SpreadSheet
36
37
 
38
+ include Dsl
39
+
37
40
  # rows of the spread sheet
38
41
  attr_accessor :rows
39
42
  # options of the spread sheet
@@ -80,10 +83,14 @@ module Sycsvpro
80
83
  # rows:: indicates the row count in combination with values param
81
84
  # cols:: indicates the col count in combination with values param
82
85
  # file:: file that contains values to create spread sheet with
86
+ # ds:: decimal spearator '.' or ',' where '.' is default. The
87
+ # decimal separator is used when spread sheet is created from
88
+ # file
83
89
  def initialize(*rows)
84
90
  opts = rows.pop if rows.last.is_a?(::Hash)
85
91
  @opts = opts || {}
86
92
  rows = rows_from_params(@opts) if rows.empty?
93
+ rows = equalize_rows(rows) if @opts[:equalize]
87
94
  check_validity_of(rows)
88
95
  @row_labels, @col_labels = create_labels(rows)
89
96
  @rows = rows
@@ -392,12 +399,32 @@ module Sycsvpro
392
399
  end
393
400
  values.each_slice(col_count) { |row| rows << row }
394
401
  elsif opts[:file]
402
+ start_read = Time.now
395
403
  File.readlines(opts[:file]).each do |line|
396
- row = line.split(';')
397
- rows << row.collect { |v|
398
- v.strip.empty? ? NotAvailable : Float(v.chomp) rescue v.chomp
404
+ next if line.chomp.empty?
405
+ rows << line.split(SEMICOLON).collect { |v|
406
+ v.strip.empty? ? NotAvailable : str2num(v.chomp, opts[:ds])
399
407
  }
400
408
  end
409
+ STDERR.puts "Reading file in #{Time.now - start_read} seconds"
410
+ end
411
+
412
+ rows
413
+ end
414
+
415
+ # If rows are of different column size the rows are equalized in column
416
+ # size by filling missing columns with NA
417
+ def equalize_rows(rows)
418
+ column_sizes = rows.collect { |r| r.size }
419
+
420
+ return rows if column_sizes.uniq.size == 1
421
+
422
+ max_size = column_sizes.max
423
+ small_rows = []
424
+ column_sizes.each_with_index { |c,i| small_rows << i if c < max_size }
425
+
426
+ small_rows.each do |i|
427
+ rows[i] += [NotAvailable] * (max_size - rows[i].size)
401
428
  end
402
429
 
403
430
  rows
@@ -408,9 +435,10 @@ module Sycsvpro
408
435
  # * not nil
409
436
  # * at least one row
410
437
  def check_validity_of(rows)
411
- raise "rows need to be arrays" if !rows_are_arrays?(rows)
412
- raise "needs at least one row" if rows.empty?
413
- raise "rows must be of same column size" if !same_column_size?(rows)
438
+ raise "rows need to be arrays" if !rows_are_arrays?(rows)
439
+ raise "needs at least one row" if rows.empty?
440
+ raise "rows must be of same column size. "+
441
+ "Use equalize: true flag to fix." if !same_column_size?(rows)
414
442
  end
415
443
 
416
444
  # Checks whether all rows have the same column size. Returns true if
@@ -20,20 +20,22 @@ module Sycsvpro
20
20
  #
21
21
  # SpreadSheetBuilder.new(outfile: "out.csv",
22
22
  # files: "f1.csv,f2.csv",
23
- # rlabels: "true,false",
24
- # clabels: "false,true",
23
+ # r: "true,false",
24
+ # c: "false,true",
25
25
  # aliases: "a,b",
26
26
  # operation: "(a*b).transpose",
27
+ # ds: ",",
27
28
  # print: "true").execute
28
29
  #
29
30
  # outfile: file where the result of the operation is written to
30
31
  # files: files that hold the spread sheet data
31
- # rlabels: indication whether the corresponding file has row labels
32
- # clabels: indication whether the corresponding file has column labels
32
+ # r: indication whether the corresponding file has row labels
33
+ # c: indication whether the corresponding file has column labels
33
34
  # aliases: symbols that correspond to the spread sheet created from the
34
35
  # files. The symbols are used in the operation. The symbols have
35
36
  # to be choosen carefully not to conflict with existing methods
36
37
  # and variables
38
+ # ds: decimal spearator '.' or ',' where '.' is default
37
39
  # operation: arithmetic operation on spread sheets using the aliases as
38
40
  # place holders for the spread sheets. The last evaluated
39
41
  # operation is returned as result and saved to outfile in case
@@ -93,7 +95,7 @@ module Sycsvpro
93
95
 
94
96
  operands = {}
95
97
  opts[:aliases].split(',').each_with_index do |a,i|
96
- operands[a] = SpreadSheet.new(file: files[i],
98
+ operands[a] = SpreadSheet.new(file: files[i], ds: opts[:ds],
97
99
  r: rlabels[i], c: clabels[i])
98
100
  end
99
101
 
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
@@ -7,7 +7,8 @@ module Sycsvpro
7
7
  # Creation of spread sheets
8
8
  it "should ensure all rows have the same column size" do
9
9
  expect { SpreadSheet.new([1,2], [3,4,5]) }.to raise_error(RuntimeError,
10
- "rows must be of same column size")
10
+ "rows must be of same column size. Use equalize: true "+
11
+ "flag to fix.")
11
12
  end
12
13
 
13
14
  it "should not accept non arrays as rows" do
@@ -30,6 +31,10 @@ module Sycsvpro
30
31
  expect { s1 == s2 }
31
32
  end
32
33
 
34
+ it "should be created from first n rows of file"
35
+
36
+ it "should be created from last n rows of file"
37
+
33
38
  it "should be created from file with missing values" do
34
39
  file = File.join(File.dirname(__FILE__), "files/spread_sheet_na.csv")
35
40
 
@@ -42,6 +47,45 @@ module Sycsvpro
42
47
  expect { s1 == s2 }
43
48
  end
44
49
 
50
+ it "should skip empty rows in file" do
51
+ file = File.join(File.dirname(__FILE__),
52
+ "files/spread_sheet_with_empty_rows.csv")
53
+
54
+ s1 = SpreadSheet.new(file: file, r: true, c: true)
55
+ s2 = SpreadSheet.new(['Alpha', 'Beta', 'Gamma'],
56
+ ['A',NotAvailable,2,3],
57
+ ['C',7,NotAvailable,9],
58
+ r: true, c: true)
59
+
60
+ expect { s1 == s2 }.to be_true
61
+ end
62
+
63
+ it "should equalize column size through NA" do
64
+ s1 = SpreadSheet.new([1,2,3],[4,5],[6,7,8,9],[10], equalize: true)
65
+ s2 = SpreadSheet.new([1,2,3,NotAvailable],
66
+ [4,5,NotAvailable,NotAvailable],
67
+ [6,7,8,9],
68
+ [10,NotAvailable,NotAvailable,NotAvailable])
69
+ s1.should eq s2
70
+ end
71
+
72
+ it "should equalize column size through NA with row and column labels" do
73
+ s1 = SpreadSheet.new(['A','B'],
74
+ ['W',1,2,3],
75
+ ['X',4,5],
76
+ ['Y',6,7,8,9],
77
+ ['Z',10],
78
+ r: true, c: true,
79
+ equalize: true)
80
+
81
+ s2 = SpreadSheet.new(['A','B',2,3],['W',1,2,3,NotAvailable],
82
+ ['X',4,5,NotAvailable,NotAvailable],
83
+ ['Y',6,7,8,9],
84
+ ['Z',10,NotAvailable,NotAvailable,NotAvailable],
85
+ r: true, c: true)
86
+ s1.should eq s2
87
+ end
88
+
45
89
  it "should be created from flat array" do
46
90
  s1 = SpreadSheet.new(values: [1,2,3,4,5,6], cols: 2)
47
91
  s2 = SpreadSheet.new([1,2],[3,4],[5,6])
@@ -79,6 +123,10 @@ module Sycsvpro
79
123
  expect { s1.tranpose == s2 }
80
124
  end
81
125
 
126
+ it "should sort on columns"
127
+
128
+ it "should filter rows on column values"
129
+
82
130
  it "should assign new values to rows and columns"
83
131
 
84
132
  it "should delete columns"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-10-09 00:00:00.000000000 Z
12
+ date: 2014-10-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake