sycsvpro 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.2.0)
4
+ sycsvpro (0.2.1)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -714,10 +714,22 @@ Version 0.1.13
714
714
 
715
715
  Version 0.2.0
716
716
  -------------
717
- * SpreadSheet is used to conduct operations like multiplication, division,
718
- addition and subtraction between multiple files that have a table like
719
- structure. SpreadSheet can also be used to retrieve information about csv
720
- files
717
+ * SpreadSheet has been introduced. A spread sheet is used to conduct
718
+ operations like multiplication, division, addition and subtraction between
719
+ multiple files that have a table like structure. SpreadSheet can also be used
720
+ to retrieve information about csv files
721
+
722
+ Version 0.2.1
723
+ -------------
724
+ * When creating spread sheets from file empty rows are skipped
725
+ * To equalize column sizes of rows in spread sheets `equalize: true` flag was
726
+ introduced
727
+ * To distinguish between different number locales like _1.234.567,89_,
728
+ _1,234,567.89_, _1 234 567.89_ and the like a `ds` flag was introduced to
729
+ spread sheet to indicate the number formatting
730
+ * Optimize performance when creating spread sheets from files
731
+ * Dsl module has got 3 new methods #is\_integer?, #is\_float? and #str2num to
732
+ convert strings that represent numbers to numericals
721
733
 
722
734
  Documentation
723
735
  =============
data/bin/sycsvpro CHANGED
@@ -426,6 +426,10 @@ command :spreadsheet do |c|
426
426
  c.arg_name 'ALIAS_1,ALIAS_2,...,ALIAS_N'
427
427
  c.flag [:a, :alias]
428
428
 
429
+ c.desc 'Decimal separator for number values'
430
+ c.arg_name '.|,'
431
+ c.flag [:ds], default: '.'
432
+
429
433
  c.desc 'The arithmetic operation with the table data'
430
434
  c.arg_name 'ARITHMETIC_OPERATION'
431
435
  c.flag [:o, :operation]
@@ -441,6 +445,7 @@ command :spreadsheet do |c|
441
445
  clabels: options[:c],
442
446
  aliases: options[:a],
443
447
  operation: options[:o],
448
+ ds: options[:ds],
444
449
  print: options[:p]).execute
445
450
  print 'done'
446
451
  end
data/lib/sycsvpro/dsl.rb CHANGED
@@ -8,6 +8,25 @@ module Dsl
8
8
  # Example:
9
9
  # Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
10
10
  COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
11
+ # Recognizes a string that represents an integer value
12
+ INTEGER_REGEX = /^\d{1,3}(?:[,\. ]\d{3}|\d)*$/
13
+ COMMA_POINT_SPACE_REGEX = /[,\. ]/
14
+ # Recognizes a string that represents a float value in the form of 1,333.45
15
+ DECIMAL_POINT_REGEX = /^\d{1,3}(?:[, ]\d{3}|\d)*(?:\.\d*)$/
16
+ # Recognizes a string that represents a float value in the form of 1.333,45
17
+ DECIMAL_COMMA_REGEX = /^\d{1,3}(?:[\. ]\d{3}|\d)*(?:,\d*)$/
18
+ # A regex that recognizes '.' and ' ' to be used e.g. in #gsub to optimize performance
19
+ POINT_SPACE_REGEX = /[\. ]/
20
+ # A regex that recognizes ',' and ' ' to be used e.g. in #gsub to optimize performance
21
+ COMMA_SPACE_REGEX = /[, ]/
22
+ # A point '.' to be used e.g. in #gsub to optimize performance
23
+ POINT = '.'
24
+ # A comma ',' to be used e.g. in #gsub to optimize performance
25
+ COMMA = ','
26
+ # A semicolon ';' to be used e.g. in #gsub to optimize performance
27
+ SEMICOLON = ';'
28
+ # An empty string '' to be used e.g. in #gsub to optimize performance
29
+ EMPTY = ''
11
30
 
12
31
  # read arguments provided at invocation
13
32
  # :call-seq:
@@ -98,6 +117,40 @@ module Dsl
98
117
  collect { |h| h.gsub(/BEGIN|END/, "") }
99
118
  end
100
119
 
120
+ # Checks if the string represents an integer if so returns the integer
121
+ # otherwise nil
122
+ def is_integer?(value)
123
+ return value.
124
+ gsub(COMMA_POINT_SPACE_REGEX, EMPTY) if !(value =~ INTEGER_REGEX).nil?
125
+ end
126
+
127
+ # Checks if the string represents a float and in case it is a float returns
128
+ # the float value otherwise nil
129
+ # "1.5" -> 1.5
130
+ # "1." -> 1.0
131
+ def is_float?(value, decimal_separator = POINT)
132
+ if decimal_separator == POINT
133
+ return value.
134
+ gsub(COMMA_SPACE_REGEX, EMPTY) if !(value =~ DECIMAL_POINT_REGEX).nil?
135
+ else
136
+ return value.
137
+ gsub(POINT_SPACE_REGEX, EMPTY).
138
+ gsub(COMMA, POINT) if !(value =~ DECIMAL_COMMA_REGEX).nil?
139
+ end
140
+ end
141
+
142
+ # Converts a string to a numeric if the string represents a numerical value
143
+ def str2num(value, decimal_separator = POINT)
144
+ case
145
+ when v = is_integer?(value)
146
+ v.to_i
147
+ when v = is_float?(value, decimal_separator)
148
+ v.to_f
149
+ else
150
+ value
151
+ end
152
+ end
153
+
101
154
  private
102
155
 
103
156
  # Assigns values to keys that are used in rows and yielded to the block
@@ -1,4 +1,5 @@
1
1
  require_relative 'not_available'
2
+ require_relative 'dsl'
2
3
 
3
4
  # Operating csv files
4
5
  module Sycsvpro
@@ -34,6 +35,8 @@ module Sycsvpro
34
35
  # [1*0] 24 32
35
36
  class SpreadSheet
36
37
 
38
+ include Dsl
39
+
37
40
  # rows of the spread sheet
38
41
  attr_accessor :rows
39
42
  # options of the spread sheet
@@ -80,10 +83,14 @@ module Sycsvpro
80
83
  # rows:: indicates the row count in combination with values param
81
84
  # cols:: indicates the col count in combination with values param
82
85
  # file:: file that contains values to create spread sheet with
86
+ # ds:: decimal spearator '.' or ',' where '.' is default. The
87
+ # decimal separator is used when spread sheet is created from
88
+ # file
83
89
  def initialize(*rows)
84
90
  opts = rows.pop if rows.last.is_a?(::Hash)
85
91
  @opts = opts || {}
86
92
  rows = rows_from_params(@opts) if rows.empty?
93
+ rows = equalize_rows(rows) if @opts[:equalize]
87
94
  check_validity_of(rows)
88
95
  @row_labels, @col_labels = create_labels(rows)
89
96
  @rows = rows
@@ -392,12 +399,32 @@ module Sycsvpro
392
399
  end
393
400
  values.each_slice(col_count) { |row| rows << row }
394
401
  elsif opts[:file]
402
+ start_read = Time.now
395
403
  File.readlines(opts[:file]).each do |line|
396
- row = line.split(';')
397
- rows << row.collect { |v|
398
- v.strip.empty? ? NotAvailable : Float(v.chomp) rescue v.chomp
404
+ next if line.chomp.empty?
405
+ rows << line.split(SEMICOLON).collect { |v|
406
+ v.strip.empty? ? NotAvailable : str2num(v.chomp, opts[:ds])
399
407
  }
400
408
  end
409
+ STDERR.puts "Reading file in #{Time.now - start_read} seconds"
410
+ end
411
+
412
+ rows
413
+ end
414
+
415
+ # If rows are of different column size the rows are equalized in column
416
+ # size by filling missing columns with NA
417
+ def equalize_rows(rows)
418
+ column_sizes = rows.collect { |r| r.size }
419
+
420
+ return rows if column_sizes.uniq.size == 1
421
+
422
+ max_size = column_sizes.max
423
+ small_rows = []
424
+ column_sizes.each_with_index { |c,i| small_rows << i if c < max_size }
425
+
426
+ small_rows.each do |i|
427
+ rows[i] += [NotAvailable] * (max_size - rows[i].size)
401
428
  end
402
429
 
403
430
  rows
@@ -408,9 +435,10 @@ module Sycsvpro
408
435
  # * not nil
409
436
  # * at least one row
410
437
  def check_validity_of(rows)
411
- raise "rows need to be arrays" if !rows_are_arrays?(rows)
412
- raise "needs at least one row" if rows.empty?
413
- raise "rows must be of same column size" if !same_column_size?(rows)
438
+ raise "rows need to be arrays" if !rows_are_arrays?(rows)
439
+ raise "needs at least one row" if rows.empty?
440
+ raise "rows must be of same column size. "+
441
+ "Use equalize: true flag to fix." if !same_column_size?(rows)
414
442
  end
415
443
 
416
444
  # Checks whether all rows have the same column size. Returns true if
@@ -20,20 +20,22 @@ module Sycsvpro
20
20
  #
21
21
  # SpreadSheetBuilder.new(outfile: "out.csv",
22
22
  # files: "f1.csv,f2.csv",
23
- # rlabels: "true,false",
24
- # clabels: "false,true",
23
+ # r: "true,false",
24
+ # c: "false,true",
25
25
  # aliases: "a,b",
26
26
  # operation: "(a*b).transpose",
27
+ # ds: ",",
27
28
  # print: "true").execute
28
29
  #
29
30
  # outfile: file where the result of the operation is written to
30
31
  # files: files that hold the spread sheet data
31
- # rlabels: indication whether the corresponding file has row labels
32
- # clabels: indication whether the corresponding file has column labels
32
+ # r: indication whether the corresponding file has row labels
33
+ # c: indication whether the corresponding file has column labels
33
34
  # aliases: symbols that correspond to the spread sheet created from the
34
35
  # files. The symbols are used in the operation. The symbols have
35
36
  # to be choosen carefully not to conflict with existing methods
36
37
  # and variables
38
+ # ds: decimal spearator '.' or ',' where '.' is default
37
39
  # operation: arithmetic operation on spread sheets using the aliases as
38
40
  # place holders for the spread sheets. The last evaluated
39
41
  # operation is returned as result and saved to outfile in case
@@ -93,7 +95,7 @@ module Sycsvpro
93
95
 
94
96
  operands = {}
95
97
  opts[:aliases].split(',').each_with_index do |a,i|
96
- operands[a] = SpreadSheet.new(file: files[i],
98
+ operands[a] = SpreadSheet.new(file: files[i], ds: opts[:ds],
97
99
  r: rlabels[i], c: clabels[i])
98
100
  end
99
101
 
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
@@ -7,7 +7,8 @@ module Sycsvpro
7
7
  # Creation of spread sheets
8
8
  it "should ensure all rows have the same column size" do
9
9
  expect { SpreadSheet.new([1,2], [3,4,5]) }.to raise_error(RuntimeError,
10
- "rows must be of same column size")
10
+ "rows must be of same column size. Use equalize: true "+
11
+ "flag to fix.")
11
12
  end
12
13
 
13
14
  it "should not accept non arrays as rows" do
@@ -30,6 +31,10 @@ module Sycsvpro
30
31
  expect { s1 == s2 }
31
32
  end
32
33
 
34
+ it "should be created from first n rows of file"
35
+
36
+ it "should be created from last n rows of file"
37
+
33
38
  it "should be created from file with missing values" do
34
39
  file = File.join(File.dirname(__FILE__), "files/spread_sheet_na.csv")
35
40
 
@@ -42,6 +47,45 @@ module Sycsvpro
42
47
  expect { s1 == s2 }
43
48
  end
44
49
 
50
+ it "should skip empty rows in file" do
51
+ file = File.join(File.dirname(__FILE__),
52
+ "files/spread_sheet_with_empty_rows.csv")
53
+
54
+ s1 = SpreadSheet.new(file: file, r: true, c: true)
55
+ s2 = SpreadSheet.new(['Alpha', 'Beta', 'Gamma'],
56
+ ['A',NotAvailable,2,3],
57
+ ['C',7,NotAvailable,9],
58
+ r: true, c: true)
59
+
60
+ expect { s1 == s2 }.to be_true
61
+ end
62
+
63
+ it "should equalize column size through NA" do
64
+ s1 = SpreadSheet.new([1,2,3],[4,5],[6,7,8,9],[10], equalize: true)
65
+ s2 = SpreadSheet.new([1,2,3,NotAvailable],
66
+ [4,5,NotAvailable,NotAvailable],
67
+ [6,7,8,9],
68
+ [10,NotAvailable,NotAvailable,NotAvailable])
69
+ s1.should eq s2
70
+ end
71
+
72
+ it "should equalize column size through NA with row and column labels" do
73
+ s1 = SpreadSheet.new(['A','B'],
74
+ ['W',1,2,3],
75
+ ['X',4,5],
76
+ ['Y',6,7,8,9],
77
+ ['Z',10],
78
+ r: true, c: true,
79
+ equalize: true)
80
+
81
+ s2 = SpreadSheet.new(['A','B',2,3],['W',1,2,3,NotAvailable],
82
+ ['X',4,5,NotAvailable,NotAvailable],
83
+ ['Y',6,7,8,9],
84
+ ['Z',10,NotAvailable,NotAvailable,NotAvailable],
85
+ r: true, c: true)
86
+ s1.should eq s2
87
+ end
88
+
45
89
  it "should be created from flat array" do
46
90
  s1 = SpreadSheet.new(values: [1,2,3,4,5,6], cols: 2)
47
91
  s2 = SpreadSheet.new([1,2],[3,4],[5,6])
@@ -79,6 +123,10 @@ module Sycsvpro
79
123
  expect { s1.tranpose == s2 }
80
124
  end
81
125
 
126
+ it "should sort on columns"
127
+
128
+ it "should filter rows on column values"
129
+
82
130
  it "should assign new values to rows and columns"
83
131
 
84
132
  it "should delete columns"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-10-09 00:00:00.000000000 Z
12
+ date: 2014-10-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake