sycsvpro 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +16 -4
- data/bin/sycsvpro +5 -0
- data/lib/sycsvpro/dsl.rb +53 -0
- data/lib/sycsvpro/spread_sheet.rb +34 -6
- data/lib/sycsvpro/spread_sheet_builder.rb +7 -5
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/spread_sheet_spec.rb +49 -1
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -714,10 +714,22 @@ Version 0.1.13
|
|
714
714
|
|
715
715
|
Version 0.2.0
|
716
716
|
-------------
|
717
|
-
* SpreadSheet is used to conduct
|
718
|
-
addition and subtraction between
|
719
|
-
structure. SpreadSheet can also be used
|
720
|
-
files
|
717
|
+
* SpreadSheet has been introduced. A spread sheet is used to conduct
|
718
|
+
operations like multiplication, division, addition and subtraction between
|
719
|
+
multiple files that have a table like structure. SpreadSheet can also be used
|
720
|
+
to retrieve information about csv files
|
721
|
+
|
722
|
+
Version 0.2.1
|
723
|
+
-------------
|
724
|
+
* When creating spread sheets from file empty rows are skipped
|
725
|
+
* To equalize column sizes of rows in spread sheets `equalize: true` flag was
|
726
|
+
introduced
|
727
|
+
* To distinguish between different number locales like _1.234.567,89_,
|
728
|
+
_1,234,567.89_, _1 234 567.89_ and the like a `ds` flag was introduced to
|
729
|
+
spread sheet to indicate the number formatting
|
730
|
+
* Optimize performance when creating spread sheets from files
|
731
|
+
* Dsl module has got 3 new methods #is\_integer?, #is\_float? and #str2num to
|
732
|
+
convert strings that represent numbers to numericals
|
721
733
|
|
722
734
|
Documentation
|
723
735
|
=============
|
data/bin/sycsvpro
CHANGED
@@ -426,6 +426,10 @@ command :spreadsheet do |c|
|
|
426
426
|
c.arg_name 'ALIAS_1,ALIAS_2,...,ALIAS_N'
|
427
427
|
c.flag [:a, :alias]
|
428
428
|
|
429
|
+
c.desc 'Decimal separator for number values'
|
430
|
+
c.arg_name '.|,'
|
431
|
+
c.flag [:ds], default: '.'
|
432
|
+
|
429
433
|
c.desc 'The arithmetic operation with the table data'
|
430
434
|
c.arg_name 'ARITHMETIC_OPERATION'
|
431
435
|
c.flag [:o, :operation]
|
@@ -441,6 +445,7 @@ command :spreadsheet do |c|
|
|
441
445
|
clabels: options[:c],
|
442
446
|
aliases: options[:a],
|
443
447
|
operation: options[:o],
|
448
|
+
ds: options[:ds],
|
444
449
|
print: options[:p]).execute
|
445
450
|
print 'done'
|
446
451
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -8,6 +8,25 @@ module Dsl
|
|
8
8
|
# Example:
|
9
9
|
# Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
|
10
10
|
COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
|
11
|
+
# Recognizes a string that represents an integer value
|
12
|
+
INTEGER_REGEX = /^\d{1,3}(?:[,\. ]\d{3}|\d)*$/
|
13
|
+
COMMA_POINT_SPACE_REGEX = /[,\. ]/
|
14
|
+
# Recognizes a string that represents a float value in the form of 1,333.45
|
15
|
+
DECIMAL_POINT_REGEX = /^\d{1,3}(?:[, ]\d{3}|\d)*(?:\.\d*)$/
|
16
|
+
# Recognizes a string that represents a float value in the form of 1.333,45
|
17
|
+
DECIMAL_COMMA_REGEX = /^\d{1,3}(?:[\. ]\d{3}|\d)*(?:,\d*)$/
|
18
|
+
# A regex that recognizes '.' and ' ' to be used e.g. in #gsub to optimize performance
|
19
|
+
POINT_SPACE_REGEX = /[\. ]/
|
20
|
+
# A regex that recognizes ',' and ' ' to be used e.g. in #gsub to optimize performance
|
21
|
+
COMMA_SPACE_REGEX = /[, ]/
|
22
|
+
# A point '.' to be used e.g. in #gsub to optimize performance
|
23
|
+
POINT = '.'
|
24
|
+
# A comma ',' to be used e.g. in #gsub to optimize performance
|
25
|
+
COMMA = ','
|
26
|
+
# A semicolon ';' to be used e.g. in #gsub to optimize performance
|
27
|
+
SEMICOLON = ';'
|
28
|
+
# An empty string '' to be used e.g. in #gsub to optimize performance
|
29
|
+
EMPTY = ''
|
11
30
|
|
12
31
|
# read arguments provided at invocation
|
13
32
|
# :call-seq:
|
@@ -98,6 +117,40 @@ module Dsl
|
|
98
117
|
collect { |h| h.gsub(/BEGIN|END/, "") }
|
99
118
|
end
|
100
119
|
|
120
|
+
# Checks if the string represents an integer if so returns the integer
|
121
|
+
# otherwise nil
|
122
|
+
def is_integer?(value)
|
123
|
+
return value.
|
124
|
+
gsub(COMMA_POINT_SPACE_REGEX, EMPTY) if !(value =~ INTEGER_REGEX).nil?
|
125
|
+
end
|
126
|
+
|
127
|
+
# Checks if the string represents a float and in case it is a float returns
|
128
|
+
# the float value otherwise nil
|
129
|
+
# "1.5" -> 1.5
|
130
|
+
# "1." -> 1.0
|
131
|
+
def is_float?(value, decimal_separator = POINT)
|
132
|
+
if decimal_separator == POINT
|
133
|
+
return value.
|
134
|
+
gsub(COMMA_SPACE_REGEX, EMPTY) if !(value =~ DECIMAL_POINT_REGEX).nil?
|
135
|
+
else
|
136
|
+
return value.
|
137
|
+
gsub(POINT_SPACE_REGEX, EMPTY).
|
138
|
+
gsub(COMMA, POINT) if !(value =~ DECIMAL_COMMA_REGEX).nil?
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Converts a string to a numeric if the string represents a numerical value
|
143
|
+
def str2num(value, decimal_separator = POINT)
|
144
|
+
case
|
145
|
+
when v = is_integer?(value)
|
146
|
+
v.to_i
|
147
|
+
when v = is_float?(value, decimal_separator)
|
148
|
+
v.to_f
|
149
|
+
else
|
150
|
+
value
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
101
154
|
private
|
102
155
|
|
103
156
|
# Assigns values to keys that are used in rows and yielded to the block
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative 'not_available'
|
2
|
+
require_relative 'dsl'
|
2
3
|
|
3
4
|
# Operating csv files
|
4
5
|
module Sycsvpro
|
@@ -34,6 +35,8 @@ module Sycsvpro
|
|
34
35
|
# [1*0] 24 32
|
35
36
|
class SpreadSheet
|
36
37
|
|
38
|
+
include Dsl
|
39
|
+
|
37
40
|
# rows of the spread sheet
|
38
41
|
attr_accessor :rows
|
39
42
|
# options of the spread sheet
|
@@ -80,10 +83,14 @@ module Sycsvpro
|
|
80
83
|
# rows:: indicates the row count in combination with values param
|
81
84
|
# cols:: indicates the col count in combination with values param
|
82
85
|
# file:: file that contains values to create spread sheet with
|
86
|
+
# ds:: decimal spearator '.' or ',' where '.' is default. The
|
87
|
+
# decimal separator is used when spread sheet is created from
|
88
|
+
# file
|
83
89
|
def initialize(*rows)
|
84
90
|
opts = rows.pop if rows.last.is_a?(::Hash)
|
85
91
|
@opts = opts || {}
|
86
92
|
rows = rows_from_params(@opts) if rows.empty?
|
93
|
+
rows = equalize_rows(rows) if @opts[:equalize]
|
87
94
|
check_validity_of(rows)
|
88
95
|
@row_labels, @col_labels = create_labels(rows)
|
89
96
|
@rows = rows
|
@@ -392,12 +399,32 @@ module Sycsvpro
|
|
392
399
|
end
|
393
400
|
values.each_slice(col_count) { |row| rows << row }
|
394
401
|
elsif opts[:file]
|
402
|
+
start_read = Time.now
|
395
403
|
File.readlines(opts[:file]).each do |line|
|
396
|
-
|
397
|
-
rows <<
|
398
|
-
v.strip.empty? ? NotAvailable :
|
404
|
+
next if line.chomp.empty?
|
405
|
+
rows << line.split(SEMICOLON).collect { |v|
|
406
|
+
v.strip.empty? ? NotAvailable : str2num(v.chomp, opts[:ds])
|
399
407
|
}
|
400
408
|
end
|
409
|
+
STDERR.puts "Reading file in #{Time.now - start_read} seconds"
|
410
|
+
end
|
411
|
+
|
412
|
+
rows
|
413
|
+
end
|
414
|
+
|
415
|
+
# If rows are of different column size the rows are equalized in column
|
416
|
+
# size by filling missing columns with NA
|
417
|
+
def equalize_rows(rows)
|
418
|
+
column_sizes = rows.collect { |r| r.size }
|
419
|
+
|
420
|
+
return rows if column_sizes.uniq.size == 1
|
421
|
+
|
422
|
+
max_size = column_sizes.max
|
423
|
+
small_rows = []
|
424
|
+
column_sizes.each_with_index { |c,i| small_rows << i if c < max_size }
|
425
|
+
|
426
|
+
small_rows.each do |i|
|
427
|
+
rows[i] += [NotAvailable] * (max_size - rows[i].size)
|
401
428
|
end
|
402
429
|
|
403
430
|
rows
|
@@ -408,9 +435,10 @@ module Sycsvpro
|
|
408
435
|
# * not nil
|
409
436
|
# * at least one row
|
410
437
|
def check_validity_of(rows)
|
411
|
-
raise "rows need to be arrays"
|
412
|
-
raise "needs at least one row"
|
413
|
-
raise "rows must be of same column size"
|
438
|
+
raise "rows need to be arrays" if !rows_are_arrays?(rows)
|
439
|
+
raise "needs at least one row" if rows.empty?
|
440
|
+
raise "rows must be of same column size. "+
|
441
|
+
"Use equalize: true flag to fix." if !same_column_size?(rows)
|
414
442
|
end
|
415
443
|
|
416
444
|
# Checks whether all rows have the same column size. Returns true if
|
@@ -20,20 +20,22 @@ module Sycsvpro
|
|
20
20
|
#
|
21
21
|
# SpreadSheetBuilder.new(outfile: "out.csv",
|
22
22
|
# files: "f1.csv,f2.csv",
|
23
|
-
#
|
24
|
-
#
|
23
|
+
# r: "true,false",
|
24
|
+
# c: "false,true",
|
25
25
|
# aliases: "a,b",
|
26
26
|
# operation: "(a*b).transpose",
|
27
|
+
# ds: ",",
|
27
28
|
# print: "true").execute
|
28
29
|
#
|
29
30
|
# outfile: file where the result of the operation is written to
|
30
31
|
# files: files that hold the spread sheet data
|
31
|
-
#
|
32
|
-
#
|
32
|
+
# r: indication whether the corresponding file has row labels
|
33
|
+
# c: indication whether the corresponding file has column labels
|
33
34
|
# aliases: symbols that correspond to the spread sheet created from the
|
34
35
|
# files. The symbols are used in the operation. The symbols have
|
35
36
|
# to be choosen carefully not to conflict with existing methods
|
36
37
|
# and variables
|
38
|
+
# ds: decimal spearator '.' or ',' where '.' is default
|
37
39
|
# operation: arithmetic operation on spread sheets using the aliases as
|
38
40
|
# place holders for the spread sheets. The last evaluated
|
39
41
|
# operation is returned as result and saved to outfile in case
|
@@ -93,7 +95,7 @@ module Sycsvpro
|
|
93
95
|
|
94
96
|
operands = {}
|
95
97
|
opts[:aliases].split(',').each_with_index do |a,i|
|
96
|
-
operands[a] = SpreadSheet.new(file: files[i],
|
98
|
+
operands[a] = SpreadSheet.new(file: files[i], ds: opts[:ds],
|
97
99
|
r: rlabels[i], c: clabels[i])
|
98
100
|
end
|
99
101
|
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -7,7 +7,8 @@ module Sycsvpro
|
|
7
7
|
# Creation of spread sheets
|
8
8
|
it "should ensure all rows have the same column size" do
|
9
9
|
expect { SpreadSheet.new([1,2], [3,4,5]) }.to raise_error(RuntimeError,
|
10
|
-
|
10
|
+
"rows must be of same column size. Use equalize: true "+
|
11
|
+
"flag to fix.")
|
11
12
|
end
|
12
13
|
|
13
14
|
it "should not accept non arrays as rows" do
|
@@ -30,6 +31,10 @@ module Sycsvpro
|
|
30
31
|
expect { s1 == s2 }
|
31
32
|
end
|
32
33
|
|
34
|
+
it "should be created from first n rows of file"
|
35
|
+
|
36
|
+
it "should be created from last n rows of file"
|
37
|
+
|
33
38
|
it "should be created from file with missing values" do
|
34
39
|
file = File.join(File.dirname(__FILE__), "files/spread_sheet_na.csv")
|
35
40
|
|
@@ -42,6 +47,45 @@ module Sycsvpro
|
|
42
47
|
expect { s1 == s2 }
|
43
48
|
end
|
44
49
|
|
50
|
+
it "should skip empty rows in file" do
|
51
|
+
file = File.join(File.dirname(__FILE__),
|
52
|
+
"files/spread_sheet_with_empty_rows.csv")
|
53
|
+
|
54
|
+
s1 = SpreadSheet.new(file: file, r: true, c: true)
|
55
|
+
s2 = SpreadSheet.new(['Alpha', 'Beta', 'Gamma'],
|
56
|
+
['A',NotAvailable,2,3],
|
57
|
+
['C',7,NotAvailable,9],
|
58
|
+
r: true, c: true)
|
59
|
+
|
60
|
+
expect { s1 == s2 }.to be_true
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should equalize column size through NA" do
|
64
|
+
s1 = SpreadSheet.new([1,2,3],[4,5],[6,7,8,9],[10], equalize: true)
|
65
|
+
s2 = SpreadSheet.new([1,2,3,NotAvailable],
|
66
|
+
[4,5,NotAvailable,NotAvailable],
|
67
|
+
[6,7,8,9],
|
68
|
+
[10,NotAvailable,NotAvailable,NotAvailable])
|
69
|
+
s1.should eq s2
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should equalize column size through NA with row and column labels" do
|
73
|
+
s1 = SpreadSheet.new(['A','B'],
|
74
|
+
['W',1,2,3],
|
75
|
+
['X',4,5],
|
76
|
+
['Y',6,7,8,9],
|
77
|
+
['Z',10],
|
78
|
+
r: true, c: true,
|
79
|
+
equalize: true)
|
80
|
+
|
81
|
+
s2 = SpreadSheet.new(['A','B',2,3],['W',1,2,3,NotAvailable],
|
82
|
+
['X',4,5,NotAvailable,NotAvailable],
|
83
|
+
['Y',6,7,8,9],
|
84
|
+
['Z',10,NotAvailable,NotAvailable,NotAvailable],
|
85
|
+
r: true, c: true)
|
86
|
+
s1.should eq s2
|
87
|
+
end
|
88
|
+
|
45
89
|
it "should be created from flat array" do
|
46
90
|
s1 = SpreadSheet.new(values: [1,2,3,4,5,6], cols: 2)
|
47
91
|
s2 = SpreadSheet.new([1,2],[3,4],[5,6])
|
@@ -79,6 +123,10 @@ module Sycsvpro
|
|
79
123
|
expect { s1.tranpose == s2 }
|
80
124
|
end
|
81
125
|
|
126
|
+
it "should sort on columns"
|
127
|
+
|
128
|
+
it "should filter rows on column values"
|
129
|
+
|
82
130
|
it "should assign new values to rows and columns"
|
83
131
|
|
84
132
|
it "should delete columns"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-10-
|
12
|
+
date: 2014-10-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|