sycsvpro 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +16 -4
- data/bin/sycsvpro +5 -0
- data/lib/sycsvpro/dsl.rb +53 -0
- data/lib/sycsvpro/spread_sheet.rb +34 -6
- data/lib/sycsvpro/spread_sheet_builder.rb +7 -5
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/spread_sheet_spec.rb +49 -1
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -714,10 +714,22 @@ Version 0.1.13
|
|
714
714
|
|
715
715
|
Version 0.2.0
|
716
716
|
-------------
|
717
|
-
* SpreadSheet is used to conduct
|
718
|
-
addition and subtraction between
|
719
|
-
structure. SpreadSheet can also be used
|
720
|
-
files
|
717
|
+
* SpreadSheet has been introduced. A spread sheet is used to conduct
|
718
|
+
operations like multiplication, division, addition and subtraction between
|
719
|
+
multiple files that have a table like structure. SpreadSheet can also be used
|
720
|
+
to retrieve information about csv files
|
721
|
+
|
722
|
+
Version 0.2.1
|
723
|
+
-------------
|
724
|
+
* When creating spread sheets from file empty rows are skipped
|
725
|
+
* To equalize column sizes of rows in spread sheets `equalize: true` flag was
|
726
|
+
introduced
|
727
|
+
* To distinguish between different number locales like _1.234.567,89_,
|
728
|
+
_1,234,567.89_, _1 234 567.89_ and the like a `ds` flag was introduced to
|
729
|
+
spread sheet to indicate the number formatting
|
730
|
+
* Optimize performance when creating spread sheets from files
|
731
|
+
* Dsl module has got 3 new methods #is\_integer?, #is\_float? and #str2num to
|
732
|
+
convert strings that represent numbers to numericals
|
721
733
|
|
722
734
|
Documentation
|
723
735
|
=============
|
data/bin/sycsvpro
CHANGED
@@ -426,6 +426,10 @@ command :spreadsheet do |c|
|
|
426
426
|
c.arg_name 'ALIAS_1,ALIAS_2,...,ALIAS_N'
|
427
427
|
c.flag [:a, :alias]
|
428
428
|
|
429
|
+
c.desc 'Decimal separator for number values'
|
430
|
+
c.arg_name '.|,'
|
431
|
+
c.flag [:ds], default: '.'
|
432
|
+
|
429
433
|
c.desc 'The arithmetic operation with the table data'
|
430
434
|
c.arg_name 'ARITHMETIC_OPERATION'
|
431
435
|
c.flag [:o, :operation]
|
@@ -441,6 +445,7 @@ command :spreadsheet do |c|
|
|
441
445
|
clabels: options[:c],
|
442
446
|
aliases: options[:a],
|
443
447
|
operation: options[:o],
|
448
|
+
ds: options[:ds],
|
444
449
|
print: options[:p]).execute
|
445
450
|
print 'done'
|
446
451
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -8,6 +8,25 @@ module Dsl
|
|
8
8
|
# Example:
|
9
9
|
# Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
|
10
10
|
COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
|
11
|
+
# Recognizes a string that represents an integer value
|
12
|
+
INTEGER_REGEX = /^\d{1,3}(?:[,\. ]\d{3}|\d)*$/
|
13
|
+
COMMA_POINT_SPACE_REGEX = /[,\. ]/
|
14
|
+
# Recognizes a string that represents a float value in the form of 1,333.45
|
15
|
+
DECIMAL_POINT_REGEX = /^\d{1,3}(?:[, ]\d{3}|\d)*(?:\.\d*)$/
|
16
|
+
# Recognizes a string that represents a float value in the form of 1.333,45
|
17
|
+
DECIMAL_COMMA_REGEX = /^\d{1,3}(?:[\. ]\d{3}|\d)*(?:,\d*)$/
|
18
|
+
# A regex that recognizes '.' and ' ' to be used e.g. in #gsub to optimize performance
|
19
|
+
POINT_SPACE_REGEX = /[\. ]/
|
20
|
+
# A regex that recognizes ',' and ' ' to be used e.g. in #gsub to optimize performance
|
21
|
+
COMMA_SPACE_REGEX = /[, ]/
|
22
|
+
# A point '.' to be used e.g. in #gsub to optimize performance
|
23
|
+
POINT = '.'
|
24
|
+
# A comma ',' to be used e.g. in #gsub to optimize performance
|
25
|
+
COMMA = ','
|
26
|
+
# A semicolon ';' to be used e.g. in #gsub to optimize performance
|
27
|
+
SEMICOLON = ';'
|
28
|
+
# An empty string '' to be used e.g. in #gsub to optimize performance
|
29
|
+
EMPTY = ''
|
11
30
|
|
12
31
|
# read arguments provided at invocation
|
13
32
|
# :call-seq:
|
@@ -98,6 +117,40 @@ module Dsl
|
|
98
117
|
collect { |h| h.gsub(/BEGIN|END/, "") }
|
99
118
|
end
|
100
119
|
|
120
|
+
# Checks if the string represents an integer if so returns the integer
|
121
|
+
# otherwise nil
|
122
|
+
def is_integer?(value)
|
123
|
+
return value.
|
124
|
+
gsub(COMMA_POINT_SPACE_REGEX, EMPTY) if !(value =~ INTEGER_REGEX).nil?
|
125
|
+
end
|
126
|
+
|
127
|
+
# Checks if the string represents a float and in case it is a float returns
|
128
|
+
# the float value otherwise nil
|
129
|
+
# "1.5" -> 1.5
|
130
|
+
# "1." -> 1.0
|
131
|
+
def is_float?(value, decimal_separator = POINT)
|
132
|
+
if decimal_separator == POINT
|
133
|
+
return value.
|
134
|
+
gsub(COMMA_SPACE_REGEX, EMPTY) if !(value =~ DECIMAL_POINT_REGEX).nil?
|
135
|
+
else
|
136
|
+
return value.
|
137
|
+
gsub(POINT_SPACE_REGEX, EMPTY).
|
138
|
+
gsub(COMMA, POINT) if !(value =~ DECIMAL_COMMA_REGEX).nil?
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Converts a string to a numeric if the string represents a numerical value
|
143
|
+
def str2num(value, decimal_separator = POINT)
|
144
|
+
case
|
145
|
+
when v = is_integer?(value)
|
146
|
+
v.to_i
|
147
|
+
when v = is_float?(value, decimal_separator)
|
148
|
+
v.to_f
|
149
|
+
else
|
150
|
+
value
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
101
154
|
private
|
102
155
|
|
103
156
|
# Assigns values to keys that are used in rows and yielded to the block
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative 'not_available'
|
2
|
+
require_relative 'dsl'
|
2
3
|
|
3
4
|
# Operating csv files
|
4
5
|
module Sycsvpro
|
@@ -34,6 +35,8 @@ module Sycsvpro
|
|
34
35
|
# [1*0] 24 32
|
35
36
|
class SpreadSheet
|
36
37
|
|
38
|
+
include Dsl
|
39
|
+
|
37
40
|
# rows of the spread sheet
|
38
41
|
attr_accessor :rows
|
39
42
|
# options of the spread sheet
|
@@ -80,10 +83,14 @@ module Sycsvpro
|
|
80
83
|
# rows:: indicates the row count in combination with values param
|
81
84
|
# cols:: indicates the col count in combination with values param
|
82
85
|
# file:: file that contains values to create spread sheet with
|
86
|
+
# ds:: decimal spearator '.' or ',' where '.' is default. The
|
87
|
+
# decimal separator is used when spread sheet is created from
|
88
|
+
# file
|
83
89
|
def initialize(*rows)
|
84
90
|
opts = rows.pop if rows.last.is_a?(::Hash)
|
85
91
|
@opts = opts || {}
|
86
92
|
rows = rows_from_params(@opts) if rows.empty?
|
93
|
+
rows = equalize_rows(rows) if @opts[:equalize]
|
87
94
|
check_validity_of(rows)
|
88
95
|
@row_labels, @col_labels = create_labels(rows)
|
89
96
|
@rows = rows
|
@@ -392,12 +399,32 @@ module Sycsvpro
|
|
392
399
|
end
|
393
400
|
values.each_slice(col_count) { |row| rows << row }
|
394
401
|
elsif opts[:file]
|
402
|
+
start_read = Time.now
|
395
403
|
File.readlines(opts[:file]).each do |line|
|
396
|
-
|
397
|
-
rows <<
|
398
|
-
v.strip.empty? ? NotAvailable :
|
404
|
+
next if line.chomp.empty?
|
405
|
+
rows << line.split(SEMICOLON).collect { |v|
|
406
|
+
v.strip.empty? ? NotAvailable : str2num(v.chomp, opts[:ds])
|
399
407
|
}
|
400
408
|
end
|
409
|
+
STDERR.puts "Reading file in #{Time.now - start_read} seconds"
|
410
|
+
end
|
411
|
+
|
412
|
+
rows
|
413
|
+
end
|
414
|
+
|
415
|
+
# If rows are of different column size the rows are equalized in column
|
416
|
+
# size by filling missing columns with NA
|
417
|
+
def equalize_rows(rows)
|
418
|
+
column_sizes = rows.collect { |r| r.size }
|
419
|
+
|
420
|
+
return rows if column_sizes.uniq.size == 1
|
421
|
+
|
422
|
+
max_size = column_sizes.max
|
423
|
+
small_rows = []
|
424
|
+
column_sizes.each_with_index { |c,i| small_rows << i if c < max_size }
|
425
|
+
|
426
|
+
small_rows.each do |i|
|
427
|
+
rows[i] += [NotAvailable] * (max_size - rows[i].size)
|
401
428
|
end
|
402
429
|
|
403
430
|
rows
|
@@ -408,9 +435,10 @@ module Sycsvpro
|
|
408
435
|
# * not nil
|
409
436
|
# * at least one row
|
410
437
|
def check_validity_of(rows)
|
411
|
-
raise "rows need to be arrays"
|
412
|
-
raise "needs at least one row"
|
413
|
-
raise "rows must be of same column size"
|
438
|
+
raise "rows need to be arrays" if !rows_are_arrays?(rows)
|
439
|
+
raise "needs at least one row" if rows.empty?
|
440
|
+
raise "rows must be of same column size. "+
|
441
|
+
"Use equalize: true flag to fix." if !same_column_size?(rows)
|
414
442
|
end
|
415
443
|
|
416
444
|
# Checks whether all rows have the same column size. Returns true if
|
@@ -20,20 +20,22 @@ module Sycsvpro
|
|
20
20
|
#
|
21
21
|
# SpreadSheetBuilder.new(outfile: "out.csv",
|
22
22
|
# files: "f1.csv,f2.csv",
|
23
|
-
#
|
24
|
-
#
|
23
|
+
# r: "true,false",
|
24
|
+
# c: "false,true",
|
25
25
|
# aliases: "a,b",
|
26
26
|
# operation: "(a*b).transpose",
|
27
|
+
# ds: ",",
|
27
28
|
# print: "true").execute
|
28
29
|
#
|
29
30
|
# outfile: file where the result of the operation is written to
|
30
31
|
# files: files that hold the spread sheet data
|
31
|
-
#
|
32
|
-
#
|
32
|
+
# r: indication whether the corresponding file has row labels
|
33
|
+
# c: indication whether the corresponding file has column labels
|
33
34
|
# aliases: symbols that correspond to the spread sheet created from the
|
34
35
|
# files. The symbols are used in the operation. The symbols have
|
35
36
|
# to be choosen carefully not to conflict with existing methods
|
36
37
|
# and variables
|
38
|
+
# ds: decimal spearator '.' or ',' where '.' is default
|
37
39
|
# operation: arithmetic operation on spread sheets using the aliases as
|
38
40
|
# place holders for the spread sheets. The last evaluated
|
39
41
|
# operation is returned as result and saved to outfile in case
|
@@ -93,7 +95,7 @@ module Sycsvpro
|
|
93
95
|
|
94
96
|
operands = {}
|
95
97
|
opts[:aliases].split(',').each_with_index do |a,i|
|
96
|
-
operands[a] = SpreadSheet.new(file: files[i],
|
98
|
+
operands[a] = SpreadSheet.new(file: files[i], ds: opts[:ds],
|
97
99
|
r: rlabels[i], c: clabels[i])
|
98
100
|
end
|
99
101
|
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -7,7 +7,8 @@ module Sycsvpro
|
|
7
7
|
# Creation of spread sheets
|
8
8
|
it "should ensure all rows have the same column size" do
|
9
9
|
expect { SpreadSheet.new([1,2], [3,4,5]) }.to raise_error(RuntimeError,
|
10
|
-
|
10
|
+
"rows must be of same column size. Use equalize: true "+
|
11
|
+
"flag to fix.")
|
11
12
|
end
|
12
13
|
|
13
14
|
it "should not accept non arrays as rows" do
|
@@ -30,6 +31,10 @@ module Sycsvpro
|
|
30
31
|
expect { s1 == s2 }
|
31
32
|
end
|
32
33
|
|
34
|
+
it "should be created from first n rows of file"
|
35
|
+
|
36
|
+
it "should be created from last n rows of file"
|
37
|
+
|
33
38
|
it "should be created from file with missing values" do
|
34
39
|
file = File.join(File.dirname(__FILE__), "files/spread_sheet_na.csv")
|
35
40
|
|
@@ -42,6 +47,45 @@ module Sycsvpro
|
|
42
47
|
expect { s1 == s2 }
|
43
48
|
end
|
44
49
|
|
50
|
+
it "should skip empty rows in file" do
|
51
|
+
file = File.join(File.dirname(__FILE__),
|
52
|
+
"files/spread_sheet_with_empty_rows.csv")
|
53
|
+
|
54
|
+
s1 = SpreadSheet.new(file: file, r: true, c: true)
|
55
|
+
s2 = SpreadSheet.new(['Alpha', 'Beta', 'Gamma'],
|
56
|
+
['A',NotAvailable,2,3],
|
57
|
+
['C',7,NotAvailable,9],
|
58
|
+
r: true, c: true)
|
59
|
+
|
60
|
+
expect { s1 == s2 }.to be_true
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should equalize column size through NA" do
|
64
|
+
s1 = SpreadSheet.new([1,2,3],[4,5],[6,7,8,9],[10], equalize: true)
|
65
|
+
s2 = SpreadSheet.new([1,2,3,NotAvailable],
|
66
|
+
[4,5,NotAvailable,NotAvailable],
|
67
|
+
[6,7,8,9],
|
68
|
+
[10,NotAvailable,NotAvailable,NotAvailable])
|
69
|
+
s1.should eq s2
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should equalize column size through NA with row and column labels" do
|
73
|
+
s1 = SpreadSheet.new(['A','B'],
|
74
|
+
['W',1,2,3],
|
75
|
+
['X',4,5],
|
76
|
+
['Y',6,7,8,9],
|
77
|
+
['Z',10],
|
78
|
+
r: true, c: true,
|
79
|
+
equalize: true)
|
80
|
+
|
81
|
+
s2 = SpreadSheet.new(['A','B',2,3],['W',1,2,3,NotAvailable],
|
82
|
+
['X',4,5,NotAvailable,NotAvailable],
|
83
|
+
['Y',6,7,8,9],
|
84
|
+
['Z',10,NotAvailable,NotAvailable,NotAvailable],
|
85
|
+
r: true, c: true)
|
86
|
+
s1.should eq s2
|
87
|
+
end
|
88
|
+
|
45
89
|
it "should be created from flat array" do
|
46
90
|
s1 = SpreadSheet.new(values: [1,2,3,4,5,6], cols: 2)
|
47
91
|
s2 = SpreadSheet.new([1,2],[3,4],[5,6])
|
@@ -79,6 +123,10 @@ module Sycsvpro
|
|
79
123
|
expect { s1.tranpose == s2 }
|
80
124
|
end
|
81
125
|
|
126
|
+
it "should sort on columns"
|
127
|
+
|
128
|
+
it "should filter rows on column values"
|
129
|
+
|
82
130
|
it "should assign new values to rows and columns"
|
83
131
|
|
84
132
|
it "should delete columns"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-10-
|
12
|
+
date: 2014-10-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|