sycsvpro 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +9 -0
- data/bin/sycsvpro +5 -0
- data/lib/sycsvpro/dsl.rb +1 -1
- data/lib/sycsvpro/not_available.rb +12 -3
- data/lib/sycsvpro/spread_sheet.rb +33 -13
- data/lib/sycsvpro/spread_sheet_builder.rb +18 -15
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/spread_sheet_spec.rb +44 -2
- data/sycsvpro.rdoc +16 -11
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -731,6 +731,15 @@ Version 0.2.1
|
|
731
731
|
* Dsl module has got 3 new methods #is\_integer?, #is\_float? and #str2num to
|
732
732
|
convert strings that represent numbers to numericals
|
733
733
|
|
734
|
+
Version 0.2.2
|
735
|
+
-------------
|
736
|
+
* Add the equalize switch to the spread sheet command line
|
737
|
+
* Optimize performance of SpreadSheet#write
|
738
|
+
* Introduce _r_ and _c_ arguments to SpreadSheet#write to indicate whether the
|
739
|
+
row and column labels should be written to the file. Row and column labels are
|
740
|
+
written per default for compatibility reasons
|
741
|
+
* Catch encoding errors when creating spread sheet from file
|
742
|
+
|
734
743
|
Documentation
|
735
744
|
=============
|
736
745
|
The class documentation can be found at
|
data/bin/sycsvpro
CHANGED
@@ -430,6 +430,10 @@ command :spreadsheet do |c|
|
|
430
430
|
c.arg_name '.|,'
|
431
431
|
c.flag [:ds], default: '.'
|
432
432
|
|
433
|
+
c.desc 'In order to build spread sheets rows have to be of equal size. The '+
|
434
|
+
'e switch equalizes row sizes over all rows'
|
435
|
+
c.switch [:e, :equalize], :default_value => false
|
436
|
+
|
433
437
|
c.desc 'The arithmetic operation with the table data'
|
434
438
|
c.arg_name 'ARITHMETIC_OPERATION'
|
435
439
|
c.flag [:o, :operation]
|
@@ -446,6 +450,7 @@ command :spreadsheet do |c|
|
|
446
450
|
aliases: options[:a],
|
447
451
|
operation: options[:o],
|
448
452
|
ds: options[:ds],
|
453
|
+
equalize: options[:e],
|
449
454
|
print: options[:p]).execute
|
450
455
|
print 'done'
|
451
456
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -108,7 +108,7 @@ module Dsl
|
|
108
108
|
|
109
109
|
# Remove non-UTF chars from string
|
110
110
|
def str2utf8(str)
|
111
|
-
str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace:
|
111
|
+
str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: EMPTY)
|
112
112
|
end
|
113
113
|
|
114
114
|
# Retrieves the values scanned by a COMMA_SPLITTER_REGEX
|
@@ -10,12 +10,13 @@ module Sycsvpro
|
|
10
10
|
# 1 + na -> na
|
11
11
|
class NotAvailable
|
12
12
|
|
13
|
+
# The string representation of NotAvailable
|
14
|
+
NA = "NA"
|
15
|
+
|
13
16
|
class << self
|
14
17
|
|
15
18
|
# Catches all expressions where na is the first argument
|
16
19
|
def method_missing(name, *args, &block)
|
17
|
-
super if name == :to_ary
|
18
|
-
super if name == :to_str
|
19
20
|
self
|
20
21
|
end
|
21
22
|
|
@@ -25,9 +26,17 @@ module Sycsvpro
|
|
25
26
|
[self,value]
|
26
27
|
end
|
27
28
|
|
29
|
+
# Checks whether SpreadSheet responds to 'name'. The methods :to_ary and
|
30
|
+
# :to_str are excluded
|
31
|
+
def respond_to?(name)
|
32
|
+
return false if name == :to_ary
|
33
|
+
return false if name == :to_str
|
34
|
+
super
|
35
|
+
end
|
36
|
+
|
28
37
|
# Returns NA as the string representation
|
29
38
|
def to_s
|
30
|
-
|
39
|
+
NA
|
31
40
|
end
|
32
41
|
|
33
42
|
end
|
@@ -82,6 +82,7 @@ module Sycsvpro
|
|
82
82
|
# values:: flat array with values
|
83
83
|
# rows:: indicates the row count in combination with values param
|
84
84
|
# cols:: indicates the col count in combination with values param
|
85
|
+
# equalize:: If columns are of different size equalizes the column size
|
85
86
|
# file:: file that contains values to create spread sheet with
|
86
87
|
# ds:: decimal spearator '.' or ',' where '.' is default. The
|
87
88
|
# decimal separator is used when spread sheet is created from
|
@@ -325,12 +326,31 @@ module Sycsvpro
|
|
325
326
|
@col_labels = opts[:cols] if opts[:cols]
|
326
327
|
end
|
327
328
|
|
328
|
-
# Writes spread sheet to a file separated with ';'
|
329
|
-
|
329
|
+
# Writes spread sheet to a file separated with ';'. Accepts two boolean
|
330
|
+
# arguments to indicate whether the row and column labels should be saved
|
331
|
+
# along with the spread sheet's labels
|
332
|
+
# r:: when true row labels will be saved, default is true
|
333
|
+
# c:: when true column labels will be saved, default is true
|
334
|
+
def write(file, opts = {})
|
335
|
+
opts = {r: true, c: true}.merge(opts)
|
336
|
+
|
337
|
+
if opts[:r]
|
338
|
+
construct_row = -> row,i { row.insert(0,row_labels[i]).join(SEMICOLON) }
|
339
|
+
else
|
340
|
+
construct_row = -> row,i { row.join(SEMICOLON) }
|
341
|
+
end
|
342
|
+
|
330
343
|
File.open(file, 'w') do |out|
|
331
|
-
|
344
|
+
if opts[:c]
|
345
|
+
if opts[:r]
|
346
|
+
out.puts "#{SEMICOLON}#{col_labels.join(SEMICOLON)}"
|
347
|
+
else
|
348
|
+
out.puts col_labels.join(SEMICOLON)
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
332
352
|
rows.each_with_index do |row, i|
|
333
|
-
out.puts
|
353
|
+
out.puts construct_row.call row, i
|
334
354
|
end
|
335
355
|
end
|
336
356
|
end
|
@@ -399,14 +419,13 @@ module Sycsvpro
|
|
399
419
|
end
|
400
420
|
values.each_slice(col_count) { |row| rows << row }
|
401
421
|
elsif opts[:file]
|
402
|
-
|
403
|
-
File.readlines(opts[:file]).each do |line|
|
422
|
+
File.foreach(opts[:file]) do |line|
|
404
423
|
next if line.chomp.empty?
|
405
|
-
|
424
|
+
values = line.split(SEMICOLON) rescue str2utf8(line).split(SEMICOLON)
|
425
|
+
rows << values.collect { |v|
|
406
426
|
v.strip.empty? ? NotAvailable : str2num(v.chomp, opts[:ds])
|
407
427
|
}
|
408
428
|
end
|
409
|
-
STDERR.puts "Reading file in #{Time.now - start_read} seconds"
|
410
429
|
end
|
411
430
|
|
412
431
|
rows
|
@@ -435,10 +454,10 @@ module Sycsvpro
|
|
435
454
|
# * not nil
|
436
455
|
# * at least one row
|
437
456
|
def check_validity_of(rows)
|
438
|
-
raise "rows need to be arrays"
|
439
|
-
raise "needs at least one row"
|
440
|
-
raise "rows must be of same column size. "+
|
441
|
-
"
|
457
|
+
raise "rows need to be arrays" if !rows_are_arrays?(rows)
|
458
|
+
raise "needs at least one row" if rows.empty?
|
459
|
+
raise "rows must be of same column size. Use "+
|
460
|
+
"'equalize: true' flag to fix this." if !same_column_size?(rows)
|
442
461
|
end
|
443
462
|
|
444
463
|
# Checks whether all rows have the same column size. Returns true if
|
@@ -522,7 +541,8 @@ module Sycsvpro
|
|
522
541
|
# a spread sheet
|
523
542
|
def process(operator, s)
|
524
543
|
s = coerce(s) || s
|
525
|
-
raise "operand needs to be a SpreadSheet,
|
544
|
+
raise "operand needs to be a SpreadSheet, "+
|
545
|
+
"Numeric or Array" unless s.is_a?(SpreadSheet)
|
526
546
|
result = []
|
527
547
|
rlabel = []
|
528
548
|
clabel = []
|
@@ -25,23 +25,25 @@ module Sycsvpro
|
|
25
25
|
# aliases: "a,b",
|
26
26
|
# operation: "(a*b).transpose",
|
27
27
|
# ds: ",",
|
28
|
+
# equalize: "true",
|
28
29
|
# print: "true").execute
|
29
30
|
#
|
30
|
-
# outfile
|
31
|
-
# files
|
32
|
-
# r
|
33
|
-
# c
|
34
|
-
# aliases
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
# ds
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
31
|
+
# outfile:: file where the result of the operation is written to
|
32
|
+
# files:: files that hold the spread sheet data
|
33
|
+
# r:: indication whether the corresponding file has row labels
|
34
|
+
# c:: indication whether the corresponding file has column labels
|
35
|
+
# aliases:: symbols that correspond to the spread sheet created from the
|
36
|
+
# files. The symbols are used in the operation. The symbols have
|
37
|
+
# to be choosen carefully not to conflict with existing methods
|
38
|
+
# and variables
|
39
|
+
# ds:: decimal spearator '.' or ',' where '.' is default
|
40
|
+
# equalize:: indicates whether different column sizes should be equalized
|
41
|
+
# operation:: arithmetic operation on spread sheets using the aliases as
|
42
|
+
# place holders for the spread sheets. The last evaluated
|
43
|
+
# operation is returned as result and saved to outfile in case
|
44
|
+
# the result is a spread sheet. In all other cases the result
|
45
|
+
# can be printed with the print flag.
|
46
|
+
# print:: print the result
|
45
47
|
def initialize(opts = {})
|
46
48
|
@print = opts[:print]
|
47
49
|
@operands = create_operands(opts)
|
@@ -96,6 +98,7 @@ module Sycsvpro
|
|
96
98
|
operands = {}
|
97
99
|
opts[:aliases].split(',').each_with_index do |a,i|
|
98
100
|
operands[a] = SpreadSheet.new(file: files[i], ds: opts[:ds],
|
101
|
+
equalize: opts[:equalize],
|
99
102
|
r: rlabels[i], c: clabels[i])
|
100
103
|
end
|
101
104
|
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -7,8 +7,8 @@ module Sycsvpro
|
|
7
7
|
# Creation of spread sheets
|
8
8
|
it "should ensure all rows have the same column size" do
|
9
9
|
expect { SpreadSheet.new([1,2], [3,4,5]) }.to raise_error(RuntimeError,
|
10
|
-
"rows must be of same column size. Use equalize: true "+
|
11
|
-
"flag to fix.")
|
10
|
+
"rows must be of same column size. Use 'equalize: true' "+
|
11
|
+
"flag to fix this.")
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should not accept non arrays as rows" do
|
@@ -115,6 +115,48 @@ module Sycsvpro
|
|
115
115
|
Dir.glob(file).size.should eq 1
|
116
116
|
end
|
117
117
|
|
118
|
+
it "should write to file without row labels" do
|
119
|
+
file = File.join(File.dirname(__FILE__), "files/spread_sheet_out.csv")
|
120
|
+
|
121
|
+
s1 = SpreadSheet.new(['A', 'B', 'C'],['I',1,2,3],['II',4,5,6],
|
122
|
+
r: true, c: true)
|
123
|
+
s1.write(file, r: false)
|
124
|
+
|
125
|
+
lines = File.readlines(file)
|
126
|
+
|
127
|
+
content = ["A;B;C\n", "1;2;3\n", "4;5;6\n"]
|
128
|
+
|
129
|
+
lines.should eq content
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should write to file without column labels" do
|
133
|
+
file = File.join(File.dirname(__FILE__), "files/spread_sheet_out.csv")
|
134
|
+
|
135
|
+
s1 = SpreadSheet.new(['A', 'B', 'C'],['I',1,2,3],['II',4,5,6],
|
136
|
+
r: true, c: true)
|
137
|
+
s1.write(file, c: false)
|
138
|
+
|
139
|
+
lines = File.readlines(file)
|
140
|
+
|
141
|
+
content = ["I;1;2;3\n", "II;4;5;6\n"]
|
142
|
+
|
143
|
+
lines.should eq content
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should only write data to file" do
|
147
|
+
file = File.join(File.dirname(__FILE__), "files/spread_sheet_out.csv")
|
148
|
+
|
149
|
+
s1 = SpreadSheet.new(['A', 'B', 'C'],['I',1,2,3],['II',4,5,6],
|
150
|
+
r: true, c: true)
|
151
|
+
s1.write(file, r: false, c: false)
|
152
|
+
|
153
|
+
lines = File.readlines(file)
|
154
|
+
|
155
|
+
content = ["1;2;3\n", "4;5;6\n"]
|
156
|
+
|
157
|
+
lines.should eq content
|
158
|
+
end
|
159
|
+
|
118
160
|
# Manipulating spread sheets
|
119
161
|
|
120
162
|
it "should transpose rows and columns" do
|
data/sycsvpro.rdoc
CHANGED
@@ -5,7 +5,7 @@ SYNOPSIS
|
|
5
5
|
sycsvpro [global options] command [command options] [arguments...]
|
6
6
|
|
7
7
|
VERSION
|
8
|
-
0.2.
|
8
|
+
0.2.2
|
9
9
|
|
10
10
|
GLOBAL OPTIONS
|
11
11
|
-f, --file=FILE - CSV file to operate on (default: none)
|
@@ -20,22 +20,27 @@ COMMANDS
|
|
20
20
|
analyze - Analyze the CSV file regarding columns, rows and content
|
21
21
|
calc - Process operations on columns. Optionally add a sum row for columns withnumber values
|
22
22
|
collect - Collect values of specified rows and columns from the file and group them in categories
|
23
|
-
count - Counts the occurences of column values. Uses column values as headings with count as values.
|
24
|
-
|
23
|
+
count - Counts the occurences of column values. Uses column values as headings with count as values.
|
24
|
+
Columns with a condition will be added as new columns and the condition will be set as
|
25
|
+
column name. Optionally adds a sum row
|
25
26
|
edit - Creates a script/insert file or opens a script/insert file for editing if it exists
|
26
27
|
execute - Executes the code provided in a file
|
27
28
|
extract - Extract specified rows and columns from the file
|
28
29
|
help - Shows a list of commands or help for one command
|
29
|
-
insert - Inserts rows from a file to a csv-file. You can for instance add sum operations in Excel or
|
30
|
-
file in Excel or LibreOffice
|
30
|
+
insert - Inserts rows from a file to a csv-file. You can for instance add sum operations in Excel or
|
31
|
+
LibreOffice style if you want to process the resulting file in Excel or LibreOffice
|
31
32
|
join - Join two files based on a joint column value
|
32
|
-
list - Lists script or insert files in the scripts directory with optionally listing methods of
|
33
|
+
list - Lists script or insert files in the scripts directory with optionally listing methods of
|
34
|
+
script files
|
33
35
|
map - Map values in columns to new values
|
34
|
-
merge - Merge multiple files based on a common column value with a key value at the first column of
|
36
|
+
merge - Merge multiple files based on a common column value with a key value at the first column of
|
37
|
+
a row
|
35
38
|
sort - Sort rows based on column values. It is possible to sort on multiple columns
|
36
|
-
spreadsheet - Do arithmetic operation with table like data. The table has to have rows with same size.
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
spreadsheet - Do arithmetic operation with table like data. The table has to have rows with same size.
|
40
|
+
Arithmetic operations are *, /, + and - where the results can be concatenated. Complete
|
41
|
+
functions can be looked up at https://rubygems.org/gem/sycsvpro
|
42
|
+
table - Associates columns to a key value. A key value can be a combination of multiple column
|
43
|
+
values. Values associated can be generated from an arithmetic or string operation. Header
|
44
|
+
columns can be generated dynamically based on column values
|
40
45
|
transpose - Transposes rows and columns
|
41
46
|
unique - Remove duplicate rows from a file. Duplicates are identified by key columns
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-10-
|
12
|
+
date: 2014-10-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|