sycsvpro 0.1.13 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile.lock +1 -1
- data/README.md +173 -4
- data/README.rdoc +2 -1
- data/bin/sycsvpro +43 -1
- data/lib/sycsvpro/aggregator.rb +7 -7
- data/lib/sycsvpro/allocator.rb +6 -6
- data/lib/sycsvpro/analyzer.rb +10 -10
- data/lib/sycsvpro/mapper.rb +14 -14
- data/lib/sycsvpro/merger.rb +14 -14
- data/lib/sycsvpro/not_available.rb +36 -0
- data/lib/sycsvpro/spread_sheet.rb +523 -0
- data/lib/sycsvpro/spread_sheet_builder.rb +104 -0
- data/lib/sycsvpro/transposer.rb +14 -15
- data/lib/sycsvpro/unique.rb +11 -12
- data/lib/sycsvpro/version.rb +1 -1
- data/lib/sycsvpro.rb +2 -0
- data/spec/sycsvpro/not_available_spec.rb +34 -0
- data/spec/sycsvpro/spread_sheet_builder_spec.rb +35 -0
- data/spec/sycsvpro/spread_sheet_spec.rb +415 -0
- data/sycsvpro.rdoc +25 -24
- metadata +8 -2
@@ -0,0 +1,523 @@
|
|
1
|
+
require_relative 'not_available'
|
2
|
+
|
3
|
+
# Operating csv files
|
4
|
+
module Sycsvpro
|
5
|
+
|
6
|
+
# A spread sheet is used to do column and row wise calculations between
|
7
|
+
# spread sheets. The calculations can be *, /, + and - where the operations
|
8
|
+
# are conducted between corresponding columns and rows. It is not as with
|
9
|
+
# matrix operations.
|
10
|
+
#
|
11
|
+
# Example:
|
12
|
+
# [0] [1] [0] [1]
|
13
|
+
# A = [0] 1 2 B = [0] 5 6
|
14
|
+
# [1] 3 4 [1] 7 8
|
15
|
+
#
|
16
|
+
# [0*0] [1*1]
|
17
|
+
# A * B = [0*0] 5 12
|
18
|
+
# [1*1] 21 32
|
19
|
+
#
|
20
|
+
# If spread sheets are not the same size then the operation is looping through
|
21
|
+
# the smaller spread sheets values
|
22
|
+
#
|
23
|
+
# Example:
|
24
|
+
# [0] [1] [0] [0]
|
25
|
+
# A = [0] 1 2 B = [0] 5 C = [0] 8
|
26
|
+
# [1] 3 4 [1] 7
|
27
|
+
#
|
28
|
+
# [0*0] [1*1]
|
29
|
+
# A * B = [0*0] 5 35
|
30
|
+
# [1*1] 21 28
|
31
|
+
#
|
32
|
+
# [0*0] [1*0]
|
33
|
+
# A * C = [0*0] 8 16
|
34
|
+
# [1*0] 24 32
|
35
|
+
class SpreadSheet
|
36
|
+
|
37
|
+
# rows of the spread sheet
|
38
|
+
attr_accessor :rows
|
39
|
+
# options of the spread sheet
|
40
|
+
attr_accessor :opts
|
41
|
+
# row labels
|
42
|
+
attr_accessor :row_labels
|
43
|
+
# column labels
|
44
|
+
attr_accessor :col_labels
|
45
|
+
|
46
|
+
# Creates a new spread sheet with rows and optional options.
|
47
|
+
#
|
48
|
+
# SpreadSheet.new([A,1,2], [B,3,4], r: true, c: false)
|
49
|
+
#
|
50
|
+
# rlabel: first column of the row contains labels if true
|
51
|
+
# clabel: first row are labels if true
|
52
|
+
#
|
53
|
+
# Creates a spread sheet with row labels 'A', 'B' and no column labels
|
54
|
+
# [0] [1]
|
55
|
+
# [A] 1 2
|
56
|
+
# [B] 3 4
|
57
|
+
#
|
58
|
+
# SpreadSheet.new(['One','Two'],['A',1,2],['B',3,4],
|
59
|
+
# r = true,
|
60
|
+
# c = true)
|
61
|
+
#
|
62
|
+
# Creates a spread sheet with row and column labels
|
63
|
+
#
|
64
|
+
# [One] [Two]
|
65
|
+
# [A] 1 2
|
66
|
+
# [B] 3 4
|
67
|
+
#
|
68
|
+
# It is also possible to specify row and column labels explicit
|
69
|
+
#
|
70
|
+
# SpreadSheet.new([1,2],[3,4], row_labels: ['A','B'],
|
71
|
+
# col_labels: ['One','Two'])
|
72
|
+
#
|
73
|
+
# Params
|
74
|
+
# ======
|
75
|
+
# r:: has row labels if true
|
76
|
+
# c:: has column labels if true
|
77
|
+
# row_labels:: explicitly provides row labels
|
78
|
+
# col_labels:: explicitly provides column labels
|
79
|
+
# values:: flat array with values
|
80
|
+
# rows:: indicates the row count in combination with values param
|
81
|
+
# cols:: indicates the col count in combination with values param
|
82
|
+
# file:: file that contains values to create spread sheet with
|
83
|
+
def initialize(*rows)
|
84
|
+
opts = rows.pop if rows.last.is_a?(::Hash)
|
85
|
+
@opts = opts || {}
|
86
|
+
rows = rows_from_params(@opts) if rows.empty?
|
87
|
+
check_validity_of(rows)
|
88
|
+
@row_labels, @col_labels = create_labels(rows)
|
89
|
+
@rows = rows
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns the dimension [rows, columns] of the spread sheet
|
93
|
+
# SpreadSheet.new([1,2,3], [4,5,6]).dim -> [2,3]
|
94
|
+
def dim
|
95
|
+
[nrows, ncols]
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns the size of the spread sheet, that is the count of elements
|
99
|
+
def size
|
100
|
+
nrows * ncols
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the number of rows
|
104
|
+
def nrows
|
105
|
+
rows.size
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns the number of columns
|
109
|
+
def ncols
|
110
|
+
rows[0].size
|
111
|
+
end
|
112
|
+
|
113
|
+
# Swaps rows and columns and returns new spread sheet with result
|
114
|
+
def transpose
|
115
|
+
SpreadSheet.new(*rows.transpose, row_labels: col_labels,
|
116
|
+
col_labels: row_labels)
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns a subset of the spread sheet and returns a new spread sheet with
|
120
|
+
# the result and the corresponding row and column labels
|
121
|
+
def [](*range)
|
122
|
+
r, c = range
|
123
|
+
r ||= 0..(nrows-1)
|
124
|
+
c ||= 0..(ncols-1)
|
125
|
+
|
126
|
+
row_selection = rows.values_at(*r)
|
127
|
+
col_selection = []
|
128
|
+
|
129
|
+
if rows_are_arrays?(row_selection)
|
130
|
+
row_selection.each do |row|
|
131
|
+
values = row.values_at(*c)
|
132
|
+
col_selection << (values.respond_to?(:to_ary) ? values : [values])
|
133
|
+
end
|
134
|
+
else
|
135
|
+
col_selection << row_selection[*c]
|
136
|
+
end
|
137
|
+
|
138
|
+
SpreadSheet.new(*col_selection,
|
139
|
+
row_labels: row_labels.values_at(*r),
|
140
|
+
col_labels: col_labels.values_at(*c))
|
141
|
+
end
|
142
|
+
|
143
|
+
# Binds spread sheets column wise
|
144
|
+
#
|
145
|
+
# 1 2 3 10 20 30
|
146
|
+
# A = 4 5 6 B = 40 50 60
|
147
|
+
# 7 8 9 70 80 90
|
148
|
+
#
|
149
|
+
# C = SpeadSheet.bind_columns(A,B)
|
150
|
+
#
|
151
|
+
# 1 2 3 10 20 30
|
152
|
+
# C = 4 5 6 40 50 60
|
153
|
+
# 7 8 9 70 80 90
|
154
|
+
#
|
155
|
+
# If the spread sheets have different row sizes the columns of the spread
|
156
|
+
# sheet with fewer rows are filled with NotAvailable
|
157
|
+
#
|
158
|
+
# 1 2 3 10 20 30
|
159
|
+
# A = 4 5 6 B = 40 50 60
|
160
|
+
# 7 8 9
|
161
|
+
#
|
162
|
+
# C = SpeadSheet.bind_columns(A,B)
|
163
|
+
#
|
164
|
+
# 1 2 3 10 20 30
|
165
|
+
# C = 4 5 6 40 50 60
|
166
|
+
# 7 8 9 NA NA NA
|
167
|
+
#
|
168
|
+
# The column lables are also combined from the spread sheets and the row
|
169
|
+
# labels of the spread sheet with the higher row count are used
|
170
|
+
#
|
171
|
+
# Returns the result in a new spread sheet
|
172
|
+
def self.bind_columns(*sheets)
|
173
|
+
row_count = sheets.collect { |s| s.nrows }.max
|
174
|
+
binds = Array.new(row_count, [])
|
175
|
+
0.upto(row_count - 1) do |r|
|
176
|
+
sheets.each do |sheet|
|
177
|
+
sheet_row = sheet.rows[r]
|
178
|
+
binds[r] += sheet_row.nil? ? [NotAvailable] * sheet.ncols : sheet_row
|
179
|
+
end
|
180
|
+
end
|
181
|
+
c_labels = sheets.collect { |s| s.col_labels }.inject(:+)
|
182
|
+
r_labels = sheets.collect { |s|
|
183
|
+
s.row_labels if s.row_labels.size == row_count
|
184
|
+
}.first
|
185
|
+
SpreadSheet.new(*binds, col_labels: c_labels, row_labels: r_labels)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Binds spread sheets row wise
|
189
|
+
#
|
190
|
+
# 1 2 3 10 20 30
|
191
|
+
# A = 4 5 6 B = 40 50 60
|
192
|
+
# 7 8 9
|
193
|
+
#
|
194
|
+
# C = SpeadSheet.bind_rows(A,B)
|
195
|
+
#
|
196
|
+
# 1 2 3
|
197
|
+
# 4 5 6
|
198
|
+
# C = 7 8 9
|
199
|
+
# 10 20 30
|
200
|
+
# 40 50 60
|
201
|
+
#
|
202
|
+
# If the spread sheets have different column sizes the columns of the spread
|
203
|
+
# sheet with fewer columns are filled with NotAvailable
|
204
|
+
#
|
205
|
+
# 1 2 3 10 20
|
206
|
+
# A = 4 5 6 B = 40 50
|
207
|
+
# 7 8 9
|
208
|
+
#
|
209
|
+
# C = SpeadSheet.bind_rows(A,B)
|
210
|
+
#
|
211
|
+
# 1 2 3
|
212
|
+
# 4 5 6
|
213
|
+
# C = 7 8 9
|
214
|
+
# 10 20 NA
|
215
|
+
# 40 50 NA
|
216
|
+
#
|
217
|
+
# The row lables are also combined from the spread sheets and the column
|
218
|
+
# labels of the spread sheet with the higher column count are used
|
219
|
+
def self.bind_rows(*sheets)
|
220
|
+
col_count = sheets.collect { |s| s.ncols }.max
|
221
|
+
binds = []
|
222
|
+
sheets.each do |sheet|
|
223
|
+
binds << sheet.rows.collect { |r|
|
224
|
+
r + [NotAvailable] * ((col_count - r.size) % col_count)
|
225
|
+
}
|
226
|
+
end
|
227
|
+
r_labels = sheets.collect { |s| s.col_labels }.inject(:+)
|
228
|
+
c_labels = sheets.collect { |s| s.col_labels if s.ncols == col_count }.first
|
229
|
+
SpreadSheet.new(*binds.flatten(1),
|
230
|
+
row_labels: r_labels,
|
231
|
+
col_labels: c_labels)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Returns the result in a new spread sheet
|
235
|
+
# Multiplies two spreadsheets column by column and returns a new spread
|
236
|
+
# sheet with the result
|
237
|
+
# 1 2 3 3 2 1 3 4 3
|
238
|
+
# 4 5 6 * 6 5 4 = 24 25 24
|
239
|
+
# 7 8 9 9 8 7 63 64 63
|
240
|
+
def *(s)
|
241
|
+
process("*", s)
|
242
|
+
end
|
243
|
+
|
244
|
+
# Divides two spreadsheets column by column and returns a new spread
|
245
|
+
# sheet with the result
|
246
|
+
# 1 2 3 3 2 1 1/3 1 3
|
247
|
+
# 4 5 6 / 6 5 4 = 2/3 1 6/4
|
248
|
+
# 7 8 9 9 8 7 7/9 1 9/7
|
249
|
+
def /(s)
|
250
|
+
process("/", s)
|
251
|
+
end
|
252
|
+
|
253
|
+
# Adds two spreadsheets column by column and returns a new spread
|
254
|
+
# sheet with the result
|
255
|
+
# 1 2 3 3 2 1 4 4 4
|
256
|
+
# 4 5 6 + 6 5 4 = 10 10 10
|
257
|
+
# 7 8 9 9 8 7 16 16 16
|
258
|
+
def +(s)
|
259
|
+
process("+", s)
|
260
|
+
end
|
261
|
+
|
262
|
+
# Subtracts two spreadsheets column by column and returns a new spread
|
263
|
+
# sheet with the result
|
264
|
+
# 1 2 3 3 2 1 -2 0 2
|
265
|
+
# 4 5 6 - 6 5 4 = -2 0 2
|
266
|
+
# 7 8 9 9 8 7 -2 0 2
|
267
|
+
def -(s)
|
268
|
+
process("-", s)
|
269
|
+
end
|
270
|
+
|
271
|
+
# Compares if two spread sheets are equal. Two spread sheets are equal
|
272
|
+
# if the spread sheets A and B are equal if Aij = Bij, that is elements at
|
273
|
+
# the same position are equal
|
274
|
+
def ==(other)
|
275
|
+
return false unless other.instance_of?(SpreadSheet)
|
276
|
+
return false unless dim == other.dim
|
277
|
+
row_count, col_count = dim
|
278
|
+
0.upto(row_count - 1) do |r|
|
279
|
+
0.upto(col_count - 1) do |c|
|
280
|
+
return false unless rows[r][c] == other.rows[r][c]
|
281
|
+
end
|
282
|
+
end
|
283
|
+
true
|
284
|
+
end
|
285
|
+
|
286
|
+
# Yields each column
|
287
|
+
def each_column
|
288
|
+
0.upto(ncols-1) { |i| yield self[nil,i] }
|
289
|
+
end
|
290
|
+
|
291
|
+
# Collects the operation on each column and returns the result in an array
|
292
|
+
def column_collect(&block)
|
293
|
+
result = []
|
294
|
+
0.upto(ncols-1) { |i| result << block.call(self[nil,i]) }
|
295
|
+
result
|
296
|
+
end
|
297
|
+
|
298
|
+
# Renames the row and column labels
|
299
|
+
#
|
300
|
+
# sheet.rename(rows: ['Row 1', 'Row 2'], cols: ['Col 1', 'Col 2'])
|
301
|
+
#
|
302
|
+
# If the provided rows and columns are larger than the spread sheet's rows
|
303
|
+
# and columns then only the respective row and column values are used. If
|
304
|
+
# the row and column labels are fewer than the respective row and column
|
305
|
+
# sizes the old labels are left untouched for the missing new labels
|
306
|
+
def rename(opts = {})
|
307
|
+
if opts[:rows]
|
308
|
+
opts[:rows] = opts[:rows][0,nrows]
|
309
|
+
opts[:rows] += row_labels[opts[:rows].size, nrows]
|
310
|
+
end
|
311
|
+
|
312
|
+
if opts[:cols]
|
313
|
+
opts[:cols] = opts[:cols][0,ncols]
|
314
|
+
opts[:cols] += col_labels[opts[:cols].size, ncols]
|
315
|
+
end
|
316
|
+
|
317
|
+
@row_labels = opts[:rows] if opts[:rows]
|
318
|
+
@col_labels = opts[:cols] if opts[:cols]
|
319
|
+
end
|
320
|
+
|
321
|
+
# Writes spread sheet to a file separated with ';'
|
322
|
+
def write(file)
|
323
|
+
File.open(file, 'w') do |out|
|
324
|
+
out.puts ";#{col_labels.join(';')}"
|
325
|
+
rows.each_with_index do |row, i|
|
326
|
+
out.puts "#{row_labels[i]};#{row.join(';')}"
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# Prints a summary of the spread sheet
|
332
|
+
def summary
|
333
|
+
puts "\nSummary"
|
334
|
+
puts "-------\n"
|
335
|
+
puts "rows: #{nrows}, columns: #{ncols}, dimension: #{dim}, size: #{size}"
|
336
|
+
puts
|
337
|
+
puts "row labels:\n #{row_labels}"
|
338
|
+
puts "column labels:\n #{col_labels}\n"
|
339
|
+
end
|
340
|
+
|
341
|
+
# Prints the spread sheet in a matrix with column labels and row labels. If
|
342
|
+
# no labels are available the column number and row number is printed
|
343
|
+
def to_s
|
344
|
+
col_label_sizes = col_labels.collect { |c| c.to_s.size + 2 }
|
345
|
+
row_label_size = row_labels.collect { |r| r.to_s.size + 2 }.max
|
346
|
+
|
347
|
+
row_col_sizes = rows.transpose.collect { |r| r.collect { |c| c.to_s.size } }
|
348
|
+
|
349
|
+
i = -1
|
350
|
+
col_sizes = col_label_sizes.collect do |s|
|
351
|
+
i += 1
|
352
|
+
[row_col_sizes[i],s].flatten.max + 1
|
353
|
+
end
|
354
|
+
|
355
|
+
s = (sprintf("%#{row_label_size}s", " "))
|
356
|
+
col_labels.each_with_index { |l,i| s << (sprintf("%#{col_sizes[i]}s",
|
357
|
+
"[#{l}]")) }
|
358
|
+
s << "\n"
|
359
|
+
|
360
|
+
rows.each_with_index do |row, i|
|
361
|
+
s << (sprintf("%#{row_label_size}s", "[#{row_labels[i]}]"))
|
362
|
+
row.each_with_index { |c,j| s << (sprintf("%#{col_sizes[j]}s", c)) }
|
363
|
+
s << "\n"
|
364
|
+
end
|
365
|
+
|
366
|
+
s
|
367
|
+
end
|
368
|
+
|
369
|
+
private
|
370
|
+
|
371
|
+
# Creates rows from provided array or file. If array doesn't provide
|
372
|
+
# equal column sizes the array is extended with NotAvailable values
|
373
|
+
def rows_from_params(opts)
|
374
|
+
col_count = opts[:cols]
|
375
|
+
row_count = opts[:rows]
|
376
|
+
|
377
|
+
size = row_count * col_count if row_count && col_count
|
378
|
+
|
379
|
+
rows = []
|
380
|
+
|
381
|
+
if values = opts[:values]
|
382
|
+
if size
|
383
|
+
values += [NotAvailable] * (size - values.size)
|
384
|
+
elsif col_count
|
385
|
+
values += [NotAvailable] * ((col_count - values.size) % col_count)
|
386
|
+
elsif row_count
|
387
|
+
values += [NotAvailable] * ((row_count - values.size) % row_count)
|
388
|
+
col_count = values.size / row_count
|
389
|
+
else
|
390
|
+
col_count = Math.sqrt(values.size).ceil
|
391
|
+
values += [NotAvailable] * ((col_count - values.size) % col_count)
|
392
|
+
end
|
393
|
+
values.each_slice(col_count) { |row| rows << row }
|
394
|
+
elsif opts[:file]
|
395
|
+
File.readlines(opts[:file]).each do |line|
|
396
|
+
row = line.split(';')
|
397
|
+
rows << row.collect { |v|
|
398
|
+
v.strip.empty? ? NotAvailable : Float(v.chomp) rescue v.chomp
|
399
|
+
}
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
rows
|
404
|
+
end
|
405
|
+
|
406
|
+
# Checks whether the rows are valid, that is
|
407
|
+
# * same size
|
408
|
+
# * not nil
|
409
|
+
# * at least one row
|
410
|
+
def check_validity_of(rows)
|
411
|
+
raise "rows need to be arrays" if !rows_are_arrays?(rows)
|
412
|
+
raise "needs at least one row" if rows.empty?
|
413
|
+
raise "rows must be of same column size" if !same_column_size?(rows)
|
414
|
+
end
|
415
|
+
|
416
|
+
# Checks whether all rows have the same column size. Returns true if
|
417
|
+
# all columns have the same column size
|
418
|
+
def same_column_size?(rows)
|
419
|
+
offset = opts[:c] ? 1 : 0
|
420
|
+
return true if rows.size == 1 + offset
|
421
|
+
(0 + offset).upto(rows.size - 2) do |i|
|
422
|
+
return false unless rows[i].size == rows[i+1].size
|
423
|
+
end
|
424
|
+
true
|
425
|
+
end
|
426
|
+
|
427
|
+
# Checks whether the rows are provided as arrays. If a non array element
|
428
|
+
# is found false is returned otherwise true
|
429
|
+
def rows_are_arrays?(rows)
|
430
|
+
rows.each { |row| return false unless row.respond_to?(:to_ary) }
|
431
|
+
true
|
432
|
+
end
|
433
|
+
|
434
|
+
def create_labels(rows)
|
435
|
+
if opts[:c]
|
436
|
+
col_labels = extract_col_labels(rows)
|
437
|
+
end
|
438
|
+
if opts[:r]
|
439
|
+
row_labels = extract_row_labels(rows)
|
440
|
+
end
|
441
|
+
|
442
|
+
if opts[:row_labels]
|
443
|
+
row_labels = opts[:row_labels]
|
444
|
+
opts[:r] = true
|
445
|
+
end
|
446
|
+
if opts[:col_labels]
|
447
|
+
col_labels = opts[:col_labels]
|
448
|
+
opts[:c] = true
|
449
|
+
end
|
450
|
+
|
451
|
+
if opts[:c]
|
452
|
+
if col_labels.size > rows[0].size
|
453
|
+
col_labels = col_labels[col_labels.size - rows[0].size,
|
454
|
+
rows[0].size]
|
455
|
+
else
|
456
|
+
col_labels = col_labels + (0..rows[0].size-1).to_a[col_labels.size,
|
457
|
+
rows[0].size]
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
if opts[:r]
|
462
|
+
if row_labels.size > rows.size
|
463
|
+
row_labels = row_labels[row_labels.size - rows.size,
|
464
|
+
rows.size]
|
465
|
+
else
|
466
|
+
row_labels = row_labels + (0..rows.size-1).to_a[row_labels.size,
|
467
|
+
rows.size]
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
row_labels = (0..rows.size-1).to_a unless row_labels
|
472
|
+
col_labels = (0..rows[0].size-1).to_a unless col_labels
|
473
|
+
[row_labels, col_labels]
|
474
|
+
end
|
475
|
+
|
476
|
+
def extract_col_labels(rows)
|
477
|
+
col_labels = rows.shift
|
478
|
+
end
|
479
|
+
|
480
|
+
def extract_row_labels(rows)
|
481
|
+
row_labels = []
|
482
|
+
rows.each { |row| row_labels << row.shift }
|
483
|
+
row_labels
|
484
|
+
end
|
485
|
+
|
486
|
+
# Coerces a number or an array to a spread sheet
|
487
|
+
def coerce(value)
|
488
|
+
return SpreadSheet.new([value]) if value.is_a?(Numeric)
|
489
|
+
return SpreadSheet.new(value) if value.is_a?(Array)
|
490
|
+
end
|
491
|
+
|
492
|
+
# Conducts the calculation of this spread sheet with the provided value
|
493
|
+
# based on the operator. It s is a number or an array it is coerced into
|
494
|
+
# a spread sheet
|
495
|
+
def process(operator, s)
|
496
|
+
s = coerce(s) || s
|
497
|
+
raise "operand needs to be a SpreadSheet, Numeric or Array" unless s.is_a?(SpreadSheet)
|
498
|
+
result = []
|
499
|
+
rlabel = []
|
500
|
+
clabel = []
|
501
|
+
s1_row_count, s1_col_count = dim
|
502
|
+
s2_row_count, s2_col_count = s.dim
|
503
|
+
row_count = [s1_row_count, s2_row_count].max
|
504
|
+
col_count = [s1_col_count, s2_col_count].max
|
505
|
+
0.upto(row_count - 1) do |r|
|
506
|
+
r1 = r % s1_row_count
|
507
|
+
r2 = r % s2_row_count
|
508
|
+
rlabel << "#{row_labels[r1]}#{operator}#{s.row_labels[r2]}"
|
509
|
+
element = []
|
510
|
+
0.upto(col_count - 1) do |c|
|
511
|
+
c1 = c % s1_col_count
|
512
|
+
c2 = c % s2_col_count
|
513
|
+
clabel << "#{col_labels[c1]}#{operator}#{s.col_labels[c2]}"
|
514
|
+
element << rows[r1][c1].send(operator, s.rows[r2][c2])
|
515
|
+
end
|
516
|
+
result << element
|
517
|
+
end
|
518
|
+
SpreadSheet.new(*result, row_labels: rlabel, col_labels: clabel)
|
519
|
+
end
|
520
|
+
|
521
|
+
end
|
522
|
+
|
523
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Operating csv files
|
2
|
+
module Sycsvpro
|
3
|
+
|
4
|
+
# SpreadSheetBuilder is used in the command line interface of sycsvpro to
|
5
|
+
# use SpreadSheet from the command line
|
6
|
+
class SpreadSheetBuilder
|
7
|
+
|
8
|
+
# The result of the SpreadSheet operation is written to this file
|
9
|
+
attr_reader :outfile
|
10
|
+
# The operands, that is the spread sheets that are used in the arithmetic
|
11
|
+
# operation
|
12
|
+
attr_reader :operands
|
13
|
+
# The spread sheet operation where the operands are used
|
14
|
+
attr_reader :operation
|
15
|
+
# Indicates whether the result should be printed
|
16
|
+
attr_reader :print
|
17
|
+
|
18
|
+
# A spread sheet builder is doing arithmetic operations and can be called
|
19
|
+
# like this:
|
20
|
+
#
|
21
|
+
# SpreadSheetBuilder.new(outfile: "out.csv",
|
22
|
+
# files: "f1.csv,f2.csv",
|
23
|
+
# rlabels: "true,false",
|
24
|
+
# clabels: "false,true",
|
25
|
+
# aliases: "a,b",
|
26
|
+
# operation: "(a*b).transpose",
|
27
|
+
# print: "true").execute
|
28
|
+
#
|
29
|
+
# outfile: file where the result of the operation is written to
|
30
|
+
# files: files that hold the spread sheet data
|
31
|
+
# rlabels: indication whether the corresponding file has row labels
|
32
|
+
# clabels: indication whether the corresponding file has column labels
|
33
|
+
# aliases: symbols that correspond to the spread sheet created from the
|
34
|
+
# files. The symbols are used in the operation. The symbols have
|
35
|
+
# to be choosen carefully not to conflict with existing methods
|
36
|
+
# and variables
|
37
|
+
# operation: arithmetic operation on spread sheets using the aliases as
|
38
|
+
# place holders for the spread sheets. The last evaluated
|
39
|
+
# operation is returned as result and saved to outfile in case
|
40
|
+
# the result is a spread sheet. In all other cases the result can
|
41
|
+
# be printed with the print flag.
|
42
|
+
# print: print the result
|
43
|
+
def initialize(opts = {})
|
44
|
+
@print = opts[:print]
|
45
|
+
@operands = create_operands(opts)
|
46
|
+
@outfile = opts[:outfile]
|
47
|
+
@operation = opts[:operation]
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns the spread sheet operands when called in the arithmetic operation
|
51
|
+
def method_missing(name, *args, &block)
|
52
|
+
super unless operands.keys.index(name.to_s)
|
53
|
+
operands[name.to_s]
|
54
|
+
end
|
55
|
+
|
56
|
+
# Executes the operation and writes the result to the outfile
|
57
|
+
def execute
|
58
|
+
result = eval(operation)
|
59
|
+
if outfile
|
60
|
+
if result.is_a?(SpreadSheet)
|
61
|
+
result.write(outfile)
|
62
|
+
else
|
63
|
+
puts
|
64
|
+
puts "Warning: Result is no spread sheet and not written to file!"
|
65
|
+
puts " To view the result use -p flag" unless print
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
if print
|
70
|
+
puts
|
71
|
+
puts "Operation"
|
72
|
+
puts "---------"
|
73
|
+
operation.split(';').each { |o| puts o }
|
74
|
+
puts
|
75
|
+
puts "Result"
|
76
|
+
puts "------"
|
77
|
+
if result.nil? || result.empty?
|
78
|
+
puts result.inspect
|
79
|
+
else
|
80
|
+
puts result
|
81
|
+
end
|
82
|
+
puts
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
# Creates the spread sheet operands for the arithmetic operation
|
89
|
+
def create_operands(opts)
|
90
|
+
files = opts[:files].split(',')
|
91
|
+
rlabels = opts[:rlabels].split(',').collect { |l| l.upcase == "TRUE" }
|
92
|
+
clabels = opts[:clabels].split(',').collect { |l| l.upcase == "TRUE" }
|
93
|
+
|
94
|
+
operands = {}
|
95
|
+
opts[:aliases].split(',').each_with_index do |a,i|
|
96
|
+
operands[a] = SpreadSheet.new(file: files[i],
|
97
|
+
r: rlabels[i], c: clabels[i])
|
98
|
+
end
|
99
|
+
|
100
|
+
operands
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
data/lib/sycsvpro/transposer.rb
CHANGED
@@ -6,22 +6,22 @@ module Sycsvpro
|
|
6
6
|
# Example
|
7
7
|
#
|
8
8
|
# infile.csv
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
9
|
+
# | Year | SP | RP | Total | SP-O | RP-O | O |
|
10
|
+
# | ---- | -- | -- | ----- | ---- | ---- | --- |
|
11
|
+
# | | 10 | 20 | 30 | 100 | 40 | 140 |
|
12
|
+
# | 2008 | 5 | 10 | 15 | 10 | 20 | 10 |
|
13
|
+
# | 2009 | 2 | 5 | 5 | 20 | 10 | 30 |
|
14
|
+
# | 2010 | 3 | 5 | 10 | 70 | 10 | 100 |
|
15
15
|
#
|
16
16
|
# outfile.csv
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
17
|
+
# | Year | | 2008 | 2009 | 2010 |
|
18
|
+
# | ----- | --- | ---- | ---- | ---- |
|
19
|
+
# | SP | 10 | 5 | 5 | 3 |
|
20
|
+
# | RP | 20 | 10 | 10 | 5 |
|
21
|
+
# | Total | 30 | 15 | 15 | 10 |
|
22
|
+
# | SP-O | 100 | 10 | 10 | 70 |
|
23
|
+
# | RP-O | 40 | 20 | 20 | 10 |
|
24
|
+
# | O | 140 | 10 | 30 | 100 |
|
25
25
|
#
|
26
26
|
class Transposer
|
27
27
|
|
@@ -37,7 +37,6 @@ module Sycsvpro
|
|
37
37
|
attr_reader :col_filter
|
38
38
|
|
39
39
|
# Create a new Transpose
|
40
|
-
# :call-seq:
|
41
40
|
# Sycsvpro::Transpose(infile: "infile.csv",
|
42
41
|
# outfile: "outfile.csv",
|
43
42
|
# rows: "0,3-5",
|