extcsv 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/gemspec +21 -0
- data/lib/extcsv.rb +733 -0
- data/lib/lsmodel.rb +289 -0
- data/rakefile +134 -0
- data/test/data/file00.txt +116 -0
- data/test/data/file01.txt +121 -0
- data/test/data/file02.txt +90 -0
- data/test/data/file03.txt +90 -0
- data/test/data/file04.csv +31 -0
- data/test/test_extcsv.rb +491 -0
- data/test/test_lsmodel.rb +150 -0
- metadata +62 -0
data/lib/extcsv.rb
ADDED
@@ -0,0 +1,733 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'csv'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
################################################################################
|
6
|
+
# Author:: Ralf Mueller
|
7
|
+
################################################################################
|
8
|
+
class Nil
|
9
|
+
def to_s; ''; end
|
10
|
+
def to_a; []; end
|
11
|
+
def empty?; true; end
|
12
|
+
end
|
13
|
+
|
14
|
+
class ExtCsv < OpenStruct
|
15
|
+
VERSION = '0.10.0'
|
16
|
+
|
17
|
+
include Comparable
|
18
|
+
include Enumerable
|
19
|
+
|
20
|
+
# Allowed data types
|
21
|
+
TYPES = %w{csv tsv psv txt plain}
|
22
|
+
|
23
|
+
# Allowed input modes, db and url are not supported, yet
|
24
|
+
MODES = %w{file db url hash array string}
|
25
|
+
|
26
|
+
# column names from different file type, which that have the same
|
27
|
+
# meaning
|
28
|
+
DOUBLE_COLUMNS = {}
|
29
|
+
|
30
|
+
# Non-Data fields
|
31
|
+
METADATA = %w{mode datatype datacolumns cellsep rowsep filename filemtime}
|
32
|
+
|
33
|
+
# mode can be one of the allowed MODES
|
34
|
+
# datatype can be one of the TYPES
|
35
|
+
#
|
36
|
+
# === Example
|
37
|
+
# ExtCsv.new("file","txt","Data.txt")
|
38
|
+
# ExtCsv.new("file","csv","Ergebniss.csv")
|
39
|
+
#
|
40
|
+
#
|
41
|
+
def initialize(mode, datatype, params)
|
42
|
+
obj_hash = {}
|
43
|
+
obj_hash[:mode] = mode
|
44
|
+
obj_hash[:datatype] = datatype
|
45
|
+
obj_hash[:datacolumns] = []
|
46
|
+
|
47
|
+
if not MODES.include?(mode) or not TYPES.include?(datatype)
|
48
|
+
puts "use '#{MODES.join("','")}' for first " +
|
49
|
+
"and '#{TYPES.join(",")}' for second parameter " +
|
50
|
+
"datatype was '#{datatype}', mode was '#{mode}'"
|
51
|
+
raise
|
52
|
+
end
|
53
|
+
|
54
|
+
# Grep data from the given source, e.g. database or file
|
55
|
+
case obj_hash[:mode]
|
56
|
+
when "string"
|
57
|
+
set_separators(obj_hash)
|
58
|
+
parse_content(params,obj_hash)
|
59
|
+
when "file"
|
60
|
+
if File.exist?(params)
|
61
|
+
obj_hash[:filename] = params
|
62
|
+
else
|
63
|
+
$stdout << "The input file '#{params}' cannot be found!\n"
|
64
|
+
$stdout << "Please check path and filename." << "\n"
|
65
|
+
return
|
66
|
+
end
|
67
|
+
obj_hash[:filemtime] = File.mtime(obj_hash[:filename]).strftime("%Y-%m-%d %H:%M:%S")
|
68
|
+
set_separators(obj_hash)
|
69
|
+
parse_content(IO.read(obj_hash[:filename]),obj_hash)
|
70
|
+
when "hash"
|
71
|
+
obj_hash = params
|
72
|
+
# update the metacolumns
|
73
|
+
#test $stdout << obj_hash.keys.join("\t")
|
74
|
+
obj_hash[:datacolumns] = (obj_hash.keys.collect {|dc| dc.to_s} - METADATA)
|
75
|
+
when "array"
|
76
|
+
params.each {|v|
|
77
|
+
key = v[0]
|
78
|
+
obj_hash[:datacolumns] << key
|
79
|
+
obj_hash[key] = v[1..-1]
|
80
|
+
}
|
81
|
+
end
|
82
|
+
super(obj_hash)
|
83
|
+
end
|
84
|
+
|
85
|
+
def set_separators(obj_hash)
|
86
|
+
obj_hash[:cellsep] = case obj_hash[:datatype]
|
87
|
+
when "txt","tsv": "\t"
|
88
|
+
when "csv": ';'
|
89
|
+
when "psv": "|"
|
90
|
+
end
|
91
|
+
obj_hash[:rowsep] = "\r\n"
|
92
|
+
end
|
93
|
+
|
94
|
+
# Main method for parsing input strings. Comments and other special
|
95
|
+
# signs are treated as follows
|
96
|
+
# * first line is taken to be the header with columns names. If that
|
97
|
+
# line starts with a comment sign (#), this sign is removed.
|
98
|
+
# * any other line which starts with '#' is ignored
|
99
|
+
# * german umlaute are translated into asci-conform versions for
|
100
|
+
# columns names
|
101
|
+
# TODO: This is some kind of arbitrary, there should be a more general
|
102
|
+
# solution
|
103
|
+
# * spaces are removed from columns names
|
104
|
+
# * brackets are translated into underscores
|
105
|
+
# * '+' and '-' are changed into the correspondig words
|
106
|
+
# * empty lines are removed
|
107
|
+
# * dots are changed into underscores for columns names
|
108
|
+
# * the greek sign � is changes into mu
|
109
|
+
def parse_content(filecontent,obj_hash)
|
110
|
+
content = []
|
111
|
+
# special treatement of emission/bloostblank data switch decimal sign
|
112
|
+
filecontent = filecontent.gsub(',','.')
|
113
|
+
# remove blank lines
|
114
|
+
filecontent = filecontent.gsub(/\r\r/,"\r").gsub(/(\r\n){2,}/,"\r\n").gsub(/\n{2,}/,"\n")
|
115
|
+
csv = CSV::StringReader.parse(filecontent, obj_hash[:cellsep])#, obj_hash[:rowsep])
|
116
|
+
|
117
|
+
# read @datatype specific header
|
118
|
+
header = csv.shift
|
119
|
+
# remove comments sign from the header
|
120
|
+
header[0].gsub!(/^#+/,'') if /^#/.match(header[0])
|
121
|
+
|
122
|
+
header.each_with_index {|key,i|
|
123
|
+
key = "test" if key.nil?
|
124
|
+
header[i] = key.downcase.tr(' ','').tr('"','').tr('�',"ue").tr('�',"ae")\
|
125
|
+
.tr('�',"oe").gsub(/\[\w*\]/,"")\
|
126
|
+
.tr('�',"ue").gsub(/^\+/,"plus_")\
|
127
|
+
.gsub('�m','mu')\
|
128
|
+
.gsub(/^-/,"minus_").tr('-','_').tr('+','_').gsub(/(\(|\))/,'_').tr('.','_').chomp
|
129
|
+
}
|
130
|
+
content << header
|
131
|
+
# read the data itself
|
132
|
+
csv.each {|row| content << row if row.to_a.nitems > 0 }
|
133
|
+
|
134
|
+
# further processing according to the input type
|
135
|
+
case obj_hash[:datatype]
|
136
|
+
when "csv"
|
137
|
+
# check if rows have the same lenght
|
138
|
+
contents_size = content.collect {|row| row.size}
|
139
|
+
content.each_with_index {|row,i|
|
140
|
+
content[i] = row[0...contents_size.min]
|
141
|
+
} unless contents_size.min == contents_size.max
|
142
|
+
end
|
143
|
+
content = content.transpose
|
144
|
+
|
145
|
+
# file specific changement of the column names: for each physical meaning
|
146
|
+
# their should be only one column
|
147
|
+
content.each {|item|
|
148
|
+
key = nil
|
149
|
+
DOUBLE_COLUMNS.keys.each {|k|
|
150
|
+
md = /#{k}/.match(item[0])
|
151
|
+
unless md.nil?
|
152
|
+
key = DOUBLE_COLUMNS[k]
|
153
|
+
break
|
154
|
+
end
|
155
|
+
}
|
156
|
+
key = item[0] if key.nil?
|
157
|
+
value = item[1..-1]
|
158
|
+
value.each_index {|i| value[i] = (value[i].nil?) ? '' : value[i].to_s}
|
159
|
+
obj_hash[key.to_sym] = value
|
160
|
+
obj_hash[:datacolumns] << key
|
161
|
+
# TODO: the following is some kind of german specific DateTime
|
162
|
+
# conversion, see change_time_format definition for more info.
|
163
|
+
# Maybe there is a more general version using the Time.parse method
|
164
|
+
change_time_format(value) if key == "zeit"
|
165
|
+
}
|
166
|
+
end
|
167
|
+
|
168
|
+
# Create an auto index
|
169
|
+
def index
|
170
|
+
(0...rsize).to_a
|
171
|
+
end
|
172
|
+
|
173
|
+
# Do a selection by the index of the dataset inside the receiver. This does
|
174
|
+
# not change the receiver.
|
175
|
+
def selectBy_index(indexes)
|
176
|
+
new_table = {}
|
177
|
+
@table.each {|key, value|
|
178
|
+
if METADATA.include?(key.to_s) or not value.kind_of?(Array)
|
179
|
+
new_table[key] = value
|
180
|
+
else
|
181
|
+
new_table[key] = value.values_at(*indexes)
|
182
|
+
end
|
183
|
+
}
|
184
|
+
self.class.new("hash","plain",new_table)
|
185
|
+
end
|
186
|
+
|
187
|
+
# Selection can be made by regular expressions. This method decides,
|
188
|
+
# with method is used.
|
189
|
+
def is_regexp?(pattern, key)
|
190
|
+
return false unless /(<|<=|>=|>)\s*/.match(pattern).nil?
|
191
|
+
case key
|
192
|
+
when "zeit"
|
193
|
+
pattern = pattern.gsub(/(-|\.\d)/,'')
|
194
|
+
else
|
195
|
+
pattern = pattern.gsub(/\.\d/,'')
|
196
|
+
end
|
197
|
+
pattern != Regexp.escape(pattern)
|
198
|
+
end
|
199
|
+
|
200
|
+
# This Function uses a hash parameter, where the key must be the name of an
|
201
|
+
# instance variable, i.g. params =
|
202
|
+
# * {:col1 => "4", :col2 => "100", :col3> "80"}
|
203
|
+
# * {:col1 => /(4|5)/, :col2 => "<500", :col3> ">=80"}
|
204
|
+
# Searching can be done directly, which uses '==' to match, via regular
|
205
|
+
# expressions of by simple mathematical operarions:
|
206
|
+
# * <
|
207
|
+
# * <=
|
208
|
+
# * >
|
209
|
+
# * >=
|
210
|
+
def selectBy(selection)
|
211
|
+
operations = %w{<= >= == < > !=}
|
212
|
+
type = nil
|
213
|
+
|
214
|
+
# transform selection keys into symbols. This make the further usage
|
215
|
+
# a lot easyer and allows to take strings or symbols for columns
|
216
|
+
# names
|
217
|
+
# ATTENTION: DO NOT MIX THE USAGE OF STRING AND SYMBOLS!
|
218
|
+
# This can lead to a data loss, because e.g. {:k => 4, "k" => 3} will be
|
219
|
+
# transformed into {:k=>3}
|
220
|
+
selection.each_key {|k|
|
221
|
+
if k.kind_of?(String)
|
222
|
+
v = selection.delete(k)
|
223
|
+
selection[k.to_sym] = v
|
224
|
+
end
|
225
|
+
}
|
226
|
+
vars = selection.keys
|
227
|
+
# test for unknown selection variables
|
228
|
+
vars.each {|attribute|
|
229
|
+
unless @table.has_key?(attribute)
|
230
|
+
$stdout << "Object does NOT hav the attribute '#{attribute}'!"
|
231
|
+
raise
|
232
|
+
end
|
233
|
+
}
|
234
|
+
# default is the lookup in the whole array of values for each var
|
235
|
+
lookup = (0..@table[vars[0]].size-1).to_a
|
236
|
+
|
237
|
+
vars.each { |var|
|
238
|
+
operation = nil
|
239
|
+
value = nil
|
240
|
+
# needle can be a real value, a math. comparision or a regular expression
|
241
|
+
needle = selection[var]
|
242
|
+
|
243
|
+
if needle.kind_of?(Numeric)
|
244
|
+
operation = "=="
|
245
|
+
value = needle
|
246
|
+
type = :numeric
|
247
|
+
#test stdout << needle << " #### #{needle.class} ####\n"
|
248
|
+
#test stdout << type.to_s << "\n"
|
249
|
+
elsif needle.kind_of?(Regexp)
|
250
|
+
operation = Regexp.new(needle)
|
251
|
+
type = :regexp
|
252
|
+
#test stdout << needle << " #### #{needle.class} ####\n"
|
253
|
+
#test stdout << type.to_s << "\n"
|
254
|
+
elsif needle.kind_of?(String)
|
255
|
+
if (md = /(#{operations.join("|")})([^=].*)/.match(needle); not md.nil?)
|
256
|
+
# separate the operation
|
257
|
+
operation = md[1]
|
258
|
+
value = md[2].strip
|
259
|
+
else
|
260
|
+
operation = '=='
|
261
|
+
value = needle
|
262
|
+
end
|
263
|
+
if (value == "")
|
264
|
+
# value is missing
|
265
|
+
$stdout << "value for variable '#{var}' is missing\n"
|
266
|
+
raise
|
267
|
+
elsif ( (value != "0" and (value.to_f.to_s == value or value.to_i.to_s == value)) or (value == "0") )
|
268
|
+
# A: numerical compare
|
269
|
+
value = value.to_f
|
270
|
+
type = :numeric
|
271
|
+
#test stdout << value << " #### #{value.class} ####\n"
|
272
|
+
#test stdout << type.to_s << "\n"
|
273
|
+
else
|
274
|
+
# B: String-like compare
|
275
|
+
# quoted if not allready quoted
|
276
|
+
value = "'" + value + "'" unless ( /'(.*[^']?.*)'/.match(value) or /"(.*[^"]?.*)"/.match(value) )
|
277
|
+
type = :string
|
278
|
+
#test $stdout << value << " #### #{value.class} ####\n"
|
279
|
+
#test $stdout << type.to_s << "\n"
|
280
|
+
end
|
281
|
+
else
|
282
|
+
$stdout << "The Parameter '#{needle}' has the wrong Type. " +
|
283
|
+
"Please use numeric values, stings or regular expressions (e.g. /(^50$|200)/)\n"
|
284
|
+
raise
|
285
|
+
end
|
286
|
+
#test stdout << "\n NEW VALUE :::::::::::::::\n"
|
287
|
+
obj_values = @table[var]
|
288
|
+
obj_values = [(0..obj_values.size-1).to_a, obj_values].transpose.values_at(*lookup)
|
289
|
+
|
290
|
+
if operation.kind_of?(Regexp)
|
291
|
+
lookup = lookup & obj_values.find_all {|i,v| operation.match(v.to_s)}.transpose[0].to_a
|
292
|
+
else
|
293
|
+
lookup = lookup & obj_values.find_all {|i,v|
|
294
|
+
v = "'" + v + "'" if type == :string
|
295
|
+
#test $stdout <<[v,operation,value].join(" ") << "\n"
|
296
|
+
eval([v,operation,value].join(" "))
|
297
|
+
}.transpose[0].to_a
|
298
|
+
end
|
299
|
+
}
|
300
|
+
selectBy_index(lookup)
|
301
|
+
end
|
302
|
+
|
303
|
+
# Find the dataset, with the values of key closest to he value-parameter
|
304
|
+
def closest_to(key, value)
|
305
|
+
# try to select directly
|
306
|
+
_ret = selectBy(key => value)
|
307
|
+
return _ret unless _ret.empty?
|
308
|
+
|
309
|
+
# grabbing for numerics
|
310
|
+
# the operation '<=' and '>=' can be left out, because, they would have
|
311
|
+
# been matcher before
|
312
|
+
_smaller = selectBy(key => " < #{value}")[-1]
|
313
|
+
_greater = selectBy(key => " > #{value}")[0]
|
314
|
+
|
315
|
+
_smaller_diff = (_smaller.send(key)[0].to_f - value).abs
|
316
|
+
_greater_diff = (_greater.send(key)[0].to_f - value).abs
|
317
|
+
return (_smaller_diff < _greater_diff) ? _smaller : _greater
|
318
|
+
end
|
319
|
+
|
320
|
+
# Transform the time from "dd.mm.yyyy hh:mm:ss" to "yyyy-mm-dd hh:mm:ss"
|
321
|
+
# For the comparison the timestamps this format is usefull, because the '<=>'
|
322
|
+
# comparison of the strings coincides with the temporal order
|
323
|
+
def change_time_format(times)
|
324
|
+
times.each_with_index {|time,i|
|
325
|
+
# if there is no space in time, it is considered a time in
|
326
|
+
# format hh:mm
|
327
|
+
if time.count(" ") == 0
|
328
|
+
time = Time.new.strftime("%d.%m.%Y ") + time
|
329
|
+
end
|
330
|
+
dATE, tIME = time.split(" ")
|
331
|
+
day, month, year = dATE.split(".")
|
332
|
+
if tIME.nil?
|
333
|
+
times[i] = [year,month,day].join('-')
|
334
|
+
else
|
335
|
+
hour, minute, second = tIME.split(":")
|
336
|
+
if second.nil?
|
337
|
+
times[i] = [year,month,day].join('-') + " " + [hour,minute].join(':')
|
338
|
+
else
|
339
|
+
times[i] = [year,month,day].join('-') + " " + [hour,minute,second].join(':')
|
340
|
+
end
|
341
|
+
end
|
342
|
+
}
|
343
|
+
end
|
344
|
+
|
345
|
+
# Return an array of datasets, which contain of the values of the gives
|
346
|
+
# columns in order of these columns, e.g.
|
347
|
+
# [[col0_val0,col1_val0,...],...,[col0_valN, col1_valN,...]]
|
348
|
+
def datasets(*columns)
|
349
|
+
retval = []
|
350
|
+
columns.each {|col| retval << @table[col.to_sym]}
|
351
|
+
retval.transpose
|
352
|
+
end
|
353
|
+
def clear
|
354
|
+
@table.each {|k,v| @table[k] = [] if v.kind_of?(Array)}
|
355
|
+
end
|
356
|
+
def empty?
|
357
|
+
return true if @table.empty?
|
358
|
+
@table.each {|k,v|
|
359
|
+
if ( v.kind_of?(Array) and v == [])
|
360
|
+
return true
|
361
|
+
end
|
362
|
+
}
|
363
|
+
false
|
364
|
+
end
|
365
|
+
|
366
|
+
#
|
367
|
+
# Different size definitions
|
368
|
+
def size
|
369
|
+
@table[datacolumns[0].to_sym].size
|
370
|
+
end
|
371
|
+
|
372
|
+
def numberOfRows
|
373
|
+
@table[datacolumns[-1].to_sym].size
|
374
|
+
end
|
375
|
+
alias :rsize :numberOfRows
|
376
|
+
|
377
|
+
def numberOfColumns
|
378
|
+
datacolumns.size
|
379
|
+
end
|
380
|
+
alias :csize :numberOfColumns
|
381
|
+
|
382
|
+
def globalsize
|
383
|
+
numberOfRows*numberOfColumns
|
384
|
+
end
|
385
|
+
|
386
|
+
def deep_copy
|
387
|
+
copy = {}
|
388
|
+
@table.each {|k,v| copy[k] = v.clone}
|
389
|
+
copy
|
390
|
+
end
|
391
|
+
|
392
|
+
#
|
393
|
+
# Perform a persistent change on the receiver. Usage like change.
|
394
|
+
def operate_on!(column, operation)
|
395
|
+
values = send(column)
|
396
|
+
send(column).each_index {|i|
|
397
|
+
newval = eval("#{values[i]} #{operation}")
|
398
|
+
send(column)[i] = newval.to_s unless newval.nil?
|
399
|
+
}
|
400
|
+
self
|
401
|
+
end
|
402
|
+
|
403
|
+
#
|
404
|
+
# Perform a change on a object copy. column can be any attribute of the
|
405
|
+
# object and the operation has to be a string, which can be evaluated by the
|
406
|
+
# interpreter, e.g. "+ 0.883" or "*Math.sin(#{myvar})"
|
407
|
+
def operate_on(column, operation)
|
408
|
+
self.class.new("hash","plain",deep_copy).operate_on!(column,operation)
|
409
|
+
end
|
410
|
+
|
411
|
+
def set_column!(column, expression)
|
412
|
+
values = send(column)
|
413
|
+
send(column).each_index {|i|
|
414
|
+
send(column)[i] = eval(expression).to_s
|
415
|
+
}
|
416
|
+
self
|
417
|
+
end
|
418
|
+
def set_column(column, expression)
|
419
|
+
self.class.new("hash","plain",deep_copy).set_column!(column,expression)
|
420
|
+
end
|
421
|
+
|
422
|
+
#
|
423
|
+
# Iteration over datasets containing values of all columns
|
424
|
+
def each(&block)
|
425
|
+
objects = []
|
426
|
+
(0...size).each {|i| objects << selectBy_index([i])}
|
427
|
+
objects.each(&block)
|
428
|
+
end
|
429
|
+
|
430
|
+
#
|
431
|
+
# iterator over different values of key
|
432
|
+
def each_by(key,sort_uniq=true, &block)
|
433
|
+
if sort_uniq
|
434
|
+
send(key).uniq.sort.each(&block)
|
435
|
+
else
|
436
|
+
send(key).each(&block)
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
#
|
441
|
+
# each_obj iterates over the subobject of the receiver, which belong to the
|
442
|
+
# certain value of key
|
443
|
+
def each_obj(key, &block)
|
444
|
+
retval = []
|
445
|
+
send(key).sort.uniq.each {|value|
|
446
|
+
retval << selectBy(key => value)
|
447
|
+
}
|
448
|
+
if block_given?
|
449
|
+
retval.each(&block)
|
450
|
+
else
|
451
|
+
retval
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
# :call-seq:
|
456
|
+
# split.(:col0,...,:colN) {|obj| ...}
|
457
|
+
# splot.(:col0,...,:coln) -> [obj0,...,objM]
|
458
|
+
#
|
459
|
+
# split is a multi-key-version of each_obj. the receiver is splitted into
|
460
|
+
# subobject, which have constant values in all given columns
|
461
|
+
#
|
462
|
+
# eg.
|
463
|
+
# <tt>qpol.split(:kv, :focus) {|little_qp| little_qp.kv == little_kv.uniq}</tt>
|
464
|
+
#
|
465
|
+
# or
|
466
|
+
#
|
467
|
+
# <tt>qpol.split(:kv, :focus) = [qpol_0,...,qpol_N]</tt>
|
468
|
+
def split(*columns, &block)
|
469
|
+
retval = []
|
470
|
+
deep_split(columns, retval)
|
471
|
+
if block_given?
|
472
|
+
retval.each(&block)
|
473
|
+
else
|
474
|
+
retval
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
# really perform the splitting necessary for split
|
479
|
+
def deep_split(columns, retval)
|
480
|
+
case
|
481
|
+
when (columns.nil? or columns.empty? or size == 1)
|
482
|
+
retval << self
|
483
|
+
when (columns.size == 1 and send(columns[0]).uniq.size == 1)
|
484
|
+
retval << self
|
485
|
+
else
|
486
|
+
each_obj(columns[0]) {|obj| obj.deep_split(columns[1..-1], retval)}
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
490
|
+
# hash representation of the data
|
491
|
+
def to_hash
|
492
|
+
@table
|
493
|
+
end
|
494
|
+
|
495
|
+
def add(name, value)
|
496
|
+
new_ostruct_member(name)
|
497
|
+
self.send(name.to_s+"=", value)
|
498
|
+
self.datacolumns << name.to_s unless self.datacolumns.include?(name.to_s)
|
499
|
+
return
|
500
|
+
end
|
501
|
+
|
502
|
+
# array representatio nof the data
|
503
|
+
def to_ary
|
504
|
+
@table.to_a
|
505
|
+
end
|
506
|
+
|
507
|
+
# Texcode for the table with vertical and horzontal lines, which contains
|
508
|
+
# values of the given columns
|
509
|
+
def to_texTable(cols,col_align="c",math=false)
|
510
|
+
hline = '\\hline'
|
511
|
+
# tex << '$' + cols.each {|col| col.sub(/(.+)_(.+)/,"\\1_\{\\2\}")}.join("$&$") + '$' + "\\\\\n"
|
512
|
+
tex = ''
|
513
|
+
tab_align = ''
|
514
|
+
cols.size.times { tab_align << '|' + col_align }
|
515
|
+
tab_align << '|'
|
516
|
+
tex << '\begin{tabular}{' + tab_align + '}' + hline + "\n"
|
517
|
+
if math
|
518
|
+
tex << '$' + cols.join("$&$").gsub(/(\w+)_(\w+)/,"\\1_\{\\2\}") + '$' + '\\\\' + hline + "\n"
|
519
|
+
else
|
520
|
+
tex << cols.join(" & ") + '\\\\' + hline +"\n"
|
521
|
+
end
|
522
|
+
datasets(cols).each {|dataset|
|
523
|
+
tex << dataset.join(" & ") + '\\\\' + hline + "\n"
|
524
|
+
}
|
525
|
+
tex << '\end{tabular}' + "\n"
|
526
|
+
tex
|
527
|
+
end
|
528
|
+
|
529
|
+
# String output. See ExtCsvExporter.to_string
|
530
|
+
def to_string(stype)
|
531
|
+
ExtCsvExporter.new("extcsv",
|
532
|
+
([datacolumns.sort] +
|
533
|
+
datasets(*datacolumns.sort)).transpose
|
534
|
+
).to_string(stype)
|
535
|
+
end
|
536
|
+
def to_file(filename, filetype="txt")
|
537
|
+
File.open(filename,"w") do |f|
|
538
|
+
f << to_string(filetype)
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
# Equality if the datacolumns have the save values,i.e. as float for numeric
|
543
|
+
# data and as strings otherwise
|
544
|
+
# the time-column is exceptional, because the e.g. the seconds could be left
|
545
|
+
# out when file is saved with MSExcel
|
546
|
+
def eql?(other)
|
547
|
+
return false unless (
|
548
|
+
self.datatype == other.datatype or self.datatype == other.datatype
|
549
|
+
)
|
550
|
+
|
551
|
+
omitted = %w|bfe_vers version zeit time|
|
552
|
+
return false unless self.datacolumns == other.datacolumns
|
553
|
+
|
554
|
+
# split between textual and numeric values
|
555
|
+
text_columns = %w|anlage kommentar dateiname| & self.datacolumns
|
556
|
+
num_columns = self.datacolumns - text_columns - omitted
|
557
|
+
text_columns.each {|c| return false if send(c) != other.send(c)}
|
558
|
+
num_columns.each {|c|
|
559
|
+
a_ = send(c)
|
560
|
+
a__ = a_.collect {|v| v.to_f}
|
561
|
+
b_ = other.send(c)
|
562
|
+
b__ = b_.collect {|v| v.to_f}
|
563
|
+
#$stdout << c << "\n" << filename << "\n";
|
564
|
+
#$stdout << c << "\n" << other.filename << "\n";
|
565
|
+
return false if (b__ != a__)
|
566
|
+
#if (send(c).collect {|v| v.to_f} != other.send(c).collect {|v| v.to_f})}
|
567
|
+
}
|
568
|
+
return true
|
569
|
+
end
|
570
|
+
|
571
|
+
def <=>(other)
|
572
|
+
compare = (self.size <=> other.size)
|
573
|
+
#test $stdout << compare.to_s << "\n"
|
574
|
+
compare = (datacolumns.size <=> other.datacolumns.size) if compare.zero?
|
575
|
+
#test $stdout << compare.to_s << "\n" if compare.zero?
|
576
|
+
compare = (to_s.size <=> other.to_s.size) if compare.zero?
|
577
|
+
#test $stdout << compare.to_s << "\n" if compare.zero?
|
578
|
+
compare = (to_s <=> other.to_s) if compare.zero?
|
579
|
+
#test $stdout << compare.to_s << "\n" if compare.zero?
|
580
|
+
#test $stdout << "##################################\n"
|
581
|
+
compare
|
582
|
+
end
|
583
|
+
|
584
|
+
# has to be defined for using eql? in uniq
|
585
|
+
def hash;0;end
|
586
|
+
|
587
|
+
def [](*argv)
|
588
|
+
copy = @table.dup
|
589
|
+
copy.each {|k,v| copy[k] = (argv.size == 1 and argv[0].kind_of?(Fixnum)) ? [v[*argv]] : v[*argv] if v.kind_of?(Array) }
|
590
|
+
ExtCsv.new("hash","plain",copy)
|
591
|
+
end
|
592
|
+
alias :slice :[]
|
593
|
+
|
594
|
+
def concat(other)
|
595
|
+
ExtCsv.concat(self,other)
|
596
|
+
end
|
597
|
+
alias :+ :concat
|
598
|
+
alias :<< :concat
|
599
|
+
|
600
|
+
def combine(other)
|
601
|
+
return self unless other.kind_of?(self.class)
|
602
|
+
1.times do
|
603
|
+
warn "Both object should have the same number of datasets to be combined"
|
604
|
+
warn "Size of first Object (#{filename}): #{rsize}"
|
605
|
+
warn "Size of second Object (#{other.filename}): #{other.rsize}"
|
606
|
+
return nil
|
607
|
+
end unless rsize == other.rsize
|
608
|
+
objects, datatypes = [self, other],[datatype,other.datatype]
|
609
|
+
udatatypes = datatypes.uniq
|
610
|
+
#
|
611
|
+
case udatatypes.size
|
612
|
+
when 1
|
613
|
+
hash = marshal_dump.merge(other.marshal_dump)
|
614
|
+
else
|
615
|
+
if datatypes.include?("csv")
|
616
|
+
csv_index = datatypes.index("csv")
|
617
|
+
qpol_index = csv_index - 1
|
618
|
+
objects[csv_index].modyfy_time_column
|
619
|
+
hash = objects[csv_index].marshal_dump.merge(objects[qpol_index].marshal_dump)
|
620
|
+
hash[:filename] = []
|
621
|
+
hash[:filename] << objects[csv_index].filename << objects[qpol_index].filename
|
622
|
+
else
|
623
|
+
hash = marshal_dump.merge(other.marshal_dump)
|
624
|
+
hash[:filename] = []
|
625
|
+
hash[:filename] << other.filename << filename
|
626
|
+
end
|
627
|
+
end
|
628
|
+
# preserving the filenames
|
629
|
+
hash[:filemtime] = [self.filemtime.to_s, other.filemtime.to_s].min
|
630
|
+
ExtCsv.new("hash","plain",hash)
|
631
|
+
end
|
632
|
+
alias :& :combine
|
633
|
+
|
634
|
+
# Objects in ary_of_objs are glues in a new ExtCsv object. They should have
|
635
|
+
# the same datatype
|
636
|
+
# TODO: if at least two objects have different columns, the composite objetc
|
637
|
+
# should have empty values at the corresponding dataset. So be carefull with
|
638
|
+
# this version of concat!
|
639
|
+
def ExtCsv.concat(*ary_of_objs)
|
640
|
+
return unless ary_of_objs.collect{|obj| obj.datatype}.uniq.size == 1
|
641
|
+
ary_of_objs.flatten! if ary_of_objs[0].kind_of?(Array)
|
642
|
+
new_obj_hash = {}
|
643
|
+
ary_of_objs.each {|obj|
|
644
|
+
obj.to_hash.each {|k,v|
|
645
|
+
new_obj_hash[k] = v.class.new unless new_obj_hash[k].kind_of?(v.class)
|
646
|
+
new_obj_hash[k] += v
|
647
|
+
}
|
648
|
+
}
|
649
|
+
new_obj_hash[:filename] = ary_of_objs.collect{|td| td.filename}
|
650
|
+
new_obj_hash[:filemtime] = ary_of_objs.collect{|td| td.filemtime}
|
651
|
+
ExtCsv.new("hash","plain",new_obj_hash)
|
652
|
+
end
|
653
|
+
|
654
|
+
def ExtCsv.combine(obj, obj_=nil)
|
655
|
+
obj.combine(obj_)
|
656
|
+
end
|
657
|
+
private :deep_copy, :set_separators, :parse_content
|
658
|
+
end
|
659
|
+
|
660
|
+
class ExtCsvExporter
|
661
|
+
DEFAULT_FILENAME = "measurement.txt"
|
662
|
+
|
663
|
+
# See to_string for allowed data types. <em>data_content</em> accepts the
|
664
|
+
# output from ExtCsv.to_ary.
|
665
|
+
def initialize(data_type, data_content)
|
666
|
+
@line_sep = "\n"
|
667
|
+
@data_type = data_type
|
668
|
+
@content = data_content
|
669
|
+
end
|
670
|
+
|
671
|
+
# Optional string types are:
|
672
|
+
# * csv , separation by ','
|
673
|
+
# * ssv, separation by ';'
|
674
|
+
# * tsv, separation by '\t'
|
675
|
+
# * psv, separation by '|'
|
676
|
+
# * xml, see to_xml
|
677
|
+
def to_string(string_type,enc="en")
|
678
|
+
string_type = "xml" if string_type.nil? or string_type.empty?
|
679
|
+
out = ''
|
680
|
+
case string_type
|
681
|
+
when "csv"
|
682
|
+
sep = ","
|
683
|
+
when "ssv"
|
684
|
+
sep = ";"
|
685
|
+
when "tsv" , "txt"
|
686
|
+
sep = "\t"
|
687
|
+
when "psv"
|
688
|
+
sep = "|"
|
689
|
+
when "xml"
|
690
|
+
out = to_xml
|
691
|
+
when "tex"
|
692
|
+
else
|
693
|
+
puts "Wrong type! Use xml, tex, csv, ssv, psv, txt or tsv instead."
|
694
|
+
raise
|
695
|
+
end
|
696
|
+
@content.transpose.each {|data_set|
|
697
|
+
out << data_set.join(sep) + @line_sep
|
698
|
+
} unless string_type == "xml"
|
699
|
+
#out.gsub(/\./,",") if enc == "de"
|
700
|
+
out
|
701
|
+
end
|
702
|
+
|
703
|
+
# XML-Documents must be treated separately: tags are named like the attributes.
|
704
|
+
def to_xml
|
705
|
+
xml = "<?xml version='1.0' encoding='ISO-8859-1'?>\n"
|
706
|
+
xml << "<" + @data_type + ">\n"
|
707
|
+
output_array = @content.transpose
|
708
|
+
tags = output_array.first
|
709
|
+
data = output_array[1..-1]
|
710
|
+
data.each {|values|
|
711
|
+
xml << " <record>\n"
|
712
|
+
values.each_with_index {|value,i|
|
713
|
+
xml << " <#{tags[i]}>#{value}</#{tags[i]}>\n"
|
714
|
+
}
|
715
|
+
xml << " </record>\n"
|
716
|
+
}
|
717
|
+
xml << "</" + @data_type + ">"
|
718
|
+
xml
|
719
|
+
end
|
720
|
+
|
721
|
+
# Create files of types, that are allowed by ExtCsvExporter.to_string
|
722
|
+
def to_file(file, filetype=nil)
|
723
|
+
# Create the output directory
|
724
|
+
dir = File.dirname(File.expand_path(file))
|
725
|
+
FileUtils.mkdir_p(dir) unless File.directory?(dir)
|
726
|
+
|
727
|
+
filename = File.directory?(file) ? DEFAULT_FILENAME : File.basename(file)
|
728
|
+
filetype = File.extname(filename)[1..-1] if filetype.nil? or filetype.empty?
|
729
|
+
File.open(file,"w") {|f|
|
730
|
+
f << to_string(filetype)
|
731
|
+
}
|
732
|
+
end
|
733
|
+
end
|