extcsv 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/extcsv.rb ADDED
@@ -0,0 +1,733 @@
1
+ require 'rubygems'
2
+ require 'csv'
3
+ require 'ostruct'
4
+
5
+ ################################################################################
6
+ # Author:: Ralf Mueller
7
+ ################################################################################
8
+ class Nil
9
+ def to_s; ''; end
10
+ def to_a; []; end
11
+ def empty?; true; end
12
+ end
13
+
14
+ class ExtCsv < OpenStruct
15
+ VERSION = '0.10.0'
16
+
17
+ include Comparable
18
+ include Enumerable
19
+
20
+ # Allowed data types
21
+ TYPES = %w{csv tsv psv txt plain}
22
+
23
+ # Allowed input modes, db and url are not supported, yet
24
+ MODES = %w{file db url hash array string}
25
+
26
+ # column names from different file type, which that have the same
27
+ # meaning
28
+ DOUBLE_COLUMNS = {}
29
+
30
+ # Non-Data fields
31
+ METADATA = %w{mode datatype datacolumns cellsep rowsep filename filemtime}
32
+
33
+ # mode can be one of the allowed MODES
34
+ # datatype can be one of the TYPES
35
+ #
36
+ # === Example
37
+ # ExtCsv.new("file","txt","Data.txt")
38
+ # ExtCsv.new("file","csv","Ergebniss.csv")
39
+ #
40
+ #
41
+ def initialize(mode, datatype, params)
42
+ obj_hash = {}
43
+ obj_hash[:mode] = mode
44
+ obj_hash[:datatype] = datatype
45
+ obj_hash[:datacolumns] = []
46
+
47
+ if not MODES.include?(mode) or not TYPES.include?(datatype)
48
+ puts "use '#{MODES.join("','")}' for first " +
49
+ "and '#{TYPES.join(",")}' for second parameter " +
50
+ "datatype was '#{datatype}', mode was '#{mode}'"
51
+ raise
52
+ end
53
+
54
+ # Grep data from the given source, e.g. database or file
55
+ case obj_hash[:mode]
56
+ when "string"
57
+ set_separators(obj_hash)
58
+ parse_content(params,obj_hash)
59
+ when "file"
60
+ if File.exist?(params)
61
+ obj_hash[:filename] = params
62
+ else
63
+ $stdout << "The input file '#{params}' cannot be found!\n"
64
+ $stdout << "Please check path and filename." << "\n"
65
+ return
66
+ end
67
+ obj_hash[:filemtime] = File.mtime(obj_hash[:filename]).strftime("%Y-%m-%d %H:%M:%S")
68
+ set_separators(obj_hash)
69
+ parse_content(IO.read(obj_hash[:filename]),obj_hash)
70
+ when "hash"
71
+ obj_hash = params
72
+ # update the metacolumns
73
+ #test $stdout << obj_hash.keys.join("\t")
74
+ obj_hash[:datacolumns] = (obj_hash.keys.collect {|dc| dc.to_s} - METADATA)
75
+ when "array"
76
+ params.each {|v|
77
+ key = v[0]
78
+ obj_hash[:datacolumns] << key
79
+ obj_hash[key] = v[1..-1]
80
+ }
81
+ end
82
+ super(obj_hash)
83
+ end
84
+
85
+ def set_separators(obj_hash)
86
+ obj_hash[:cellsep] = case obj_hash[:datatype]
87
+ when "txt","tsv": "\t"
88
+ when "csv": ';'
89
+ when "psv": "|"
90
+ end
91
+ obj_hash[:rowsep] = "\r\n"
92
+ end
93
+
94
+ # Main method for parsing input strings. Comments and other special
95
+ # signs are treated as follows
96
+ # * first line is taken to be the header with columns names. If that
97
+ # line starts with a comment sign (#), this sign is removed.
98
+ # * any other line which starts with '#' is ignored
99
+ # * german umlaute are translated into asci-conform versions for
100
+ # columns names
101
+ # TODO: This is some kind of arbitrary, there should be a more general
102
+ # solution
103
+ # * spaces are removed from columns names
104
+ # * brackets are translated into underscores
105
+ # * '+' and '-' are changed into the correspondig words
106
+ # * empty lines are removed
107
+ # * dots are changed into underscores for columns names
108
+ # * the greek sign � is changes into mu
109
+ def parse_content(filecontent,obj_hash)
110
+ content = []
111
+ # special treatement of emission/bloostblank data switch decimal sign
112
+ filecontent = filecontent.gsub(',','.')
113
+ # remove blank lines
114
+ filecontent = filecontent.gsub(/\r\r/,"\r").gsub(/(\r\n){2,}/,"\r\n").gsub(/\n{2,}/,"\n")
115
+ csv = CSV::StringReader.parse(filecontent, obj_hash[:cellsep])#, obj_hash[:rowsep])
116
+
117
+ # read @datatype specific header
118
+ header = csv.shift
119
+ # remove comments sign from the header
120
+ header[0].gsub!(/^#+/,'') if /^#/.match(header[0])
121
+
122
+ header.each_with_index {|key,i|
123
+ key = "test" if key.nil?
124
+ header[i] = key.downcase.tr(' ','').tr('"','').tr('�',"ue").tr('�',"ae")\
125
+ .tr('�',"oe").gsub(/\[\w*\]/,"")\
126
+ .tr('�',"ue").gsub(/^\+/,"plus_")\
127
+ .gsub('�m','mu')\
128
+ .gsub(/^-/,"minus_").tr('-','_').tr('+','_').gsub(/(\(|\))/,'_').tr('.','_').chomp
129
+ }
130
+ content << header
131
+ # read the data itself
132
+ csv.each {|row| content << row if row.to_a.nitems > 0 }
133
+
134
+ # further processing according to the input type
135
+ case obj_hash[:datatype]
136
+ when "csv"
137
+ # check if rows have the same lenght
138
+ contents_size = content.collect {|row| row.size}
139
+ content.each_with_index {|row,i|
140
+ content[i] = row[0...contents_size.min]
141
+ } unless contents_size.min == contents_size.max
142
+ end
143
+ content = content.transpose
144
+
145
+ # file specific changement of the column names: for each physical meaning
146
+ # their should be only one column
147
+ content.each {|item|
148
+ key = nil
149
+ DOUBLE_COLUMNS.keys.each {|k|
150
+ md = /#{k}/.match(item[0])
151
+ unless md.nil?
152
+ key = DOUBLE_COLUMNS[k]
153
+ break
154
+ end
155
+ }
156
+ key = item[0] if key.nil?
157
+ value = item[1..-1]
158
+ value.each_index {|i| value[i] = (value[i].nil?) ? '' : value[i].to_s}
159
+ obj_hash[key.to_sym] = value
160
+ obj_hash[:datacolumns] << key
161
+ # TODO: the following is some kind of german specific DateTime
162
+ # conversion, see change_time_format definition for more info.
163
+ # Maybe there is a more general version using the Time.parse method
164
+ change_time_format(value) if key == "zeit"
165
+ }
166
+ end
167
+
168
+ # Create an auto index
169
+ def index
170
+ (0...rsize).to_a
171
+ end
172
+
173
+ # Do a selection by the index of the dataset inside the receiver. This does
174
+ # not change the receiver.
175
+ def selectBy_index(indexes)
176
+ new_table = {}
177
+ @table.each {|key, value|
178
+ if METADATA.include?(key.to_s) or not value.kind_of?(Array)
179
+ new_table[key] = value
180
+ else
181
+ new_table[key] = value.values_at(*indexes)
182
+ end
183
+ }
184
+ self.class.new("hash","plain",new_table)
185
+ end
186
+
187
+ # Selection can be made by regular expressions. This method decides,
188
+ # with method is used.
189
+ def is_regexp?(pattern, key)
190
+ return false unless /(<|<=|>=|>)\s*/.match(pattern).nil?
191
+ case key
192
+ when "zeit"
193
+ pattern = pattern.gsub(/(-|\.\d)/,'')
194
+ else
195
+ pattern = pattern.gsub(/\.\d/,'')
196
+ end
197
+ pattern != Regexp.escape(pattern)
198
+ end
199
+
200
+ # This Function uses a hash parameter, where the key must be the name of an
201
+ # instance variable, i.g. params =
202
+ # * {:col1 => "4", :col2 => "100", :col3> "80"}
203
+ # * {:col1 => /(4|5)/, :col2 => "<500", :col3> ">=80"}
204
+ # Searching can be done directly, which uses '==' to match, via regular
205
+ # expressions of by simple mathematical operarions:
206
+ # * <
207
+ # * <=
208
+ # * >
209
+ # * >=
210
+ def selectBy(selection)
211
+ operations = %w{<= >= == < > !=}
212
+ type = nil
213
+
214
+ # transform selection keys into symbols. This make the further usage
215
+ # a lot easyer and allows to take strings or symbols for columns
216
+ # names
217
+ # ATTENTION: DO NOT MIX THE USAGE OF STRING AND SYMBOLS!
218
+ # This can lead to a data loss, because e.g. {:k => 4, "k" => 3} will be
219
+ # transformed into {:k=>3}
220
+ selection.each_key {|k|
221
+ if k.kind_of?(String)
222
+ v = selection.delete(k)
223
+ selection[k.to_sym] = v
224
+ end
225
+ }
226
+ vars = selection.keys
227
+ # test for unknown selection variables
228
+ vars.each {|attribute|
229
+ unless @table.has_key?(attribute)
230
+ $stdout << "Object does NOT hav the attribute '#{attribute}'!"
231
+ raise
232
+ end
233
+ }
234
+ # default is the lookup in the whole array of values for each var
235
+ lookup = (0..@table[vars[0]].size-1).to_a
236
+
237
+ vars.each { |var|
238
+ operation = nil
239
+ value = nil
240
+ # needle can be a real value, a math. comparision or a regular expression
241
+ needle = selection[var]
242
+
243
+ if needle.kind_of?(Numeric)
244
+ operation = "=="
245
+ value = needle
246
+ type = :numeric
247
+ #test stdout << needle << " #### #{needle.class} ####\n"
248
+ #test stdout << type.to_s << "\n"
249
+ elsif needle.kind_of?(Regexp)
250
+ operation = Regexp.new(needle)
251
+ type = :regexp
252
+ #test stdout << needle << " #### #{needle.class} ####\n"
253
+ #test stdout << type.to_s << "\n"
254
+ elsif needle.kind_of?(String)
255
+ if (md = /(#{operations.join("|")})([^=].*)/.match(needle); not md.nil?)
256
+ # separate the operation
257
+ operation = md[1]
258
+ value = md[2].strip
259
+ else
260
+ operation = '=='
261
+ value = needle
262
+ end
263
+ if (value == "")
264
+ # value is missing
265
+ $stdout << "value for variable '#{var}' is missing\n"
266
+ raise
267
+ elsif ( (value != "0" and (value.to_f.to_s == value or value.to_i.to_s == value)) or (value == "0") )
268
+ # A: numerical compare
269
+ value = value.to_f
270
+ type = :numeric
271
+ #test stdout << value << " #### #{value.class} ####\n"
272
+ #test stdout << type.to_s << "\n"
273
+ else
274
+ # B: String-like compare
275
+ # quoted if not allready quoted
276
+ value = "'" + value + "'" unless ( /'(.*[^']?.*)'/.match(value) or /"(.*[^"]?.*)"/.match(value) )
277
+ type = :string
278
+ #test $stdout << value << " #### #{value.class} ####\n"
279
+ #test $stdout << type.to_s << "\n"
280
+ end
281
+ else
282
+ $stdout << "The Parameter '#{needle}' has the wrong Type. " +
283
+ "Please use numeric values, stings or regular expressions (e.g. /(^50$|200)/)\n"
284
+ raise
285
+ end
286
+ #test stdout << "\n NEW VALUE :::::::::::::::\n"
287
+ obj_values = @table[var]
288
+ obj_values = [(0..obj_values.size-1).to_a, obj_values].transpose.values_at(*lookup)
289
+
290
+ if operation.kind_of?(Regexp)
291
+ lookup = lookup & obj_values.find_all {|i,v| operation.match(v.to_s)}.transpose[0].to_a
292
+ else
293
+ lookup = lookup & obj_values.find_all {|i,v|
294
+ v = "'" + v + "'" if type == :string
295
+ #test $stdout <<[v,operation,value].join(" ") << "\n"
296
+ eval([v,operation,value].join(" "))
297
+ }.transpose[0].to_a
298
+ end
299
+ }
300
+ selectBy_index(lookup)
301
+ end
302
+
303
+ # Find the dataset, with the values of key closest to he value-parameter
304
+ def closest_to(key, value)
305
+ # try to select directly
306
+ _ret = selectBy(key => value)
307
+ return _ret unless _ret.empty?
308
+
309
+ # grabbing for numerics
310
+ # the operation '<=' and '>=' can be left out, because, they would have
311
+ # been matcher before
312
+ _smaller = selectBy(key => " < #{value}")[-1]
313
+ _greater = selectBy(key => " > #{value}")[0]
314
+
315
+ _smaller_diff = (_smaller.send(key)[0].to_f - value).abs
316
+ _greater_diff = (_greater.send(key)[0].to_f - value).abs
317
+ return (_smaller_diff < _greater_diff) ? _smaller : _greater
318
+ end
319
+
320
+ # Transform the time from "dd.mm.yyyy hh:mm:ss" to "yyyy-mm-dd hh:mm:ss"
321
+ # For the comparison the timestamps this format is usefull, because the '<=>'
322
+ # comparison of the strings coincides with the temporal order
323
+ def change_time_format(times)
324
+ times.each_with_index {|time,i|
325
+ # if there is no space in time, it is considered a time in
326
+ # format hh:mm
327
+ if time.count(" ") == 0
328
+ time = Time.new.strftime("%d.%m.%Y ") + time
329
+ end
330
+ dATE, tIME = time.split(" ")
331
+ day, month, year = dATE.split(".")
332
+ if tIME.nil?
333
+ times[i] = [year,month,day].join('-')
334
+ else
335
+ hour, minute, second = tIME.split(":")
336
+ if second.nil?
337
+ times[i] = [year,month,day].join('-') + " " + [hour,minute].join(':')
338
+ else
339
+ times[i] = [year,month,day].join('-') + " " + [hour,minute,second].join(':')
340
+ end
341
+ end
342
+ }
343
+ end
344
+
345
+ # Return an array of datasets, which contain of the values of the gives
346
+ # columns in order of these columns, e.g.
347
+ # [[col0_val0,col1_val0,...],...,[col0_valN, col1_valN,...]]
348
+ def datasets(*columns)
349
+ retval = []
350
+ columns.each {|col| retval << @table[col.to_sym]}
351
+ retval.transpose
352
+ end
353
+ def clear
354
+ @table.each {|k,v| @table[k] = [] if v.kind_of?(Array)}
355
+ end
356
+ def empty?
357
+ return true if @table.empty?
358
+ @table.each {|k,v|
359
+ if ( v.kind_of?(Array) and v == [])
360
+ return true
361
+ end
362
+ }
363
+ false
364
+ end
365
+
366
+ #
367
+ # Different size definitions
368
+ def size
369
+ @table[datacolumns[0].to_sym].size
370
+ end
371
+
372
+ def numberOfRows
373
+ @table[datacolumns[-1].to_sym].size
374
+ end
375
+ alias :rsize :numberOfRows
376
+
377
+ def numberOfColumns
378
+ datacolumns.size
379
+ end
380
+ alias :csize :numberOfColumns
381
+
382
+ def globalsize
383
+ numberOfRows*numberOfColumns
384
+ end
385
+
386
+ def deep_copy
387
+ copy = {}
388
+ @table.each {|k,v| copy[k] = v.clone}
389
+ copy
390
+ end
391
+
392
+ #
393
+ # Perform a persistent change on the receiver. Usage like change.
394
+ def operate_on!(column, operation)
395
+ values = send(column)
396
+ send(column).each_index {|i|
397
+ newval = eval("#{values[i]} #{operation}")
398
+ send(column)[i] = newval.to_s unless newval.nil?
399
+ }
400
+ self
401
+ end
402
+
403
+ #
404
+ # Perform a change on a object copy. column can be any attribute of the
405
+ # object and the operation has to be a string, which can be evaluated by the
406
+ # interpreter, e.g. "+ 0.883" or "*Math.sin(#{myvar})"
407
+ def operate_on(column, operation)
408
+ self.class.new("hash","plain",deep_copy).operate_on!(column,operation)
409
+ end
410
+
411
+ def set_column!(column, expression)
412
+ values = send(column)
413
+ send(column).each_index {|i|
414
+ send(column)[i] = eval(expression).to_s
415
+ }
416
+ self
417
+ end
418
+ def set_column(column, expression)
419
+ self.class.new("hash","plain",deep_copy).set_column!(column,expression)
420
+ end
421
+
422
+ #
423
+ # Iteration over datasets containing values of all columns
424
+ def each(&block)
425
+ objects = []
426
+ (0...size).each {|i| objects << selectBy_index([i])}
427
+ objects.each(&block)
428
+ end
429
+
430
+ #
431
+ # iterator over different values of key
432
+ def each_by(key,sort_uniq=true, &block)
433
+ if sort_uniq
434
+ send(key).uniq.sort.each(&block)
435
+ else
436
+ send(key).each(&block)
437
+ end
438
+ end
439
+
440
+ #
441
+ # each_obj iterates over the subobject of the receiver, which belong to the
442
+ # certain value of key
443
+ def each_obj(key, &block)
444
+ retval = []
445
+ send(key).sort.uniq.each {|value|
446
+ retval << selectBy(key => value)
447
+ }
448
+ if block_given?
449
+ retval.each(&block)
450
+ else
451
+ retval
452
+ end
453
+ end
454
+
455
+ # :call-seq:
456
+ # split.(:col0,...,:colN) {|obj| ...}
457
+ # splot.(:col0,...,:coln) -> [obj0,...,objM]
458
+ #
459
+ # split is a multi-key-version of each_obj. the receiver is splitted into
460
+ # subobject, which have constant values in all given columns
461
+ #
462
+ # eg.
463
+ # <tt>qpol.split(:kv, :focus) {|little_qp| little_qp.kv == little_kv.uniq}</tt>
464
+ #
465
+ # or
466
+ #
467
+ # <tt>qpol.split(:kv, :focus) = [qpol_0,...,qpol_N]</tt>
468
+ def split(*columns, &block)
469
+ retval = []
470
+ deep_split(columns, retval)
471
+ if block_given?
472
+ retval.each(&block)
473
+ else
474
+ retval
475
+ end
476
+ end
477
+
478
+ # really perform the splitting necessary for split
479
+ def deep_split(columns, retval)
480
+ case
481
+ when (columns.nil? or columns.empty? or size == 1)
482
+ retval << self
483
+ when (columns.size == 1 and send(columns[0]).uniq.size == 1)
484
+ retval << self
485
+ else
486
+ each_obj(columns[0]) {|obj| obj.deep_split(columns[1..-1], retval)}
487
+ end
488
+ end
489
+
490
+ # hash representation of the data
491
+ def to_hash
492
+ @table
493
+ end
494
+
495
+ def add(name, value)
496
+ new_ostruct_member(name)
497
+ self.send(name.to_s+"=", value)
498
+ self.datacolumns << name.to_s unless self.datacolumns.include?(name.to_s)
499
+ return
500
+ end
501
+
502
+ # array representatio nof the data
503
+ def to_ary
504
+ @table.to_a
505
+ end
506
+
507
+ # Texcode for the table with vertical and horzontal lines, which contains
508
+ # values of the given columns
509
+ def to_texTable(cols,col_align="c",math=false)
510
+ hline = '\\hline'
511
+ # tex << '$' + cols.each {|col| col.sub(/(.+)_(.+)/,"\\1_\{\\2\}")}.join("$&$") + '$' + "\\\\\n"
512
+ tex = ''
513
+ tab_align = ''
514
+ cols.size.times { tab_align << '|' + col_align }
515
+ tab_align << '|'
516
+ tex << '\begin{tabular}{' + tab_align + '}' + hline + "\n"
517
+ if math
518
+ tex << '$' + cols.join("$&$").gsub(/(\w+)_(\w+)/,"\\1_\{\\2\}") + '$' + '\\\\' + hline + "\n"
519
+ else
520
+ tex << cols.join(" & ") + '\\\\' + hline +"\n"
521
+ end
522
+ datasets(cols).each {|dataset|
523
+ tex << dataset.join(" & ") + '\\\\' + hline + "\n"
524
+ }
525
+ tex << '\end{tabular}' + "\n"
526
+ tex
527
+ end
528
+
529
+ # String output. See ExtCsvExporter.to_string
530
+ def to_string(stype)
531
+ ExtCsvExporter.new("extcsv",
532
+ ([datacolumns.sort] +
533
+ datasets(*datacolumns.sort)).transpose
534
+ ).to_string(stype)
535
+ end
536
+ def to_file(filename, filetype="txt")
537
+ File.open(filename,"w") do |f|
538
+ f << to_string(filetype)
539
+ end
540
+ end
541
+
542
+ # Equality if the datacolumns have the save values,i.e. as float for numeric
543
+ # data and as strings otherwise
544
+ # the time-column is exceptional, because the e.g. the seconds could be left
545
+ # out when file is saved with MSExcel
546
+ def eql?(other)
547
+ return false unless (
548
+ self.datatype == other.datatype or self.datatype == other.datatype
549
+ )
550
+
551
+ omitted = %w|bfe_vers version zeit time|
552
+ return false unless self.datacolumns == other.datacolumns
553
+
554
+ # split between textual and numeric values
555
+ text_columns = %w|anlage kommentar dateiname| & self.datacolumns
556
+ num_columns = self.datacolumns - text_columns - omitted
557
+ text_columns.each {|c| return false if send(c) != other.send(c)}
558
+ num_columns.each {|c|
559
+ a_ = send(c)
560
+ a__ = a_.collect {|v| v.to_f}
561
+ b_ = other.send(c)
562
+ b__ = b_.collect {|v| v.to_f}
563
+ #$stdout << c << "\n" << filename << "\n";
564
+ #$stdout << c << "\n" << other.filename << "\n";
565
+ return false if (b__ != a__)
566
+ #if (send(c).collect {|v| v.to_f} != other.send(c).collect {|v| v.to_f})}
567
+ }
568
+ return true
569
+ end
570
+
571
+ def <=>(other)
572
+ compare = (self.size <=> other.size)
573
+ #test $stdout << compare.to_s << "\n"
574
+ compare = (datacolumns.size <=> other.datacolumns.size) if compare.zero?
575
+ #test $stdout << compare.to_s << "\n" if compare.zero?
576
+ compare = (to_s.size <=> other.to_s.size) if compare.zero?
577
+ #test $stdout << compare.to_s << "\n" if compare.zero?
578
+ compare = (to_s <=> other.to_s) if compare.zero?
579
+ #test $stdout << compare.to_s << "\n" if compare.zero?
580
+ #test $stdout << "##################################\n"
581
+ compare
582
+ end
583
+
584
+ # has to be defined for using eql? in uniq
585
+ def hash;0;end
586
+
587
+ def [](*argv)
588
+ copy = @table.dup
589
+ copy.each {|k,v| copy[k] = (argv.size == 1 and argv[0].kind_of?(Fixnum)) ? [v[*argv]] : v[*argv] if v.kind_of?(Array) }
590
+ ExtCsv.new("hash","plain",copy)
591
+ end
592
+ alias :slice :[]
593
+
594
+ def concat(other)
595
+ ExtCsv.concat(self,other)
596
+ end
597
+ alias :+ :concat
598
+ alias :<< :concat
599
+
600
+ def combine(other)
601
+ return self unless other.kind_of?(self.class)
602
+ 1.times do
603
+ warn "Both object should have the same number of datasets to be combined"
604
+ warn "Size of first Object (#{filename}): #{rsize}"
605
+ warn "Size of second Object (#{other.filename}): #{other.rsize}"
606
+ return nil
607
+ end unless rsize == other.rsize
608
+ objects, datatypes = [self, other],[datatype,other.datatype]
609
+ udatatypes = datatypes.uniq
610
+ #
611
+ case udatatypes.size
612
+ when 1
613
+ hash = marshal_dump.merge(other.marshal_dump)
614
+ else
615
+ if datatypes.include?("csv")
616
+ csv_index = datatypes.index("csv")
617
+ qpol_index = csv_index - 1
618
+ objects[csv_index].modyfy_time_column
619
+ hash = objects[csv_index].marshal_dump.merge(objects[qpol_index].marshal_dump)
620
+ hash[:filename] = []
621
+ hash[:filename] << objects[csv_index].filename << objects[qpol_index].filename
622
+ else
623
+ hash = marshal_dump.merge(other.marshal_dump)
624
+ hash[:filename] = []
625
+ hash[:filename] << other.filename << filename
626
+ end
627
+ end
628
+ # preserving the filenames
629
+ hash[:filemtime] = [self.filemtime.to_s, other.filemtime.to_s].min
630
+ ExtCsv.new("hash","plain",hash)
631
+ end
632
+ alias :& :combine
633
+
634
+ # Objects in ary_of_objs are glues in a new ExtCsv object. They should have
635
+ # the same datatype
636
+ # TODO: if at least two objects have different columns, the composite objetc
637
+ # should have empty values at the corresponding dataset. So be carefull with
638
+ # this version of concat!
639
+ def ExtCsv.concat(*ary_of_objs)
640
+ return unless ary_of_objs.collect{|obj| obj.datatype}.uniq.size == 1
641
+ ary_of_objs.flatten! if ary_of_objs[0].kind_of?(Array)
642
+ new_obj_hash = {}
643
+ ary_of_objs.each {|obj|
644
+ obj.to_hash.each {|k,v|
645
+ new_obj_hash[k] = v.class.new unless new_obj_hash[k].kind_of?(v.class)
646
+ new_obj_hash[k] += v
647
+ }
648
+ }
649
+ new_obj_hash[:filename] = ary_of_objs.collect{|td| td.filename}
650
+ new_obj_hash[:filemtime] = ary_of_objs.collect{|td| td.filemtime}
651
+ ExtCsv.new("hash","plain",new_obj_hash)
652
+ end
653
+
654
+ def ExtCsv.combine(obj, obj_=nil)
655
+ obj.combine(obj_)
656
+ end
657
+ private :deep_copy, :set_separators, :parse_content
658
+ end
659
+
660
+ class ExtCsvExporter
661
+ DEFAULT_FILENAME = "measurement.txt"
662
+
663
+ # See to_string for allowed data types. <em>data_content</em> accepts the
664
+ # output from ExtCsv.to_ary.
665
+ def initialize(data_type, data_content)
666
+ @line_sep = "\n"
667
+ @data_type = data_type
668
+ @content = data_content
669
+ end
670
+
671
+ # Optional string types are:
672
+ # * csv , separation by ','
673
+ # * ssv, separation by ';'
674
+ # * tsv, separation by '\t'
675
+ # * psv, separation by '|'
676
+ # * xml, see to_xml
677
+ def to_string(string_type,enc="en")
678
+ string_type = "xml" if string_type.nil? or string_type.empty?
679
+ out = ''
680
+ case string_type
681
+ when "csv"
682
+ sep = ","
683
+ when "ssv"
684
+ sep = ";"
685
+ when "tsv" , "txt"
686
+ sep = "\t"
687
+ when "psv"
688
+ sep = "|"
689
+ when "xml"
690
+ out = to_xml
691
+ when "tex"
692
+ else
693
+ puts "Wrong type! Use xml, tex, csv, ssv, psv, txt or tsv instead."
694
+ raise
695
+ end
696
+ @content.transpose.each {|data_set|
697
+ out << data_set.join(sep) + @line_sep
698
+ } unless string_type == "xml"
699
+ #out.gsub(/\./,",") if enc == "de"
700
+ out
701
+ end
702
+
703
+ # XML-Documents must be treated separately: tags are named like the attributes.
704
+ def to_xml
705
+ xml = "<?xml version='1.0' encoding='ISO-8859-1'?>\n"
706
+ xml << "<" + @data_type + ">\n"
707
+ output_array = @content.transpose
708
+ tags = output_array.first
709
+ data = output_array[1..-1]
710
+ data.each {|values|
711
+ xml << " <record>\n"
712
+ values.each_with_index {|value,i|
713
+ xml << " <#{tags[i]}>#{value}</#{tags[i]}>\n"
714
+ }
715
+ xml << " </record>\n"
716
+ }
717
+ xml << "</" + @data_type + ">"
718
+ xml
719
+ end
720
+
721
+ # Create files of types, that are allowed by ExtCsvExporter.to_string
722
+ def to_file(file, filetype=nil)
723
+ # Create the output directory
724
+ dir = File.dirname(File.expand_path(file))
725
+ FileUtils.mkdir_p(dir) unless File.directory?(dir)
726
+
727
+ filename = File.directory?(file) ? DEFAULT_FILENAME : File.basename(file)
728
+ filetype = File.extname(filename)[1..-1] if filetype.nil? or filetype.empty?
729
+ File.open(file,"w") {|f|
730
+ f << to_string(filetype)
731
+ }
732
+ end
733
+ end