rbbt-util 4.0.2 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  require 'rbbt/util/cmd'
2
2
  module TSV
3
3
  class Parser
4
- attr_accessor :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight
4
+ attr_accessor :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped
5
5
 
6
6
  class SKIP_LINE < Exception; end
7
7
  class END_PARSING < Exception; end
@@ -63,23 +63,64 @@ module TSV
63
63
  end
64
64
 
65
65
  def get_values_single(parts)
66
- return parts.shift, parts.first if field_positions.nil?
66
+ return parts.shift, parts.first if field_positions.nil? and key_position.nil?
67
67
  key = parts[key_position]
68
- value = parts[field_positions.first]
68
+ value = parts[field_positions.nil? ? 0 : field_positions.first]
69
69
  [key, value]
70
70
  end
71
71
 
72
72
  def get_values_list(parts)
73
- return parts.shift, parts if field_positions.nil?
73
+ return parts.shift, parts if field_positions.nil? and key_position.nil?
74
74
  key = parts[key_position]
75
- values = parts.values_at *field_positions
75
+
76
+ values = if field_positions.nil?
77
+ parts.tap{|o| o.delete_at key_position}
78
+ else
79
+ parts.values_at *field_positions
80
+ end
81
+
76
82
  [key, values]
77
83
  end
78
84
 
79
85
  def get_values_double(parts)
80
- return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil?
86
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
87
+ keys = parts[key_position].split(@sep2, -1)
88
+ values = if field_positions.nil?
89
+ parts.tap{|o| o.delete_at key_position}
90
+ else
91
+ parts.values_at *field_positions
92
+ end.collect{|value| value.split(@sep2, -1)}
93
+ [keys, values]
94
+ end
95
+
96
+ def get_values_flat_inverse(parts)
97
+ value = parts.shift
98
+ keys = parts
99
+ [keys, [value]]
100
+ end
101
+
102
+ def get_values_flat(parts)
103
+ if key_position and key_position != 0 and field_positions.nil?
104
+ value = parts.shift
105
+ keys = parts
106
+ return [keys, [value]]
107
+ end
108
+
109
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if
110
+ field_positions.nil? and (key_position.nil? or key_position == 0)
111
+
81
112
  keys = parts[key_position].split(@sep2, -1)
82
- values = parts.values_at(*field_positions).collect{|value| value.split(@sep2, -1)}
113
+
114
+ if @take_all
115
+ values = parts.collect{|value| value.split(@sep2, -1)}
116
+ else
117
+
118
+ values = if field_positions.nil?
119
+ parts.tap{|o| o.delete_at key_position}
120
+ else
121
+ parts.values_at *field_positions
122
+ end.collect{|value| value.split(@sep2, -1)}
123
+ end
83
124
  [keys, values]
84
125
  end
85
126
 
@@ -110,18 +151,52 @@ module TSV
110
151
  end
111
152
 
112
153
  def add_to_data_merge(data, keys, values)
113
- keys.each do |key|
154
+ keys.uniq.each do |key|
155
+ if data.include? key
156
+ #data[key] = data[key].zip(values).collect do |old, new|
157
+ # old.concat new
158
+ # old
159
+ #end
160
+ new = data[key]
161
+ new.each_with_index do |old, i|
162
+ old.concat values[i]
163
+ end
164
+ data[key] = new
165
+ else
166
+ data[key] = values
167
+ end
168
+ end
169
+ end
170
+
171
+ def add_to_data_merge_zipped(data, keys, values)
172
+ num = keys.length
173
+ values = values.collect{|v| v.length != num ? [v.first] * num : v}
174
+ all = values.unshift keys
175
+ Misc.zip_fields(all).each do |values|
176
+ key = values.shift
114
177
  if data.include? key
115
178
  data[key] = data[key].zip(values).collect do |old, new|
116
- old.concat new
179
+ old.push new
117
180
  old
118
181
  end
119
182
  else
120
- data[key] = values
183
+ data[key] = values.collect{|v| [v]}
121
184
  end
122
185
  end
123
186
  end
124
187
 
188
+ def add_to_data_zipped(data, keys, values)
189
+ num = keys.length
190
+ values = values.collect{|v| v.length != num ? [v.first] * num : v}
191
+ all = values.unshift keys
192
+ Misc.zip_fields(all).each do |values|
193
+ key = values.shift
194
+ next if data.include? key
195
+ data[key] = values.collect{|v| [v]}
196
+ end
197
+ end
198
+
199
+
125
200
  def cast_values_single(value)
126
201
  case
127
202
  when Symbol === cast
@@ -157,6 +232,7 @@ module TSV
157
232
  key_field = Misc.process_options options, :key_field
158
233
  fields = Misc.process_options options, :fields
159
234
 
235
+
160
236
  if (key_field.nil? or key_field == 0 or key_field == :key) and
161
237
  (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
162
238
 
@@ -166,7 +242,7 @@ module TSV
166
242
  @straight = false
167
243
 
168
244
  case
169
- when (key_field.nil? or key_field == @key_field or key_field == 0)
245
+ when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
170
246
  @key_position = 0
171
247
  when Integer === key_field
172
248
  @key_position = key_field
@@ -176,9 +252,11 @@ module TSV
176
252
  raise "Format of key_field not understood: #{key_field.inspect}"
177
253
  end
178
254
 
179
- if (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
180
- @field_positions = (0..@fields.length).to_a
181
- @field_positions.delete @key_position
255
+ if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
256
+ if not @fields.nil? and type != :flat
257
+ @field_positions = (0..@fields.length).to_a
258
+ @field_positions.delete @key_position
259
+ end
182
260
  else
183
261
  fields = [fields] if not Array === fields
184
262
  @field_positions = fields.collect{|field|
@@ -196,8 +274,12 @@ module TSV
196
274
  end
197
275
 
198
276
  new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
199
- @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil?
200
- @key_field = new_key_field
277
+ @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
278
+ @fields ||= fields if Array === fields and String === fields.first
279
+ @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
280
+ @key_field = new_key_field
281
+ @key_field ||= key_field if String === key_field
282
+
201
283
  end
202
284
  end
203
285
 
@@ -215,13 +297,19 @@ module TSV
215
297
  @type ||= Misc.process_options options, :type
216
298
  @fix = Misc.process_options(options, :fix)
217
299
  @select= Misc.process_options options, :select
300
+ @zipped = Misc.process_options options, :zipped
218
301
 
219
302
  case @type
220
303
  when :double
221
304
  self.instance_eval do alias get_values get_values_double end
222
305
  self.instance_eval do alias cast_values cast_values_double end
223
- if merge
224
- self.instance_eval do alias add_to_data add_to_data_merge end
306
+ case
307
+ when (merge and not zipped)
308
+ self.instance_eval do alias add_to_data add_to_data_merge end
309
+ when (merge and zipped)
310
+ self.instance_eval do alias add_to_data add_to_data_merge_zipped end
311
+ when zipped
312
+ self.instance_eval do alias add_to_data add_to_data_zipped end
225
313
  else
226
314
  self.instance_eval do alias add_to_data add_to_data_no_merge_double end
227
315
  end
@@ -234,7 +322,8 @@ module TSV
234
322
  self.instance_eval do alias cast_values cast_values_list end
235
323
  self.instance_eval do alias add_to_data add_to_data_no_merge_list end
236
324
  when :flat
237
- self.instance_eval do alias get_values get_values_double end
325
+ @take_all = true if options[:fields].nil?
326
+ self.instance_eval do alias get_values get_values_flat end
238
327
  self.instance_eval do alias cast_values cast_values_double end
239
328
  if merge
240
329
  self.instance_eval do alias add_to_data add_to_data_flat_merge end
@@ -243,9 +332,10 @@ module TSV
243
332
  end
244
333
  end
245
334
 
335
+ fields = options[:fields]
246
336
  fix_fields(options)
247
337
 
248
- @straight = false if @sep != "\t" or not @cast.nil? or merge
338
+ @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
249
339
  end
250
340
 
251
341
  def setup(data)
@@ -14,6 +14,11 @@ module TSV
14
14
  def self.load(str); str.unpack("l*"); end
15
15
  end
16
16
 
17
+ class FloatArraySerializer
18
+ def self.dump(a); a.pack("d*"); end
19
+ def self.load(str); str.unpack("d*"); end
20
+ end
21
+
17
22
  class StringSerializer
18
23
  def self.dump(str); str.to_s; end
19
24
  def self.load(str); str; end
@@ -66,6 +71,7 @@ module TSV
66
71
  :integer => IntegerSerializer,
67
72
  :float => FloatSerializer,
68
73
  :integer_array => IntegerArraySerializer,
74
+ :float_array => FloatArraySerializer,
69
75
  :marshal => Marshal,
70
76
  :single => StringSerializer,
71
77
  :string => StringSerializer,
data/lib/rbbt/tsv/util.rb CHANGED
@@ -23,6 +23,38 @@ module TSV
23
23
 
24
24
  counts
25
25
  end
26
+
27
+ def self.field_match_counts(file, values, options = {})
28
+ options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
29
+ persist_options = Misc.pull_keys options, :persist
30
+
31
+ filename = TSV === file ? file.filename : file
32
+ text = Persist.persist filename, :string, persist_options do
33
+ tsv = TSV === file ? file : TSV.open(file)
34
+
35
+ text = ""
36
+ fields = nil
37
+ tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
38
+ names.zip(fields).each do |list, format|
39
+ list.delete_if do |name| name.empty? end
40
+ next if list.empty?
41
+ text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
42
+ end
43
+ end
44
+ text
45
+ end
46
+
47
+ path = Persist.persistence_path(filename, persist_options)
48
+ TmpFile.with_file(values * "\n") do |value_file|
49
+ cmd = "cat '#{ path }' | grep -w -F -f '#{ value_file }' |cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\\t/'"
50
+ begin
51
+ TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
52
+ rescue
53
+ TSV.setup({nil => 0}, :type => :single, :cast => :to_i)
54
+ end
55
+ end
56
+ end
57
+
26
58
  def self.get_filename(file)
27
59
  case
28
60
  when String === file
@@ -55,7 +87,9 @@ module TSV
55
87
  when (field.nil? or field == :key or key_field == field)
56
88
  :key
57
89
  when String === field
58
- fields.index field
90
+ pos = fields.index field
91
+ Log.medium "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
92
+ pos
59
93
  end
60
94
  end
61
95
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/log'
2
+ require 'set'
2
3
 
3
4
  module ChainMethods
4
5
  def self.chain_methods_extended(base)
@@ -19,18 +20,31 @@ module ChainMethods
19
20
 
20
21
  def setup_chains(base)
21
22
  raise "No prefix specified for #{self.to_s}" if self.chain_prefix.nil? or (String === self.chain_prefix and self.chain_prefix.empty?)
22
- #methods = self.instance_methods.select{|method| method =~ /^#{self.chain_prefix}/}
23
23
  methods = self.chained_methods
24
24
 
25
25
  return if methods.empty?
26
26
 
27
27
  prefix = self.chain_prefix
28
28
 
29
- new_method = methods.first
30
- original = new_method.sub(prefix.to_s + '_', '')
31
- first_clean_method = prefix.to_s + '_clean_' + original
29
+ #do_chain = true
30
+ #methods.collect{|new_method|
31
+ # original = new_method.sub(prefix.to_s + '_', '')
32
+ # clean = prefix.to_s + '_clean_' + original
33
+ # if base.respond_to? clean
34
+ # do_chain = false
35
+ # break
36
+ # end
37
+ #}
38
+
39
+
40
+ if not base.respond_to?(:processed_chains) or base.processed_chains.nil? or not base.processed_chains.include? prefix
41
+ class << base
42
+ attr_accessor :processed_chains
43
+ end if not base.respond_to? :processed_chains
44
+
45
+ base.processed_chains = Set.new if base.processed_chains.nil?
46
+ base.processed_chains << prefix
32
47
 
33
- if not base.respond_to? first_clean_method
34
48
  class << base; self; end.module_eval do
35
49
  methods.each do |new_method|
36
50
  original = new_method.sub(prefix.to_s + '_', '')
@@ -39,6 +53,10 @@ module ChainMethods
39
53
  original = "[]" if original == "get_brackets"
40
54
  original = "[]=" if original == "set_brackets"
41
55
 
56
+ if base.respond_to? clean_method
57
+ raise "Method already defined: #{clean_method}. #{ prefix }"
58
+ end
59
+
42
60
  begin
43
61
  alias_method clean_method, original
44
62
  rescue
@@ -52,10 +70,10 @@ module ChainMethods
52
70
 
53
71
  if not metaclass.respond_to? :extended
54
72
  metaclass.module_eval do
55
- alias prev_chain_methods_extended extended
73
+ alias prev_chain_methods_extended extended if methods.include? "extended"
56
74
 
57
75
  def extended(base)
58
- prev_chain_methods_extended(base)
76
+ prev_chain_methods_extended(base) if methods.include? "prev_chain_methods_extended"
59
77
  setup_chains(base)
60
78
  end
61
79
  end
@@ -68,7 +86,4 @@ module ChainMethods
68
86
  def self.extended(base)
69
87
  chain_methods_extended(base)
70
88
  end
71
-
72
-
73
-
74
89
  end
@@ -2,10 +2,54 @@ require 'lockfile'
2
2
  require 'rbbt/util/chain_methods'
3
3
  require 'rbbt/resource/path'
4
4
  require 'rbbt/annotations'
5
+ require 'net/smtp'
5
6
 
6
7
  module Misc
7
8
  class FieldNotFoundError < StandardError;end
8
9
 
10
+ def self.consolidate(list)
11
+ list.inject(nil){|acc,e|
12
+ if acc.nil?
13
+ acc = e
14
+ else
15
+ acc.concat e
16
+ acc
17
+ end
18
+ }
19
+ end
20
+
21
+ def self.positional2hash(keys, *values)
22
+ if Hash === values.last
23
+ extra = values.pop
24
+ inputs = Misc.zip2hash(keys, values)
25
+ inputs.delete_if{|k,v| v.nil?}
26
+ inputs = Misc.add_defaults inputs, extra
27
+ inputs.delete_if{|k,v| not keys.include? k}
28
+ else
29
+ Misc.zip2hash(keys, values)
30
+ end
31
+ end
32
+
33
+ def self.send_email(from, to, subject, message, options = {})
34
+ IndiferentHash.setup(options)
35
+ options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
36
+ IndiferentHash.setup(options)
37
+
38
+ server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
39
+
40
+ msg = <<-END_OF_MESSAGE
41
+ From: #{from_alias} <#{from}>
42
+ To: #{to_alias} <#{to}>
43
+ Subject: #{subject}
44
+
45
+ #{message}
46
+ END_OF_MESSAGE
47
+
48
+ Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
49
+ smtp.send_message msg, from, to
50
+ end
51
+ end
52
+
9
53
  def self.counts(array)
10
54
  counts = Hash.new 0
11
55
  array.each do |e|
@@ -13,7 +57,7 @@ module Misc
13
57
  end
14
58
  counts
15
59
  end
16
-
60
+
17
61
  IUPAC2BASE = {
18
62
  "A" => ["A"],
19
63
  "C" => ["C"],
@@ -55,19 +99,16 @@ module Misc
55
99
  e1, e2 = a1.shift, a2.shift
56
100
  intersect = []
57
101
  while true
58
- case
59
- when (e1 and e2)
60
- case e1 <=> e2
61
- when 0
62
- intersect << e1
63
- e1, e2 = a1.shift, a2.shift
64
- when -1
65
- e1 = a1.shift
66
- when 1
67
- e2 = a2.shift
68
- end
69
- else
70
- break
102
+ break if e1.nil? or e2.nil?
103
+ case e1 <=> e2
104
+ when 0
105
+ intersect << e1
106
+ e1, e2 = a1.shift, a2.shift
107
+ when -1
108
+ e1 = a1.shift while not e1.nil? and e1 < e2
109
+ when 1
110
+ e2 = a2.shift
111
+ e2 = a2.shift while not e2.nil? and e2 < e1
71
112
  end
72
113
  end
73
114
  intersect
@@ -167,6 +208,22 @@ module Misc
167
208
  res
168
209
  end
169
210
 
211
+ def self.memprof
212
+ require 'memprof'
213
+ Memprof.start
214
+ begin
215
+ res = yield
216
+ rescue Exception
217
+ puts "Profiling aborted"
218
+ raise $!
219
+ ensure
220
+ Memprof.stop
221
+ print Memprof.stats
222
+ end
223
+
224
+ res
225
+ end
226
+
170
227
  def self.insist(times = 3)
171
228
  try = 0
172
229
  begin
@@ -228,6 +285,8 @@ module Misc
228
285
  FileUtils.mkdir_p dir unless File.exists? dir
229
286
  FileUtils.cd dir
230
287
  res = yield
288
+ rescue
289
+ raise $!
231
290
  ensure
232
291
  FileUtils.cd old_pwd
233
292
  end
@@ -271,8 +330,9 @@ module Misc
271
330
  else
272
331
  raise "Format of '#{options.inspect}' not understood. It should be a hash"
273
332
  end
333
+
274
334
  defaults.each do |key, value|
275
- next unless new_options[key].nil?
335
+ next if options.include? key
276
336
 
277
337
  new_options[key] = value
278
338
  end
@@ -419,12 +479,15 @@ end
419
479
 
420
480
  module NamedArray
421
481
  extend ChainMethods
482
+
422
483
  self.chain_prefix = :named_array
423
484
  attr_accessor :fields
485
+ attr_accessor :key
424
486
 
425
- def self.setup(array, fields)
487
+ def self.setup(array, fields, key = nil)
426
488
  array.extend NamedArray
427
489
  array.fields = fields
490
+ array.key = key
428
491
  array
429
492
  end
430
493
 
@@ -455,11 +518,13 @@ module NamedArray
455
518
  if defined? Entity
456
519
  entity = (defined?(Entity) and Entity.respond_to?(:formats)) ? Entity.formats[key] : nil
457
520
  if entity
458
- if entity.annotations.first == :format
459
- entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)), key)
460
- else
461
- entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)))
462
- end
521
+ elem = if entity.annotations.first == :format
522
+ entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)), key)
523
+ else
524
+ entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)))
525
+ end
526
+ elem.context = self
527
+ elem
463
528
  else
464
529
  named_array_clean_get_brackets(Misc.field_position(fields, key))
465
530
  end
@@ -469,18 +534,21 @@ module NamedArray
469
534
  end
470
535
 
471
536
  def named_array_each(&block)
472
- if defined?(Entity) and not fields.nil? and not fields.empty?
473
- fields.zip(self) do |field,elem|
537
+ if defined?(Entity) and not @fields.nil? and not @fields.empty?
538
+ @fields.zip(self).each do |field,elem|
474
539
  entity = (defined?(Entity) and Entity.respond_to?(:formats)) ? Entity.formats[field] : nil
540
+
475
541
  if entity
476
542
  elem = elem.dup if elem.frozen?
477
543
  if entity.annotations.first == :format
478
- elem = entity.setup(elem, field)
544
+ entity.setup(elem, field)
479
545
  else
480
- elem = entity.setup(elem)
546
+ entity.setup(elem)
481
547
  end
482
- else
548
+ elem.context = self
549
+ elem
483
550
  end
551
+
484
552
  yield(elem)
485
553
  elem
486
554
  end
@@ -489,6 +557,20 @@ module NamedArray
489
557
  end
490
558
  end
491
559
 
560
+ def named_array_collect
561
+ res = []
562
+
563
+ named_array_each do |elem|
564
+ if block_given?
565
+ res << yield(elem)
566
+ else
567
+ res << elem
568
+ end
569
+ end
570
+
571
+ res
572
+ end
573
+
492
574
  def named_array_set_brackets(key,value)
493
575
  named_array_clean_set_brackets(Misc.field_position(fields, key), value)
494
576
  end
@@ -517,7 +599,7 @@ module NamedArray
517
599
 
518
600
  def report
519
601
  fields.zip(self).collect do |field,value|
520
- "\nAttributes:\n* #{ field }: #{ Array === value ? value * "|" : value }"
602
+ "#{ field }: #{ Array === value ? value * "|" : value }"
521
603
  end * "\n"
522
604
  end
523
605