rbbt-util 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  require 'rbbt/util/cmd'
2
2
  module TSV
3
3
  class Parser
4
- attr_accessor :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight
4
+ attr_accessor :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped
5
5
 
6
6
  class SKIP_LINE < Exception; end
7
7
  class END_PARSING < Exception; end
@@ -63,23 +63,64 @@ module TSV
63
63
  end
64
64
 
65
65
  def get_values_single(parts)
66
- return parts.shift, parts.first if field_positions.nil?
66
+ return parts.shift, parts.first if field_positions.nil? and key_position.nil?
67
67
  key = parts[key_position]
68
- value = parts[field_positions.first]
68
+ value = parts[field_positions.nil? ? 0 : field_positions.first]
69
69
  [key, value]
70
70
  end
71
71
 
72
72
  def get_values_list(parts)
73
- return parts.shift, parts if field_positions.nil?
73
+ return parts.shift, parts if field_positions.nil? and key_position.nil?
74
74
  key = parts[key_position]
75
- values = parts.values_at *field_positions
75
+
76
+ values = if field_positions.nil?
77
+ parts.tap{|o| o.delete_at key_position}
78
+ else
79
+ parts.values_at *field_positions
80
+ end
81
+
76
82
  [key, values]
77
83
  end
78
84
 
79
85
  def get_values_double(parts)
80
- return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil?
86
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
87
+ keys = parts[key_position].split(@sep2, -1)
88
+ values = if field_positions.nil?
89
+ parts.tap{|o| o.delete_at key_position}
90
+ else
91
+ parts.values_at *field_positions
92
+ end.collect{|value| value.split(@sep2, -1)}
93
+ [keys, values]
94
+ end
95
+
96
+ def get_values_flat_inverse(parts)
97
+ value = parts.shift
98
+ keys = parts
99
+ [keys, [value]]
100
+ end
101
+
102
+ def get_values_flat(parts)
103
+ if key_position and key_position != 0 and field_positions.nil?
104
+ value = parts.shift
105
+ keys = parts
106
+ return [keys, [value]]
107
+ end
108
+
109
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if
110
+ field_positions.nil? and (key_position.nil? or key_position == 0)
111
+
81
112
  keys = parts[key_position].split(@sep2, -1)
82
- values = parts.values_at(*field_positions).collect{|value| value.split(@sep2, -1)}
113
+
114
+ if @take_all
115
+ values = parts.collect{|value| value.split(@sep2, -1)}
116
+ else
117
+
118
+ values = if field_positions.nil?
119
+ parts.tap{|o| o.delete_at key_position}
120
+ else
121
+ parts.values_at *field_positions
122
+ end.collect{|value| value.split(@sep2, -1)}
123
+ end
83
124
  [keys, values]
84
125
  end
85
126
 
@@ -110,18 +151,52 @@ module TSV
110
151
  end
111
152
 
112
153
  def add_to_data_merge(data, keys, values)
113
- keys.each do |key|
154
+ keys.uniq.each do |key|
155
+ if data.include? key
156
+ #data[key] = data[key].zip(values).collect do |old, new|
157
+ # old.concat new
158
+ # old
159
+ #end
160
+ new = data[key]
161
+ new.each_with_index do |old, i|
162
+ old.concat values[i]
163
+ end
164
+ data[key] = new
165
+ else
166
+ data[key] = values
167
+ end
168
+ end
169
+ end
170
+
171
+ def add_to_data_merge_zipped(data, keys, values)
172
+ num = keys.length
173
+ values = values.collect{|v| v.length != num ? [v.first] * num : v}
174
+ all = values.unshift keys
175
+ Misc.zip_fields(all).each do |values|
176
+ key = values.shift
114
177
  if data.include? key
115
178
  data[key] = data[key].zip(values).collect do |old, new|
116
- old.concat new
179
+ old.push new
117
180
  old
118
181
  end
119
182
  else
120
- data[key] = values
183
+ data[key] = values.collect{|v| [v]}
121
184
  end
122
185
  end
123
186
  end
124
187
 
188
+ def add_to_data_zipped(data, keys, values)
189
+ num = keys.length
190
+ values = values.collect{|v| v.length != num ? [v.first] * num : v}
191
+ all = values.unshift keys
192
+ Misc.zip_fields(all).each do |values|
193
+ key = values.shift
194
+ next if data.include? key
195
+ data[key] = values.collect{|v| [v]}
196
+ end
197
+ end
198
+
199
+
125
200
  def cast_values_single(value)
126
201
  case
127
202
  when Symbol === cast
@@ -157,6 +232,7 @@ module TSV
157
232
  key_field = Misc.process_options options, :key_field
158
233
  fields = Misc.process_options options, :fields
159
234
 
235
+
160
236
  if (key_field.nil? or key_field == 0 or key_field == :key) and
161
237
  (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
162
238
 
@@ -166,7 +242,7 @@ module TSV
166
242
  @straight = false
167
243
 
168
244
  case
169
- when (key_field.nil? or key_field == @key_field or key_field == 0)
245
+ when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
170
246
  @key_position = 0
171
247
  when Integer === key_field
172
248
  @key_position = key_field
@@ -176,9 +252,11 @@ module TSV
176
252
  raise "Format of key_field not understood: #{key_field.inspect}"
177
253
  end
178
254
 
179
- if (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
180
- @field_positions = (0..@fields.length).to_a
181
- @field_positions.delete @key_position
255
+ if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
256
+ if not @fields.nil? and type != :flat
257
+ @field_positions = (0..@fields.length).to_a
258
+ @field_positions.delete @key_position
259
+ end
182
260
  else
183
261
  fields = [fields] if not Array === fields
184
262
  @field_positions = fields.collect{|field|
@@ -196,8 +274,12 @@ module TSV
196
274
  end
197
275
 
198
276
  new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
199
- @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil?
200
- @key_field = new_key_field
277
+ @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
278
+ @fields ||= fields if Array === fields and String === fields.first
279
+ @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
280
+ @key_field = new_key_field
281
+ @key_field ||= key_field if String === key_field
282
+
201
283
  end
202
284
  end
203
285
 
@@ -215,13 +297,19 @@ module TSV
215
297
  @type ||= Misc.process_options options, :type
216
298
  @fix = Misc.process_options(options, :fix)
217
299
  @select= Misc.process_options options, :select
300
+ @zipped = Misc.process_options options, :zipped
218
301
 
219
302
  case @type
220
303
  when :double
221
304
  self.instance_eval do alias get_values get_values_double end
222
305
  self.instance_eval do alias cast_values cast_values_double end
223
- if merge
224
- self.instance_eval do alias add_to_data add_to_data_merge end
306
+ case
307
+ when (merge and not zipped)
308
+ self.instance_eval do alias add_to_data add_to_data_merge end
309
+ when (merge and zipped)
310
+ self.instance_eval do alias add_to_data add_to_data_merge_zipped end
311
+ when zipped
312
+ self.instance_eval do alias add_to_data add_to_data_zipped end
225
313
  else
226
314
  self.instance_eval do alias add_to_data add_to_data_no_merge_double end
227
315
  end
@@ -234,7 +322,8 @@ module TSV
234
322
  self.instance_eval do alias cast_values cast_values_list end
235
323
  self.instance_eval do alias add_to_data add_to_data_no_merge_list end
236
324
  when :flat
237
- self.instance_eval do alias get_values get_values_double end
325
+ @take_all = true if options[:fields].nil?
326
+ self.instance_eval do alias get_values get_values_flat end
238
327
  self.instance_eval do alias cast_values cast_values_double end
239
328
  if merge
240
329
  self.instance_eval do alias add_to_data add_to_data_flat_merge end
@@ -243,9 +332,10 @@ module TSV
243
332
  end
244
333
  end
245
334
 
335
+ fields = options[:fields]
246
336
  fix_fields(options)
247
337
 
248
- @straight = false if @sep != "\t" or not @cast.nil? or merge
338
+ @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
249
339
  end
250
340
 
251
341
  def setup(data)
@@ -14,6 +14,11 @@ module TSV
14
14
  def self.load(str); str.unpack("l*"); end
15
15
  end
16
16
 
17
+ class FloatArraySerializer
18
+ def self.dump(a); a.pack("d*"); end
19
+ def self.load(str); str.unpack("d*"); end
20
+ end
21
+
17
22
  class StringSerializer
18
23
  def self.dump(str); str.to_s; end
19
24
  def self.load(str); str; end
@@ -66,6 +71,7 @@ module TSV
66
71
  :integer => IntegerSerializer,
67
72
  :float => FloatSerializer,
68
73
  :integer_array => IntegerArraySerializer,
74
+ :float_array => FloatArraySerializer,
69
75
  :marshal => Marshal,
70
76
  :single => StringSerializer,
71
77
  :string => StringSerializer,
data/lib/rbbt/tsv/util.rb CHANGED
@@ -23,6 +23,38 @@ module TSV
23
23
 
24
24
  counts
25
25
  end
26
+
27
+ def self.field_match_counts(file, values, options = {})
28
+ options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
29
+ persist_options = Misc.pull_keys options, :persist
30
+
31
+ filename = TSV === file ? file.filename : file
32
+ text = Persist.persist filename, :string, persist_options do
33
+ tsv = TSV === file ? file : TSV.open(file)
34
+
35
+ text = ""
36
+ fields = nil
37
+ tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
38
+ names.zip(fields).each do |list, format|
39
+ list.delete_if do |name| name.empty? end
40
+ next if list.empty?
41
+ text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
42
+ end
43
+ end
44
+ text
45
+ end
46
+
47
+ path = Persist.persistence_path(filename, persist_options)
48
+ TmpFile.with_file(values * "\n") do |value_file|
49
+ cmd = "cat '#{ path }' | grep -w -F -f '#{ value_file }' |cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\\t/'"
50
+ begin
51
+ TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
52
+ rescue
53
+ TSV.setup({nil => 0}, :type => :single, :cast => :to_i)
54
+ end
55
+ end
56
+ end
57
+
26
58
  def self.get_filename(file)
27
59
  case
28
60
  when String === file
@@ -55,7 +87,9 @@ module TSV
55
87
  when (field.nil? or field == :key or key_field == field)
56
88
  :key
57
89
  when String === field
58
- fields.index field
90
+ pos = fields.index field
91
+ Log.medium "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
92
+ pos
59
93
  end
60
94
  end
61
95
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/log'
2
+ require 'set'
2
3
 
3
4
  module ChainMethods
4
5
  def self.chain_methods_extended(base)
@@ -19,18 +20,31 @@ module ChainMethods
19
20
 
20
21
  def setup_chains(base)
21
22
  raise "No prefix specified for #{self.to_s}" if self.chain_prefix.nil? or (String === self.chain_prefix and self.chain_prefix.empty?)
22
- #methods = self.instance_methods.select{|method| method =~ /^#{self.chain_prefix}/}
23
23
  methods = self.chained_methods
24
24
 
25
25
  return if methods.empty?
26
26
 
27
27
  prefix = self.chain_prefix
28
28
 
29
- new_method = methods.first
30
- original = new_method.sub(prefix.to_s + '_', '')
31
- first_clean_method = prefix.to_s + '_clean_' + original
29
+ #do_chain = true
30
+ #methods.collect{|new_method|
31
+ # original = new_method.sub(prefix.to_s + '_', '')
32
+ # clean = prefix.to_s + '_clean_' + original
33
+ # if base.respond_to? clean
34
+ # do_chain = false
35
+ # break
36
+ # end
37
+ #}
38
+
39
+
40
+ if not base.respond_to?(:processed_chains) or base.processed_chains.nil? or not base.processed_chains.include? prefix
41
+ class << base
42
+ attr_accessor :processed_chains
43
+ end if not base.respond_to? :processed_chains
44
+
45
+ base.processed_chains = Set.new if base.processed_chains.nil?
46
+ base.processed_chains << prefix
32
47
 
33
- if not base.respond_to? first_clean_method
34
48
  class << base; self; end.module_eval do
35
49
  methods.each do |new_method|
36
50
  original = new_method.sub(prefix.to_s + '_', '')
@@ -39,6 +53,10 @@ module ChainMethods
39
53
  original = "[]" if original == "get_brackets"
40
54
  original = "[]=" if original == "set_brackets"
41
55
 
56
+ if base.respond_to? clean_method
57
+ raise "Method already defined: #{clean_method}. #{ prefix }"
58
+ end
59
+
42
60
  begin
43
61
  alias_method clean_method, original
44
62
  rescue
@@ -52,10 +70,10 @@ module ChainMethods
52
70
 
53
71
  if not metaclass.respond_to? :extended
54
72
  metaclass.module_eval do
55
- alias prev_chain_methods_extended extended
73
+ alias prev_chain_methods_extended extended if methods.include? "extended"
56
74
 
57
75
  def extended(base)
58
- prev_chain_methods_extended(base)
76
+ prev_chain_methods_extended(base) if methods.include? "prev_chain_methods_extended"
59
77
  setup_chains(base)
60
78
  end
61
79
  end
@@ -68,7 +86,4 @@ module ChainMethods
68
86
  def self.extended(base)
69
87
  chain_methods_extended(base)
70
88
  end
71
-
72
-
73
-
74
89
  end
@@ -2,10 +2,54 @@ require 'lockfile'
2
2
  require 'rbbt/util/chain_methods'
3
3
  require 'rbbt/resource/path'
4
4
  require 'rbbt/annotations'
5
+ require 'net/smtp'
5
6
 
6
7
  module Misc
7
8
  class FieldNotFoundError < StandardError;end
8
9
 
10
+ def self.consolidate(list)
11
+ list.inject(nil){|acc,e|
12
+ if acc.nil?
13
+ acc = e
14
+ else
15
+ acc.concat e
16
+ acc
17
+ end
18
+ }
19
+ end
20
+
21
+ def self.positional2hash(keys, *values)
22
+ if Hash === values.last
23
+ extra = values.pop
24
+ inputs = Misc.zip2hash(keys, values)
25
+ inputs.delete_if{|k,v| v.nil?}
26
+ inputs = Misc.add_defaults inputs, extra
27
+ inputs.delete_if{|k,v| not keys.include? k}
28
+ else
29
+ Misc.zip2hash(keys, values)
30
+ end
31
+ end
32
+
33
+ def self.send_email(from, to, subject, message, options = {})
34
+ IndiferentHash.setup(options)
35
+ options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
36
+ IndiferentHash.setup(options)
37
+
38
+ server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
39
+
40
+ msg = <<-END_OF_MESSAGE
41
+ From: #{from_alias} <#{from}>
42
+ To: #{to_alias} <#{to}>
43
+ Subject: #{subject}
44
+
45
+ #{message}
46
+ END_OF_MESSAGE
47
+
48
+ Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
49
+ smtp.send_message msg, from, to
50
+ end
51
+ end
52
+
9
53
  def self.counts(array)
10
54
  counts = Hash.new 0
11
55
  array.each do |e|
@@ -13,7 +57,7 @@ module Misc
13
57
  end
14
58
  counts
15
59
  end
16
-
60
+
17
61
  IUPAC2BASE = {
18
62
  "A" => ["A"],
19
63
  "C" => ["C"],
@@ -55,19 +99,16 @@ module Misc
55
99
  e1, e2 = a1.shift, a2.shift
56
100
  intersect = []
57
101
  while true
58
- case
59
- when (e1 and e2)
60
- case e1 <=> e2
61
- when 0
62
- intersect << e1
63
- e1, e2 = a1.shift, a2.shift
64
- when -1
65
- e1 = a1.shift
66
- when 1
67
- e2 = a2.shift
68
- end
69
- else
70
- break
102
+ break if e1.nil? or e2.nil?
103
+ case e1 <=> e2
104
+ when 0
105
+ intersect << e1
106
+ e1, e2 = a1.shift, a2.shift
107
+ when -1
108
+ e1 = a1.shift while not e1.nil? and e1 < e2
109
+ when 1
110
+ e2 = a2.shift
111
+ e2 = a2.shift while not e2.nil? and e2 < e1
71
112
  end
72
113
  end
73
114
  intersect
@@ -167,6 +208,22 @@ module Misc
167
208
  res
168
209
  end
169
210
 
211
+ def self.memprof
212
+ require 'memprof'
213
+ Memprof.start
214
+ begin
215
+ res = yield
216
+ rescue Exception
217
+ puts "Profiling aborted"
218
+ raise $!
219
+ ensure
220
+ Memprof.stop
221
+ print Memprof.stats
222
+ end
223
+
224
+ res
225
+ end
226
+
170
227
  def self.insist(times = 3)
171
228
  try = 0
172
229
  begin
@@ -228,6 +285,8 @@ module Misc
228
285
  FileUtils.mkdir_p dir unless File.exists? dir
229
286
  FileUtils.cd dir
230
287
  res = yield
288
+ rescue
289
+ raise $!
231
290
  ensure
232
291
  FileUtils.cd old_pwd
233
292
  end
@@ -271,8 +330,9 @@ module Misc
271
330
  else
272
331
  raise "Format of '#{options.inspect}' not understood. It should be a hash"
273
332
  end
333
+
274
334
  defaults.each do |key, value|
275
- next unless new_options[key].nil?
335
+ next if options.include? key
276
336
 
277
337
  new_options[key] = value
278
338
  end
@@ -419,12 +479,15 @@ end
419
479
 
420
480
  module NamedArray
421
481
  extend ChainMethods
482
+
422
483
  self.chain_prefix = :named_array
423
484
  attr_accessor :fields
485
+ attr_accessor :key
424
486
 
425
- def self.setup(array, fields)
487
+ def self.setup(array, fields, key = nil)
426
488
  array.extend NamedArray
427
489
  array.fields = fields
490
+ array.key = key
428
491
  array
429
492
  end
430
493
 
@@ -455,11 +518,13 @@ module NamedArray
455
518
  if defined? Entity
456
519
  entity = (defined?(Entity) and Entity.respond_to?(:formats)) ? Entity.formats[key] : nil
457
520
  if entity
458
- if entity.annotations.first == :format
459
- entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)), key)
460
- else
461
- entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)))
462
- end
521
+ elem = if entity.annotations.first == :format
522
+ entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)), key)
523
+ else
524
+ entity.setup(named_array_clean_get_brackets(Misc.field_position(fields, key)))
525
+ end
526
+ elem.context = self
527
+ elem
463
528
  else
464
529
  named_array_clean_get_brackets(Misc.field_position(fields, key))
465
530
  end
@@ -469,18 +534,21 @@ module NamedArray
469
534
  end
470
535
 
471
536
  def named_array_each(&block)
472
- if defined?(Entity) and not fields.nil? and not fields.empty?
473
- fields.zip(self) do |field,elem|
537
+ if defined?(Entity) and not @fields.nil? and not @fields.empty?
538
+ @fields.zip(self).each do |field,elem|
474
539
  entity = (defined?(Entity) and Entity.respond_to?(:formats)) ? Entity.formats[field] : nil
540
+
475
541
  if entity
476
542
  elem = elem.dup if elem.frozen?
477
543
  if entity.annotations.first == :format
478
- elem = entity.setup(elem, field)
544
+ entity.setup(elem, field)
479
545
  else
480
- elem = entity.setup(elem)
546
+ entity.setup(elem)
481
547
  end
482
- else
548
+ elem.context = self
549
+ elem
483
550
  end
551
+
484
552
  yield(elem)
485
553
  elem
486
554
  end
@@ -489,6 +557,20 @@ module NamedArray
489
557
  end
490
558
  end
491
559
 
560
+ def named_array_collect
561
+ res = []
562
+
563
+ named_array_each do |elem|
564
+ if block_given?
565
+ res << yield(elem)
566
+ else
567
+ res << elem
568
+ end
569
+ end
570
+
571
+ res
572
+ end
573
+
492
574
  def named_array_set_brackets(key,value)
493
575
  named_array_clean_set_brackets(Misc.field_position(fields, key), value)
494
576
  end
@@ -517,7 +599,7 @@ module NamedArray
517
599
 
518
600
  def report
519
601
  fields.zip(self).collect do |field,value|
520
- "\nAttributes:\n* #{ field }: #{ Array === value ? value * "|" : value }"
602
+ "#{ field }: #{ Array === value ? value * "|" : value }"
521
603
  end * "\n"
522
604
  end
523
605