rbbt-util 5.11.4 → 5.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1b14c182984b18f946f50ba4d5e07fafec05f8b5
4
- data.tar.gz: f6cdb39396d9bd4871e74ab0eb63015373e415d9
3
+ metadata.gz: 6ec4bc56a7d0d260d93be40ee61d23c08f58d5a8
4
+ data.tar.gz: 6f51a747302e936ed1e5953e929e5120fa5caa89
5
5
  SHA512:
6
- metadata.gz: a51d7e12fb6fa32385092372830f5f3349fd697c0263f561c287cf17860b604c9544254444d0148c214381639f76e4de396b6d32c1d4a3cbf0e8bd631b45f3c2
7
- data.tar.gz: d56ed3b1b3a048d5a68f1ad5b17f1548ed993387ff5fc7313b79716faeb31a96e5e7aa289b3a0d7821aeaa60ffbe2767ac771f9aba57c31b990b94186b1a77dc
6
+ metadata.gz: db5a97b2787f182444de536059beb027d038573c0c3ac3196ec44ce18d2561fa3204a38229d3054e5c92594309fc4a488cc6e9116e370c4f2d383b28a8ed9e71
7
+ data.tar.gz: 8a4fc523ae9266ceea9964f26f85834a8f5c0e0899a523bb938e2e813da191651215ec356432ee7b467f39b470328b32dda911d1b2cd9bd757fa467ad14d54ce
@@ -235,7 +235,8 @@ module TSV
235
235
  store.add *value
236
236
  when IO
237
237
  return if value.nil?
238
- store.puts value.strip
238
+ value.strip!
239
+ store.puts value
239
240
  else
240
241
  store << value
241
242
  end
data/lib/rbbt/util/cmd.rb CHANGED
@@ -72,7 +72,7 @@ module CMD
72
72
  def read(*args)
73
73
  data = original_read(*args) unless self.closed? or self.eof?
74
74
 
75
- self.close if self.eof? and not self.closed?
75
+ self.close if not self.closed? and self.eof?
76
76
 
77
77
  data || ""
78
78
  end
@@ -5,6 +5,17 @@ require 'cgi'
5
5
  require 'zlib'
6
6
  require 'rubygems/package'
7
7
  require 'rbbt/util/tar'
8
+ require 'rbbt/util/misc/exceptions'
9
+ require 'rbbt/util/misc/concurrent_stream'
10
+ require 'rbbt/util/misc/indiferent_hash'
11
+ require 'rbbt/util/misc/pipes'
12
+ require 'rbbt/util/misc/format'
13
+ require 'rbbt/util/misc/omics'
14
+ require 'rbbt/util/misc/inspect'
15
+ require 'rbbt/util/misc/math'
16
+ require 'rbbt/util/misc/development'
17
+ require 'rbbt/util/misc/lock'
18
+
8
19
 
9
20
  class Hash
10
21
  def chunked_values_at(keys, max = 5000)
@@ -16,372 +27,14 @@ class Hash
16
27
  end
17
28
  end
18
29
 
19
- class ParameterException < Exception; end
20
- class FieldNotFoundError < Exception;end
21
- class Aborted < Exception; end
22
- class TryAgain < Exception; end
23
- class ClosedStream < Exception; end
24
- class KeepLocked < Exception
25
- attr_accessor :payload
26
- def initialize(payload)
27
- @payload = payload
28
- end
29
- end
30
-
31
30
  module LaterString
32
31
  def to_s
33
32
  yield
34
33
  end
35
34
  end
36
35
 
37
- module ConcurrentStream
38
- attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined
39
-
40
- def joined?
41
- @joined
42
- end
43
-
44
- def join
45
-
46
- if @threads and @threads.any?
47
- @threads.each do |t|
48
- t.join
49
- end
50
- @threads = []
51
- end
52
-
53
- if @pids and @pids.any?
54
- @pids.each do |pid|
55
- begin
56
- Process.waitpid(pid, Process::WUNTRACED)
57
- raise "Error joining process #{pid} in #{self.inspect}" unless $?.success?
58
- rescue Errno::ECHILD
59
- end
60
- end
61
- @pids = []
62
- end
63
-
64
- if @callback and not joined?
65
- @callback.call
66
- @callback = nil
67
- end
68
-
69
- @joined = true
70
- end
71
-
72
- def abort
73
- @threads.each{|t| t.raise Aborted.new } if @threads
74
- @threads.each{|t| t.join } if @threads
75
- @pids.each{|pid| Process.kill :INT, pid } if @pids
76
- @pids.each{|pid| Process.waitpid pid } if @pids
77
- @abort_callback.call if @abort_callback
78
- @abort_callback = nil
79
- end
80
-
81
- def self.setup(stream, options = {}, &block)
82
- threads, pids, callback, filename = Misc.process_options options, :threads, :pids, :callback, :filename
83
- stream.extend ConcurrentStream unless ConcurrentStream === stream
84
-
85
- stream.threads ||= []
86
- stream.pids ||= []
87
- stream.threads.concat(Array === threads ? threads : [threads]) unless threads.nil?
88
- stream.pids.concat(Array === pids ? pids : [pids]) unless pids.nil? or pids.empty?
89
-
90
- callback = block if block_given?
91
- if stream.callback and callback
92
- old_callback = stream.callback
93
- stream.callback = Proc.new do
94
- old_callback.call
95
- callback.call
96
- end
97
- else
98
- stream.callback = callback
99
- end
100
-
101
- stream.filename = filename unless filename.nil?
102
-
103
- stream
104
- end
105
- end
106
-
107
-
108
- Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
109
36
  module Misc
110
37
 
111
-
112
- PIPE_MUTEX = Mutex.new
113
-
114
- OPEN_PIPE_IN = []
115
- def self.pipe
116
- OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
117
- PIPE_MUTEX.synchronize do
118
- sout, sin = IO.pipe
119
- OPEN_PIPE_IN << sin
120
-
121
- [sout, sin]
122
- end
123
- end
124
-
125
- def self.release_pipes(*pipes)
126
- PIPE_MUTEX.synchronize do
127
- pipes.flatten.each do |pipe|
128
- pipe.close unless pipe.closed?
129
- end
130
- end
131
- end
132
-
133
-
134
- def self.purge_pipes(*save)
135
- PIPE_MUTEX.synchronize do
136
- OPEN_PIPE_IN.each do |pipe|
137
- next if save.include? pipe
138
- pipe.close unless pipe.closed?
139
- end
140
- end
141
- end
142
-
143
- def self.open_pipe(do_fork = false, close = true)
144
- raise "No block given" unless block_given?
145
-
146
- sout, sin = Misc.pipe
147
-
148
- if do_fork
149
- parent_pid = Process.pid
150
- pid = Process.fork {
151
- purge_pipes(sin)
152
- sout.close
153
- begin
154
- yield sin
155
- rescue
156
- Log.exception $!
157
- Process.kill :INT, parent_pid
158
- Kernel.exit! -1
159
- ensure
160
- sin.close if close and not sin.closed?
161
- end
162
- Kernel.exit! 0
163
- }
164
- sin.close #if close
165
- ConcurrentStream.setup sout, :pids => [pid]
166
- else
167
- thread = Thread.new(Thread.current) do |parent|
168
- begin
169
- yield sin
170
- rescue
171
- parent.raise $!
172
- ensure
173
- sin.close if close and not sin.closed?
174
- end
175
- end
176
- ConcurrentStream.setup sout, :threads => [thread]
177
- end
178
- sout
179
- end
180
-
181
- def self.tee_stream_fork(stream)
182
- stream_out1, stream_in1 = Misc.pipe
183
- stream_out2, stream_in2 = Misc.pipe
184
-
185
- splitter_pid = Process.fork do
186
- Misc.purge_pipes(stream_in1, stream_in2)
187
- stream_out1.close
188
- stream_out2.close
189
- begin
190
- filename = stream.respond_to?(:filename)? stream.filename : nil
191
- skip1 = skip2 = false
192
- while block = stream.read(2048)
193
- begin stream_in1.write block; rescue Exception; Log.exception $!; skip1 = true end unless skip1
194
- begin stream_in2.write block; rescue Exception; Log.exception $!; skip2 = true end unless skip2
195
- end
196
- raise "Error writing in stream_in2" if skip2
197
- raise "Error writing in stream_in2" if skip2
198
- rescue Aborted
199
- stream.abort if stream.respond_to? :abort
200
- raise $!
201
- rescue IOError
202
- Log.exception $!
203
- rescue Exception
204
- Log.exception $!
205
- ensure
206
- stream_in1.close
207
- stream_in2.close
208
- stream.join if stream.respond_to? :join
209
- end
210
- end
211
- stream.close
212
- stream_in1.close
213
- stream_in2.close
214
- #stream.join if stream.respond_to? :join
215
-
216
- ConcurrentStream.setup stream_out1, :pids => [splitter_pid]
217
- ConcurrentStream.setup stream_out2, :pids => [splitter_pid]
218
-
219
- [stream_out1, stream_out2]
220
- end
221
-
222
- def self.tee_stream_thread(stream)
223
- stream_out1, stream_in1 = Misc.pipe
224
- stream_out2, stream_in2 = Misc.pipe
225
-
226
- splitter_thread = Thread.new(Thread.current, stream_in1, stream_in2) do |parent,stream_in1,stream_in2|
227
- begin
228
- filename = stream.respond_to?(:filename)? stream.filename : nil
229
- skip1 = skip2 = false
230
- while block = stream.read(2048)
231
- begin stream_in1.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip1 = true end unless skip1
232
- begin stream_in2.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip2 = true end unless skip2
233
- end
234
- rescue Aborted
235
- stream.abort if stream.respond_to? :abort
236
- raise $!
237
- rescue IOError
238
- Log.exception $!
239
- rescue Exception
240
- Log.exception $!
241
- parent.raise $!
242
- ensure
243
- stream_in1.close
244
- stream_in2.close
245
- stream.join if stream.respond_to? :join
246
- end
247
- end
248
-
249
- ConcurrentStream.setup stream_out1, :threads => splitter_thread
250
- ConcurrentStream.setup stream_out2, :threads => splitter_thread
251
-
252
- [stream_out1, stream_out2]
253
- end
254
-
255
- class << self
256
- alias tee_stream tee_stream_thread
257
- end
258
-
259
- def self.read_full_stream(io)
260
- str = ""
261
- begin
262
- while block = io.read(2048)
263
- str << block
264
- end
265
- rescue
266
- io.abort if io.respond_to? :abort
267
- ensure
268
- io.join if io.respond_to? :join
269
- io.close if io.respond_to? :close
270
- end
271
- str
272
- end
273
-
274
- def self.consume_stream(io)
275
- begin
276
- Thread.pass while block = io.read(2048)
277
- rescue
278
- io.abort if io.respond_to? :abort
279
- ensure
280
- io.join if io.respond_to? :join
281
- io.close if io.respond_to? :close
282
- end
283
- end
284
-
285
- def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
286
- i = 0
287
- re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
288
- text.split(re).collect do |paragraph|
289
- i += 1
290
- str = if i % 2 == 1
291
- words = paragraph.gsub(/\s+/, "\s").split(" ")
292
- lines = []
293
- line = " "*offset
294
- word = words.shift
295
- while word
296
- word = word[0..size-indent-offset-4] + '...' if word.length >= size - indent - offset
297
- while word and Log.uncolor(line).length + Log.uncolor(word).length <= size - indent
298
- line << word << " "
299
- word = words.shift
300
- end
301
- lines << ((" " * indent) << line[0..-2])
302
- line = ""
303
- end
304
- (lines * "\n")
305
- else
306
- paragraph
307
- end
308
- offset = 0
309
- str
310
- end*""
311
- end
312
-
313
- def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
314
- dd = "" if dd.nil?
315
- dt = dt.to_s + ":" unless dd.empty?
316
- dt = Log.color color, dt if color
317
- len = Log.uncolor(dt).length
318
-
319
- if indent < 0
320
- text = format_paragraph(dd, size, indent.abs+1, 0)
321
- text = dt << "\n" << text
322
- else
323
- offset = len - indent
324
- offset = 0 if offset < 0
325
- text = format_paragraph(dd, size, indent.abs+1, offset)
326
- text[0..len-1] = dt
327
- end
328
- text
329
- end
330
-
331
- def self.format_definition_list(defs, size = 80, indent = 20, color = :yellow)
332
- entries = []
333
- defs.each do |dt,dd|
334
- text = format_definition_list_item(dt,dd,size,indent,color)
335
- entries << text
336
- end
337
- entries * "\n\n"
338
- end
339
-
340
- def self.read_stream(stream, size)
341
- str = nil
342
- Thread.pass while IO.select([stream],nil,nil,1).nil?
343
- while not str = stream.read(size)
344
- IO.select([stream],nil,nil,1)
345
- Thread.pass
346
- raise ClosedStream if stream.eof?
347
- end
348
-
349
- while str.length < size
350
- raise ClosedStream if stream.eof?
351
- IO.select([stream],nil,nil,1)
352
- if new = stream.read(size-str.length)
353
- str << new
354
- end
355
- end
356
- str
357
- end
358
-
359
- def self.read_stream(stream, size)
360
- str = nil
361
- Thread.pass while IO.select([stream],nil,nil,1).nil?
362
- while not str = stream.read(size)
363
- IO.select([stream],nil,nil,1)
364
- Thread.pass
365
- raise ClosedStream if stream.eof?
366
- end
367
-
368
- while str.length < size
369
- raise ClosedStream if stream.eof?
370
- IO.select([stream],nil,nil,1)
371
- if new = stream.read(size-str.length)
372
- str << new
373
- end
374
- end
375
- str
376
- end
377
- def self._read_stream(stream, size)
378
- str = ""
379
- while (len=str.length) < size
380
- str << (stream.read(size-len) or break)
381
- end
382
- str
383
- end
384
-
385
38
  def self.parse_cmd_params(str)
386
39
  return str if Array === str
387
40
  str.scan(/
@@ -390,39 +43,6 @@ module Misc
390
43
  /x).flatten.compact
391
44
  end
392
45
 
393
- def self.correct_icgc_mutation(pos, ref, mut_str)
394
- mut = mut_str
395
- mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
396
- mut = "+" << mut if ref == '-'
397
- [pos, [mut]]
398
- end
399
-
400
- def self.correct_vcf_mutation(pos, ref, mut_str)
401
- muts = mut_str.nil? ? [] : mut_str.split(',')
402
-
403
- while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
404
- ref = ref[1..-1]
405
- pos = pos + 1
406
- muts = muts.collect{|m| m[1..-1]}
407
- end
408
-
409
- muts = muts.collect do |m|
410
- case
411
- when ref.empty?
412
- "+" << m
413
- when (m.length < ref.length and (m.empty? or ref.index(m)))
414
- "-" * (ref.length - m.length)
415
- when (ref.length == 1 and m.length == 1)
416
- m
417
- else
418
- Log.debug{"Cannot understand: #{[ref, m]} (#{ muts })"}
419
- '-' * ref.length + m
420
- end
421
- end
422
-
423
- [pos, muts]
424
- end
425
-
426
46
  def self.pid_exists?(pid)
427
47
  return false if pid.nil?
428
48
  begin
@@ -433,25 +53,6 @@ module Misc
433
53
  end
434
54
  end
435
55
 
436
- COLOR_LIST = %w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
437
-
438
- def self.colors_for(list)
439
- unused = COLOR_LIST.dup
440
-
441
- used = {}
442
- colors = list.collect do |elem|
443
- if used.include? elem
444
- used[elem]
445
- else
446
- color = unused.shift
447
- used[elem]=color
448
- color
449
- end
450
- end
451
-
452
- [colors, used]
453
- end
454
-
455
56
  def self.collapse_ranges(ranges)
456
57
  processed = []
457
58
  last = nil
@@ -527,10 +128,6 @@ module Misc
527
128
  end
528
129
  end
529
130
 
530
- Log2Multiplier = 1.0 / Math.log(2.0)
531
- def self.log2(x)
532
- Math.log(x) * Log2Multiplier
533
- end
534
131
 
535
132
  def self.prepare_entity(entity, field, options = {})
536
133
  return entity unless defined? Entity
@@ -555,116 +152,6 @@ module Misc
555
152
  entity
556
153
  end
557
154
 
558
- ARRAY_MAX_LENGTH = 1000
559
- STRING_MAX_LENGTH = ARRAY_MAX_LENGTH * 10
560
-
561
- def self.sanitize_filename(filename, length = 254)
562
- if filename.length > length
563
- if filename =~ /(\..{2,9})$/
564
- extension = $1
565
- else
566
- extension = ''
567
- end
568
-
569
- post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
570
-
571
- filename = filename[0..(length - post_fix.length - 1)] << post_fix
572
- else
573
- filename
574
- end
575
- filename
576
- end
577
-
578
- def self.fingerprint(obj)
579
- case obj
580
- when nil
581
- "nil"
582
- when (defined? Step and Step)
583
- obj.path || Misc.fingerprint([obj.task.name, obj.inputs])
584
- when TrueClass
585
- "true"
586
- when FalseClass
587
- "false"
588
- when Symbol
589
- ":" << obj.to_s
590
- when String
591
- if obj.length > 100
592
- "'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
593
- else
594
- "'" << obj << "'"
595
- end
596
- when (defined? AnnotatedArray and AnnotatedArray)
597
- "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
598
- when (defined? TSV and TSV::Parser)
599
- "<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
600
- when IO
601
- "<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
602
- when File
603
- "<File:" + obj.path + ">"
604
- when Array
605
- if (length = obj.length) > 10
606
- "[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
607
- else
608
- "[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
609
- end
610
- when (defined? TSV and TSV)
611
- obj.with_unnamed do
612
- "TSV:{"<< fingerprint(obj.all_fields|| []).inspect << ";" << fingerprint(obj.keys).inspect << "}"
613
- end
614
- when Hash
615
- if obj.length > 10
616
- "H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
617
- else
618
- new = "{"
619
- obj.each do |k,v|
620
- new << k.to_s << '=>' << fingerprint(v) << ' '
621
- end
622
- if new.length > 1
623
- new[-1] = "}"
624
- else
625
- new << '}'
626
- end
627
- new
628
- end
629
- else
630
- obj.to_s
631
- end
632
- end
633
-
634
-
635
- def self.remove_long_items(obj)
636
- case
637
- when IO === obj
638
- remove_long_items("IO: " + obj.filename)
639
- when obj.respond_to?(:path)
640
- remove_long_items("File: " + obj.path)
641
- when TSV::Parser === obj
642
- remove_long_items("TSV Stream: " + obj.filename + " -- " << Misc.fingerprint(obj.options))
643
- when TSV === obj
644
- remove_long_items((obj.all_fields || []) + obj.keys.sort)
645
- when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
646
- remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH } (#{obj.length})")
647
- when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
648
- remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH } (#{obj.length})"])
649
- when (String === obj and obj.length > STRING_MAX_LENGTH)
650
- obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH} (#{obj.length})"
651
- when Hash === obj
652
- new = {}
653
- obj.each do |k,v|
654
- new[k] = remove_long_items(v)
655
- end
656
- new
657
- when Array === obj
658
- obj.collect do |e| remove_long_items(e) end
659
- else
660
- obj
661
- end
662
- end
663
-
664
- #def self.remove_long_items(obj)
665
- # return fingerprint(obj)
666
- #end
667
-
668
155
  def self.ensembl_server(organism)
669
156
  date = organism.split("/")[1]
670
157
  if date.nil?
@@ -674,18 +161,6 @@ module Misc
674
161
  end
675
162
  end
676
163
 
677
- def self.filename?(string)
678
- String === string and string.length > 0 and string.length < 250 and File.exists?(string)
679
- end
680
-
681
- def self.max(list)
682
- max = nil
683
- list.each do |v|
684
- next if v.nil?
685
- max = v if max.nil? or v > max
686
- end
687
- max
688
- end
689
164
 
690
165
  def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
691
166
  name1 ||= "list 1"
@@ -710,20 +185,6 @@ module Misc
710
185
  url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
711
186
  end
712
187
 
713
- def self.sum(list)
714
- list.compact.inject(0.0){|acc,e| acc += e}
715
- end
716
-
717
- def self.mean(list)
718
- sum(list) / list.compact.length
719
- end
720
-
721
- def self.sd(list)
722
- return nil if list.length < 3
723
- mean = mean(list)
724
- Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
725
- end
726
-
727
188
  def self.consolidate(list)
728
189
  list.inject(nil){|acc,e|
729
190
  if acc.nil?
@@ -795,208 +256,6 @@ end
795
256
  proportions
796
257
  end
797
258
 
798
- IUPAC2BASE = {
799
- "A" => ["A"],
800
- "C" => ["C"],
801
- "G" => ["G"],
802
- "T" => ["T"],
803
- "U" => ["U"],
804
- "R" => "A or G".split(" or "),
805
- "Y" => "C or T".split(" or "),
806
- "S" => "G or C".split(" or "),
807
- "W" => "A or T".split(" or "),
808
- "K" => "G or T".split(" or "),
809
- "M" => "A or C".split(" or "),
810
- "B" => "C or G or T".split(" or "),
811
- "D" => "A or G or T".split(" or "),
812
- "H" => "A or C or T".split(" or "),
813
- "V" => "A or C or G".split(" or "),
814
- "N" => %w(A C T G),
815
- }
816
-
817
- BASE2COMPLEMENT = {
818
- "A" => "T",
819
- "C" => "G",
820
- "G" => "C",
821
- "T" => "A",
822
- "U" => "A",
823
- }
824
-
825
- THREE_TO_ONE_AA_CODE = {
826
- "ala" => "A",
827
- "arg" => "R",
828
- "asn" => "N",
829
- "asp" => "D",
830
- "cys" => "C",
831
- "glu" => "E",
832
- "gln" => "Q",
833
- "gly" => "G",
834
- "his" => "H",
835
- "ile" => "I",
836
- "leu" => "L",
837
- "lys" => "K",
838
- "met" => "M",
839
- "phe" => "F",
840
- "pro" => "P",
841
- "ser" => "S",
842
- "thr" => "T",
843
- "trp" => "W",
844
- "tyr" => "Y",
845
- "val" => "V"
846
- }
847
- CODON_TABLE = {
848
- "ATT" => "I",
849
- "ATC" => "I",
850
- "ATA" => "I",
851
- "CTT" => "L",
852
- "CTC" => "L",
853
- "CTA" => "L",
854
- "CTG" => "L",
855
- "TTA" => "L",
856
- "TTG" => "L",
857
- "GTT" => "V",
858
- "GTC" => "V",
859
- "GTA" => "V",
860
- "GTG" => "V",
861
- "TTT" => "F",
862
- "TTC" => "F",
863
- "ATG" => "M",
864
- "TGT" => "C",
865
- "TGC" => "C",
866
- "GCT" => "A",
867
- "GCC" => "A",
868
- "GCA" => "A",
869
- "GCG" => "A",
870
- "GGT" => "G",
871
- "GGC" => "G",
872
- "GGA" => "G",
873
- "GGG" => "G",
874
- "CCT" => "P",
875
- "CCC" => "P",
876
- "CCA" => "P",
877
- "CCG" => "P",
878
- "ACT" => "T",
879
- "ACC" => "T",
880
- "ACA" => "T",
881
- "ACG" => "T",
882
- "TCT" => "S",
883
- "TCC" => "S",
884
- "TCA" => "S",
885
- "TCG" => "S",
886
- "AGT" => "S",
887
- "AGC" => "S",
888
- "TAT" => "Y",
889
- "TAC" => "Y",
890
- "TGG" => "W",
891
- "CAA" => "Q",
892
- "CAG" => "Q",
893
- "AAT" => "N",
894
- "AAC" => "N",
895
- "CAT" => "H",
896
- "CAC" => "H",
897
- "GAA" => "E",
898
- "GAG" => "E",
899
- "GAT" => "D",
900
- "GAC" => "D",
901
- "AAA" => "K",
902
- "AAG" => "K",
903
- "CGT" => "R",
904
- "CGC" => "R",
905
- "CGA" => "R",
906
- "CGG" => "R",
907
- "AGA" => "R",
908
- "AGG" => "R",
909
- "TAA" => "*",
910
- "TAG" => "*",
911
- "TGA" => "*",
912
- }
913
-
914
- #def self.fast_align(reference, sequence)
915
- #
916
- #require 'narray'
917
- # init_gap = -1
918
- # gap = -2
919
- # diff = -2
920
- # same = 2
921
-
922
- # cols = sequence.length + 1
923
- # rows = reference.length + 1
924
-
925
- # a = NArray.int(cols, rows)
926
-
927
- # for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
928
- # for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end
929
-
930
- # spos = 1
931
- # while spos < cols do
932
- # rpos = 1
933
- # while rpos < rows do
934
- # match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
935
- # skip_sequence = a[spos-1,rpos] + gap
936
- # skip_reference = a[spos,rpos-1] + gap
937
- # a[spos,rpos] = [match, skip_sequence, skip_reference].max
938
- # rpos += 1
939
- # end
940
- # spos += 1
941
- # end
942
-
943
- # start = Misc.max(a[-1,0..rows-1])
944
- # start_pos = a[-1,0..rows-1].to_a.index start
945
-
946
- # ref = ''
947
- # seq = ''
948
- # rpos = start_pos
949
- # spos = cols - 1
950
-
951
- # while spos > 0 and rpos > 0
952
- # score = a[spos,rpos]
953
- # score_match = a[spos-1,rpos-1]
954
- # score_skip_reference = a[spos,rpos-1]
955
- # score_skip_sequence = a[spos-1,rpos]
956
-
957
- # case
958
- # when score == score_match + (sequence[spos-1] != reference[rpos-1] ? diff : same)
959
- # ref << reference[rpos-1]
960
- # seq << sequence[spos-1]
961
- # spos -= 1
962
- # rpos -= 1
963
- # when score == score_skip_reference + gap
964
- # ref << reference[rpos-1]
965
- # seq << '-'
966
- # rpos -= 1
967
- # when score == score_skip_sequence + gap
968
- # seq << sequence[spos-1]
969
- # ref << '-'
970
- # spos -= 1
971
- # else
972
- # raise "stop"
973
- # end
974
- # end
975
-
976
- # while (rpos > 0)
977
- # ref << reference[rpos-1]
978
- # seq = seq << '-'
979
- # rpos -= 1
980
- # end
981
-
982
- # while (spos > 0)
983
- # seq << sequence[spos-1]
984
- # ref = ref + '-'
985
- # spos -= 1
986
- # end
987
- #
988
- # [ref.reverse + reference[start_pos..-1], seq.reverse + '-' * (rows - start_pos - 1)]
989
- #end
990
-
991
- def self.IUPAC_to_base(iupac)
992
- IUPAC2BASE[iupac]
993
- end
994
-
995
- def self.is_filename?(string)
996
- return true if string.respond_to? :exists
997
- return true if String === string and string.length < 265 and File.exists? string
998
- return false
999
- end
1000
259
 
1001
260
  def self.sorted_array_hits(a1, a2)
1002
261
  e1, e2 = a1.shift, a2.shift
@@ -1132,100 +391,6 @@ end
1132
391
  end
1133
392
  end
1134
393
 
1135
- def self.benchmark(repeats = 1, message = nil)
1136
- require 'benchmark'
1137
- res = nil
1138
- begin
1139
- measure = Benchmark.measure do
1140
- repeats.times do
1141
- res = yield
1142
- end
1143
- end
1144
- if message
1145
- puts "#{message }: #{ repeats } repeats"
1146
- else
1147
- puts "Benchmark for #{ repeats } repeats"
1148
- end
1149
- puts measure
1150
- rescue Exception
1151
- puts "Benchmark aborted"
1152
- raise $!
1153
- end
1154
- res
1155
- end
1156
-
1157
- def self.profile_html(options = {})
1158
- require 'ruby-prof'
1159
- RubyProf.start
1160
- begin
1161
- res = yield
1162
- rescue Exception
1163
- puts "Profiling aborted"
1164
- raise $!
1165
- ensure
1166
- result = RubyProf.stop
1167
- printer = RubyProf::MultiPrinter.new(result)
1168
- TmpFile.with_file do |dir|
1169
- FileUtils.mkdir_p dir unless File.exists? dir
1170
- printer.print(:path => dir, :profile => 'profile')
1171
- CMD.cmd("firefox -no-remote '#{ dir }'")
1172
- end
1173
- end
1174
-
1175
- res
1176
- end
1177
-
1178
- def self.profile_graph(options = {})
1179
- require 'ruby-prof'
1180
- RubyProf.start
1181
- begin
1182
- res = yield
1183
- rescue Exception
1184
- puts "Profiling aborted"
1185
- raise $!
1186
- ensure
1187
- result = RubyProf.stop
1188
- #result.eliminate_methods!([/annotated_array_clean_/])
1189
- printer = RubyProf::GraphPrinter.new(result)
1190
- printer.print(STDOUT, options)
1191
- end
1192
-
1193
- res
1194
- end
1195
-
1196
- def self.profile(options = {})
1197
- require 'ruby-prof'
1198
- RubyProf.start
1199
- begin
1200
- res = yield
1201
- rescue Exception
1202
- puts "Profiling aborted"
1203
- raise $!
1204
- ensure
1205
- result = RubyProf.stop
1206
- printer = RubyProf::FlatPrinter.new(result)
1207
- printer.print(STDOUT, options)
1208
- end
1209
-
1210
- res
1211
- end
1212
-
1213
- def self.memprof
1214
- require 'memprof'
1215
- Memprof.start
1216
- begin
1217
- res = yield
1218
- rescue Exception
1219
- puts "Profiling aborted"
1220
- raise $!
1221
- ensure
1222
- Memprof.stop
1223
- print Memprof.stats
1224
- end
1225
-
1226
- res
1227
- end
1228
-
1229
394
  def self.do_once(&block)
1230
395
  return nil if $__did_once
1231
396
  $__did_once = true
@@ -1334,22 +499,6 @@ end
1334
499
  html
1335
500
  end
1336
501
 
1337
- #def self.path_relative_to(basedir, path)
1338
- # path = File.expand_path(path) unless path[0] == "/"
1339
- # basedir = File.expand_path(basedir) unless basedir[0] == "/"
1340
-
1341
- # basedir << "/" unless basedir[-1] == "/"
1342
- # case
1343
- # when path == basedir
1344
- # "."
1345
- # #when path =~ /#{Regexp.quote basedir}\/(.*)/
1346
- # when path.index(basedir) == 0
1347
- # return path[basedir.length..-1]
1348
- # else
1349
- # return nil
1350
- # end
1351
- #end
1352
-
1353
502
  def self.path_relative_to(basedir, path)
1354
503
  path = File.expand_path(path) unless path[0] == "/"
1355
504
  basedir = File.expand_path(basedir) unless basedir[0] == "/"
@@ -1369,88 +518,6 @@ end
1369
518
  @hostanem ||= `hostname`.strip
1370
519
  end
1371
520
 
1372
- LOCK_MUTEX = Mutex.new
1373
- def self.lock(file, unlock = true)
1374
- return yield if file.nil?
1375
- FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
1376
-
1377
- res = nil
1378
-
1379
- lock_path = File.expand_path(file + '.lock')
1380
- lockfile = Lockfile.new(lock_path)
1381
-
1382
- hostname = Misc.hostname
1383
- LOCK_MUTEX.synchronize do
1384
- Misc.insist 2, 0.1 do
1385
- Misc.insist 3, 0.1 do
1386
- begin
1387
- if File.exists? lock_path
1388
- info = Open.open(lock_path){|f| YAML.load(f) }
1389
- raise "No info" unless info
1390
-
1391
- if hostname == info["host"] and not Misc.pid_exists?(info["pid"])
1392
- Log.info("Removing lockfile: #{lock_path}. This pid #{Process.pid}. Content: #{info.inspect}")
1393
- FileUtils.rm lock_path
1394
- end
1395
- end
1396
- rescue Exception
1397
- FileUtils.rm lock_path if File.exists? lock_path
1398
- lockfile = Lockfile.new(lock_path) unless File.exists? lock_path
1399
- raise $!
1400
- end
1401
- end
1402
- end
1403
- end
1404
-
1405
- begin
1406
- lockfile.lock
1407
- res = yield lockfile
1408
- rescue Lockfile::StolenLockError
1409
- unlock = false
1410
- rescue KeepLocked
1411
- unlock = false
1412
- res = $!.payload
1413
- ensure
1414
- if unlock and lockfile.locked?
1415
- lockfile.unlock
1416
- end
1417
- end
1418
-
1419
- res
1420
- end
1421
-
1422
-
1423
- LOCK_REPO_SERIALIZER=Marshal
1424
- def self.lock_in_repo(repo, key, *args)
1425
- return yield file, *args if repo.nil? or key.nil?
1426
-
1427
- lock_key = "lock-" << key
1428
-
1429
- begin
1430
- if repo[lock_key] and
1431
- Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
1432
- info["pid"] and not Misc.pid_exists?(info["pid"])
1433
-
1434
- Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
1435
- repo.out lock_key
1436
- end
1437
- rescue
1438
- Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
1439
- repo.out lock_key if repo.include? lock_key
1440
- end
1441
-
1442
- while repo[lock_key]
1443
- sleep 1
1444
- end
1445
-
1446
- repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
1447
-
1448
- res = yield lock_key, *args
1449
-
1450
- repo.delete lock_key
1451
-
1452
- res
1453
- end
1454
521
 
1455
522
  def self.common_path(dir, file)
1456
523
  file = File.expand_path file
@@ -1479,32 +546,6 @@ end
1479
546
  res
1480
547
  end
1481
548
 
1482
- def self.to_utf8(string)
1483
- string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
1484
- end
1485
-
1486
- def self.fixutf8(string)
1487
- return nil if string.nil?
1488
- return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
1489
- (string.respond_to? :valid_encoding and string.valid_encoding)
1490
-
1491
- if string.respond_to?(:encode)
1492
- string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
1493
- else
1494
- require 'iconv'
1495
- @@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
1496
- @@ic.iconv(string)
1497
- end
1498
- end
1499
-
1500
- def self.fixascii(string)
1501
- if string.respond_to?(:encode)
1502
- self.fixutf8(string).encode("ASCII-8BIT")
1503
- else
1504
- string
1505
- end
1506
- end
1507
-
1508
549
  def self.sensiblewrite(path, content = nil, &block)
1509
550
  return if File.exists? path
1510
551
  tmp_path = path + '.sensible_write'
@@ -1556,69 +597,6 @@ end
1556
597
  new_options
1557
598
  end
1558
599
 
1559
- def self.digest(text)
1560
- Digest::MD5.hexdigest(text)
1561
- end
1562
-
1563
- HASH2MD5_MAX_STRING_LENGTH = 1000
1564
- HASH2MD5_MAX_ARRAY_LENGTH = 100
1565
- def self.hash2md5(hash)
1566
- str = ""
1567
- keys = hash.keys
1568
- keys = keys.clean_annotations if keys.respond_to? :clean_annotations
1569
- keys = keys.sort_by{|k| k.to_s}
1570
-
1571
- if hash.respond_to? :unnamed
1572
- unnamed = hash.unnamed
1573
- hash.unnamed = true
1574
- end
1575
- keys.each do |k|
1576
- next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
1577
- v = hash[k]
1578
- case
1579
- when TrueClass === v
1580
- str << k.to_s << "=>true"
1581
- when FalseClass === v
1582
- str << k.to_s << "=>false"
1583
- when Hash === v
1584
- str << k.to_s << "=>" << hash2md5(v)
1585
- when Symbol === v
1586
- str << k.to_s << "=>" << v.to_s
1587
- when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
1588
- str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
1589
- when String === v
1590
- str << k.to_s << "=>" << v
1591
- when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
1592
- str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
1593
- when TSV::Parser === v
1594
- str << remove_long_items(v)
1595
- when Array === v
1596
- str << k.to_s << "=>[" << v * "," << "]"
1597
- when File === v
1598
- str << k.to_s << "=>[File:" << v.path << "]"
1599
- else
1600
- v_ins = v.inspect
1601
-
1602
- case
1603
- when v_ins =~ /:0x0/
1604
- str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
1605
- else
1606
- str << k.to_s << "=>" << v_ins
1607
- end
1608
-
1609
- end
1610
-
1611
- str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v
1612
- end
1613
- hash.unnamed = unnamed if hash.respond_to? :unnamed
1614
-
1615
- if str.empty?
1616
- ""
1617
- else
1618
- digest(str)
1619
- end
1620
- end
1621
-
1622
600
  def self.process_options(hash, *keys)
1623
601
  if keys.length == 1
1624
602
  hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
@@ -1815,7 +793,6 @@ end
1815
793
  chunks
1816
794
  end
1817
795
 
1818
-
1819
796
  def self.append_zipped(current, new)
1820
797
  current.each do |v|
1821
798
  n = new.shift
@@ -1832,145 +809,6 @@ end
1832
809
  return [] if array.empty? or (first = array.first).nil?
1833
810
  first.zip(*array[1..-1])
1834
811
  end
1835
-
1836
- def self.camel_case(string)
1837
- return string if string !~ /_/ && string =~ /[A-Z]+.*/
1838
- string.split(/_|(\d+)/).map{|e|
1839
- (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize)
1840
- }.join
1841
- end
1842
-
1843
- def self.camel_case_lower(string)
1844
- string.split('_').inject([]){ |buffer,e|
1845
- buffer.push(buffer.empty? ? e.downcase : (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize))
1846
- }.join
1847
- end
1848
-
1849
- def self.snake_case(string)
1850
- return nil if string.nil?
1851
- string = string.to_s if Symbol === string
1852
- string.
1853
- gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
1854
- gsub(/([a-z])([A-Z])/,'\1_\2').
1855
- gsub(/\s/,'_').gsub(/[^\w_]/, '').
1856
- split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
1857
- end
1858
-
1859
- # source: https://gist.github.com/ekdevdes/2450285
1860
- # author: Ethan Kramer (https://github.com/ekdevdes)
1861
- def self.humanize(value, options = {})
1862
- if options.empty?
1863
- options[:format] = :sentence
1864
- end
1865
-
1866
- values = []
1867
- values = value.split('_')
1868
- values.each_index do |index|
1869
- # lower case each item in array
1870
- # Miguel Vazquez edit: Except for acronyms
1871
- values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
1872
- end
1873
- if options[:format] == :allcaps
1874
- values.each do |value|
1875
- value.capitalize!
1876
- end
1877
-
1878
- if options.empty?
1879
- options[:seperator] = " "
1880
- end
1881
-
1882
- return values.join " "
1883
- end
1884
-
1885
- if options[:format] == :class
1886
- values.each do |value|
1887
- value.capitalize!
1888
- end
1889
-
1890
- return values.join ""
1891
- end
1892
-
1893
- if options[:format] == :sentence
1894
- values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)
1895
-
1896
- return values.join " "
1897
- end
1898
-
1899
- if options[:format] == :nocaps
1900
- return values.join " "
1901
- end
1902
- end
1903
- end
1904
-
1905
- #TODO: REMOVE
1906
- #class RBBTError < StandardError
1907
- # attr_accessor :info
1908
- #
1909
- # alias old_to_s to_s
1910
- # def to_s
1911
- # str = old_to_s.dup
1912
- # if info
1913
- # str << "\n" << "Additional Info:\n---\n" << info << "---"
1914
- # end
1915
- # str
1916
- # end
1917
- #end
1918
-
1919
- module IndiferentHash
1920
-
1921
- def self.setup(hash)
1922
- hash.extend IndiferentHash
1923
- end
1924
-
1925
- def merge(other)
1926
- new = self.dup
1927
- IndiferentHash.setup(new)
1928
- other.each do |k,value|
1929
- new.delete k
1930
- new[k] = value
1931
- end
1932
- new
1933
- end
1934
-
1935
- def [](key)
1936
- res = super(key)
1937
- return res unless res.nil?
1938
-
1939
- case key
1940
- when Symbol, Module
1941
- super(key.to_s)
1942
- when String
1943
- super(key.to_sym)
1944
- else
1945
- super(key)
1946
- end
1947
- end
1948
-
1949
- def values_at(*key_list)
1950
- key_list.inject([]){|acc,key| acc << self[key]}
1951
- end
1952
-
1953
- def include?(key)
1954
- case key
1955
- when Symbol, Module
1956
- super(key) || super(key.to_s)
1957
- when String
1958
- super(key) || super(key.to_sym)
1959
- else
1960
- super(key)
1961
- end
1962
- end
1963
-
1964
- def delete(key)
1965
- case key
1966
- when Symbol, Module
1967
- super(key) || super(key.to_s)
1968
- when String
1969
- super(key) || super(key.to_sym)
1970
- else
1971
- super(key)
1972
- end
1973
- end
1974
812
  end
1975
813
 
1976
814
  module PDF2Text