rbbt-util 5.11.4 → 5.11.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1b14c182984b18f946f50ba4d5e07fafec05f8b5
4
- data.tar.gz: f6cdb39396d9bd4871e74ab0eb63015373e415d9
3
+ metadata.gz: 6ec4bc56a7d0d260d93be40ee61d23c08f58d5a8
4
+ data.tar.gz: 6f51a747302e936ed1e5953e929e5120fa5caa89
5
5
  SHA512:
6
- metadata.gz: a51d7e12fb6fa32385092372830f5f3349fd697c0263f561c287cf17860b604c9544254444d0148c214381639f76e4de396b6d32c1d4a3cbf0e8bd631b45f3c2
7
- data.tar.gz: d56ed3b1b3a048d5a68f1ad5b17f1548ed993387ff5fc7313b79716faeb31a96e5e7aa289b3a0d7821aeaa60ffbe2767ac771f9aba57c31b990b94186b1a77dc
6
+ metadata.gz: db5a97b2787f182444de536059beb027d038573c0c3ac3196ec44ce18d2561fa3204a38229d3054e5c92594309fc4a488cc6e9116e370c4f2d383b28a8ed9e71
7
+ data.tar.gz: 8a4fc523ae9266ceea9964f26f85834a8f5c0e0899a523bb938e2e813da191651215ec356432ee7b467f39b470328b32dda911d1b2cd9bd757fa467ad14d54ce
@@ -235,7 +235,8 @@ module TSV
235
235
  store.add *value
236
236
  when IO
237
237
  return if value.nil?
238
- store.puts value.strip
238
+ value.strip!
239
+ store.puts value
239
240
  else
240
241
  store << value
241
242
  end
data/lib/rbbt/util/cmd.rb CHANGED
@@ -72,7 +72,7 @@ module CMD
72
72
  def read(*args)
73
73
  data = original_read(*args) unless self.closed? or self.eof?
74
74
 
75
- self.close if self.eof? and not self.closed?
75
+ self.close if not self.closed? and self.eof?
76
76
 
77
77
  data || ""
78
78
  end
@@ -5,6 +5,17 @@ require 'cgi'
5
5
  require 'zlib'
6
6
  require 'rubygems/package'
7
7
  require 'rbbt/util/tar'
8
+ require 'rbbt/util/misc/exceptions'
9
+ require 'rbbt/util/misc/concurrent_stream'
10
+ require 'rbbt/util/misc/indiferent_hash'
11
+ require 'rbbt/util/misc/pipes'
12
+ require 'rbbt/util/misc/format'
13
+ require 'rbbt/util/misc/omics'
14
+ require 'rbbt/util/misc/inspect'
15
+ require 'rbbt/util/misc/math'
16
+ require 'rbbt/util/misc/development'
17
+ require 'rbbt/util/misc/lock'
18
+
8
19
 
9
20
  class Hash
10
21
  def chunked_values_at(keys, max = 5000)
@@ -16,372 +27,14 @@ class Hash
16
27
  end
17
28
  end
18
29
 
19
- class ParameterException < Exception; end
20
- class FieldNotFoundError < Exception;end
21
- class Aborted < Exception; end
22
- class TryAgain < Exception; end
23
- class ClosedStream < Exception; end
24
- class KeepLocked < Exception
25
- attr_accessor :payload
26
- def initialize(payload)
27
- @payload = payload
28
- end
29
- end
30
-
31
30
  module LaterString
32
31
  def to_s
33
32
  yield
34
33
  end
35
34
  end
36
35
 
37
- module ConcurrentStream
38
- attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined
39
-
40
- def joined?
41
- @joined
42
- end
43
-
44
- def join
45
-
46
- if @threads and @threads.any?
47
- @threads.each do |t|
48
- t.join
49
- end
50
- @threads = []
51
- end
52
-
53
- if @pids and @pids.any?
54
- @pids.each do |pid|
55
- begin
56
- Process.waitpid(pid, Process::WUNTRACED)
57
- raise "Error joining process #{pid} in #{self.inspect}" unless $?.success?
58
- rescue Errno::ECHILD
59
- end
60
- end
61
- @pids = []
62
- end
63
-
64
- if @callback and not joined?
65
- @callback.call
66
- @callback = nil
67
- end
68
-
69
- @joined = true
70
- end
71
-
72
- def abort
73
- @threads.each{|t| t.raise Aborted.new } if @threads
74
- @threads.each{|t| t.join } if @threads
75
- @pids.each{|pid| Process.kill :INT, pid } if @pids
76
- @pids.each{|pid| Process.waitpid pid } if @pids
77
- @abort_callback.call if @abort_callback
78
- @abort_callback = nil
79
- end
80
-
81
- def self.setup(stream, options = {}, &block)
82
- threads, pids, callback, filename = Misc.process_options options, :threads, :pids, :callback, :filename
83
- stream.extend ConcurrentStream unless ConcurrentStream === stream
84
-
85
- stream.threads ||= []
86
- stream.pids ||= []
87
- stream.threads.concat(Array === threads ? threads : [threads]) unless threads.nil?
88
- stream.pids.concat(Array === pids ? pids : [pids]) unless pids.nil? or pids.empty?
89
-
90
- callback = block if block_given?
91
- if stream.callback and callback
92
- old_callback = stream.callback
93
- stream.callback = Proc.new do
94
- old_callback.call
95
- callback.call
96
- end
97
- else
98
- stream.callback = callback
99
- end
100
-
101
- stream.filename = filename unless filename.nil?
102
-
103
- stream
104
- end
105
- end
106
-
107
-
108
- Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
109
36
  module Misc
110
37
 
111
-
112
- PIPE_MUTEX = Mutex.new
113
-
114
- OPEN_PIPE_IN = []
115
- def self.pipe
116
- OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
117
- PIPE_MUTEX.synchronize do
118
- sout, sin = IO.pipe
119
- OPEN_PIPE_IN << sin
120
-
121
- [sout, sin]
122
- end
123
- end
124
-
125
- def self.release_pipes(*pipes)
126
- PIPE_MUTEX.synchronize do
127
- pipes.flatten.each do |pipe|
128
- pipe.close unless pipe.closed?
129
- end
130
- end
131
- end
132
-
133
-
134
- def self.purge_pipes(*save)
135
- PIPE_MUTEX.synchronize do
136
- OPEN_PIPE_IN.each do |pipe|
137
- next if save.include? pipe
138
- pipe.close unless pipe.closed?
139
- end
140
- end
141
- end
142
-
143
- def self.open_pipe(do_fork = false, close = true)
144
- raise "No block given" unless block_given?
145
-
146
- sout, sin = Misc.pipe
147
-
148
- if do_fork
149
- parent_pid = Process.pid
150
- pid = Process.fork {
151
- purge_pipes(sin)
152
- sout.close
153
- begin
154
- yield sin
155
- rescue
156
- Log.exception $!
157
- Process.kill :INT, parent_pid
158
- Kernel.exit! -1
159
- ensure
160
- sin.close if close and not sin.closed?
161
- end
162
- Kernel.exit! 0
163
- }
164
- sin.close #if close
165
- ConcurrentStream.setup sout, :pids => [pid]
166
- else
167
- thread = Thread.new(Thread.current) do |parent|
168
- begin
169
- yield sin
170
- rescue
171
- parent.raise $!
172
- ensure
173
- sin.close if close and not sin.closed?
174
- end
175
- end
176
- ConcurrentStream.setup sout, :threads => [thread]
177
- end
178
- sout
179
- end
180
-
181
- def self.tee_stream_fork(stream)
182
- stream_out1, stream_in1 = Misc.pipe
183
- stream_out2, stream_in2 = Misc.pipe
184
-
185
- splitter_pid = Process.fork do
186
- Misc.purge_pipes(stream_in1, stream_in2)
187
- stream_out1.close
188
- stream_out2.close
189
- begin
190
- filename = stream.respond_to?(:filename)? stream.filename : nil
191
- skip1 = skip2 = false
192
- while block = stream.read(2048)
193
- begin stream_in1.write block; rescue Exception; Log.exception $!; skip1 = true end unless skip1
194
- begin stream_in2.write block; rescue Exception; Log.exception $!; skip2 = true end unless skip2
195
- end
196
- raise "Error writing in stream_in2" if skip2
197
- raise "Error writing in stream_in2" if skip2
198
- rescue Aborted
199
- stream.abort if stream.respond_to? :abort
200
- raise $!
201
- rescue IOError
202
- Log.exception $!
203
- rescue Exception
204
- Log.exception $!
205
- ensure
206
- stream_in1.close
207
- stream_in2.close
208
- stream.join if stream.respond_to? :join
209
- end
210
- end
211
- stream.close
212
- stream_in1.close
213
- stream_in2.close
214
- #stream.join if stream.respond_to? :join
215
-
216
- ConcurrentStream.setup stream_out1, :pids => [splitter_pid]
217
- ConcurrentStream.setup stream_out2, :pids => [splitter_pid]
218
-
219
- [stream_out1, stream_out2]
220
- end
221
-
222
- def self.tee_stream_thread(stream)
223
- stream_out1, stream_in1 = Misc.pipe
224
- stream_out2, stream_in2 = Misc.pipe
225
-
226
- splitter_thread = Thread.new(Thread.current, stream_in1, stream_in2) do |parent,stream_in1,stream_in2|
227
- begin
228
- filename = stream.respond_to?(:filename)? stream.filename : nil
229
- skip1 = skip2 = false
230
- while block = stream.read(2048)
231
- begin stream_in1.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip1 = true end unless skip1
232
- begin stream_in2.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip2 = true end unless skip2
233
- end
234
- rescue Aborted
235
- stream.abort if stream.respond_to? :abort
236
- raise $!
237
- rescue IOError
238
- Log.exception $!
239
- rescue Exception
240
- Log.exception $!
241
- parent.raise $!
242
- ensure
243
- stream_in1.close
244
- stream_in2.close
245
- stream.join if stream.respond_to? :join
246
- end
247
- end
248
-
249
- ConcurrentStream.setup stream_out1, :threads => splitter_thread
250
- ConcurrentStream.setup stream_out2, :threads => splitter_thread
251
-
252
- [stream_out1, stream_out2]
253
- end
254
-
255
- class << self
256
- alias tee_stream tee_stream_thread
257
- end
258
-
259
- def self.read_full_stream(io)
260
- str = ""
261
- begin
262
- while block = io.read(2048)
263
- str << block
264
- end
265
- rescue
266
- io.abort if io.respond_to? :abort
267
- ensure
268
- io.join if io.respond_to? :join
269
- io.close if io.respond_to? :close
270
- end
271
- str
272
- end
273
-
274
- def self.consume_stream(io)
275
- begin
276
- Thread.pass while block = io.read(2048)
277
- rescue
278
- io.abort if io.respond_to? :abort
279
- ensure
280
- io.join if io.respond_to? :join
281
- io.close if io.respond_to? :close
282
- end
283
- end
284
-
285
- def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
286
- i = 0
287
- re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
288
- text.split(re).collect do |paragraph|
289
- i += 1
290
- str = if i % 2 == 1
291
- words = paragraph.gsub(/\s+/, "\s").split(" ")
292
- lines = []
293
- line = " "*offset
294
- word = words.shift
295
- while word
296
- word = word[0..size-indent-offset-4] + '...' if word.length >= size - indent - offset
297
- while word and Log.uncolor(line).length + Log.uncolor(word).length <= size - indent
298
- line << word << " "
299
- word = words.shift
300
- end
301
- lines << ((" " * indent) << line[0..-2])
302
- line = ""
303
- end
304
- (lines * "\n")
305
- else
306
- paragraph
307
- end
308
- offset = 0
309
- str
310
- end*""
311
- end
312
-
313
- def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
314
- dd = "" if dd.nil?
315
- dt = dt.to_s + ":" unless dd.empty?
316
- dt = Log.color color, dt if color
317
- len = Log.uncolor(dt).length
318
-
319
- if indent < 0
320
- text = format_paragraph(dd, size, indent.abs+1, 0)
321
- text = dt << "\n" << text
322
- else
323
- offset = len - indent
324
- offset = 0 if offset < 0
325
- text = format_paragraph(dd, size, indent.abs+1, offset)
326
- text[0..len-1] = dt
327
- end
328
- text
329
- end
330
-
331
- def self.format_definition_list(defs, size = 80, indent = 20, color = :yellow)
332
- entries = []
333
- defs.each do |dt,dd|
334
- text = format_definition_list_item(dt,dd,size,indent,color)
335
- entries << text
336
- end
337
- entries * "\n\n"
338
- end
339
-
340
- def self.read_stream(stream, size)
341
- str = nil
342
- Thread.pass while IO.select([stream],nil,nil,1).nil?
343
- while not str = stream.read(size)
344
- IO.select([stream],nil,nil,1)
345
- Thread.pass
346
- raise ClosedStream if stream.eof?
347
- end
348
-
349
- while str.length < size
350
- raise ClosedStream if stream.eof?
351
- IO.select([stream],nil,nil,1)
352
- if new = stream.read(size-str.length)
353
- str << new
354
- end
355
- end
356
- str
357
- end
358
-
359
- def self.read_stream(stream, size)
360
- str = nil
361
- Thread.pass while IO.select([stream],nil,nil,1).nil?
362
- while not str = stream.read(size)
363
- IO.select([stream],nil,nil,1)
364
- Thread.pass
365
- raise ClosedStream if stream.eof?
366
- end
367
-
368
- while str.length < size
369
- raise ClosedStream if stream.eof?
370
- IO.select([stream],nil,nil,1)
371
- if new = stream.read(size-str.length)
372
- str << new
373
- end
374
- end
375
- str
376
- end
377
- def self._read_stream(stream, size)
378
- str = ""
379
- while (len=str.length) < size
380
- str << (stream.read(size-len) or break)
381
- end
382
- str
383
- end
384
-
385
38
  def self.parse_cmd_params(str)
386
39
  return str if Array === str
387
40
  str.scan(/
@@ -390,39 +43,6 @@ module Misc
390
43
  /x).flatten.compact
391
44
  end
392
45
 
393
- def self.correct_icgc_mutation(pos, ref, mut_str)
394
- mut = mut_str
395
- mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
396
- mut = "+" << mut if ref == '-'
397
- [pos, [mut]]
398
- end
399
-
400
- def self.correct_vcf_mutation(pos, ref, mut_str)
401
- muts = mut_str.nil? ? [] : mut_str.split(',')
402
-
403
- while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
404
- ref = ref[1..-1]
405
- pos = pos + 1
406
- muts = muts.collect{|m| m[1..-1]}
407
- end
408
-
409
- muts = muts.collect do |m|
410
- case
411
- when ref.empty?
412
- "+" << m
413
- when (m.length < ref.length and (m.empty? or ref.index(m)))
414
- "-" * (ref.length - m.length)
415
- when (ref.length == 1 and m.length == 1)
416
- m
417
- else
418
- Log.debug{"Cannot understand: #{[ref, m]} (#{ muts })"}
419
- '-' * ref.length + m
420
- end
421
- end
422
-
423
- [pos, muts]
424
- end
425
-
426
46
  def self.pid_exists?(pid)
427
47
  return false if pid.nil?
428
48
  begin
@@ -433,25 +53,6 @@ module Misc
433
53
  end
434
54
  end
435
55
 
436
- COLOR_LIST = %w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
437
-
438
- def self.colors_for(list)
439
- unused = COLOR_LIST.dup
440
-
441
- used = {}
442
- colors = list.collect do |elem|
443
- if used.include? elem
444
- used[elem]
445
- else
446
- color = unused.shift
447
- used[elem]=color
448
- color
449
- end
450
- end
451
-
452
- [colors, used]
453
- end
454
-
455
56
  def self.collapse_ranges(ranges)
456
57
  processed = []
457
58
  last = nil
@@ -527,10 +128,6 @@ module Misc
527
128
  end
528
129
  end
529
130
 
530
- Log2Multiplier = 1.0 / Math.log(2.0)
531
- def self.log2(x)
532
- Math.log(x) * Log2Multiplier
533
- end
534
131
 
535
132
  def self.prepare_entity(entity, field, options = {})
536
133
  return entity unless defined? Entity
@@ -555,116 +152,6 @@ module Misc
555
152
  entity
556
153
  end
557
154
 
558
- ARRAY_MAX_LENGTH = 1000
559
- STRING_MAX_LENGTH = ARRAY_MAX_LENGTH * 10
560
-
561
- def self.sanitize_filename(filename, length = 254)
562
- if filename.length > length
563
- if filename =~ /(\..{2,9})$/
564
- extension = $1
565
- else
566
- extension = ''
567
- end
568
-
569
- post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
570
-
571
- filename = filename[0..(length - post_fix.length - 1)] << post_fix
572
- else
573
- filename
574
- end
575
- filename
576
- end
577
-
578
- def self.fingerprint(obj)
579
- case obj
580
- when nil
581
- "nil"
582
- when (defined? Step and Step)
583
- obj.path || Misc.fingerprint([obj.task.name, obj.inputs])
584
- when TrueClass
585
- "true"
586
- when FalseClass
587
- "false"
588
- when Symbol
589
- ":" << obj.to_s
590
- when String
591
- if obj.length > 100
592
- "'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
593
- else
594
- "'" << obj << "'"
595
- end
596
- when (defined? AnnotatedArray and AnnotatedArray)
597
- "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
598
- when (defined? TSV and TSV::Parser)
599
- "<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
600
- when IO
601
- "<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
602
- when File
603
- "<File:" + obj.path + ">"
604
- when Array
605
- if (length = obj.length) > 10
606
- "[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
607
- else
608
- "[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
609
- end
610
- when (defined? TSV and TSV)
611
- obj.with_unnamed do
612
- "TSV:{"<< fingerprint(obj.all_fields|| []).inspect << ";" << fingerprint(obj.keys).inspect << "}"
613
- end
614
- when Hash
615
- if obj.length > 10
616
- "H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
617
- else
618
- new = "{"
619
- obj.each do |k,v|
620
- new << k.to_s << '=>' << fingerprint(v) << ' '
621
- end
622
- if new.length > 1
623
- new[-1] = "}"
624
- else
625
- new << '}'
626
- end
627
- new
628
- end
629
- else
630
- obj.to_s
631
- end
632
- end
633
-
634
-
635
- def self.remove_long_items(obj)
636
- case
637
- when IO === obj
638
- remove_long_items("IO: " + obj.filename)
639
- when obj.respond_to?(:path)
640
- remove_long_items("File: " + obj.path)
641
- when TSV::Parser === obj
642
- remove_long_items("TSV Stream: " + obj.filename + " -- " << Misc.fingerprint(obj.options))
643
- when TSV === obj
644
- remove_long_items((obj.all_fields || []) + obj.keys.sort)
645
- when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
646
- remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH } (#{obj.length})")
647
- when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
648
- remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH } (#{obj.length})"])
649
- when (String === obj and obj.length > STRING_MAX_LENGTH)
650
- obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH} (#{obj.length})"
651
- when Hash === obj
652
- new = {}
653
- obj.each do |k,v|
654
- new[k] = remove_long_items(v)
655
- end
656
- new
657
- when Array === obj
658
- obj.collect do |e| remove_long_items(e) end
659
- else
660
- obj
661
- end
662
- end
663
-
664
- #def self.remove_long_items(obj)
665
- # return fingerprint(obj)
666
- #end
667
-
668
155
  def self.ensembl_server(organism)
669
156
  date = organism.split("/")[1]
670
157
  if date.nil?
@@ -674,18 +161,6 @@ module Misc
674
161
  end
675
162
  end
676
163
 
677
- def self.filename?(string)
678
- String === string and string.length > 0 and string.length < 250 and File.exists?(string)
679
- end
680
-
681
- def self.max(list)
682
- max = nil
683
- list.each do |v|
684
- next if v.nil?
685
- max = v if max.nil? or v > max
686
- end
687
- max
688
- end
689
164
 
690
165
  def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
691
166
  name1 ||= "list 1"
@@ -710,20 +185,6 @@ module Misc
710
185
  url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
711
186
  end
712
187
 
713
- def self.sum(list)
714
- list.compact.inject(0.0){|acc,e| acc += e}
715
- end
716
-
717
- def self.mean(list)
718
- sum(list) / list.compact.length
719
- end
720
-
721
- def self.sd(list)
722
- return nil if list.length < 3
723
- mean = mean(list)
724
- Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
725
- end
726
-
727
188
  def self.consolidate(list)
728
189
  list.inject(nil){|acc,e|
729
190
  if acc.nil?
@@ -795,208 +256,6 @@ end
795
256
  proportions
796
257
  end
797
258
 
798
- IUPAC2BASE = {
799
- "A" => ["A"],
800
- "C" => ["C"],
801
- "G" => ["G"],
802
- "T" => ["T"],
803
- "U" => ["U"],
804
- "R" => "A or G".split(" or "),
805
- "Y" => "C or T".split(" or "),
806
- "S" => "G or C".split(" or "),
807
- "W" => "A or T".split(" or "),
808
- "K" => "G or T".split(" or "),
809
- "M" => "A or C".split(" or "),
810
- "B" => "C or G or T".split(" or "),
811
- "D" => "A or G or T".split(" or "),
812
- "H" => "A or C or T".split(" or "),
813
- "V" => "A or C or G".split(" or "),
814
- "N" => %w(A C T G),
815
- }
816
-
817
- BASE2COMPLEMENT = {
818
- "A" => "T",
819
- "C" => "G",
820
- "G" => "C",
821
- "T" => "A",
822
- "U" => "A",
823
- }
824
-
825
- THREE_TO_ONE_AA_CODE = {
826
- "ala" => "A",
827
- "arg" => "R",
828
- "asn" => "N",
829
- "asp" => "D",
830
- "cys" => "C",
831
- "glu" => "E",
832
- "gln" => "Q",
833
- "gly" => "G",
834
- "his" => "H",
835
- "ile" => "I",
836
- "leu" => "L",
837
- "lys" => "K",
838
- "met" => "M",
839
- "phe" => "F",
840
- "pro" => "P",
841
- "ser" => "S",
842
- "thr" => "T",
843
- "trp" => "W",
844
- "tyr" => "Y",
845
- "val" => "V"
846
- }
847
- CODON_TABLE = {
848
- "ATT" => "I",
849
- "ATC" => "I",
850
- "ATA" => "I",
851
- "CTT" => "L",
852
- "CTC" => "L",
853
- "CTA" => "L",
854
- "CTG" => "L",
855
- "TTA" => "L",
856
- "TTG" => "L",
857
- "GTT" => "V",
858
- "GTC" => "V",
859
- "GTA" => "V",
860
- "GTG" => "V",
861
- "TTT" => "F",
862
- "TTC" => "F",
863
- "ATG" => "M",
864
- "TGT" => "C",
865
- "TGC" => "C",
866
- "GCT" => "A",
867
- "GCC" => "A",
868
- "GCA" => "A",
869
- "GCG" => "A",
870
- "GGT" => "G",
871
- "GGC" => "G",
872
- "GGA" => "G",
873
- "GGG" => "G",
874
- "CCT" => "P",
875
- "CCC" => "P",
876
- "CCA" => "P",
877
- "CCG" => "P",
878
- "ACT" => "T",
879
- "ACC" => "T",
880
- "ACA" => "T",
881
- "ACG" => "T",
882
- "TCT" => "S",
883
- "TCC" => "S",
884
- "TCA" => "S",
885
- "TCG" => "S",
886
- "AGT" => "S",
887
- "AGC" => "S",
888
- "TAT" => "Y",
889
- "TAC" => "Y",
890
- "TGG" => "W",
891
- "CAA" => "Q",
892
- "CAG" => "Q",
893
- "AAT" => "N",
894
- "AAC" => "N",
895
- "CAT" => "H",
896
- "CAC" => "H",
897
- "GAA" => "E",
898
- "GAG" => "E",
899
- "GAT" => "D",
900
- "GAC" => "D",
901
- "AAA" => "K",
902
- "AAG" => "K",
903
- "CGT" => "R",
904
- "CGC" => "R",
905
- "CGA" => "R",
906
- "CGG" => "R",
907
- "AGA" => "R",
908
- "AGG" => "R",
909
- "TAA" => "*",
910
- "TAG" => "*",
911
- "TGA" => "*",
912
- }
913
-
914
- #def self.fast_align(reference, sequence)
915
- #
916
- #require 'narray'
917
- # init_gap = -1
918
- # gap = -2
919
- # diff = -2
920
- # same = 2
921
-
922
- # cols = sequence.length + 1
923
- # rows = reference.length + 1
924
-
925
- # a = NArray.int(cols, rows)
926
-
927
- # for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
928
- # for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end
929
-
930
- # spos = 1
931
- # while spos < cols do
932
- # rpos = 1
933
- # while rpos < rows do
934
- # match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
935
- # skip_sequence = a[spos-1,rpos] + gap
936
- # skip_reference = a[spos,rpos-1] + gap
937
- # a[spos,rpos] = [match, skip_sequence, skip_reference].max
938
- # rpos += 1
939
- # end
940
- # spos += 1
941
- # end
942
-
943
- # start = Misc.max(a[-1,0..rows-1])
944
- # start_pos = a[-1,0..rows-1].to_a.index start
945
-
946
- # ref = ''
947
- # seq = ''
948
- # rpos = start_pos
949
- # spos = cols - 1
950
-
951
- # while spos > 0 and rpos > 0
952
- # score = a[spos,rpos]
953
- # score_match = a[spos-1,rpos-1]
954
- # score_skip_reference = a[spos,rpos-1]
955
- # score_skip_sequence = a[spos-1,rpos]
956
-
957
- # case
958
- # when score == score_match + (sequence[spos-1] != reference[rpos-1] ? diff : same)
959
- # ref << reference[rpos-1]
960
- # seq << sequence[spos-1]
961
- # spos -= 1
962
- # rpos -= 1
963
- # when score == score_skip_reference + gap
964
- # ref << reference[rpos-1]
965
- # seq << '-'
966
- # rpos -= 1
967
- # when score == score_skip_sequence + gap
968
- # seq << sequence[spos-1]
969
- # ref << '-'
970
- # spos -= 1
971
- # else
972
- # raise "stop"
973
- # end
974
- # end
975
-
976
- # while (rpos > 0)
977
- # ref << reference[rpos-1]
978
- # seq = seq << '-'
979
- # rpos -= 1
980
- # end
981
-
982
- # while (spos > 0)
983
- # seq << sequence[spos-1]
984
- # ref = ref + '-'
985
- # spos -= 1
986
- # end
987
- #
988
- # [ref.reverse + reference[start_pos..-1], seq.reverse + '-' * (rows - start_pos - 1)]
989
- #end
990
-
991
- def self.IUPAC_to_base(iupac)
992
- IUPAC2BASE[iupac]
993
- end
994
-
995
- def self.is_filename?(string)
996
- return true if string.respond_to? :exists
997
- return true if String === string and string.length < 265 and File.exists? string
998
- return false
999
- end
1000
259
 
1001
260
  def self.sorted_array_hits(a1, a2)
1002
261
  e1, e2 = a1.shift, a2.shift
@@ -1132,100 +391,6 @@ end
1132
391
  end
1133
392
  end
1134
393
 
1135
- def self.benchmark(repeats = 1, message = nil)
1136
- require 'benchmark'
1137
- res = nil
1138
- begin
1139
- measure = Benchmark.measure do
1140
- repeats.times do
1141
- res = yield
1142
- end
1143
- end
1144
- if message
1145
- puts "#{message }: #{ repeats } repeats"
1146
- else
1147
- puts "Benchmark for #{ repeats } repeats"
1148
- end
1149
- puts measure
1150
- rescue Exception
1151
- puts "Benchmark aborted"
1152
- raise $!
1153
- end
1154
- res
1155
- end
1156
-
1157
- def self.profile_html(options = {})
1158
- require 'ruby-prof'
1159
- RubyProf.start
1160
- begin
1161
- res = yield
1162
- rescue Exception
1163
- puts "Profiling aborted"
1164
- raise $!
1165
- ensure
1166
- result = RubyProf.stop
1167
- printer = RubyProf::MultiPrinter.new(result)
1168
- TmpFile.with_file do |dir|
1169
- FileUtils.mkdir_p dir unless File.exists? dir
1170
- printer.print(:path => dir, :profile => 'profile')
1171
- CMD.cmd("firefox -no-remote '#{ dir }'")
1172
- end
1173
- end
1174
-
1175
- res
1176
- end
1177
-
1178
- def self.profile_graph(options = {})
1179
- require 'ruby-prof'
1180
- RubyProf.start
1181
- begin
1182
- res = yield
1183
- rescue Exception
1184
- puts "Profiling aborted"
1185
- raise $!
1186
- ensure
1187
- result = RubyProf.stop
1188
- #result.eliminate_methods!([/annotated_array_clean_/])
1189
- printer = RubyProf::GraphPrinter.new(result)
1190
- printer.print(STDOUT, options)
1191
- end
1192
-
1193
- res
1194
- end
1195
-
1196
- def self.profile(options = {})
1197
- require 'ruby-prof'
1198
- RubyProf.start
1199
- begin
1200
- res = yield
1201
- rescue Exception
1202
- puts "Profiling aborted"
1203
- raise $!
1204
- ensure
1205
- result = RubyProf.stop
1206
- printer = RubyProf::FlatPrinter.new(result)
1207
- printer.print(STDOUT, options)
1208
- end
1209
-
1210
- res
1211
- end
1212
-
1213
- def self.memprof
1214
- require 'memprof'
1215
- Memprof.start
1216
- begin
1217
- res = yield
1218
- rescue Exception
1219
- puts "Profiling aborted"
1220
- raise $!
1221
- ensure
1222
- Memprof.stop
1223
- print Memprof.stats
1224
- end
1225
-
1226
- res
1227
- end
1228
-
1229
394
  def self.do_once(&block)
1230
395
  return nil if $__did_once
1231
396
  $__did_once = true
@@ -1334,22 +499,6 @@ end
1334
499
  html
1335
500
  end
1336
501
 
1337
- #def self.path_relative_to(basedir, path)
1338
- # path = File.expand_path(path) unless path[0] == "/"
1339
- # basedir = File.expand_path(basedir) unless basedir[0] == "/"
1340
-
1341
- # basedir << "/" unless basedir[-1] == "/"
1342
- # case
1343
- # when path == basedir
1344
- # "."
1345
- # #when path =~ /#{Regexp.quote basedir}\/(.*)/
1346
- # when path.index(basedir) == 0
1347
- # return path[basedir.length..-1]
1348
- # else
1349
- # return nil
1350
- # end
1351
- #end
1352
-
1353
502
  def self.path_relative_to(basedir, path)
1354
503
  path = File.expand_path(path) unless path[0] == "/"
1355
504
  basedir = File.expand_path(basedir) unless basedir[0] == "/"
@@ -1369,88 +518,6 @@ end
1369
518
  @hostanem ||= `hostname`.strip
1370
519
  end
1371
520
 
1372
- LOCK_MUTEX = Mutex.new
1373
- def self.lock(file, unlock = true)
1374
- return yield if file.nil?
1375
- FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
1376
-
1377
- res = nil
1378
-
1379
- lock_path = File.expand_path(file + '.lock')
1380
- lockfile = Lockfile.new(lock_path)
1381
-
1382
- hostname = Misc.hostname
1383
- LOCK_MUTEX.synchronize do
1384
- Misc.insist 2, 0.1 do
1385
- Misc.insist 3, 0.1 do
1386
- begin
1387
- if File.exists? lock_path
1388
- info = Open.open(lock_path){|f| YAML.load(f) }
1389
- raise "No info" unless info
1390
-
1391
- if hostname == info["host"] and not Misc.pid_exists?(info["pid"])
1392
- Log.info("Removing lockfile: #{lock_path}. This pid #{Process.pid}. Content: #{info.inspect}")
1393
- FileUtils.rm lock_path
1394
- end
1395
- end
1396
- rescue Exception
1397
- FileUtils.rm lock_path if File.exists? lock_path
1398
- lockfile = Lockfile.new(lock_path) unless File.exists? lock_path
1399
- raise $!
1400
- end
1401
- end
1402
- end
1403
- end
1404
-
1405
- begin
1406
- lockfile.lock
1407
- res = yield lockfile
1408
- rescue Lockfile::StolenLockError
1409
- unlock = false
1410
- rescue KeepLocked
1411
- unlock = false
1412
- res = $!.payload
1413
- ensure
1414
- if unlock and lockfile.locked?
1415
- lockfile.unlock
1416
- end
1417
- end
1418
-
1419
- res
1420
- end
1421
-
1422
-
1423
- LOCK_REPO_SERIALIZER=Marshal
1424
- def self.lock_in_repo(repo, key, *args)
1425
- return yield file, *args if repo.nil? or key.nil?
1426
-
1427
- lock_key = "lock-" << key
1428
-
1429
- begin
1430
- if repo[lock_key] and
1431
- Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
1432
- info["pid"] and not Misc.pid_exists?(info["pid"])
1433
-
1434
- Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
1435
- repo.out lock_key
1436
- end
1437
- rescue
1438
- Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
1439
- repo.out lock_key if repo.include? lock_key
1440
- end
1441
-
1442
- while repo[lock_key]
1443
- sleep 1
1444
- end
1445
-
1446
- repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
1447
-
1448
- res = yield lock_key, *args
1449
-
1450
- repo.delete lock_key
1451
-
1452
- res
1453
- end
1454
521
 
1455
522
  def self.common_path(dir, file)
1456
523
  file = File.expand_path file
@@ -1479,32 +546,6 @@ end
1479
546
  res
1480
547
  end
1481
548
 
1482
- def self.to_utf8(string)
1483
- string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
1484
- end
1485
-
1486
- def self.fixutf8(string)
1487
- return nil if string.nil?
1488
- return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
1489
- (string.respond_to? :valid_encoding and string.valid_encoding)
1490
-
1491
- if string.respond_to?(:encode)
1492
- string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
1493
- else
1494
- require 'iconv'
1495
- @@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
1496
- @@ic.iconv(string)
1497
- end
1498
- end
1499
-
1500
- def self.fixascii(string)
1501
- if string.respond_to?(:encode)
1502
- self.fixutf8(string).encode("ASCII-8BIT")
1503
- else
1504
- string
1505
- end
1506
- end
1507
-
1508
549
  def self.sensiblewrite(path, content = nil, &block)
1509
550
  return if File.exists? path
1510
551
  tmp_path = path + '.sensible_write'
@@ -1556,69 +597,6 @@ end
1556
597
  new_options
1557
598
  end
1558
599
 
1559
- def self.digest(text)
1560
- Digest::MD5.hexdigest(text)
1561
- end
1562
-
1563
- HASH2MD5_MAX_STRING_LENGTH = 1000
1564
- HASH2MD5_MAX_ARRAY_LENGTH = 100
1565
- def self.hash2md5(hash)
1566
- str = ""
1567
- keys = hash.keys
1568
- keys = keys.clean_annotations if keys.respond_to? :clean_annotations
1569
- keys = keys.sort_by{|k| k.to_s}
1570
-
1571
- if hash.respond_to? :unnamed
1572
- unnamed = hash.unnamed
1573
- hash.unnamed = true
1574
- end
1575
- keys.each do |k|
1576
- next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
1577
- v = hash[k]
1578
- case
1579
- when TrueClass === v
1580
- str << k.to_s << "=>true"
1581
- when FalseClass === v
1582
- str << k.to_s << "=>false"
1583
- when Hash === v
1584
- str << k.to_s << "=>" << hash2md5(v)
1585
- when Symbol === v
1586
- str << k.to_s << "=>" << v.to_s
1587
- when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
1588
- str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
1589
- when String === v
1590
- str << k.to_s << "=>" << v
1591
- when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
1592
- str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
1593
- when TSV::Parser === v
1594
- str << remove_long_items(v)
1595
- when Array === v
1596
- str << k.to_s << "=>[" << v * "," << "]"
1597
- when File === v
1598
- str << k.to_s << "=>[File:" << v.path << "]"
1599
- else
1600
- v_ins = v.inspect
1601
-
1602
- case
1603
- when v_ins =~ /:0x0/
1604
- str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
1605
- else
1606
- str << k.to_s << "=>" << v_ins
1607
- end
1608
-
1609
- end
1610
-
1611
- str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v
1612
- end
1613
- hash.unnamed = unnamed if hash.respond_to? :unnamed
1614
-
1615
- if str.empty?
1616
- ""
1617
- else
1618
- digest(str)
1619
- end
1620
- end
1621
-
1622
600
  def self.process_options(hash, *keys)
1623
601
  if keys.length == 1
1624
602
  hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
@@ -1815,7 +793,6 @@ end
1815
793
  chunks
1816
794
  end
1817
795
 
1818
-
1819
796
  def self.append_zipped(current, new)
1820
797
  current.each do |v|
1821
798
  n = new.shift
@@ -1832,145 +809,6 @@ end
1832
809
  return [] if array.empty? or (first = array.first).nil?
1833
810
  first.zip(*array[1..-1])
1834
811
  end
1835
-
1836
- def self.camel_case(string)
1837
- return string if string !~ /_/ && string =~ /[A-Z]+.*/
1838
- string.split(/_|(\d+)/).map{|e|
1839
- (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize)
1840
- }.join
1841
- end
1842
-
1843
- def self.camel_case_lower(string)
1844
- string.split('_').inject([]){ |buffer,e|
1845
- buffer.push(buffer.empty? ? e.downcase : (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize))
1846
- }.join
1847
- end
1848
-
1849
- def self.snake_case(string)
1850
- return nil if string.nil?
1851
- string = string.to_s if Symbol === string
1852
- string.
1853
- gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
1854
- gsub(/([a-z])([A-Z])/,'\1_\2').
1855
- gsub(/\s/,'_').gsub(/[^\w_]/, '').
1856
- split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
1857
- end
1858
-
1859
- # source: https://gist.github.com/ekdevdes/2450285
1860
- # author: Ethan Kramer (https://github.com/ekdevdes)
1861
- def self.humanize(value, options = {})
1862
- if options.empty?
1863
- options[:format] = :sentence
1864
- end
1865
-
1866
- values = []
1867
- values = value.split('_')
1868
- values.each_index do |index|
1869
- # lower case each item in array
1870
- # Miguel Vazquez edit: Except for acronyms
1871
- values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
1872
- end
1873
- if options[:format] == :allcaps
1874
- values.each do |value|
1875
- value.capitalize!
1876
- end
1877
-
1878
- if options.empty?
1879
- options[:seperator] = " "
1880
- end
1881
-
1882
- return values.join " "
1883
- end
1884
-
1885
- if options[:format] == :class
1886
- values.each do |value|
1887
- value.capitalize!
1888
- end
1889
-
1890
- return values.join ""
1891
- end
1892
-
1893
- if options[:format] == :sentence
1894
- values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)
1895
-
1896
- return values.join " "
1897
- end
1898
-
1899
- if options[:format] == :nocaps
1900
- return values.join " "
1901
- end
1902
- end
1903
- end
1904
-
1905
- #TODO: REMOVE
1906
- #class RBBTError < StandardError
1907
- # attr_accessor :info
1908
- #
1909
- # alias old_to_s to_s
1910
- # def to_s
1911
- # str = old_to_s.dup
1912
- # if info
1913
- # str << "\n" << "Additional Info:\n---\n" << info << "---"
1914
- # end
1915
- # str
1916
- # end
1917
- #end
1918
-
1919
- module IndiferentHash
1920
-
1921
- def self.setup(hash)
1922
- hash.extend IndiferentHash
1923
- end
1924
-
1925
- def merge(other)
1926
- new = self.dup
1927
- IndiferentHash.setup(new)
1928
- other.each do |k,value|
1929
- new.delete k
1930
- new[k] = value
1931
- end
1932
- new
1933
- end
1934
-
1935
- def [](key)
1936
- res = super(key)
1937
- return res unless res.nil?
1938
-
1939
- case key
1940
- when Symbol, Module
1941
- super(key.to_s)
1942
- when String
1943
- super(key.to_sym)
1944
- else
1945
- super(key)
1946
- end
1947
- end
1948
-
1949
- def values_at(*key_list)
1950
- key_list.inject([]){|acc,key| acc << self[key]}
1951
- end
1952
-
1953
- def include?(key)
1954
- case key
1955
- when Symbol, Module
1956
- super(key) || super(key.to_s)
1957
- when String
1958
- super(key) || super(key.to_sym)
1959
- else
1960
- super(key)
1961
- end
1962
- end
1963
-
1964
- def delete(key)
1965
- case key
1966
- when Symbol, Module
1967
- super(key) || super(key.to_s)
1968
- when String
1969
- super(key) || super(key.to_sym)
1970
- else
1971
- super(key)
1972
- end
1973
- end
1974
812
  end
1975
813
 
1976
814
  module PDF2Text