rbbt-util 5.7.0 → 5.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations.rb +4 -1
  3. data/lib/rbbt/annotations/util.rb +11 -0
  4. data/lib/rbbt/persist.rb +8 -2
  5. data/lib/rbbt/resource/path.rb +1 -0
  6. data/lib/rbbt/tsv/accessor.rb +18 -15
  7. data/lib/rbbt/tsv/parallel.rb +89 -32
  8. data/lib/rbbt/tsv/util.rb +11 -0
  9. data/lib/rbbt/util/R.rb +0 -1
  10. data/lib/rbbt/util/concurrency.rb +2 -0
  11. data/lib/rbbt/util/concurrency/processes.rb +96 -0
  12. data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
  13. data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
  14. data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
  15. data/lib/rbbt/util/concurrency/threads.rb +76 -0
  16. data/lib/rbbt/util/log.rb +37 -5
  17. data/lib/rbbt/util/misc.rb +89 -4
  18. data/lib/rbbt/util/semaphore.rb +10 -4
  19. data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
  20. data/lib/rbbt/util/simpleopt/doc.rb +2 -4
  21. data/lib/rbbt/workflow/accessor.rb +39 -12
  22. data/lib/rbbt/workflow/step.rb +5 -7
  23. data/share/rbbt_commands/benchmark/pthrough +18 -0
  24. data/share/rbbt_commands/color +41 -0
  25. data/share/rbbt_commands/stat/density +50 -0
  26. data/share/rbbt_commands/tsv/info +21 -3
  27. data/share/rbbt_commands/tsv/slice +46 -0
  28. data/share/rbbt_commands/tsv/subset +53 -0
  29. data/share/rbbt_commands/tsv/values +7 -1
  30. data/test/rbbt/annotations/test_util.rb +14 -0
  31. data/test/rbbt/tsv/test_parallel.rb +25 -3
  32. data/test/rbbt/tsv/test_util.rb +15 -0
  33. data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
  34. data/test/rbbt/util/concurrency/test_processes.rb +53 -0
  35. data/test/rbbt/util/concurrency/test_threads.rb +42 -0
  36. data/test/rbbt/util/test_concurrency.rb +6 -0
  37. metadata +23 -2
@@ -0,0 +1,144 @@
1
+ class RbbtProcessQueue
2
+ class RbbtProcessSocket
3
+
4
+ class ClosedSocket < Exception; end
5
+
6
+ attr_accessor :sin, :sout, :in_lockfile, :out_lockfile
7
+ def initialize(lockfile = nil)
8
+ @sout, @sin = File.pipe
9
+
10
+ lockfile ||= TmpFile.tmp_file
11
+
12
+ @lockfile = lockfile
13
+ @in_lockfile = lockfile + '.in'
14
+ @out_lockfile = lockfile + '.out'
15
+ raise "in_lockfile exists?" if File.exists? @in_lockfile
16
+ raise "out_lockfile exists?" if File.exists? @in_lockfile
17
+ FileUtils.touch @in_lockfile
18
+ FileUtils.touch @out_lockfile
19
+ end
20
+
21
+ def self.serialize(obj)
22
+ dump = nil
23
+ begin
24
+ case obj
25
+ when TSV
26
+ type = "T"
27
+ info = obj.info
28
+ info.delete_if{|k,v| v.nil?}
29
+ dump = Marshal.dump([info, {}.merge(obj)])
30
+ else
31
+ type = "M"
32
+ dump = Marshal.dump(obj)
33
+ end
34
+ payload = [type, dump].pack('A1a*')
35
+ length = payload.bytesize
36
+ #Log.info "Writing #{ length }"
37
+ [length].pack('L') << payload
38
+ rescue Exception
39
+ Log.error "Serialize error for: #{Misc.fingerprint obj} - #{Misc.fingerprint dump}"
40
+ raise $!
41
+ end
42
+ end
43
+
44
+ def self.unserialize(str)
45
+ begin
46
+ c, dump = str.unpack("A1a*")
47
+ case c
48
+ when "M"
49
+ return Marshal.load(dump)
50
+ when "T"
51
+ info, hash = Marshal.load(dump)
52
+ return TSV.setup(hash, info)
53
+ end
54
+ rescue Exception
55
+ Log.error "Unserialize error for: #{Misc.fingerprint str}"
56
+ raise $!
57
+ end
58
+ end
59
+
60
+ def read_sout(length)
61
+ str = ""
62
+ str << sout.readpartial(length-str.length) while str.length < length
63
+ str
64
+ end
65
+
66
+ def write_sin(str)
67
+ str_length = str.length
68
+ wrote = 0
69
+ wrote += sin.write_nonblock(str[wrote..-1]) while wrote < str_length
70
+ end
71
+
72
+ def push(obj)
73
+ Filelock in_lockfile do
74
+ payload = RbbtProcessSocket.serialize(obj)
75
+ sin << payload
76
+ end
77
+ end
78
+
79
+
80
+ def pop
81
+ r = []
82
+
83
+ payload = begin
84
+ Filelock out_lockfile do
85
+ raise ClosedQueue if sout.eof?
86
+ r,w,e = IO.select([sout], [], [], 1)
87
+ raise TryAgain if r.empty?
88
+
89
+ first_char = read_sout(4)
90
+ length = first_char.unpack('L').first
91
+ #Log.info "Reading #{ length }"
92
+ read_sout(length)
93
+ end
94
+ rescue TryAgain
95
+ sleep 1
96
+ end
97
+
98
+ RbbtProcessSocket.unserialize(payload)
99
+ end
100
+
101
+ def pop
102
+ loop do
103
+ r,w,e = IO.select([sout], [], [], 1)
104
+ next if r.empty?
105
+ break
106
+ end
107
+
108
+ first_char = read_sout(4)
109
+ length = first_char.unpack('L').first
110
+ #Log.info "Reading #{ length }"
111
+ read_sout(length)
112
+ end
113
+ rescue TryAgain
114
+ sleep 1
115
+ end
116
+
117
+ RbbtProcessSocket.unserialize(payload)
118
+ end
119
+
120
+ def rest
121
+ sin.close
122
+ str = sout.read
123
+ res = []
124
+
125
+ while not str.empty?
126
+ first_char = str[0]
127
+ next if first_char.nil?
128
+ length = first_char.unpack("C").first
129
+ dump = str[1..length]
130
+ res << Marshal.load(dump)
131
+ str = str[length+1..-1]
132
+ end
133
+
134
+ res
135
+ end
136
+
137
+ def clean
138
+ FileUtils.rm @in_lockfile if File.exists? @in_lockfile
139
+ FileUtils.rm @out_lockfile if File.exists? @out_lockfile
140
+ sin.close unless sin.closed?
141
+ sout.close unless sout.closed?
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,53 @@
1
+ require 'rbbt/util/concurrency/processes/socket'
2
+ class RbbtProcessQueue
3
+ class RbbtProcessQueueWorker
4
+ attr_accessor :pid, :queue, :callback_queue, :block
5
+ def initialize(queue, callback_queue = nil, &block)
6
+ @queue, @callback_queue, @block = queue, callback_queue, block
7
+
8
+ @pid = Process.fork do
9
+ begin
10
+ @queue.swrite.close
11
+ @callback_queue.sread.close if @callback_queue
12
+
13
+ Signal.trap(:INT){ raise Aborted; }
14
+ loop do
15
+ p = @queue.pop
16
+ raise p if Exception === p
17
+ res = @block.call p
18
+ @callback_queue.push res if @callback_queue
19
+ end
20
+
21
+ exit 0
22
+ rescue ClosedStream
23
+ exit 0
24
+ rescue Aborted
25
+ exit -1
26
+ rescue Exception
27
+ Log.exception $!
28
+ @callback_queue.push($!) if @callback_queue
29
+ exit -1
30
+ end
31
+
32
+ end
33
+ end
34
+
35
+ def join
36
+ Process.waitpid @pid
37
+ end
38
+
39
+ def abort
40
+ Process.kill :INT, @pid
41
+ end
42
+
43
+ def done?
44
+ begin
45
+ Process.waitpid @pid, Process::WNOHANG
46
+ rescue Errno::ECHILD
47
+ true
48
+ rescue
49
+ false
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,76 @@
1
+ class RbbtThreadQueue
2
+ attr_accessor :num_threads, :threads, :queue, :mutex, :block, :done
3
+
4
+ class RbbtThreadQueueWorker < Thread
5
+ def initialize(queue, mutex = nil, &block)
6
+ if mutex.nil?
7
+ super(Thread.current) do |current|
8
+ begin
9
+ loop do
10
+ p = queue.pop
11
+ block.call *p
12
+ end
13
+ rescue Exception
14
+ current.raise $! unless Aborted === $!
15
+ end
16
+ end
17
+ else
18
+ super(Thread.current) do |current|
19
+ begin
20
+ loop do
21
+ p = queue.pop
22
+ p << mutex
23
+ block.call *p
24
+ end
25
+ rescue Exception
26
+ current.raise $! unless Aborted === $!
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ def clean
33
+ raise Aborted if alive?
34
+ end
35
+ end
36
+
37
+ def initialize(num_threads)
38
+ @num_threads = num_threads
39
+ @threads = []
40
+ @queue = Queue.new
41
+ @mutex = Mutex.new
42
+ end
43
+
44
+ def init(use_mutex = false, &block)
45
+ clean
46
+ num_threads.times do |i|
47
+ @threads << RbbtThreadQueueWorker.new(queue, use_mutex ? mutex : nil, &block)
48
+ end
49
+ end
50
+
51
+ def join
52
+ while queue.length > 0 or queue.num_waiting < @threads.length
53
+ Thread.pass
54
+ raise "No worker thread survived" if @threads.empty? and queue.length > 0
55
+ end
56
+ @threads.delete_if{|t| t.alive?}
57
+ @threads.each{|t| t.raise Aborted }
58
+ @threads.each{|t| t.join(0.1) }
59
+ end
60
+
61
+ def clean
62
+ threads.each{ |t| t.clean }.clear
63
+ end
64
+
65
+ def process(e)
66
+ queue << e
67
+ end
68
+
69
+
70
+ def self.each(list, num = 3, &block)
71
+ q = RbbtThreadQueue.new num
72
+ q.init(&block)
73
+ list.each do |elem| q.process elem end
74
+ q.join
75
+ end
76
+ end
data/lib/rbbt/util/log.rb CHANGED
@@ -126,6 +126,14 @@ def ppp(message)
126
126
  puts ""
127
127
  end
128
128
 
129
+ def fff(object)
130
+ stack = caller
131
+ Log.debug{"#{Log.color :cyan, "FINGERPRINT:"} " << stack.first}
132
+ Log.debug{""}
133
+ Log.debug{require 'rbbt/util/misc'; "=> " << Misc.fingerprint(object) }
134
+ Log.debug{""}
135
+ end
136
+
129
137
  def ddd(message, file = $stdout)
130
138
  stack = caller
131
139
  Log.debug{"#{Log.color :cyan, "DEBUG:"} " << stack.first}
@@ -134,12 +142,36 @@ def ddd(message, file = $stdout)
134
142
  Log.debug{""}
135
143
  end
136
144
 
137
- def fff(object)
145
+ def lll(message, file = $stdout)
138
146
  stack = caller
139
- Log.debug{"#{Log.color :cyan, "FINGERPRINT:"} " << stack.first}
140
- Log.debug{""}
141
- Log.debug{require 'rbbt/util/misc'; "=> " << Misc.fingerprint(object) }
142
- Log.debug{""}
147
+ Log.low{"#{Log.color :cyan, "LOW:"} " << stack.first}
148
+ Log.low{""}
149
+ Log.low{"=> " << message.inspect}
150
+ Log.low{""}
151
+ end
152
+
153
+ def mmm(message, file = $stdout)
154
+ stack = caller
155
+ Log.low{"#{Log.color :cyan, "MEDIUM:"} " << stack.first}
156
+ Log.low{""}
157
+ Log.low{"=> " << message.inspect}
158
+ Log.low{""}
159
+ end
160
+
161
+ def hhh(message, file = $stdout)
162
+ stack = caller
163
+ Log.high{"#{Log.color :cyan, "HIGH:"} " << stack.first}
164
+ Log.high{""}
165
+ Log.high{"=> " << message.inspect}
166
+ Log.high{""}
167
+ end
168
+
169
+ def iii(message, file = $stdout)
170
+ stack = caller
171
+ Log.info{"#{Log.color :cyan, "INFO:"} " << stack.first}
172
+ Log.info{""}
173
+ Log.info{"=> " << message.inspect}
174
+ Log.info{""}
143
175
  end
144
176
 
145
177
 
@@ -16,8 +16,11 @@ class Hash
16
16
  end
17
17
  end
18
18
 
19
- class ParameterException < Exception; end
19
+ class ParameterException < StandardError; end
20
20
  class FieldNotFoundError < StandardError;end
21
+ class Aborted < Exception; end
22
+ class TryAgain < Exception; end
23
+ class ClosedStream < Exception; end
21
24
 
22
25
  module LaterString
23
26
  def to_s
@@ -28,18 +31,34 @@ end
28
31
  Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
29
32
  module Misc
30
33
 
34
+ def self.read_stream(stream, size)
35
+ str = nil
36
+ while not str = stream.read(size)
37
+ IO.select([stream],nil,nil,1)
38
+ raise ClosedStream if stream.eof?
39
+ end
40
+
41
+ while str.length < size
42
+ raise ClosedStream if stream.eof?
43
+ IO.select([stream],nil,nil,1)
44
+ if new = stream.read(size-str.length)
45
+ str << new
46
+ end
47
+ end
48
+ str
49
+ end
31
50
  def self.parse_cmd_params(str)
32
51
  return str if Array === str
33
52
  str.scan(/
34
- (?:["']([^"']*?)["']) |
35
- ([^"'\s]+)
53
+ (?:["']([^"']*?)["']) |
54
+ ([^"'\s]+)
36
55
  /x).flatten.compact
37
56
  end
38
57
 
39
58
  def self.correct_icgc_mutation(pos, ref, mut_str)
40
59
  mut = mut_str
41
60
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
42
- mut = "+" << mut if ref == '-'
61
+ mut = "+" << mut if ref == '-'
43
62
  [pos, [mut]]
44
63
  end
45
64
 
@@ -504,6 +523,72 @@ end
504
523
  "tyr" => "Y",
505
524
  "val" => "V"
506
525
  }
526
+ CODON_TABLE = {
527
+ "ATT" => "I",
528
+ "ATC" => "I",
529
+ "ATA" => "I",
530
+ "CTT" => "L",
531
+ "CTC" => "L",
532
+ "CTA" => "L",
533
+ "CTG" => "L",
534
+ "TTA" => "L",
535
+ "TTG" => "L",
536
+ "GTT" => "V",
537
+ "GTC" => "V",
538
+ "GTA" => "V",
539
+ "GTG" => "V",
540
+ "TTT" => "F",
541
+ "TTC" => "F",
542
+ "ATG" => "M",
543
+ "TGT" => "C",
544
+ "TGC" => "C",
545
+ "GCT" => "A",
546
+ "GCC" => "A",
547
+ "GCA" => "A",
548
+ "GCG" => "A",
549
+ "GGT" => "G",
550
+ "GGC" => "G",
551
+ "GGA" => "G",
552
+ "GGG" => "G",
553
+ "CCT" => "P",
554
+ "CCC" => "P",
555
+ "CCA" => "P",
556
+ "CCG" => "P",
557
+ "ACT" => "T",
558
+ "ACC" => "T",
559
+ "ACA" => "T",
560
+ "ACG" => "T",
561
+ "TCT" => "S",
562
+ "TCC" => "S",
563
+ "TCA" => "S",
564
+ "TCG" => "S",
565
+ "AGT" => "S",
566
+ "AGC" => "S",
567
+ "TAT" => "Y",
568
+ "TAC" => "Y",
569
+ "TGG" => "W",
570
+ "CAA" => "Q",
571
+ "CAG" => "Q",
572
+ "AAT" => "N",
573
+ "AAC" => "N",
574
+ "CAT" => "H",
575
+ "CAC" => "H",
576
+ "GAA" => "E",
577
+ "GAG" => "E",
578
+ "GAT" => "D",
579
+ "GAC" => "D",
580
+ "AAA" => "K",
581
+ "AAG" => "K",
582
+ "CGT" => "R",
583
+ "CGC" => "R",
584
+ "CGA" => "R",
585
+ "CGG" => "R",
586
+ "AGA" => "R",
587
+ "AGG" => "R",
588
+ "TAA" => "*",
589
+ "TAG" => "*",
590
+ "TGA" => "*",
591
+ }
507
592
 
508
593
  #def self.fast_align(reference, sequence)
509
594
  #