rbbt-util 5.7.0 → 5.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/annotations.rb +4 -1
- data/lib/rbbt/annotations/util.rb +11 -0
- data/lib/rbbt/persist.rb +8 -2
- data/lib/rbbt/resource/path.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -15
- data/lib/rbbt/tsv/parallel.rb +89 -32
- data/lib/rbbt/tsv/util.rb +11 -0
- data/lib/rbbt/util/R.rb +0 -1
- data/lib/rbbt/util/concurrency.rb +2 -0
- data/lib/rbbt/util/concurrency/processes.rb +96 -0
- data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
- data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
- data/lib/rbbt/util/concurrency/threads.rb +76 -0
- data/lib/rbbt/util/log.rb +37 -5
- data/lib/rbbt/util/misc.rb +89 -4
- data/lib/rbbt/util/semaphore.rb +10 -4
- data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
- data/lib/rbbt/util/simpleopt/doc.rb +2 -4
- data/lib/rbbt/workflow/accessor.rb +39 -12
- data/lib/rbbt/workflow/step.rb +5 -7
- data/share/rbbt_commands/benchmark/pthrough +18 -0
- data/share/rbbt_commands/color +41 -0
- data/share/rbbt_commands/stat/density +50 -0
- data/share/rbbt_commands/tsv/info +21 -3
- data/share/rbbt_commands/tsv/slice +46 -0
- data/share/rbbt_commands/tsv/subset +53 -0
- data/share/rbbt_commands/tsv/values +7 -1
- data/test/rbbt/annotations/test_util.rb +14 -0
- data/test/rbbt/tsv/test_parallel.rb +25 -3
- data/test/rbbt/tsv/test_util.rb +15 -0
- data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
- data/test/rbbt/util/concurrency/test_processes.rb +53 -0
- data/test/rbbt/util/concurrency/test_threads.rb +42 -0
- data/test/rbbt/util/test_concurrency.rb +6 -0
- metadata +23 -2
@@ -0,0 +1,144 @@
|
|
1
|
+
class RbbtProcessQueue
|
2
|
+
class RbbtProcessSocket
|
3
|
+
|
4
|
+
class ClosedSocket < Exception; end
|
5
|
+
|
6
|
+
attr_accessor :sin, :sout, :in_lockfile, :out_lockfile
|
7
|
+
def initialize(lockfile = nil)
|
8
|
+
@sout, @sin = File.pipe
|
9
|
+
|
10
|
+
lockfile ||= TmpFile.tmp_file
|
11
|
+
|
12
|
+
@lockfile = lockfile
|
13
|
+
@in_lockfile = lockfile + '.in'
|
14
|
+
@out_lockfile = lockfile + '.out'
|
15
|
+
raise "in_lockfile exists?" if File.exists? @in_lockfile
|
16
|
+
raise "out_lockfile exists?" if File.exists? @in_lockfile
|
17
|
+
FileUtils.touch @in_lockfile
|
18
|
+
FileUtils.touch @out_lockfile
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.serialize(obj)
|
22
|
+
dump = nil
|
23
|
+
begin
|
24
|
+
case obj
|
25
|
+
when TSV
|
26
|
+
type = "T"
|
27
|
+
info = obj.info
|
28
|
+
info.delete_if{|k,v| v.nil?}
|
29
|
+
dump = Marshal.dump([info, {}.merge(obj)])
|
30
|
+
else
|
31
|
+
type = "M"
|
32
|
+
dump = Marshal.dump(obj)
|
33
|
+
end
|
34
|
+
payload = [type, dump].pack('A1a*')
|
35
|
+
length = payload.bytesize
|
36
|
+
#Log.info "Writing #{ length }"
|
37
|
+
[length].pack('L') << payload
|
38
|
+
rescue Exception
|
39
|
+
Log.error "Serialize error for: #{Misc.fingerprint obj} - #{Misc.fingerprint dump}"
|
40
|
+
raise $!
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.unserialize(str)
|
45
|
+
begin
|
46
|
+
c, dump = str.unpack("A1a*")
|
47
|
+
case c
|
48
|
+
when "M"
|
49
|
+
return Marshal.load(dump)
|
50
|
+
when "T"
|
51
|
+
info, hash = Marshal.load(dump)
|
52
|
+
return TSV.setup(hash, info)
|
53
|
+
end
|
54
|
+
rescue Exception
|
55
|
+
Log.error "Unserialize error for: #{Misc.fingerprint str}"
|
56
|
+
raise $!
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def read_sout(length)
|
61
|
+
str = ""
|
62
|
+
str << sout.readpartial(length-str.length) while str.length < length
|
63
|
+
str
|
64
|
+
end
|
65
|
+
|
66
|
+
def write_sin(str)
|
67
|
+
str_length = str.length
|
68
|
+
wrote = 0
|
69
|
+
wrote += sin.write_nonblock(str[wrote..-1]) while wrote < str_length
|
70
|
+
end
|
71
|
+
|
72
|
+
def push(obj)
|
73
|
+
Filelock in_lockfile do
|
74
|
+
payload = RbbtProcessSocket.serialize(obj)
|
75
|
+
sin << payload
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
def pop
|
81
|
+
r = []
|
82
|
+
|
83
|
+
payload = begin
|
84
|
+
Filelock out_lockfile do
|
85
|
+
raise ClosedQueue if sout.eof?
|
86
|
+
r,w,e = IO.select([sout], [], [], 1)
|
87
|
+
raise TryAgain if r.empty?
|
88
|
+
|
89
|
+
first_char = read_sout(4)
|
90
|
+
length = first_char.unpack('L').first
|
91
|
+
#Log.info "Reading #{ length }"
|
92
|
+
read_sout(length)
|
93
|
+
end
|
94
|
+
rescue TryAgain
|
95
|
+
sleep 1
|
96
|
+
end
|
97
|
+
|
98
|
+
RbbtProcessSocket.unserialize(payload)
|
99
|
+
end
|
100
|
+
|
101
|
+
def pop
|
102
|
+
loop do
|
103
|
+
r,w,e = IO.select([sout], [], [], 1)
|
104
|
+
next if r.empty?
|
105
|
+
break
|
106
|
+
end
|
107
|
+
|
108
|
+
first_char = read_sout(4)
|
109
|
+
length = first_char.unpack('L').first
|
110
|
+
#Log.info "Reading #{ length }"
|
111
|
+
read_sout(length)
|
112
|
+
end
|
113
|
+
rescue TryAgain
|
114
|
+
sleep 1
|
115
|
+
end
|
116
|
+
|
117
|
+
RbbtProcessSocket.unserialize(payload)
|
118
|
+
end
|
119
|
+
|
120
|
+
def rest
|
121
|
+
sin.close
|
122
|
+
str = sout.read
|
123
|
+
res = []
|
124
|
+
|
125
|
+
while not str.empty?
|
126
|
+
first_char = str[0]
|
127
|
+
next if first_char.nil?
|
128
|
+
length = first_char.unpack("C").first
|
129
|
+
dump = str[1..length]
|
130
|
+
res << Marshal.load(dump)
|
131
|
+
str = str[length+1..-1]
|
132
|
+
end
|
133
|
+
|
134
|
+
res
|
135
|
+
end
|
136
|
+
|
137
|
+
def clean
|
138
|
+
FileUtils.rm @in_lockfile if File.exists? @in_lockfile
|
139
|
+
FileUtils.rm @out_lockfile if File.exists? @out_lockfile
|
140
|
+
sin.close unless sin.closed?
|
141
|
+
sout.close unless sout.closed?
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rbbt/util/concurrency/processes/socket'
|
2
|
+
class RbbtProcessQueue
|
3
|
+
class RbbtProcessQueueWorker
|
4
|
+
attr_accessor :pid, :queue, :callback_queue, :block
|
5
|
+
def initialize(queue, callback_queue = nil, &block)
|
6
|
+
@queue, @callback_queue, @block = queue, callback_queue, block
|
7
|
+
|
8
|
+
@pid = Process.fork do
|
9
|
+
begin
|
10
|
+
@queue.swrite.close
|
11
|
+
@callback_queue.sread.close if @callback_queue
|
12
|
+
|
13
|
+
Signal.trap(:INT){ raise Aborted; }
|
14
|
+
loop do
|
15
|
+
p = @queue.pop
|
16
|
+
raise p if Exception === p
|
17
|
+
res = @block.call p
|
18
|
+
@callback_queue.push res if @callback_queue
|
19
|
+
end
|
20
|
+
|
21
|
+
exit 0
|
22
|
+
rescue ClosedStream
|
23
|
+
exit 0
|
24
|
+
rescue Aborted
|
25
|
+
exit -1
|
26
|
+
rescue Exception
|
27
|
+
Log.exception $!
|
28
|
+
@callback_queue.push($!) if @callback_queue
|
29
|
+
exit -1
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def join
|
36
|
+
Process.waitpid @pid
|
37
|
+
end
|
38
|
+
|
39
|
+
def abort
|
40
|
+
Process.kill :INT, @pid
|
41
|
+
end
|
42
|
+
|
43
|
+
def done?
|
44
|
+
begin
|
45
|
+
Process.waitpid @pid, Process::WNOHANG
|
46
|
+
rescue Errno::ECHILD
|
47
|
+
true
|
48
|
+
rescue
|
49
|
+
false
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
class RbbtThreadQueue
|
2
|
+
attr_accessor :num_threads, :threads, :queue, :mutex, :block, :done
|
3
|
+
|
4
|
+
class RbbtThreadQueueWorker < Thread
|
5
|
+
def initialize(queue, mutex = nil, &block)
|
6
|
+
if mutex.nil?
|
7
|
+
super(Thread.current) do |current|
|
8
|
+
begin
|
9
|
+
loop do
|
10
|
+
p = queue.pop
|
11
|
+
block.call *p
|
12
|
+
end
|
13
|
+
rescue Exception
|
14
|
+
current.raise $! unless Aborted === $!
|
15
|
+
end
|
16
|
+
end
|
17
|
+
else
|
18
|
+
super(Thread.current) do |current|
|
19
|
+
begin
|
20
|
+
loop do
|
21
|
+
p = queue.pop
|
22
|
+
p << mutex
|
23
|
+
block.call *p
|
24
|
+
end
|
25
|
+
rescue Exception
|
26
|
+
current.raise $! unless Aborted === $!
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def clean
|
33
|
+
raise Aborted if alive?
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def initialize(num_threads)
|
38
|
+
@num_threads = num_threads
|
39
|
+
@threads = []
|
40
|
+
@queue = Queue.new
|
41
|
+
@mutex = Mutex.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def init(use_mutex = false, &block)
|
45
|
+
clean
|
46
|
+
num_threads.times do |i|
|
47
|
+
@threads << RbbtThreadQueueWorker.new(queue, use_mutex ? mutex : nil, &block)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def join
|
52
|
+
while queue.length > 0 or queue.num_waiting < @threads.length
|
53
|
+
Thread.pass
|
54
|
+
raise "No worker thread survived" if @threads.empty? and queue.length > 0
|
55
|
+
end
|
56
|
+
@threads.delete_if{|t| t.alive?}
|
57
|
+
@threads.each{|t| t.raise Aborted }
|
58
|
+
@threads.each{|t| t.join(0.1) }
|
59
|
+
end
|
60
|
+
|
61
|
+
def clean
|
62
|
+
threads.each{ |t| t.clean }.clear
|
63
|
+
end
|
64
|
+
|
65
|
+
def process(e)
|
66
|
+
queue << e
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def self.each(list, num = 3, &block)
|
71
|
+
q = RbbtThreadQueue.new num
|
72
|
+
q.init(&block)
|
73
|
+
list.each do |elem| q.process elem end
|
74
|
+
q.join
|
75
|
+
end
|
76
|
+
end
|
data/lib/rbbt/util/log.rb
CHANGED
@@ -126,6 +126,14 @@ def ppp(message)
|
|
126
126
|
puts ""
|
127
127
|
end
|
128
128
|
|
129
|
+
def fff(object)
|
130
|
+
stack = caller
|
131
|
+
Log.debug{"#{Log.color :cyan, "FINGERPRINT:"} " << stack.first}
|
132
|
+
Log.debug{""}
|
133
|
+
Log.debug{require 'rbbt/util/misc'; "=> " << Misc.fingerprint(object) }
|
134
|
+
Log.debug{""}
|
135
|
+
end
|
136
|
+
|
129
137
|
def ddd(message, file = $stdout)
|
130
138
|
stack = caller
|
131
139
|
Log.debug{"#{Log.color :cyan, "DEBUG:"} " << stack.first}
|
@@ -134,12 +142,36 @@ def ddd(message, file = $stdout)
|
|
134
142
|
Log.debug{""}
|
135
143
|
end
|
136
144
|
|
137
|
-
def
|
145
|
+
def lll(message, file = $stdout)
|
138
146
|
stack = caller
|
139
|
-
Log.
|
140
|
-
Log.
|
141
|
-
Log.
|
142
|
-
Log.
|
147
|
+
Log.low{"#{Log.color :cyan, "LOW:"} " << stack.first}
|
148
|
+
Log.low{""}
|
149
|
+
Log.low{"=> " << message.inspect}
|
150
|
+
Log.low{""}
|
151
|
+
end
|
152
|
+
|
153
|
+
def mmm(message, file = $stdout)
|
154
|
+
stack = caller
|
155
|
+
Log.low{"#{Log.color :cyan, "MEDIUM:"} " << stack.first}
|
156
|
+
Log.low{""}
|
157
|
+
Log.low{"=> " << message.inspect}
|
158
|
+
Log.low{""}
|
159
|
+
end
|
160
|
+
|
161
|
+
def hhh(message, file = $stdout)
|
162
|
+
stack = caller
|
163
|
+
Log.high{"#{Log.color :cyan, "HIGH:"} " << stack.first}
|
164
|
+
Log.high{""}
|
165
|
+
Log.high{"=> " << message.inspect}
|
166
|
+
Log.high{""}
|
167
|
+
end
|
168
|
+
|
169
|
+
def iii(message, file = $stdout)
|
170
|
+
stack = caller
|
171
|
+
Log.info{"#{Log.color :cyan, "INFO:"} " << stack.first}
|
172
|
+
Log.info{""}
|
173
|
+
Log.info{"=> " << message.inspect}
|
174
|
+
Log.info{""}
|
143
175
|
end
|
144
176
|
|
145
177
|
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -16,8 +16,11 @@ class Hash
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
class ParameterException <
|
19
|
+
class ParameterException < StandardError; end
|
20
20
|
class FieldNotFoundError < StandardError;end
|
21
|
+
class Aborted < Exception; end
|
22
|
+
class TryAgain < Exception; end
|
23
|
+
class ClosedStream < Exception; end
|
21
24
|
|
22
25
|
module LaterString
|
23
26
|
def to_s
|
@@ -28,18 +31,34 @@ end
|
|
28
31
|
Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
|
29
32
|
module Misc
|
30
33
|
|
34
|
+
def self.read_stream(stream, size)
|
35
|
+
str = nil
|
36
|
+
while not str = stream.read(size)
|
37
|
+
IO.select([stream],nil,nil,1)
|
38
|
+
raise ClosedStream if stream.eof?
|
39
|
+
end
|
40
|
+
|
41
|
+
while str.length < size
|
42
|
+
raise ClosedStream if stream.eof?
|
43
|
+
IO.select([stream],nil,nil,1)
|
44
|
+
if new = stream.read(size-str.length)
|
45
|
+
str << new
|
46
|
+
end
|
47
|
+
end
|
48
|
+
str
|
49
|
+
end
|
31
50
|
def self.parse_cmd_params(str)
|
32
51
|
return str if Array === str
|
33
52
|
str.scan(/
|
34
|
-
|
35
|
-
|
53
|
+
(?:["']([^"']*?)["']) |
|
54
|
+
([^"'\s]+)
|
36
55
|
/x).flatten.compact
|
37
56
|
end
|
38
57
|
|
39
58
|
def self.correct_icgc_mutation(pos, ref, mut_str)
|
40
59
|
mut = mut_str
|
41
60
|
mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
|
42
|
-
|
61
|
+
mut = "+" << mut if ref == '-'
|
43
62
|
[pos, [mut]]
|
44
63
|
end
|
45
64
|
|
@@ -504,6 +523,72 @@ end
|
|
504
523
|
"tyr" => "Y",
|
505
524
|
"val" => "V"
|
506
525
|
}
|
526
|
+
CODON_TABLE = {
|
527
|
+
"ATT" => "I",
|
528
|
+
"ATC" => "I",
|
529
|
+
"ATA" => "I",
|
530
|
+
"CTT" => "L",
|
531
|
+
"CTC" => "L",
|
532
|
+
"CTA" => "L",
|
533
|
+
"CTG" => "L",
|
534
|
+
"TTA" => "L",
|
535
|
+
"TTG" => "L",
|
536
|
+
"GTT" => "V",
|
537
|
+
"GTC" => "V",
|
538
|
+
"GTA" => "V",
|
539
|
+
"GTG" => "V",
|
540
|
+
"TTT" => "F",
|
541
|
+
"TTC" => "F",
|
542
|
+
"ATG" => "M",
|
543
|
+
"TGT" => "C",
|
544
|
+
"TGC" => "C",
|
545
|
+
"GCT" => "A",
|
546
|
+
"GCC" => "A",
|
547
|
+
"GCA" => "A",
|
548
|
+
"GCG" => "A",
|
549
|
+
"GGT" => "G",
|
550
|
+
"GGC" => "G",
|
551
|
+
"GGA" => "G",
|
552
|
+
"GGG" => "G",
|
553
|
+
"CCT" => "P",
|
554
|
+
"CCC" => "P",
|
555
|
+
"CCA" => "P",
|
556
|
+
"CCG" => "P",
|
557
|
+
"ACT" => "T",
|
558
|
+
"ACC" => "T",
|
559
|
+
"ACA" => "T",
|
560
|
+
"ACG" => "T",
|
561
|
+
"TCT" => "S",
|
562
|
+
"TCC" => "S",
|
563
|
+
"TCA" => "S",
|
564
|
+
"TCG" => "S",
|
565
|
+
"AGT" => "S",
|
566
|
+
"AGC" => "S",
|
567
|
+
"TAT" => "Y",
|
568
|
+
"TAC" => "Y",
|
569
|
+
"TGG" => "W",
|
570
|
+
"CAA" => "Q",
|
571
|
+
"CAG" => "Q",
|
572
|
+
"AAT" => "N",
|
573
|
+
"AAC" => "N",
|
574
|
+
"CAT" => "H",
|
575
|
+
"CAC" => "H",
|
576
|
+
"GAA" => "E",
|
577
|
+
"GAG" => "E",
|
578
|
+
"GAT" => "D",
|
579
|
+
"GAC" => "D",
|
580
|
+
"AAA" => "K",
|
581
|
+
"AAG" => "K",
|
582
|
+
"CGT" => "R",
|
583
|
+
"CGC" => "R",
|
584
|
+
"CGA" => "R",
|
585
|
+
"CGG" => "R",
|
586
|
+
"AGA" => "R",
|
587
|
+
"AGG" => "R",
|
588
|
+
"TAA" => "*",
|
589
|
+
"TAG" => "*",
|
590
|
+
"TGA" => "*",
|
591
|
+
}
|
507
592
|
|
508
593
|
#def self.fast_align(reference, sequence)
|
509
594
|
#
|