rbbt-util 5.11.4 → 5.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -1
- data/lib/rbbt/util/cmd.rb +1 -1
- data/lib/rbbt/util/misc.rb +11 -1173
- data/lib/rbbt/util/misc/concurrent_stream.rb +69 -0
- data/lib/rbbt/util/misc/development.rb +95 -0
- data/lib/rbbt/util/misc/exceptions.rb +11 -0
- data/lib/rbbt/util/misc/format.rb +170 -0
- data/lib/rbbt/util/misc/indiferent_hash.rb +56 -0
- data/lib/rbbt/util/misc/inspect.rb +181 -0
- data/lib/rbbt/util/misc/lock.rb +87 -0
- data/lib/rbbt/util/misc/math.rb +32 -0
- data/lib/rbbt/util/misc/objects.rb +0 -0
- data/lib/rbbt/util/misc/omics.rb +183 -0
- data/lib/rbbt/util/misc/pipes.rb +224 -0
- data/lib/rbbt/workflow/accessor.rb +1 -0
- data/lib/rbbt/workflow/step.rb +15 -9
- data/share/rbbt_commands/workflow/task +2 -0
- metadata +13 -2
@@ -0,0 +1,87 @@
|
|
1
|
+
Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
|
2
|
+
|
3
|
+
module Misc
|
4
|
+
|
5
|
+
LOCK_MUTEX = Mutex.new
|
6
|
+
def self.lock(file, unlock = true)
|
7
|
+
return yield if file.nil?
|
8
|
+
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
9
|
+
|
10
|
+
res = nil
|
11
|
+
|
12
|
+
lock_path = File.expand_path(file + '.lock')
|
13
|
+
lockfile = Lockfile.new(lock_path)
|
14
|
+
|
15
|
+
hostname = Misc.hostname
|
16
|
+
LOCK_MUTEX.synchronize do
|
17
|
+
Misc.insist 2, 0.1 do
|
18
|
+
Misc.insist 3, 0.1 do
|
19
|
+
begin
|
20
|
+
if File.exists? lock_path
|
21
|
+
info = Open.open(lock_path){|f| YAML.load(f) }
|
22
|
+
raise "No info" unless info
|
23
|
+
|
24
|
+
if hostname == info["host"] and not Misc.pid_exists?(info["pid"])
|
25
|
+
Log.info("Removing lockfile: #{lock_path}. This pid #{Process.pid}. Content: #{info.inspect}")
|
26
|
+
FileUtils.rm lock_path
|
27
|
+
end
|
28
|
+
end
|
29
|
+
rescue Exception
|
30
|
+
FileUtils.rm lock_path if File.exists? lock_path
|
31
|
+
lockfile = Lockfile.new(lock_path) unless File.exists? lock_path
|
32
|
+
raise $!
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
begin
|
39
|
+
lockfile.lock
|
40
|
+
res = yield lockfile
|
41
|
+
rescue Lockfile::StolenLockError
|
42
|
+
unlock = false
|
43
|
+
rescue KeepLocked
|
44
|
+
unlock = false
|
45
|
+
res = $!.payload
|
46
|
+
ensure
|
47
|
+
if unlock and lockfile.locked?
|
48
|
+
lockfile.unlock
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
res
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
LOCK_REPO_SERIALIZER=Marshal
|
57
|
+
def self.lock_in_repo(repo, key, *args)
|
58
|
+
return yield file, *args if repo.nil? or key.nil?
|
59
|
+
|
60
|
+
lock_key = "lock-" << key
|
61
|
+
|
62
|
+
begin
|
63
|
+
if repo[lock_key] and
|
64
|
+
Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
|
65
|
+
info["pid"] and not Misc.pid_exists?(info["pid"])
|
66
|
+
|
67
|
+
Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
|
68
|
+
repo.out lock_key
|
69
|
+
end
|
70
|
+
rescue
|
71
|
+
Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
|
72
|
+
repo.out lock_key if repo.include? lock_key
|
73
|
+
end
|
74
|
+
|
75
|
+
while repo[lock_key]
|
76
|
+
sleep 1
|
77
|
+
end
|
78
|
+
|
79
|
+
repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
|
80
|
+
|
81
|
+
res = yield lock_key, *args
|
82
|
+
|
83
|
+
repo.delete lock_key
|
84
|
+
|
85
|
+
res
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
|
2
|
+
module Misc
|
3
|
+
|
4
|
+
Log2Multiplier = 1.0 / Math.log(2.0)
|
5
|
+
def self.log2(x)
|
6
|
+
Math.log(x) * Log2Multiplier
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.max(list)
|
10
|
+
max = nil
|
11
|
+
list.each do |v|
|
12
|
+
next if v.nil?
|
13
|
+
max = v if max.nil? or v > max
|
14
|
+
end
|
15
|
+
max
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.sum(list)
|
19
|
+
list.compact.inject(0.0){|acc,e| acc += e}
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.mean(list)
|
23
|
+
sum(list) / list.compact.length
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.sd(list)
|
27
|
+
return nil if list.length < 3
|
28
|
+
mean = mean(list)
|
29
|
+
Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
File without changes
|
@@ -0,0 +1,183 @@
|
|
1
|
+
module Misc
|
2
|
+
|
3
|
+
IUPAC2BASE = {
|
4
|
+
"A" => ["A"],
|
5
|
+
"C" => ["C"],
|
6
|
+
"G" => ["G"],
|
7
|
+
"T" => ["T"],
|
8
|
+
"U" => ["U"],
|
9
|
+
"R" => "A or G".split(" or "),
|
10
|
+
"Y" => "C or T".split(" or "),
|
11
|
+
"S" => "G or C".split(" or "),
|
12
|
+
"W" => "A or T".split(" or "),
|
13
|
+
"K" => "G or T".split(" or "),
|
14
|
+
"M" => "A or C".split(" or "),
|
15
|
+
"B" => "C or G or T".split(" or "),
|
16
|
+
"D" => "A or G or T".split(" or "),
|
17
|
+
"H" => "A or C or T".split(" or "),
|
18
|
+
"V" => "A or C or G".split(" or "),
|
19
|
+
"N" => %w(A C T G),
|
20
|
+
}
|
21
|
+
|
22
|
+
BASE2COMPLEMENT = {
|
23
|
+
"A" => "T",
|
24
|
+
"C" => "G",
|
25
|
+
"G" => "C",
|
26
|
+
"T" => "A",
|
27
|
+
"U" => "A",
|
28
|
+
}
|
29
|
+
|
30
|
+
THREE_TO_ONE_AA_CODE = {
|
31
|
+
"ala" => "A",
|
32
|
+
"arg" => "R",
|
33
|
+
"asn" => "N",
|
34
|
+
"asp" => "D",
|
35
|
+
"cys" => "C",
|
36
|
+
"glu" => "E",
|
37
|
+
"gln" => "Q",
|
38
|
+
"gly" => "G",
|
39
|
+
"his" => "H",
|
40
|
+
"ile" => "I",
|
41
|
+
"leu" => "L",
|
42
|
+
"lys" => "K",
|
43
|
+
"met" => "M",
|
44
|
+
"phe" => "F",
|
45
|
+
"pro" => "P",
|
46
|
+
"ser" => "S",
|
47
|
+
"thr" => "T",
|
48
|
+
"trp" => "W",
|
49
|
+
"tyr" => "Y",
|
50
|
+
"val" => "V"
|
51
|
+
}
|
52
|
+
CODON_TABLE = {
|
53
|
+
"ATT" => "I",
|
54
|
+
"ATC" => "I",
|
55
|
+
"ATA" => "I",
|
56
|
+
"CTT" => "L",
|
57
|
+
"CTC" => "L",
|
58
|
+
"CTA" => "L",
|
59
|
+
"CTG" => "L",
|
60
|
+
"TTA" => "L",
|
61
|
+
"TTG" => "L",
|
62
|
+
"GTT" => "V",
|
63
|
+
"GTC" => "V",
|
64
|
+
"GTA" => "V",
|
65
|
+
"GTG" => "V",
|
66
|
+
"TTT" => "F",
|
67
|
+
"TTC" => "F",
|
68
|
+
"ATG" => "M",
|
69
|
+
"TGT" => "C",
|
70
|
+
"TGC" => "C",
|
71
|
+
"GCT" => "A",
|
72
|
+
"GCC" => "A",
|
73
|
+
"GCA" => "A",
|
74
|
+
"GCG" => "A",
|
75
|
+
"GGT" => "G",
|
76
|
+
"GGC" => "G",
|
77
|
+
"GGA" => "G",
|
78
|
+
"GGG" => "G",
|
79
|
+
"CCT" => "P",
|
80
|
+
"CCC" => "P",
|
81
|
+
"CCA" => "P",
|
82
|
+
"CCG" => "P",
|
83
|
+
"ACT" => "T",
|
84
|
+
"ACC" => "T",
|
85
|
+
"ACA" => "T",
|
86
|
+
"ACG" => "T",
|
87
|
+
"TCT" => "S",
|
88
|
+
"TCC" => "S",
|
89
|
+
"TCA" => "S",
|
90
|
+
"TCG" => "S",
|
91
|
+
"AGT" => "S",
|
92
|
+
"AGC" => "S",
|
93
|
+
"TAT" => "Y",
|
94
|
+
"TAC" => "Y",
|
95
|
+
"TGG" => "W",
|
96
|
+
"CAA" => "Q",
|
97
|
+
"CAG" => "Q",
|
98
|
+
"AAT" => "N",
|
99
|
+
"AAC" => "N",
|
100
|
+
"CAT" => "H",
|
101
|
+
"CAC" => "H",
|
102
|
+
"GAA" => "E",
|
103
|
+
"GAG" => "E",
|
104
|
+
"GAT" => "D",
|
105
|
+
"GAC" => "D",
|
106
|
+
"AAA" => "K",
|
107
|
+
"AAG" => "K",
|
108
|
+
"CGT" => "R",
|
109
|
+
"CGC" => "R",
|
110
|
+
"CGA" => "R",
|
111
|
+
"CGG" => "R",
|
112
|
+
"AGA" => "R",
|
113
|
+
"AGG" => "R",
|
114
|
+
"TAA" => "*",
|
115
|
+
"TAG" => "*",
|
116
|
+
"TGA" => "*",
|
117
|
+
}
|
118
|
+
|
119
|
+
def self.correct_icgc_mutation(pos, ref, mut_str)
|
120
|
+
mut = mut_str
|
121
|
+
mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
|
122
|
+
mut = "+" << mut if ref == '-'
|
123
|
+
[pos, [mut]]
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.correct_vcf_mutation(pos, ref, mut_str)
|
127
|
+
muts = mut_str.nil? ? [] : mut_str.split(',')
|
128
|
+
|
129
|
+
while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
|
130
|
+
ref = ref[1..-1]
|
131
|
+
pos = pos + 1
|
132
|
+
muts = muts.collect{|m| m[1..-1]}
|
133
|
+
end
|
134
|
+
|
135
|
+
muts = muts.collect do |m|
|
136
|
+
case
|
137
|
+
when ref.empty?
|
138
|
+
"+" << m
|
139
|
+
when (m.length < ref.length and (m.empty? or ref.index(m)))
|
140
|
+
"-" * (ref.length - m.length)
|
141
|
+
when (ref.length == 1 and m.length == 1)
|
142
|
+
m
|
143
|
+
else
|
144
|
+
Log.debug{"Cannot understand: #{[ref, m]} (#{ muts })"}
|
145
|
+
'-' * ref.length + m
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
[pos, muts]
|
150
|
+
end
|
151
|
+
|
152
|
+
def self.IUPAC_to_base(iupac)
|
153
|
+
IUPAC2BASE[iupac]
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
def self.sort_mutations(mutations)
|
158
|
+
mutations.collect do |mutation|
|
159
|
+
chr,pos,mut = mutation.split ":"
|
160
|
+
chr.sub!(/^chr/i,'')
|
161
|
+
chr = 22 if chr == "Y"
|
162
|
+
chr = 23 if chr == "X"
|
163
|
+
chr = 24 if chr == "MT" or chr == "M"
|
164
|
+
[chr.to_i, pos.to_i, mut, mutation]
|
165
|
+
end.sort do |a,b|
|
166
|
+
case a[0] <=> b[0]
|
167
|
+
when -1
|
168
|
+
-1
|
169
|
+
when 1
|
170
|
+
1
|
171
|
+
when 0
|
172
|
+
case a[1] <=> b[1]
|
173
|
+
when -1
|
174
|
+
-1
|
175
|
+
when 1
|
176
|
+
1
|
177
|
+
when 0
|
178
|
+
a[2] <=> b[2]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end.collect{|p| p.last }
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
module Misc
|
2
|
+
|
3
|
+
PIPE_MUTEX = Mutex.new
|
4
|
+
|
5
|
+
OPEN_PIPE_IN = []
|
6
|
+
def self.pipe
|
7
|
+
OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
|
8
|
+
PIPE_MUTEX.synchronize do
|
9
|
+
sout, sin = IO.pipe
|
10
|
+
OPEN_PIPE_IN << sin
|
11
|
+
|
12
|
+
[sout, sin]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.release_pipes(*pipes)
|
17
|
+
PIPE_MUTEX.synchronize do
|
18
|
+
pipes.flatten.each do |pipe|
|
19
|
+
pipe.close unless pipe.closed?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def self.purge_pipes(*save)
|
26
|
+
PIPE_MUTEX.synchronize do
|
27
|
+
OPEN_PIPE_IN.each do |pipe|
|
28
|
+
next if save.include? pipe
|
29
|
+
pipe.close unless pipe.closed?
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.open_pipe(do_fork = false, close = true)
|
35
|
+
raise "No block given" unless block_given?
|
36
|
+
|
37
|
+
sout, sin = Misc.pipe
|
38
|
+
|
39
|
+
if do_fork
|
40
|
+
parent_pid = Process.pid
|
41
|
+
pid = Process.fork {
|
42
|
+
purge_pipes(sin)
|
43
|
+
sout.close
|
44
|
+
begin
|
45
|
+
yield sin
|
46
|
+
rescue
|
47
|
+
Log.exception $!
|
48
|
+
Process.kill :INT, parent_pid
|
49
|
+
Kernel.exit! -1
|
50
|
+
ensure
|
51
|
+
sin.close if close and not sin.closed?
|
52
|
+
end
|
53
|
+
Kernel.exit! 0
|
54
|
+
}
|
55
|
+
sin.close #if close
|
56
|
+
ConcurrentStream.setup sout, :pids => [pid]
|
57
|
+
else
|
58
|
+
thread = Thread.new(Thread.current) do |parent|
|
59
|
+
begin
|
60
|
+
yield sin
|
61
|
+
rescue
|
62
|
+
parent.raise $!
|
63
|
+
ensure
|
64
|
+
sin.close if close and not sin.closed?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
ConcurrentStream.setup sout, :threads => [thread]
|
68
|
+
end
|
69
|
+
sout
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.tee_stream_fork(stream)
|
73
|
+
stream_out1, stream_in1 = Misc.pipe
|
74
|
+
stream_out2, stream_in2 = Misc.pipe
|
75
|
+
|
76
|
+
splitter_pid = Process.fork do
|
77
|
+
Misc.purge_pipes(stream_in1, stream_in2)
|
78
|
+
stream_out1.close
|
79
|
+
stream_out2.close
|
80
|
+
begin
|
81
|
+
filename = stream.respond_to?(:filename)? stream.filename : nil
|
82
|
+
skip1 = skip2 = false
|
83
|
+
while block = stream.read(2048)
|
84
|
+
begin stream_in1.write block; rescue Exception; Log.exception $!; skip1 = true end unless skip1
|
85
|
+
begin stream_in2.write block; rescue Exception; Log.exception $!; skip2 = true end unless skip2
|
86
|
+
end
|
87
|
+
raise "Error writing in stream_in2" if skip2
|
88
|
+
raise "Error writing in stream_in2" if skip2
|
89
|
+
rescue Aborted
|
90
|
+
stream.abort if stream.respond_to? :abort
|
91
|
+
raise $!
|
92
|
+
rescue IOError
|
93
|
+
Log.exception $!
|
94
|
+
rescue Exception
|
95
|
+
Log.exception $!
|
96
|
+
ensure
|
97
|
+
stream_in1.close
|
98
|
+
stream_in2.close
|
99
|
+
stream.join if stream.respond_to? :join
|
100
|
+
end
|
101
|
+
end
|
102
|
+
stream.close
|
103
|
+
stream_in1.close
|
104
|
+
stream_in2.close
|
105
|
+
#stream.join if stream.respond_to? :join
|
106
|
+
|
107
|
+
ConcurrentStream.setup stream_out1, :pids => [splitter_pid]
|
108
|
+
ConcurrentStream.setup stream_out2, :pids => [splitter_pid]
|
109
|
+
|
110
|
+
[stream_out1, stream_out2]
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.tee_stream_thread(stream)
|
114
|
+
stream_out1, stream_in1 = Misc.pipe
|
115
|
+
stream_out2, stream_in2 = Misc.pipe
|
116
|
+
|
117
|
+
splitter_thread = Thread.new(Thread.current, stream_in1, stream_in2) do |parent,stream_in1,stream_in2|
|
118
|
+
begin
|
119
|
+
filename = stream.respond_to?(:filename)? stream.filename : nil
|
120
|
+
skip1 = skip2 = false
|
121
|
+
while block = stream.read(2048)
|
122
|
+
begin stream_in1.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip1 = true end unless skip1
|
123
|
+
begin stream_in2.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip2 = true end unless skip2
|
124
|
+
end
|
125
|
+
rescue Aborted
|
126
|
+
stream.abort if stream.respond_to? :abort
|
127
|
+
raise $!
|
128
|
+
rescue IOError
|
129
|
+
Log.exception $!
|
130
|
+
rescue Exception
|
131
|
+
Log.exception $!
|
132
|
+
parent.raise $!
|
133
|
+
ensure
|
134
|
+
stream_in1.close
|
135
|
+
stream_in2.close
|
136
|
+
stream.join if stream.respond_to? :join
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
ConcurrentStream.setup stream_out1, :threads => splitter_thread
|
141
|
+
ConcurrentStream.setup stream_out2, :threads => splitter_thread
|
142
|
+
|
143
|
+
[stream_out1, stream_out2]
|
144
|
+
end
|
145
|
+
|
146
|
+
class << self
|
147
|
+
alias tee_stream tee_stream_thread
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.read_full_stream(io)
|
151
|
+
str = ""
|
152
|
+
begin
|
153
|
+
while block = io.read(2048)
|
154
|
+
str << block
|
155
|
+
end
|
156
|
+
rescue
|
157
|
+
io.abort if io.respond_to? :abort
|
158
|
+
ensure
|
159
|
+
io.join if io.respond_to? :join
|
160
|
+
io.close if io.respond_to? :close
|
161
|
+
end
|
162
|
+
str
|
163
|
+
end
|
164
|
+
|
165
|
+
def self.consume_stream(io)
|
166
|
+
begin
|
167
|
+
while block = io.read(2048)
|
168
|
+
return if io.eof?
|
169
|
+
Thread.pass
|
170
|
+
end
|
171
|
+
rescue
|
172
|
+
io.abort if io.respond_to? :abort
|
173
|
+
ensure
|
174
|
+
io.join if io.respond_to? :join
|
175
|
+
io.close if io.respond_to? :close
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def self.read_stream(stream, size)
|
180
|
+
str = nil
|
181
|
+
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
182
|
+
while not str = stream.read(size)
|
183
|
+
IO.select([stream],nil,nil,1)
|
184
|
+
Thread.pass
|
185
|
+
raise ClosedStream if stream.eof?
|
186
|
+
end
|
187
|
+
|
188
|
+
while str.length < size
|
189
|
+
raise ClosedStream if stream.eof?
|
190
|
+
IO.select([stream],nil,nil,1)
|
191
|
+
if new = stream.read(size-str.length)
|
192
|
+
str << new
|
193
|
+
end
|
194
|
+
end
|
195
|
+
str
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.read_stream(stream, size)
|
199
|
+
str = nil
|
200
|
+
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
201
|
+
while not str = stream.read(size)
|
202
|
+
IO.select([stream],nil,nil,1)
|
203
|
+
Thread.pass
|
204
|
+
raise ClosedStream if stream.eof?
|
205
|
+
end
|
206
|
+
|
207
|
+
while str.length < size
|
208
|
+
raise ClosedStream if stream.eof?
|
209
|
+
IO.select([stream],nil,nil,1)
|
210
|
+
if new = stream.read(size-str.length)
|
211
|
+
str << new
|
212
|
+
end
|
213
|
+
end
|
214
|
+
str
|
215
|
+
end
|
216
|
+
def self._read_stream(stream, size)
|
217
|
+
str = ""
|
218
|
+
while (len=str.length) < size
|
219
|
+
str << (stream.read(size-len) or break)
|
220
|
+
end
|
221
|
+
str
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|