bio-vcf 0.8.0 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +1 -11
- data/Gemfile +4 -5
- data/Gemfile.lock +28 -65
- data/LICENSE.txt +1 -1
- data/README.md +387 -107
- data/RELEASE_NOTES.md +20 -0
- data/RELEASE_NOTES.md~ +11 -0
- data/Rakefile +3 -40
- data/TAGS +115 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +176 -109
- data/bio-vcf.gemspec +14 -70
- data/features/cli.feature +22 -4
- data/features/diff_count.feature +0 -1
- data/features/filter.feature +12 -0
- data/features/multisample.feature +25 -0
- data/features/somaticsniper.feature +2 -0
- data/features/step_definitions/cli-feature.rb +15 -6
- data/features/step_definitions/diff_count.rb +1 -1
- data/features/step_definitions/multisample.rb +19 -0
- data/features/step_definitions/somaticsniper.rb +9 -1
- data/features/step_definitions/vcf_header.rb +48 -0
- data/features/support/env.rb +0 -9
- data/features/vcf_header.feature +35 -0
- data/lib/bio-vcf.rb +2 -0
- data/lib/bio-vcf/bedfilter.rb +43 -0
- data/lib/bio-vcf/pcows.rb +303 -0
- data/lib/bio-vcf/template.rb +75 -0
- data/lib/bio-vcf/vcffile.rb +46 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +25 -20
- data/lib/bio-vcf/vcfheader.rb +146 -6
- data/lib/bio-vcf/vcfheader_line.rb +778 -0
- data/lib/bio-vcf/vcfrecord.rb +56 -18
- data/lib/bio-vcf/vcfsample.rb +27 -3
- data/ragel/gen_vcfheaderline_parser.rl +165 -0
- data/ragel/generate.sh +8 -0
- data/template/vcf2json.erb +19 -7
- data/template/vcf2json_full_header.erb +22 -0
- data/template/vcf2json_use_meta.erb +41 -0
- data/template/vcf2rdf_header.erb +24 -0
- data/test/data/input/empty.vcf +2 -0
- data/test/data/input/gatk_exome.vcf +237 -0
- data/test/data/input/gatk_wgs.vcf +1000 -0
- data/test/data/input/test.bed +632 -0
- data/test/data/regression/empty-stderr.new +12 -0
- data/test/data/regression/empty.new +2 -0
- data/test/data/regression/empty.ref +2 -0
- data/test/data/regression/eval_once-stderr.new +2 -0
- data/test/data/regression/eval_once.new +1 -0
- data/test/data/regression/eval_once.ref +1 -0
- data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
- data/test/data/regression/eval_r.info.dp.new +150 -0
- data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
- data/test/data/regression/ifilter_s.dp.new +31 -0
- data/test/data/regression/pass1-stderr.new +10 -0
- data/test/data/regression/pass1.new +88 -0
- data/test/data/regression/pass1.ref +88 -0
- data/test/data/regression/r.info.dp-stderr.new +4 -0
- data/test/data/regression/r.info.dp.new +114 -0
- data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
- data/test/data/regression/rewrite.info.sample.new +150 -0
- data/test/data/regression/s.dp-stderr.new +18 -0
- data/test/data/regression/s.dp.new +145 -0
- data/test/data/regression/seval_s.dp-stderr.new +10 -0
- data/test/data/regression/seval_s.dp.new +36 -0
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
- data/test/data/regression/sfilter_seval_s.dp.new +31 -0
- data/test/data/regression/thread4-stderr.new +10 -0
- data/test/data/regression/thread4.new +150 -0
- data/test/data/regression/thread4_4-stderr.new +25 -0
- data/test/data/regression/thread4_4.new +130 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -1
- data/test/data/regression/thread4_4_failed_filter.new +110 -0
- data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
- data/test/data/regression/vcf2json_full_header.new +225 -0
- data/test/data/regression/vcf2json_full_header.ref +225 -0
- data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
- data/test/data/regression/vcf2json_use_meta.new +4697 -0
- data/test/data/regression/vcf2json_use_meta.ref +4697 -0
- data/test/performance/metrics.md +18 -1
- data/test/stress/stress_test.sh +15 -0
- data/test/tmp/test.vcf +12469 -0
- metadata +65 -64
@@ -0,0 +1,303 @@
|
|
1
|
+
# Parallel copy-on-write streaming (PCOWS)
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
class PCOWS
|
6
|
+
|
7
|
+
RUNNINGEXT = 'part' # file extension
|
8
|
+
|
9
|
+
def initialize(num_threads,chunk_size,name=File.basename(__FILE__),timeout=180,quiet=false,debug=false)
|
10
|
+
num_threads = cpu_count() if not num_threads # FIXME: set to cpu_num by default
|
11
|
+
# $stderr.print "Using ",num_threads,"threads \n"
|
12
|
+
@num_threads = num_threads
|
13
|
+
@chunk_size = chunk_size
|
14
|
+
@pid_list = []
|
15
|
+
@name = name
|
16
|
+
@timeout = timeout
|
17
|
+
@quiet = quiet
|
18
|
+
@debug = debug
|
19
|
+
if @debug
|
20
|
+
$stderr.print "PCOWS running in DEBUG MODE\n"
|
21
|
+
end
|
22
|
+
if multi_threaded
|
23
|
+
@tmpdir = Dir::mktmpdir(@name+'_')
|
24
|
+
end
|
25
|
+
@last_output = 0 # counter
|
26
|
+
@output_locked = false
|
27
|
+
end
|
28
|
+
|
29
|
+
# Feed the worker 'func and state' to COWS. Note that func is a
|
30
|
+
# lambda closure so it can pick up surrounding scope at invocation
|
31
|
+
# in addition to the data captured in 'state'.
|
32
|
+
|
33
|
+
def submit_worker(func,state)
|
34
|
+
pid = nil
|
35
|
+
if multi_threaded
|
36
|
+
count = @pid_list.size+1
|
37
|
+
fn = mktmpfilename(count)
|
38
|
+
pid = fork do
|
39
|
+
# ---- This is running a new copy-on-write process
|
40
|
+
tempfn = fn+'.'+RUNNINGEXT
|
41
|
+
STDOUT.reopen(File.open(tempfn, 'w+'))
|
42
|
+
func.call(state).each { | line | print line }
|
43
|
+
STDOUT.flush
|
44
|
+
STDOUT.close
|
45
|
+
# sleep 0.1
|
46
|
+
# f.flush
|
47
|
+
# f.close
|
48
|
+
# sleep 0.2 # interval to make sure we are done writing,
|
49
|
+
# otherwise there may be misses at the end of a
|
50
|
+
# block (maybe the f.close fixed it)
|
51
|
+
|
52
|
+
FileUtils::mv(tempfn,fn)
|
53
|
+
exit(0)
|
54
|
+
end
|
55
|
+
Process.detach(pid)
|
56
|
+
else
|
57
|
+
# ---- Single threaded: call in main process and output immediately
|
58
|
+
func.call(state).each { | line | print line }
|
59
|
+
end
|
60
|
+
@pid_list << [ pid,count,fn ]
|
61
|
+
return true
|
62
|
+
end
|
63
|
+
|
64
|
+
def submit_final_worker(func,state)
|
65
|
+
@final_worker = true
|
66
|
+
submit_worker(func,state)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Make sure no more than num_threads are running at the same time -
|
70
|
+
# this is achieved by checking the PID table and the running files
|
71
|
+
# in the tmpdir
|
72
|
+
|
73
|
+
def wait_for_worker_slot()
|
74
|
+
return if single_threaded
|
75
|
+
Timeout.timeout(@timeout) do
|
76
|
+
printed_timeout_message = false
|
77
|
+
while true
|
78
|
+
# ---- count running pids
|
79
|
+
running = @pid_list.reduce(0) do | sum, info |
|
80
|
+
(pid,count,fn) = info
|
81
|
+
if pid_or_file_running?(pid,fn)
|
82
|
+
sum+1
|
83
|
+
else
|
84
|
+
sum
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return if running < @num_threads
|
88
|
+
if not printed_timeout_message
|
89
|
+
$stderr.print "Waiting for slot (timeout=#{@timeout})\n" if not @quiet
|
90
|
+
printed_timeout_message = true
|
91
|
+
end
|
92
|
+
sleep 0.1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# ---- In this section the output gets collected and passed on to a
|
98
|
+
# printer thread. This function makes sure the printing is
|
99
|
+
# ordered and that no printers are running at the same
|
100
|
+
# time. The printer thread should be doing as little processing
|
101
|
+
# as possible.
|
102
|
+
#
|
103
|
+
# In this implementation type==:by_line will call func for
|
104
|
+
# each line. Otherwise it is called once with the filename.
|
105
|
+
def process_output(func=nil,type=:by_line, blocking=false)
|
106
|
+
return if single_threaded
|
107
|
+
output = lambda { |fn|
|
108
|
+
if type == :by_line
|
109
|
+
File.new(fn).each_line { |buf|
|
110
|
+
print buf
|
111
|
+
}
|
112
|
+
else
|
113
|
+
func.call(fn)
|
114
|
+
end
|
115
|
+
}
|
116
|
+
if @output_locked
|
117
|
+
# ---- is the other thread still running? We wait until it
|
118
|
+
# is finished to start the next one
|
119
|
+
(pid,count,fn) = @output_locked
|
120
|
+
$stderr.print "Checking for output_lock on existing #{fn}\n" if not @quiet
|
121
|
+
return if File.exist?(fn) # continue because thread still processing
|
122
|
+
# Now we should remove the .keep file
|
123
|
+
cleanup_keep_file(fn)
|
124
|
+
@last_output += 1 # get next one in line
|
125
|
+
@output_locked = false
|
126
|
+
end
|
127
|
+
# ---- process the next output chunk. After completion it
|
128
|
+
# gets renamed to chunk.keep. This to avoid missing
|
129
|
+
# output (if we unlink the file prematurely)
|
130
|
+
if info = @pid_list[@last_output]
|
131
|
+
(pid,count,fn) = info
|
132
|
+
$stderr.print "Testing (#{@last_output}) for output file ",[info],"\n" if @debug
|
133
|
+
if File.exist?(fn)
|
134
|
+
# Yes! We have the next output, create outputter
|
135
|
+
@output_locked = info
|
136
|
+
$stderr.print "Set lock on ",[info],"\n" if not @quiet
|
137
|
+
if not blocking
|
138
|
+
$stderr.print "Processing output file #{fn} (non-blocking)\n" if not @quiet
|
139
|
+
pid = fork do
|
140
|
+
output.call(fn)
|
141
|
+
# after finishing output move it to .keep
|
142
|
+
FileUtils::mv(fn,fn+'.keep')
|
143
|
+
exit(0)
|
144
|
+
end
|
145
|
+
Process.detach(pid)
|
146
|
+
else
|
147
|
+
$stderr.print "Processing output file #{fn} (blocking)\n" if not @quiet
|
148
|
+
output.call(fn)
|
149
|
+
FileUtils::mv(fn,fn+'.keep')
|
150
|
+
end
|
151
|
+
else
|
152
|
+
sleep 0.2
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Wait for a worker slot to appear. When working the pid is writing
|
158
|
+
# a file with extension .part(ial). After completion the file is
|
159
|
+
# renamed without .part and a slot is free.
|
160
|
+
def wait_for_worker(info)
|
161
|
+
(pid,count,fn) = info
|
162
|
+
if pid_or_file_running?(pid,fn)
|
163
|
+
$stderr.print "Waiting up to #{@timeout} seconds for pid=#{pid} to complete #{fn}\n" if not @quiet
|
164
|
+
begin
|
165
|
+
Timeout.timeout(@timeout) do
|
166
|
+
while not File.exist?(fn) # wait for the result to appear
|
167
|
+
sleep 0.2
|
168
|
+
return if not pid_or_file_running?(pid,fn) # worker is gone
|
169
|
+
end
|
170
|
+
end
|
171
|
+
# Partial file should have been renamed:
|
172
|
+
raise "FATAL: child process #{pid} appears to have crashed #{fn}" if not File.exist?(fn)
|
173
|
+
$stderr.print "OK pid=#{pid}, processing starts of #{fn}\n" if not @quiet
|
174
|
+
rescue Timeout::Error
|
175
|
+
# Kill it to speed up exit
|
176
|
+
Process.kill 9, pid
|
177
|
+
Process.wait pid
|
178
|
+
$stderr.print "FATAL: child process killed because it stopped responding, pid = #{pid}, fn = #{fn}, count = #{count}\n"
|
179
|
+
$stderr.print "Bailing out"
|
180
|
+
raise
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# This is the final cleanup after the reader thread is done. All workers
|
186
|
+
# need to complete.
|
187
|
+
|
188
|
+
def wait_for_workers()
|
189
|
+
return if single_threaded
|
190
|
+
@pid_list.each do |info|
|
191
|
+
wait_for_worker(info)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def process_remaining_output()
|
196
|
+
return if single_threaded
|
197
|
+
$stderr.print "Processing remaining output...\n" if not @quiet
|
198
|
+
while @output_locked
|
199
|
+
sleep 0.2
|
200
|
+
process_output() # keep trying
|
201
|
+
end
|
202
|
+
@pid_list.each do |info|
|
203
|
+
(pid,count,fn) = info
|
204
|
+
while pid_or_file_running?(pid,fn) or File.exist?(fn)
|
205
|
+
$stderr.print "Trying: ",[info],"\n" if not @quiet
|
206
|
+
process_output(nil,:by_line,true)
|
207
|
+
sleep 0.2
|
208
|
+
end
|
209
|
+
end
|
210
|
+
while @output_locked
|
211
|
+
sleep 0.1
|
212
|
+
process_output(nil,:by_line,true)
|
213
|
+
end
|
214
|
+
cleanup_tmpdir()
|
215
|
+
end
|
216
|
+
|
217
|
+
def cleanup()
|
218
|
+
@pid_list.each do |info|
|
219
|
+
(pid,count,fn) = info
|
220
|
+
if pid_running?(pid)
|
221
|
+
$stderr.print "Killing child ",[info],"\n"
|
222
|
+
begin
|
223
|
+
Process.kill 9, pid
|
224
|
+
Process.wait pid
|
225
|
+
rescue Errno::ENOENT
|
226
|
+
$stdout.puts "INFO: #{pidfile} did not exist: Errno::ENOENT" if not @quiet
|
227
|
+
rescue Errno::ESRCH
|
228
|
+
$stdout.puts "INFO: The process #{opid} did not exist: Errno::ESRCH" if not @quiet
|
229
|
+
end
|
230
|
+
end
|
231
|
+
File.unlink(fn) if File.exist?(fn)
|
232
|
+
cleanup_keep_file(fn,wait: false)
|
233
|
+
tempfn = fn+'.'+RUNNINGEXT
|
234
|
+
File.unlink(tempfn) if File.exist?(tempfn)
|
235
|
+
end
|
236
|
+
cleanup_tmpdir()
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
def mktmpfilename(num,ext=nil)
|
242
|
+
@tmpdir+sprintf("/%0.6d-",num)+@name+(ext ? '.'+ext : '')
|
243
|
+
end
|
244
|
+
|
245
|
+
def pid_or_file_running?(pid,fn)
|
246
|
+
(pid && pid_running?(pid)) or File.exist?(fn+'.'+RUNNINGEXT)
|
247
|
+
end
|
248
|
+
|
249
|
+
def pid_running?(pid)
|
250
|
+
begin
|
251
|
+
fpid,status=Process.waitpid2(pid,Process::WNOHANG)
|
252
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
253
|
+
return false
|
254
|
+
end
|
255
|
+
return true if nil == fpid && nil == status
|
256
|
+
return ! (status.exited? || status.signaled?)
|
257
|
+
end
|
258
|
+
|
259
|
+
def single_threaded
|
260
|
+
@num_threads == 1
|
261
|
+
end
|
262
|
+
|
263
|
+
def multi_threaded
|
264
|
+
@num_threads > 1
|
265
|
+
end
|
266
|
+
|
267
|
+
def cpu_count
|
268
|
+
begin
|
269
|
+
return File.read('/proc/cpuinfo').scan(/^processor\s*:/).size if File.exist? '/proc/cpuinfo'
|
270
|
+
# Actually, the JVM does not allow fork...
|
271
|
+
return Java::Java.lang.Runtime.getRuntime.availableProcessors if defined? Java::Java
|
272
|
+
rescue LoadError
|
273
|
+
# Count on MAC
|
274
|
+
return Integer `sysctl -n hw.ncpu 2>/dev/null`
|
275
|
+
end
|
276
|
+
$stderr.print "Could not determine number of CPUs" if not @quiet
|
277
|
+
1
|
278
|
+
end
|
279
|
+
|
280
|
+
def cleanup_keep_file(fn, opts = { wait: true })
|
281
|
+
if not @debug
|
282
|
+
keep = fn+'.keep'
|
283
|
+
return if not opts[:wait] and !File.exist?(keep)
|
284
|
+
$stderr.print "Trying to remove #{keep}\n" if not @quiet
|
285
|
+
while true
|
286
|
+
if File.exist?(keep)
|
287
|
+
$stderr.print "Removing #{keep}\n" if not @quiet
|
288
|
+
File.unlink(keep)
|
289
|
+
break # forever loop
|
290
|
+
end
|
291
|
+
sleep 0.1
|
292
|
+
end #forever
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def cleanup_tmpdir
|
297
|
+
if not @debug
|
298
|
+
$stderr.print "Removing dir #{@tmpdir}\n" if not @quiet
|
299
|
+
Dir.unlink(@tmpdir) if @tmpdir
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
|
5
|
+
class Template
|
6
|
+
|
7
|
+
def initialize fn
|
8
|
+
raise "Can not find template #{fn}!" if not File.exist?(fn)
|
9
|
+
parse(File.read(fn))
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse buf
|
13
|
+
header = []
|
14
|
+
body = []
|
15
|
+
footer = []
|
16
|
+
where = :header
|
17
|
+
buf.split("\n").each do | line |
|
18
|
+
case where
|
19
|
+
when :header
|
20
|
+
next if line =~ /=HEADER/
|
21
|
+
if line =~ /=BODY/
|
22
|
+
body = []
|
23
|
+
where = :body
|
24
|
+
next
|
25
|
+
end
|
26
|
+
header << line
|
27
|
+
when :body
|
28
|
+
if line =~ /=FOOTER/
|
29
|
+
footer = []
|
30
|
+
where = :footer
|
31
|
+
next
|
32
|
+
end
|
33
|
+
body << line
|
34
|
+
else
|
35
|
+
footer << line
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if body == []
|
39
|
+
body = header
|
40
|
+
header = []
|
41
|
+
end
|
42
|
+
@erb_header = ERB.new(header.join("\n")) if header.size
|
43
|
+
@erb_body = ERB.new(body.join("\n")) if body.size
|
44
|
+
@erb_footer = ERB.new(footer.join("\n")) if footer.size
|
45
|
+
end
|
46
|
+
|
47
|
+
def result env
|
48
|
+
@erb.result(env)
|
49
|
+
end
|
50
|
+
|
51
|
+
def header env
|
52
|
+
if @erb_header
|
53
|
+
@erb_header.result(env)
|
54
|
+
else
|
55
|
+
""
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def body env
|
60
|
+
if @erb_body
|
61
|
+
@erb_body.result(env)
|
62
|
+
else
|
63
|
+
""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def footer env
|
68
|
+
if @erb_footer
|
69
|
+
@erb_footer.result(env)
|
70
|
+
else
|
71
|
+
""
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BioVcf
|
2
|
+
# This class abstracts a VCF file that can be iterated.
|
3
|
+
# The VCF can be plain text or compressed with gzip
|
4
|
+
# Note that files compressed with bgzip will not work, as thie ruby implementation of Zlib don't allow concatenated files
|
5
|
+
class VCFfile
|
6
|
+
|
7
|
+
def initialize(file: "", is_gz: true)
|
8
|
+
@file = file
|
9
|
+
@is_gz = is_gz
|
10
|
+
end
|
11
|
+
|
12
|
+
def parseVCFheader(head_line="")
|
13
|
+
m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(head_line)
|
14
|
+
{:id=>m[1],:number=>m[2],:type=>m[3],:desc=>m[4]}
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
#Returns an enum that can be used as an iterator.
|
19
|
+
def each
|
20
|
+
return enum_for(:each) unless block_given?
|
21
|
+
io = nil
|
22
|
+
if @is_gz
|
23
|
+
infile = open(@file)
|
24
|
+
io = Zlib::GzipReader.new(infile)
|
25
|
+
else
|
26
|
+
io = File.open(@file)
|
27
|
+
end
|
28
|
+
|
29
|
+
header = BioVcf::VcfHeader.new
|
30
|
+
io.each_line do |line|
|
31
|
+
line.chomp!
|
32
|
+
if line =~ /^##fileformat=/
|
33
|
+
header.add(line)
|
34
|
+
next
|
35
|
+
end
|
36
|
+
if line =~ /^#/
|
37
|
+
header.add(line)
|
38
|
+
next
|
39
|
+
end
|
40
|
+
fields = BioVcf::VcfLine.parse(line)
|
41
|
+
rec = BioVcf::VcfRecord.new(fields,header)
|
42
|
+
yield rec
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -11,7 +11,7 @@ module BioVcf
|
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
14
|
-
# Helper class for a list of (variant) values, such as A,G.
|
14
|
+
# Helper class for a list of (variant) values, such as A,G.
|
15
15
|
# The [] function does the hard work. You can pass in an index (integer)
|
16
16
|
# or nucleotide which translates to an index.
|
17
17
|
# (see ./features for examples)
|
@@ -20,7 +20,7 @@ module BioVcf
|
|
20
20
|
@alt = alt
|
21
21
|
@list = list.split(/,/).map{|i| i.to_i}
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def [] idx
|
25
25
|
if idx.kind_of?(Integer)
|
26
26
|
# return a value
|
@@ -67,7 +67,7 @@ module BioVcf
|
|
67
67
|
@alt = alt
|
68
68
|
@list = list.split(/,/).map{|i| i.to_i}
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
71
|
def [] idx
|
72
72
|
if idx.kind_of?(Integer)
|
73
73
|
@list[idx].to_i
|
@@ -87,15 +87,15 @@ module BioVcf
|
|
87
87
|
end
|
88
88
|
|
89
89
|
# Return the max value on the nucleotides in the list (typically rec.alt)
|
90
|
-
def max
|
90
|
+
def max
|
91
91
|
@list.reduce(0){ |memo,v| (v>memo ? v : memo) }
|
92
92
|
end
|
93
93
|
|
94
|
-
def min
|
94
|
+
def min
|
95
95
|
@list.reduce(MAXINT){ |memo,v| (v<memo ? v : memo) }
|
96
96
|
end
|
97
97
|
|
98
|
-
def sum
|
98
|
+
def sum
|
99
99
|
@list.reduce(0){ |memo,v| v+memo }
|
100
100
|
end
|
101
101
|
end
|
@@ -129,14 +129,14 @@ module BioVcf
|
|
129
129
|
!empty?
|
130
130
|
end
|
131
131
|
|
132
|
-
def dp4
|
133
|
-
ilist('DP4')
|
132
|
+
def dp4
|
133
|
+
ilist('DP4')
|
134
134
|
end
|
135
|
-
def ad
|
136
|
-
ilist('AD')
|
135
|
+
def ad
|
136
|
+
ilist('AD')
|
137
137
|
end
|
138
|
-
def pl
|
139
|
-
ilist('PL')
|
138
|
+
def pl
|
139
|
+
ilist('PL')
|
140
140
|
end
|
141
141
|
|
142
142
|
def bcount
|
@@ -156,7 +156,7 @@ module BioVcf
|
|
156
156
|
end
|
157
157
|
|
158
158
|
def gti
|
159
|
-
gt.split(
|
159
|
+
gt.split(/[\/\|]/).map { |g| g.to_i }
|
160
160
|
end
|
161
161
|
|
162
162
|
def gts?
|
@@ -178,11 +178,11 @@ module BioVcf
|
|
178
178
|
else
|
179
179
|
v = values[fetch(m.to_s.upcase)]
|
180
180
|
return nil if VcfValue::empty?(v)
|
181
|
-
|
182
|
-
|
181
|
+
return v.to_i if v =~ /^\d+$/
|
182
|
+
return v.to_f if v =~ /^\d+\.\d+$/
|
183
183
|
v
|
184
184
|
end
|
185
|
-
end
|
185
|
+
end
|
186
186
|
|
187
187
|
private
|
188
188
|
|
@@ -200,7 +200,7 @@ module BioVcf
|
|
200
200
|
def ilist name
|
201
201
|
v = fetch_value(name)
|
202
202
|
return nil if not v
|
203
|
-
v.split(',').map{|i| i.to_i}
|
203
|
+
v.split(',').map{|i| i.to_i}
|
204
204
|
end
|
205
205
|
|
206
206
|
end
|
@@ -218,10 +218,15 @@ module BioVcf
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def [] name
|
221
|
-
|
221
|
+
begin
|
222
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
223
|
+
rescue TypeError
|
224
|
+
$stderr.print "Unknown field name <#{name}> in record, did you mean r.info.#{name}?\n"
|
225
|
+
raise
|
226
|
+
end
|
222
227
|
end
|
223
228
|
|
224
|
-
def method_missing(m, *args, &block)
|
229
|
+
def method_missing(m, *args, &block)
|
225
230
|
name = m.to_s
|
226
231
|
if name =~ /\?$/
|
227
232
|
# test for valid sample
|
@@ -229,7 +234,7 @@ module BioVcf
|
|
229
234
|
else
|
230
235
|
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
231
236
|
end
|
232
|
-
end
|
237
|
+
end
|
233
238
|
|
234
239
|
end
|
235
240
|
end
|