bio-vcf 0.8.0 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +1 -11
- data/Gemfile +4 -5
- data/Gemfile.lock +28 -65
- data/LICENSE.txt +1 -1
- data/README.md +387 -107
- data/RELEASE_NOTES.md +20 -0
- data/RELEASE_NOTES.md~ +11 -0
- data/Rakefile +3 -40
- data/TAGS +115 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +176 -109
- data/bio-vcf.gemspec +14 -70
- data/features/cli.feature +22 -4
- data/features/diff_count.feature +0 -1
- data/features/filter.feature +12 -0
- data/features/multisample.feature +25 -0
- data/features/somaticsniper.feature +2 -0
- data/features/step_definitions/cli-feature.rb +15 -6
- data/features/step_definitions/diff_count.rb +1 -1
- data/features/step_definitions/multisample.rb +19 -0
- data/features/step_definitions/somaticsniper.rb +9 -1
- data/features/step_definitions/vcf_header.rb +48 -0
- data/features/support/env.rb +0 -9
- data/features/vcf_header.feature +35 -0
- data/lib/bio-vcf.rb +2 -0
- data/lib/bio-vcf/bedfilter.rb +43 -0
- data/lib/bio-vcf/pcows.rb +303 -0
- data/lib/bio-vcf/template.rb +75 -0
- data/lib/bio-vcf/vcffile.rb +46 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +25 -20
- data/lib/bio-vcf/vcfheader.rb +146 -6
- data/lib/bio-vcf/vcfheader_line.rb +778 -0
- data/lib/bio-vcf/vcfrecord.rb +56 -18
- data/lib/bio-vcf/vcfsample.rb +27 -3
- data/ragel/gen_vcfheaderline_parser.rl +165 -0
- data/ragel/generate.sh +8 -0
- data/template/vcf2json.erb +19 -7
- data/template/vcf2json_full_header.erb +22 -0
- data/template/vcf2json_use_meta.erb +41 -0
- data/template/vcf2rdf_header.erb +24 -0
- data/test/data/input/empty.vcf +2 -0
- data/test/data/input/gatk_exome.vcf +237 -0
- data/test/data/input/gatk_wgs.vcf +1000 -0
- data/test/data/input/test.bed +632 -0
- data/test/data/regression/empty-stderr.new +12 -0
- data/test/data/regression/empty.new +2 -0
- data/test/data/regression/empty.ref +2 -0
- data/test/data/regression/eval_once-stderr.new +2 -0
- data/test/data/regression/eval_once.new +1 -0
- data/test/data/regression/eval_once.ref +1 -0
- data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
- data/test/data/regression/eval_r.info.dp.new +150 -0
- data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
- data/test/data/regression/ifilter_s.dp.new +31 -0
- data/test/data/regression/pass1-stderr.new +10 -0
- data/test/data/regression/pass1.new +88 -0
- data/test/data/regression/pass1.ref +88 -0
- data/test/data/regression/r.info.dp-stderr.new +4 -0
- data/test/data/regression/r.info.dp.new +114 -0
- data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
- data/test/data/regression/rewrite.info.sample.new +150 -0
- data/test/data/regression/s.dp-stderr.new +18 -0
- data/test/data/regression/s.dp.new +145 -0
- data/test/data/regression/seval_s.dp-stderr.new +10 -0
- data/test/data/regression/seval_s.dp.new +36 -0
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
- data/test/data/regression/sfilter_seval_s.dp.new +31 -0
- data/test/data/regression/thread4-stderr.new +10 -0
- data/test/data/regression/thread4.new +150 -0
- data/test/data/regression/thread4_4-stderr.new +25 -0
- data/test/data/regression/thread4_4.new +130 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -1
- data/test/data/regression/thread4_4_failed_filter.new +110 -0
- data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
- data/test/data/regression/vcf2json_full_header.new +225 -0
- data/test/data/regression/vcf2json_full_header.ref +225 -0
- data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
- data/test/data/regression/vcf2json_use_meta.new +4697 -0
- data/test/data/regression/vcf2json_use_meta.ref +4697 -0
- data/test/performance/metrics.md +18 -1
- data/test/stress/stress_test.sh +15 -0
- data/test/tmp/test.vcf +12469 -0
- metadata +65 -64
@@ -0,0 +1,303 @@
|
|
1
|
+
# Parallel copy-on-write streaming (PCOWS)
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
class PCOWS
|
6
|
+
|
7
|
+
RUNNINGEXT = 'part' # file extension
|
8
|
+
|
9
|
+
def initialize(num_threads,chunk_size,name=File.basename(__FILE__),timeout=180,quiet=false,debug=false)
|
10
|
+
num_threads = cpu_count() if not num_threads # FIXME: set to cpu_num by default
|
11
|
+
# $stderr.print "Using ",num_threads,"threads \n"
|
12
|
+
@num_threads = num_threads
|
13
|
+
@chunk_size = chunk_size
|
14
|
+
@pid_list = []
|
15
|
+
@name = name
|
16
|
+
@timeout = timeout
|
17
|
+
@quiet = quiet
|
18
|
+
@debug = debug
|
19
|
+
if @debug
|
20
|
+
$stderr.print "PCOWS running in DEBUG MODE\n"
|
21
|
+
end
|
22
|
+
if multi_threaded
|
23
|
+
@tmpdir = Dir::mktmpdir(@name+'_')
|
24
|
+
end
|
25
|
+
@last_output = 0 # counter
|
26
|
+
@output_locked = false
|
27
|
+
end
|
28
|
+
|
29
|
+
# Feed the worker 'func and state' to COWS. Note that func is a
|
30
|
+
# lambda closure so it can pick up surrounding scope at invocation
|
31
|
+
# in addition to the data captured in 'state'.
|
32
|
+
|
33
|
+
def submit_worker(func,state)
|
34
|
+
pid = nil
|
35
|
+
if multi_threaded
|
36
|
+
count = @pid_list.size+1
|
37
|
+
fn = mktmpfilename(count)
|
38
|
+
pid = fork do
|
39
|
+
# ---- This is running a new copy-on-write process
|
40
|
+
tempfn = fn+'.'+RUNNINGEXT
|
41
|
+
STDOUT.reopen(File.open(tempfn, 'w+'))
|
42
|
+
func.call(state).each { | line | print line }
|
43
|
+
STDOUT.flush
|
44
|
+
STDOUT.close
|
45
|
+
# sleep 0.1
|
46
|
+
# f.flush
|
47
|
+
# f.close
|
48
|
+
# sleep 0.2 # interval to make sure we are done writing,
|
49
|
+
# otherwise there may be misses at the end of a
|
50
|
+
# block (maybe the f.close fixed it)
|
51
|
+
|
52
|
+
FileUtils::mv(tempfn,fn)
|
53
|
+
exit(0)
|
54
|
+
end
|
55
|
+
Process.detach(pid)
|
56
|
+
else
|
57
|
+
# ---- Single threaded: call in main process and output immediately
|
58
|
+
func.call(state).each { | line | print line }
|
59
|
+
end
|
60
|
+
@pid_list << [ pid,count,fn ]
|
61
|
+
return true
|
62
|
+
end
|
63
|
+
|
64
|
+
def submit_final_worker(func,state)
|
65
|
+
@final_worker = true
|
66
|
+
submit_worker(func,state)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Make sure no more than num_threads are running at the same time -
|
70
|
+
# this is achieved by checking the PID table and the running files
|
71
|
+
# in the tmpdir
|
72
|
+
|
73
|
+
def wait_for_worker_slot()
|
74
|
+
return if single_threaded
|
75
|
+
Timeout.timeout(@timeout) do
|
76
|
+
printed_timeout_message = false
|
77
|
+
while true
|
78
|
+
# ---- count running pids
|
79
|
+
running = @pid_list.reduce(0) do | sum, info |
|
80
|
+
(pid,count,fn) = info
|
81
|
+
if pid_or_file_running?(pid,fn)
|
82
|
+
sum+1
|
83
|
+
else
|
84
|
+
sum
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return if running < @num_threads
|
88
|
+
if not printed_timeout_message
|
89
|
+
$stderr.print "Waiting for slot (timeout=#{@timeout})\n" if not @quiet
|
90
|
+
printed_timeout_message = true
|
91
|
+
end
|
92
|
+
sleep 0.1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# ---- In this section the output gets collected and passed on to a
|
98
|
+
# printer thread. This function makes sure the printing is
|
99
|
+
# ordered and that no printers are running at the same
|
100
|
+
# time. The printer thread should be doing as little processing
|
101
|
+
# as possible.
|
102
|
+
#
|
103
|
+
# In this implementation type==:by_line will call func for
|
104
|
+
# each line. Otherwise it is called once with the filename.
|
105
|
+
def process_output(func=nil,type=:by_line, blocking=false)
|
106
|
+
return if single_threaded
|
107
|
+
output = lambda { |fn|
|
108
|
+
if type == :by_line
|
109
|
+
File.new(fn).each_line { |buf|
|
110
|
+
print buf
|
111
|
+
}
|
112
|
+
else
|
113
|
+
func.call(fn)
|
114
|
+
end
|
115
|
+
}
|
116
|
+
if @output_locked
|
117
|
+
# ---- is the other thread still running? We wait until it
|
118
|
+
# is finished to start the next one
|
119
|
+
(pid,count,fn) = @output_locked
|
120
|
+
$stderr.print "Checking for output_lock on existing #{fn}\n" if not @quiet
|
121
|
+
return if File.exist?(fn) # continue because thread still processing
|
122
|
+
# Now we should remove the .keep file
|
123
|
+
cleanup_keep_file(fn)
|
124
|
+
@last_output += 1 # get next one in line
|
125
|
+
@output_locked = false
|
126
|
+
end
|
127
|
+
# ---- process the next output chunk. After completion it
|
128
|
+
# gets renamed to chunk.keep. This to avoid missing
|
129
|
+
# output (if we unlink the file prematurely)
|
130
|
+
if info = @pid_list[@last_output]
|
131
|
+
(pid,count,fn) = info
|
132
|
+
$stderr.print "Testing (#{@last_output}) for output file ",[info],"\n" if @debug
|
133
|
+
if File.exist?(fn)
|
134
|
+
# Yes! We have the next output, create outputter
|
135
|
+
@output_locked = info
|
136
|
+
$stderr.print "Set lock on ",[info],"\n" if not @quiet
|
137
|
+
if not blocking
|
138
|
+
$stderr.print "Processing output file #{fn} (non-blocking)\n" if not @quiet
|
139
|
+
pid = fork do
|
140
|
+
output.call(fn)
|
141
|
+
# after finishing output move it to .keep
|
142
|
+
FileUtils::mv(fn,fn+'.keep')
|
143
|
+
exit(0)
|
144
|
+
end
|
145
|
+
Process.detach(pid)
|
146
|
+
else
|
147
|
+
$stderr.print "Processing output file #{fn} (blocking)\n" if not @quiet
|
148
|
+
output.call(fn)
|
149
|
+
FileUtils::mv(fn,fn+'.keep')
|
150
|
+
end
|
151
|
+
else
|
152
|
+
sleep 0.2
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Wait for a worker slot to appear. When working the pid is writing
|
158
|
+
# a file with extension .part(ial). After completion the file is
|
159
|
+
# renamed without .part and a slot is free.
|
160
|
+
def wait_for_worker(info)
|
161
|
+
(pid,count,fn) = info
|
162
|
+
if pid_or_file_running?(pid,fn)
|
163
|
+
$stderr.print "Waiting up to #{@timeout} seconds for pid=#{pid} to complete #{fn}\n" if not @quiet
|
164
|
+
begin
|
165
|
+
Timeout.timeout(@timeout) do
|
166
|
+
while not File.exist?(fn) # wait for the result to appear
|
167
|
+
sleep 0.2
|
168
|
+
return if not pid_or_file_running?(pid,fn) # worker is gone
|
169
|
+
end
|
170
|
+
end
|
171
|
+
# Partial file should have been renamed:
|
172
|
+
raise "FATAL: child process #{pid} appears to have crashed #{fn}" if not File.exist?(fn)
|
173
|
+
$stderr.print "OK pid=#{pid}, processing starts of #{fn}\n" if not @quiet
|
174
|
+
rescue Timeout::Error
|
175
|
+
# Kill it to speed up exit
|
176
|
+
Process.kill 9, pid
|
177
|
+
Process.wait pid
|
178
|
+
$stderr.print "FATAL: child process killed because it stopped responding, pid = #{pid}, fn = #{fn}, count = #{count}\n"
|
179
|
+
$stderr.print "Bailing out"
|
180
|
+
raise
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# This is the final cleanup after the reader thread is done. All workers
|
186
|
+
# need to complete.
|
187
|
+
|
188
|
+
def wait_for_workers()
|
189
|
+
return if single_threaded
|
190
|
+
@pid_list.each do |info|
|
191
|
+
wait_for_worker(info)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def process_remaining_output()
|
196
|
+
return if single_threaded
|
197
|
+
$stderr.print "Processing remaining output...\n" if not @quiet
|
198
|
+
while @output_locked
|
199
|
+
sleep 0.2
|
200
|
+
process_output() # keep trying
|
201
|
+
end
|
202
|
+
@pid_list.each do |info|
|
203
|
+
(pid,count,fn) = info
|
204
|
+
while pid_or_file_running?(pid,fn) or File.exist?(fn)
|
205
|
+
$stderr.print "Trying: ",[info],"\n" if not @quiet
|
206
|
+
process_output(nil,:by_line,true)
|
207
|
+
sleep 0.2
|
208
|
+
end
|
209
|
+
end
|
210
|
+
while @output_locked
|
211
|
+
sleep 0.1
|
212
|
+
process_output(nil,:by_line,true)
|
213
|
+
end
|
214
|
+
cleanup_tmpdir()
|
215
|
+
end
|
216
|
+
|
217
|
+
def cleanup()
|
218
|
+
@pid_list.each do |info|
|
219
|
+
(pid,count,fn) = info
|
220
|
+
if pid_running?(pid)
|
221
|
+
$stderr.print "Killing child ",[info],"\n"
|
222
|
+
begin
|
223
|
+
Process.kill 9, pid
|
224
|
+
Process.wait pid
|
225
|
+
rescue Errno::ENOENT
|
226
|
+
$stdout.puts "INFO: #{pidfile} did not exist: Errno::ENOENT" if not @quiet
|
227
|
+
rescue Errno::ESRCH
|
228
|
+
$stdout.puts "INFO: The process #{opid} did not exist: Errno::ESRCH" if not @quiet
|
229
|
+
end
|
230
|
+
end
|
231
|
+
File.unlink(fn) if File.exist?(fn)
|
232
|
+
cleanup_keep_file(fn,wait: false)
|
233
|
+
tempfn = fn+'.'+RUNNINGEXT
|
234
|
+
File.unlink(tempfn) if File.exist?(tempfn)
|
235
|
+
end
|
236
|
+
cleanup_tmpdir()
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
def mktmpfilename(num,ext=nil)
|
242
|
+
@tmpdir+sprintf("/%0.6d-",num)+@name+(ext ? '.'+ext : '')
|
243
|
+
end
|
244
|
+
|
245
|
+
def pid_or_file_running?(pid,fn)
|
246
|
+
(pid && pid_running?(pid)) or File.exist?(fn+'.'+RUNNINGEXT)
|
247
|
+
end
|
248
|
+
|
249
|
+
def pid_running?(pid)
|
250
|
+
begin
|
251
|
+
fpid,status=Process.waitpid2(pid,Process::WNOHANG)
|
252
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
253
|
+
return false
|
254
|
+
end
|
255
|
+
return true if nil == fpid && nil == status
|
256
|
+
return ! (status.exited? || status.signaled?)
|
257
|
+
end
|
258
|
+
|
259
|
+
def single_threaded
|
260
|
+
@num_threads == 1
|
261
|
+
end
|
262
|
+
|
263
|
+
def multi_threaded
|
264
|
+
@num_threads > 1
|
265
|
+
end
|
266
|
+
|
267
|
+
def cpu_count
|
268
|
+
begin
|
269
|
+
return File.read('/proc/cpuinfo').scan(/^processor\s*:/).size if File.exist? '/proc/cpuinfo'
|
270
|
+
# Actually, the JVM does not allow fork...
|
271
|
+
return Java::Java.lang.Runtime.getRuntime.availableProcessors if defined? Java::Java
|
272
|
+
rescue LoadError
|
273
|
+
# Count on MAC
|
274
|
+
return Integer `sysctl -n hw.ncpu 2>/dev/null`
|
275
|
+
end
|
276
|
+
$stderr.print "Could not determine number of CPUs" if not @quiet
|
277
|
+
1
|
278
|
+
end
|
279
|
+
|
280
|
+
def cleanup_keep_file(fn, opts = { wait: true })
|
281
|
+
if not @debug
|
282
|
+
keep = fn+'.keep'
|
283
|
+
return if not opts[:wait] and !File.exist?(keep)
|
284
|
+
$stderr.print "Trying to remove #{keep}\n" if not @quiet
|
285
|
+
while true
|
286
|
+
if File.exist?(keep)
|
287
|
+
$stderr.print "Removing #{keep}\n" if not @quiet
|
288
|
+
File.unlink(keep)
|
289
|
+
break # forever loop
|
290
|
+
end
|
291
|
+
sleep 0.1
|
292
|
+
end #forever
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def cleanup_tmpdir
|
297
|
+
if not @debug
|
298
|
+
$stderr.print "Removing dir #{@tmpdir}\n" if not @quiet
|
299
|
+
Dir.unlink(@tmpdir) if @tmpdir
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
|
5
|
+
class Template
|
6
|
+
|
7
|
+
def initialize fn
|
8
|
+
raise "Can not find template #{fn}!" if not File.exist?(fn)
|
9
|
+
parse(File.read(fn))
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse buf
|
13
|
+
header = []
|
14
|
+
body = []
|
15
|
+
footer = []
|
16
|
+
where = :header
|
17
|
+
buf.split("\n").each do | line |
|
18
|
+
case where
|
19
|
+
when :header
|
20
|
+
next if line =~ /=HEADER/
|
21
|
+
if line =~ /=BODY/
|
22
|
+
body = []
|
23
|
+
where = :body
|
24
|
+
next
|
25
|
+
end
|
26
|
+
header << line
|
27
|
+
when :body
|
28
|
+
if line =~ /=FOOTER/
|
29
|
+
footer = []
|
30
|
+
where = :footer
|
31
|
+
next
|
32
|
+
end
|
33
|
+
body << line
|
34
|
+
else
|
35
|
+
footer << line
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if body == []
|
39
|
+
body = header
|
40
|
+
header = []
|
41
|
+
end
|
42
|
+
@erb_header = ERB.new(header.join("\n")) if header.size
|
43
|
+
@erb_body = ERB.new(body.join("\n")) if body.size
|
44
|
+
@erb_footer = ERB.new(footer.join("\n")) if footer.size
|
45
|
+
end
|
46
|
+
|
47
|
+
def result env
|
48
|
+
@erb.result(env)
|
49
|
+
end
|
50
|
+
|
51
|
+
def header env
|
52
|
+
if @erb_header
|
53
|
+
@erb_header.result(env)
|
54
|
+
else
|
55
|
+
""
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def body env
|
60
|
+
if @erb_body
|
61
|
+
@erb_body.result(env)
|
62
|
+
else
|
63
|
+
""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def footer env
|
68
|
+
if @erb_footer
|
69
|
+
@erb_footer.result(env)
|
70
|
+
else
|
71
|
+
""
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BioVcf
|
2
|
+
# This class abstracts a VCF file that can be iterated.
|
3
|
+
# The VCF can be plain text or compressed with gzip
|
4
|
+
# Note that files compressed with bgzip will not work, as thie ruby implementation of Zlib don't allow concatenated files
|
5
|
+
class VCFfile
|
6
|
+
|
7
|
+
def initialize(file: "", is_gz: true)
|
8
|
+
@file = file
|
9
|
+
@is_gz = is_gz
|
10
|
+
end
|
11
|
+
|
12
|
+
def parseVCFheader(head_line="")
|
13
|
+
m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(head_line)
|
14
|
+
{:id=>m[1],:number=>m[2],:type=>m[3],:desc=>m[4]}
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
#Returns an enum that can be used as an iterator.
|
19
|
+
def each
|
20
|
+
return enum_for(:each) unless block_given?
|
21
|
+
io = nil
|
22
|
+
if @is_gz
|
23
|
+
infile = open(@file)
|
24
|
+
io = Zlib::GzipReader.new(infile)
|
25
|
+
else
|
26
|
+
io = File.open(@file)
|
27
|
+
end
|
28
|
+
|
29
|
+
header = BioVcf::VcfHeader.new
|
30
|
+
io.each_line do |line|
|
31
|
+
line.chomp!
|
32
|
+
if line =~ /^##fileformat=/
|
33
|
+
header.add(line)
|
34
|
+
next
|
35
|
+
end
|
36
|
+
if line =~ /^#/
|
37
|
+
header.add(line)
|
38
|
+
next
|
39
|
+
end
|
40
|
+
fields = BioVcf::VcfLine.parse(line)
|
41
|
+
rec = BioVcf::VcfRecord.new(fields,header)
|
42
|
+
yield rec
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -11,7 +11,7 @@ module BioVcf
|
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
14
|
-
# Helper class for a list of (variant) values, such as A,G.
|
14
|
+
# Helper class for a list of (variant) values, such as A,G.
|
15
15
|
# The [] function does the hard work. You can pass in an index (integer)
|
16
16
|
# or nucleotide which translates to an index.
|
17
17
|
# (see ./features for examples)
|
@@ -20,7 +20,7 @@ module BioVcf
|
|
20
20
|
@alt = alt
|
21
21
|
@list = list.split(/,/).map{|i| i.to_i}
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def [] idx
|
25
25
|
if idx.kind_of?(Integer)
|
26
26
|
# return a value
|
@@ -67,7 +67,7 @@ module BioVcf
|
|
67
67
|
@alt = alt
|
68
68
|
@list = list.split(/,/).map{|i| i.to_i}
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
71
|
def [] idx
|
72
72
|
if idx.kind_of?(Integer)
|
73
73
|
@list[idx].to_i
|
@@ -87,15 +87,15 @@ module BioVcf
|
|
87
87
|
end
|
88
88
|
|
89
89
|
# Return the max value on the nucleotides in the list (typically rec.alt)
|
90
|
-
def max
|
90
|
+
def max
|
91
91
|
@list.reduce(0){ |memo,v| (v>memo ? v : memo) }
|
92
92
|
end
|
93
93
|
|
94
|
-
def min
|
94
|
+
def min
|
95
95
|
@list.reduce(MAXINT){ |memo,v| (v<memo ? v : memo) }
|
96
96
|
end
|
97
97
|
|
98
|
-
def sum
|
98
|
+
def sum
|
99
99
|
@list.reduce(0){ |memo,v| v+memo }
|
100
100
|
end
|
101
101
|
end
|
@@ -129,14 +129,14 @@ module BioVcf
|
|
129
129
|
!empty?
|
130
130
|
end
|
131
131
|
|
132
|
-
def dp4
|
133
|
-
ilist('DP4')
|
132
|
+
def dp4
|
133
|
+
ilist('DP4')
|
134
134
|
end
|
135
|
-
def ad
|
136
|
-
ilist('AD')
|
135
|
+
def ad
|
136
|
+
ilist('AD')
|
137
137
|
end
|
138
|
-
def pl
|
139
|
-
ilist('PL')
|
138
|
+
def pl
|
139
|
+
ilist('PL')
|
140
140
|
end
|
141
141
|
|
142
142
|
def bcount
|
@@ -156,7 +156,7 @@ module BioVcf
|
|
156
156
|
end
|
157
157
|
|
158
158
|
def gti
|
159
|
-
gt.split(
|
159
|
+
gt.split(/[\/\|]/).map { |g| g.to_i }
|
160
160
|
end
|
161
161
|
|
162
162
|
def gts?
|
@@ -178,11 +178,11 @@ module BioVcf
|
|
178
178
|
else
|
179
179
|
v = values[fetch(m.to_s.upcase)]
|
180
180
|
return nil if VcfValue::empty?(v)
|
181
|
-
|
182
|
-
|
181
|
+
return v.to_i if v =~ /^\d+$/
|
182
|
+
return v.to_f if v =~ /^\d+\.\d+$/
|
183
183
|
v
|
184
184
|
end
|
185
|
-
end
|
185
|
+
end
|
186
186
|
|
187
187
|
private
|
188
188
|
|
@@ -200,7 +200,7 @@ module BioVcf
|
|
200
200
|
def ilist name
|
201
201
|
v = fetch_value(name)
|
202
202
|
return nil if not v
|
203
|
-
v.split(',').map{|i| i.to_i}
|
203
|
+
v.split(',').map{|i| i.to_i}
|
204
204
|
end
|
205
205
|
|
206
206
|
end
|
@@ -218,10 +218,15 @@ module BioVcf
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def [] name
|
221
|
-
|
221
|
+
begin
|
222
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
223
|
+
rescue TypeError
|
224
|
+
$stderr.print "Unknown field name <#{name}> in record, did you mean r.info.#{name}?\n"
|
225
|
+
raise
|
226
|
+
end
|
222
227
|
end
|
223
228
|
|
224
|
-
def method_missing(m, *args, &block)
|
229
|
+
def method_missing(m, *args, &block)
|
225
230
|
name = m.to_s
|
226
231
|
if name =~ /\?$/
|
227
232
|
# test for valid sample
|
@@ -229,7 +234,7 @@ module BioVcf
|
|
229
234
|
else
|
230
235
|
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
231
236
|
end
|
232
|
-
end
|
237
|
+
end
|
233
238
|
|
234
239
|
end
|
235
240
|
end
|