bio-vcf 0.8.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +8 -2
- data/Gemfile +4 -6
- data/README.md +92 -57
- data/Rakefile +3 -41
- data/TAGS +115 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +58 -70
- data/bio-vcf.gemspec +23 -75
- data/features/cli.feature +6 -1
- data/features/multisample.feature +12 -0
- data/features/step_definitions/cli-feature.rb +2 -2
- data/features/step_definitions/multisample.rb +19 -0
- data/features/step_definitions/vcf_header.rb +1 -1
- data/features/support/env.rb +0 -9
- data/lib/bio-vcf/pcows.rb +210 -0
- data/lib/bio-vcf/vcfheader.rb +28 -9
- data/lib/bio-vcf/vcfheader_line.rb +455 -160
- data/lib/bio-vcf/vcfrecord.rb +30 -15
- data/ragel/gen_vcfheaderline_parser.rl +68 -25
- data/ragel/generate.sh +4 -1
- data/template/vcf2json.erb +16 -16
- data/template/vcf2json_full_header.erb +16 -17
- data/template/vcf2json_use_meta.erb +35 -35
- data/test/data/input/gatk_exome.vcf +237 -0
- data/test/data/input/gatk_wgs.vcf +1000 -0
- data/test/data/input/test.bed +632 -0
- data/test/data/regression/eval_once-stderr.new +1 -0
- data/test/data/regression/eval_once.new +1 -0
- data/test/data/regression/eval_once.ref +1 -0
- data/test/data/regression/eval_r.info.dp-stderr.new +4 -0
- data/test/data/regression/eval_r.info.dp.new +150 -0
- data/test/data/regression/ifilter_s.dp-stderr.new +28 -0
- data/test/data/regression/ifilter_s.dp.new +31 -0
- data/test/data/regression/r.info.dp-stderr.new +4 -0
- data/test/data/regression/r.info.dp.new +147 -0
- data/test/data/regression/rewrite.info.sample-stderr.new +4 -0
- data/test/data/regression/rewrite.info.sample.new +150 -0
- data/test/data/regression/s.dp-stderr.new +12 -0
- data/test/data/regression/s.dp.new +145 -0
- data/test/data/regression/seval_s.dp-stderr.new +4 -0
- data/test/data/regression/seval_s.dp.new +36 -0
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +12 -0
- data/test/data/regression/sfilter_seval_s.dp.new +31 -0
- data/test/data/regression/thread4-stderr.new +4 -0
- data/test/data/regression/thread4.new +150 -0
- data/test/data/regression/thread4_4-stderr.new +15 -0
- data/test/data/regression/thread4_4.new +150 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
- data/test/data/regression/thread4_4_failed_filter.new +110 -0
- data/test/data/regression/vcf2json_full_header-stderr.new +4 -0
- data/test/data/regression/vcf2json_full_header.new +225 -0
- data/test/data/regression/vcf2json_full_header.ref +222 -258
- data/test/data/regression/vcf2json_use_meta-stderr.new +4 -0
- data/test/data/regression/vcf2json_use_meta.new +4697 -0
- data/test/data/regression/vcf2json_use_meta.ref +4697 -0
- data/test/performance/metrics.md +18 -1
- data/test/tmp/test.vcf +12469 -0
- metadata +38 -62
- data/Gemfile.lock +0 -81
- data/ragel/gen_vcfheaderline_parser.rb +0 -483
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.9.0
|
data/bin/bio-vcf
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# License:: MIT
|
6
6
|
#
|
7
|
-
# Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
# Copyright (C) 2014-2015 Pjotr Prins <pjotr.prins@thebird.nl>
|
8
8
|
|
9
9
|
USAGE = "Vcf parser"
|
10
10
|
|
@@ -15,6 +15,7 @@ VERSION_FILENAME=File.join(gempath,'VERSION')
|
|
15
15
|
version = File.new(VERSION_FILENAME).read.chomp
|
16
16
|
|
17
17
|
require 'bio-vcf'
|
18
|
+
require 'bio-vcf/pcows'
|
18
19
|
require 'optparse'
|
19
20
|
require 'timeout'
|
20
21
|
require 'fileutils'
|
@@ -26,7 +27,7 @@ require 'fileutils'
|
|
26
27
|
# Bio::Log::CLI.logger('stderr')
|
27
28
|
# Bio::Log::CLI.trace('info')
|
28
29
|
|
29
|
-
options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000 }
|
30
|
+
options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000, timeout: 180 }
|
30
31
|
opts = OptionParser.new do |o|
|
31
32
|
o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g. #{File.basename($0)} < test/data/input/somaticsniper.vcf"
|
32
33
|
|
@@ -68,6 +69,9 @@ opts = OptionParser.new do |o|
|
|
68
69
|
o.on('--eval-once cmd',String, 'Evaluate command once (usually for header info)') do |cmd|
|
69
70
|
options[:eval_once] = true
|
70
71
|
options[:eval] = cmd
|
72
|
+
# options[:num_threads] = 1
|
73
|
+
# options[:thread_lines] = 1
|
74
|
+
options[:skip_header] = true
|
71
75
|
end
|
72
76
|
o.on('--seval cmd',String, 'Evaluate command on each sample') do |cmd|
|
73
77
|
options[:seval] = cmd
|
@@ -112,7 +116,14 @@ opts = OptionParser.new do |o|
|
|
112
116
|
options[:template] = s
|
113
117
|
options[:skip_header] = true
|
114
118
|
end
|
115
|
-
|
119
|
+
|
120
|
+
o.on("--add-header-tag", "Add bio-vcf status tag to header output") do |t|
|
121
|
+
options[:tag] = true
|
122
|
+
end
|
123
|
+
|
124
|
+
o.on("--timeout [num]", Integer, "Timeout waiting for thread to complete (default #{options[:timeout]})") do |i|
|
125
|
+
options[:timeout] = i
|
126
|
+
end
|
116
127
|
|
117
128
|
# Uncomment the following when using the bio-logger
|
118
129
|
# o.separator ""
|
@@ -137,9 +148,10 @@ opts = OptionParser.new do |o|
|
|
137
148
|
options[:verbose] = true
|
138
149
|
end
|
139
150
|
|
140
|
-
|
141
|
-
|
142
|
-
|
151
|
+
o.on("--debug", "Show debug messages") do |v|
|
152
|
+
# Bio::Log::CLI.trace('debug')
|
153
|
+
options[:debug] = true
|
154
|
+
end
|
143
155
|
|
144
156
|
o.separator ""
|
145
157
|
o.on_tail('-h', '--help', 'display this help and exit') do
|
@@ -150,8 +162,8 @@ end
|
|
150
162
|
opts.parse!(ARGV)
|
151
163
|
|
152
164
|
BIOVCF_VERSION=version
|
153
|
-
BIOVCF_BANNER = "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins
|
154
|
-
$stderr.print BIOVCF_BANNER
|
165
|
+
BIOVCF_BANNER = "bio-vcf #{version} (biogem Ruby #{RUBY_VERSION} with pcows) by Pjotr Prins 2015\n"
|
166
|
+
$stderr.print BIOVCF_BANNER if !options[:quiet]
|
155
167
|
|
156
168
|
if options[:show_help]
|
157
169
|
print opts
|
@@ -174,15 +186,6 @@ if options[:template]
|
|
174
186
|
template = Bio::Template.new(fn)
|
175
187
|
end
|
176
188
|
|
177
|
-
if options[:num_threads] != 1
|
178
|
-
begin
|
179
|
-
require 'parallel'
|
180
|
-
rescue LoadError
|
181
|
-
$stderr.print "Error: Missing 'parallel' module. Install with command 'gem install parallel' if you want multiple threads\n"
|
182
|
-
options[:num_threads] = 1
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
189
|
stats = nil
|
187
190
|
if options[:statistics]
|
188
191
|
options[:num_threads] = nil
|
@@ -202,7 +205,7 @@ include BioVcf
|
|
202
205
|
|
203
206
|
# Parse the header section of a VCF file (chomping STDIN)
|
204
207
|
def parse_header line, samples, options
|
205
|
-
header = VcfHeader.new
|
208
|
+
header = VcfHeader.new(options[:debug])
|
206
209
|
header.add(line)
|
207
210
|
print line if not options[:skip_header]
|
208
211
|
STDIN.each_line do | headerline |
|
@@ -214,7 +217,7 @@ def parse_header line, samples, options
|
|
214
217
|
if not options[:skip_header]
|
215
218
|
if headerline =~ /^#CHR/
|
216
219
|
# The header before actual data contains the sample names, first inject the BioVcf meta information
|
217
|
-
print header.tag(options),"\n" if not options[:skip_header]
|
220
|
+
print header.tag(options),"\n" if options[:tag] and not options[:skip_header]
|
218
221
|
selected = header.column_names
|
219
222
|
if samples
|
220
223
|
newfields = selected[0..8]
|
@@ -234,7 +237,7 @@ def parse_header line, samples, options
|
|
234
237
|
return header,line
|
235
238
|
end
|
236
239
|
|
237
|
-
# Parse a VCF line and return the result as a string
|
240
|
+
# Parse a VCF line and return the (template) result as a string buffer
|
238
241
|
def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
239
242
|
fields = VcfLine.parse(line)
|
240
243
|
rec = VcfRecord.new(fields,header)
|
@@ -261,7 +264,7 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
|
261
264
|
return if not bed
|
262
265
|
end
|
263
266
|
|
264
|
-
return if filter and not rec.
|
267
|
+
return if filter and not rec.gfilter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
|
265
268
|
|
266
269
|
if sfilter
|
267
270
|
rec.each_sample(options[:sfilter_samples]) do | sample |
|
@@ -320,13 +323,12 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
|
320
323
|
exit 1
|
321
324
|
end
|
322
325
|
return results.to_s+"\n" if results
|
323
|
-
exit(1) if options[:eval_once] # <--- can this be reached?
|
324
326
|
else
|
325
327
|
if options[:rdf]
|
326
328
|
# Output Turtle RDF
|
327
329
|
VcfRdf::record(options[:id],rec,options[:tags])
|
328
330
|
elsif options[:template]
|
329
|
-
#
|
331
|
+
# Use ERB template
|
330
332
|
begin
|
331
333
|
template.body(binding)
|
332
334
|
rescue Exception => e
|
@@ -347,13 +349,11 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
|
347
349
|
end
|
348
350
|
end
|
349
351
|
|
352
|
+
pcows = PCOWS.new(options[:num_threads],'bio-vcf',options[:timeout])
|
350
353
|
header = nil
|
351
354
|
header_output_completed = false
|
352
|
-
NUM_THREADS = options[:num_threads]
|
353
355
|
CHUNK_SIZE = options[:thread_lines]
|
354
|
-
|
355
|
-
chunks = []
|
356
|
-
lines = []
|
356
|
+
chunk_lines = []
|
357
357
|
line_number=0
|
358
358
|
|
359
359
|
if options[:bed]
|
@@ -361,6 +361,7 @@ if options[:bed]
|
|
361
361
|
end
|
362
362
|
|
363
363
|
begin
|
364
|
+
# Define linear parser function (going through one chunk)
|
364
365
|
process = lambda { | lines |
|
365
366
|
res = []
|
366
367
|
lines.each do | line |
|
@@ -368,20 +369,17 @@ begin
|
|
368
369
|
end
|
369
370
|
res
|
370
371
|
}
|
371
|
-
output = lambda { |collection|
|
372
|
-
collection.each do | result |
|
373
|
-
result.each { |line| print line }
|
374
|
-
end
|
375
|
-
} # end output
|
376
372
|
|
377
373
|
# ---- Main loop
|
378
374
|
STDIN.each_line do | line |
|
379
375
|
line_number += 1
|
380
|
-
# ---- In this section header information is handled
|
381
376
|
|
382
377
|
# ---- Skip embedded headers down the line...
|
383
378
|
next if header_output_completed and line =~ /^#/
|
384
379
|
|
380
|
+
# ---- In the following section header information is handled -
|
381
|
+
# this only happens once.
|
382
|
+
|
385
383
|
# ---- Parse the header lines (chomps from STDIN)
|
386
384
|
# and returns header info and the current line
|
387
385
|
if line =~ /^#/
|
@@ -399,50 +397,40 @@ begin
|
|
399
397
|
header_output_completed = true
|
400
398
|
end
|
401
399
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
exit 0
|
425
|
-
end
|
426
|
-
end
|
427
|
-
lines = []
|
428
|
-
end
|
400
|
+
if options[:eval_once]
|
401
|
+
# this happens if we only want one line evaluated - say to get
|
402
|
+
# the number of samples
|
403
|
+
print parse_line(line,header,options,bedfilter,samples,template,stats)
|
404
|
+
exit 0
|
405
|
+
end
|
406
|
+
|
407
|
+
# ---- Lines are collected in one buffer and the lines buffer
|
408
|
+
# is added to the chunks list (for the threads)
|
409
|
+
chunk_lines << line
|
410
|
+
|
411
|
+
# ---- In the following section the VCF lines are parsed by chunks
|
412
|
+
# The chunks may go into different threads
|
413
|
+
|
414
|
+
if chunk_lines.size > CHUNK_SIZE
|
415
|
+
# ---- process one chunk
|
416
|
+
$stderr.print '.' if not options[:quiet]
|
417
|
+
pcows.wait_for_worker_slot()
|
418
|
+
pcows.submit_worker(process,chunk_lines)
|
419
|
+
pcows.process_output()
|
420
|
+
|
421
|
+
chunk_lines = []
|
429
422
|
end
|
430
423
|
end
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
chunks << lines
|
436
|
-
output.call Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
|
437
|
-
process.call(chunk)
|
438
|
-
}
|
439
|
-
end
|
424
|
+
pcows.submit_worker(process,chunk_lines)
|
425
|
+
pcows.wait_for_workers()
|
426
|
+
pcows.process_remaining_output()
|
427
|
+
|
440
428
|
print template.footer(binding) if template
|
441
429
|
stats.print if stats
|
442
430
|
|
443
431
|
rescue Exception => e
|
444
432
|
# $stderr.print line
|
445
|
-
$stderr.print e.message,"\n"
|
433
|
+
$stderr.print e.message,"\n" if e.message != 'exit'
|
446
434
|
raise if options[:verbose]
|
447
435
|
exit 1
|
448
436
|
end
|
data/bio-vcf.gemspec
CHANGED
@@ -1,15 +1,13 @@
|
|
1
|
-
#
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
1
|
+
# No longer generated by jeweler
|
4
2
|
# -*- encoding: utf-8 -*-
|
5
3
|
|
6
4
|
Gem::Specification.new do |s|
|
7
5
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.
|
6
|
+
s.version = "0.9.0"
|
9
7
|
|
10
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
9
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "
|
10
|
+
# s.date = "2015-12-28"
|
13
11
|
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
14
12
|
s.email = "pjotr.public01@thebird.nl"
|
15
13
|
s.executables = ["bio-vcf"]
|
@@ -20,66 +18,19 @@ Gem::Specification.new do |s|
|
|
20
18
|
s.files = [
|
21
19
|
".travis.yml",
|
22
20
|
"Gemfile",
|
23
|
-
"Gemfile.lock",
|
24
21
|
"LICENSE.txt",
|
25
22
|
"README.md",
|
26
23
|
"Rakefile",
|
27
24
|
"VERSION",
|
28
25
|
"bin/bio-vcf",
|
29
26
|
"bio-vcf.gemspec",
|
30
|
-
"features/cli.feature",
|
31
|
-
"features/diff_count.feature",
|
32
|
-
"features/multisample.feature",
|
33
|
-
"features/sfilter.feature",
|
34
|
-
"features/somaticsniper.feature",
|
35
|
-
"features/step_definitions/bio-vcf_steps.rb",
|
36
|
-
"features/step_definitions/cli-feature.rb",
|
37
|
-
"features/step_definitions/diff_count.rb",
|
38
|
-
"features/step_definitions/multisample.rb",
|
39
|
-
"features/step_definitions/sfilter.rb",
|
40
|
-
"features/step_definitions/somaticsniper.rb",
|
41
|
-
"features/step_definitions/vcf_header.rb",
|
42
|
-
"features/support/env.rb",
|
43
|
-
"features/vcf_header.feature",
|
44
|
-
"lib/bio-vcf.rb",
|
45
|
-
"lib/bio-vcf/bedfilter.rb",
|
46
|
-
"lib/bio-vcf/template.rb",
|
47
|
-
"lib/bio-vcf/utils.rb",
|
48
|
-
"lib/bio-vcf/variant.rb",
|
49
|
-
"lib/bio-vcf/vcf.rb",
|
50
|
-
"lib/bio-vcf/vcfgenotypefield.rb",
|
51
|
-
"lib/bio-vcf/vcfheader.rb",
|
52
|
-
"lib/bio-vcf/vcfheader_line.rb",
|
53
|
-
"lib/bio-vcf/vcfline.rb",
|
54
|
-
"lib/bio-vcf/vcfrdf.rb",
|
55
|
-
"lib/bio-vcf/vcfrecord.rb",
|
56
|
-
"lib/bio-vcf/vcfsample.rb",
|
57
|
-
"lib/bio-vcf/vcfstatistics.rb",
|
58
|
-
"ragel/gen_vcfheaderline_parser.rb",
|
59
27
|
"ragel/gen_vcfheaderline_parser.rl",
|
60
28
|
"ragel/generate.sh",
|
61
|
-
"template/gatk_vcf2rdf.erb",
|
62
|
-
"template/vcf2json.erb",
|
63
|
-
"template/vcf2json_full_header.erb",
|
64
|
-
"template/vcf2json_use_meta.erb",
|
65
|
-
"template/vcf2rdf.erb",
|
66
|
-
"template/vcf2rdf_header.erb",
|
67
|
-
"test/data/input/dbsnp.vcf",
|
68
|
-
"test/data/input/multisample.vcf",
|
69
|
-
"test/data/input/somaticsniper.vcf",
|
70
|
-
"test/data/regression/eval_r.info.dp.ref",
|
71
|
-
"test/data/regression/ifilter_s.dp.ref",
|
72
|
-
"test/data/regression/r.info.dp.ref",
|
73
|
-
"test/data/regression/rewrite.info.sample.ref",
|
74
|
-
"test/data/regression/s.dp.ref",
|
75
|
-
"test/data/regression/seval_s.dp.ref",
|
76
|
-
"test/data/regression/sfilter_seval_s.dp.ref",
|
77
|
-
"test/data/regression/thread4.ref",
|
78
|
-
"test/data/regression/thread4_4.ref",
|
79
|
-
"test/data/regression/thread4_4_failed_filter-stderr.ref",
|
80
|
-
"test/data/regression/vcf2json_full_header.ref",
|
81
|
-
"test/performance/metrics.md"
|
82
29
|
]
|
30
|
+
s.files += Dir['lib/**/*.rb'] + Dir['bin/*']
|
31
|
+
s.files += Dir['[A-Z]*'] + Dir['test/**/*'] + Dir['features/**/*'] +
|
32
|
+
Dir['template/**/*']
|
33
|
+
|
83
34
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
84
35
|
s.licenses = ["MIT"]
|
85
36
|
s.require_paths = ["lib"]
|
@@ -87,25 +38,22 @@ Gem::Specification.new do |s|
|
|
87
38
|
s.rubygems_version = "2.0.3"
|
88
39
|
s.summary = "Fast multi-threaded VCF parser"
|
89
40
|
|
90
|
-
if s.respond_to? :specification_version then
|
91
|
-
|
41
|
+
# if s.respond_to? :specification_version then
|
42
|
+
# s.specification_version = 4
|
92
43
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
s.add_dependency(%q<jeweler>, [">= 2.0.1"])
|
108
|
-
s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
109
|
-
end
|
44
|
+
# if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
# s.add_development_dependency(%q<rspec>, [">= 2.14.0"])
|
46
|
+
# s.add_development_dependency(%q<cucumber>, [">= 1.3.11"])
|
47
|
+
# s.add_development_dependency(%q<regressiontest>, [">= 0.0.3"])
|
48
|
+
# else
|
49
|
+
# s.add_dependency(%q<rspec>, [">= 2.14.0"])
|
50
|
+
# s.add_dependency(%q<cucumber>, [">= 1.3.11"])
|
51
|
+
# s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
52
|
+
# end
|
53
|
+
# else
|
54
|
+
# s.add_dependency(%q<rspec>, [">= 2.14.0"])
|
55
|
+
# s.add_dependency(%q<cucumber>, [">= 1.3.11"])
|
56
|
+
# s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
57
|
+
# end
|
110
58
|
end
|
111
59
|
|
data/features/cli.feature
CHANGED
@@ -48,6 +48,11 @@ Feature: Command-line interface (CLI)
|
|
48
48
|
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
49
49
|
Then I expect the named output to match the named output "rewrite.info.sample"
|
50
50
|
|
51
|
+
Scenario: Test eval-once
|
52
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
53
|
+
When I execute "./bin/bio-vcf --eval-once header.meta[\'GATKCommandLine\']"
|
54
|
+
Then I expect the named output to match the named output "eval_once"
|
55
|
+
|
51
56
|
Scenario: Test JSON output with header meta data
|
52
57
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
53
58
|
When I execute "./bin/bio-vcf --template template/vcf2json_full_header.erb"
|
@@ -60,7 +65,7 @@ Feature: Command-line interface (CLI)
|
|
60
65
|
|
61
66
|
Scenario: Test deadlock on failed filter with threads
|
62
67
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
63
|
-
When I execute "./bin/bio-vcf --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
68
|
+
When I execute "./bin/bio-vcf -q --timeout 2 --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
64
69
|
Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
|
65
70
|
|
66
71
|
|
@@ -25,7 +25,10 @@ Feature: Multi-sample VCF
|
|
25
25
|
And I expect rec.info.ac to be 5
|
26
26
|
And I expect rec.info.af to be 0.357
|
27
27
|
And I expect rec.info.dp to be 1537
|
28
|
+
And I expect rec.info['dp'] to be 1537
|
28
29
|
And I expect rec.info.readposranksum to be 0.815
|
30
|
+
And I expect rec.info['ReadPosRankSum'] to be 0.815
|
31
|
+
And I expect rec.info.fields to contain ["AC", "AF", "AN", "BASEQRANKSUM", "DP", "DELS", "FS", "HAPLOTYPESCORE", "MLEAC", "MLEAF", "MQ", "MQ0", "MQRANKSUM", "QD", "READPOSRANKSUM"]
|
29
32
|
And I expect rec.sample['Original'].ad to be [189,25]
|
30
33
|
And I expect rec.sample['Original'].gt to be "0/1"
|
31
34
|
And I expect rec.sample['s3t2'].ad to be [167,26]
|
@@ -76,3 +79,12 @@ Feature: Multi-sample VCF
|
|
76
79
|
And I expect r.original.gts to be ["C","G"]
|
77
80
|
And I expect r.original.gts[0] to be "C"
|
78
81
|
And I expect r.original.gts[1] to be "G"
|
82
|
+
|
83
|
+
# INFO fields with matching tails
|
84
|
+
Given multisample vcf line
|
85
|
+
"""
|
86
|
+
1 10723 . C G 73.85 . AC=4;AF=0.667;CIEND=999;END=111;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL 0|1 ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
|
87
|
+
"""
|
88
|
+
When I parse the record
|
89
|
+
Then I expect r.info.end to be 111
|
90
|
+
And I expect r.info.ciend to be 999
|
@@ -8,9 +8,9 @@ When /^I execute "(.*?)"$/ do |arg1|
|
|
8
8
|
end
|
9
9
|
|
10
10
|
Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
11
|
-
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should
|
11
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should be_truthy
|
12
12
|
end
|
13
13
|
|
14
14
|
Then(/^I expect an error and the named output to match the named output "(.*?)" in under (\d+) seconds$/) do |arg1,arg2|
|
15
|
-
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should
|
15
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_truthy
|
16
16
|
end
|