bio-vcf 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +8 -2
- data/Gemfile +4 -6
- data/README.md +92 -57
- data/Rakefile +3 -41
- data/TAGS +115 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +58 -70
- data/bio-vcf.gemspec +23 -75
- data/features/cli.feature +6 -1
- data/features/multisample.feature +12 -0
- data/features/step_definitions/cli-feature.rb +2 -2
- data/features/step_definitions/multisample.rb +19 -0
- data/features/step_definitions/vcf_header.rb +1 -1
- data/features/support/env.rb +0 -9
- data/lib/bio-vcf/pcows.rb +210 -0
- data/lib/bio-vcf/vcfheader.rb +28 -9
- data/lib/bio-vcf/vcfheader_line.rb +455 -160
- data/lib/bio-vcf/vcfrecord.rb +30 -15
- data/ragel/gen_vcfheaderline_parser.rl +68 -25
- data/ragel/generate.sh +4 -1
- data/template/vcf2json.erb +16 -16
- data/template/vcf2json_full_header.erb +16 -17
- data/template/vcf2json_use_meta.erb +35 -35
- data/test/data/input/gatk_exome.vcf +237 -0
- data/test/data/input/gatk_wgs.vcf +1000 -0
- data/test/data/input/test.bed +632 -0
- data/test/data/regression/eval_once-stderr.new +1 -0
- data/test/data/regression/eval_once.new +1 -0
- data/test/data/regression/eval_once.ref +1 -0
- data/test/data/regression/eval_r.info.dp-stderr.new +4 -0
- data/test/data/regression/eval_r.info.dp.new +150 -0
- data/test/data/regression/ifilter_s.dp-stderr.new +28 -0
- data/test/data/regression/ifilter_s.dp.new +31 -0
- data/test/data/regression/r.info.dp-stderr.new +4 -0
- data/test/data/regression/r.info.dp.new +147 -0
- data/test/data/regression/rewrite.info.sample-stderr.new +4 -0
- data/test/data/regression/rewrite.info.sample.new +150 -0
- data/test/data/regression/s.dp-stderr.new +12 -0
- data/test/data/regression/s.dp.new +145 -0
- data/test/data/regression/seval_s.dp-stderr.new +4 -0
- data/test/data/regression/seval_s.dp.new +36 -0
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +12 -0
- data/test/data/regression/sfilter_seval_s.dp.new +31 -0
- data/test/data/regression/thread4-stderr.new +4 -0
- data/test/data/regression/thread4.new +150 -0
- data/test/data/regression/thread4_4-stderr.new +15 -0
- data/test/data/regression/thread4_4.new +150 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
- data/test/data/regression/thread4_4_failed_filter.new +110 -0
- data/test/data/regression/vcf2json_full_header-stderr.new +4 -0
- data/test/data/regression/vcf2json_full_header.new +225 -0
- data/test/data/regression/vcf2json_full_header.ref +222 -258
- data/test/data/regression/vcf2json_use_meta-stderr.new +4 -0
- data/test/data/regression/vcf2json_use_meta.new +4697 -0
- data/test/data/regression/vcf2json_use_meta.ref +4697 -0
- data/test/performance/metrics.md +18 -1
- data/test/tmp/test.vcf +12469 -0
- metadata +38 -62
- data/Gemfile.lock +0 -81
- data/ragel/gen_vcfheaderline_parser.rb +0 -483
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.9.0
|
data/bin/bio-vcf
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# License:: MIT
|
6
6
|
#
|
7
|
-
# Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
# Copyright (C) 2014-2015 Pjotr Prins <pjotr.prins@thebird.nl>
|
8
8
|
|
9
9
|
USAGE = "Vcf parser"
|
10
10
|
|
@@ -15,6 +15,7 @@ VERSION_FILENAME=File.join(gempath,'VERSION')
|
|
15
15
|
version = File.new(VERSION_FILENAME).read.chomp
|
16
16
|
|
17
17
|
require 'bio-vcf'
|
18
|
+
require 'bio-vcf/pcows'
|
18
19
|
require 'optparse'
|
19
20
|
require 'timeout'
|
20
21
|
require 'fileutils'
|
@@ -26,7 +27,7 @@ require 'fileutils'
|
|
26
27
|
# Bio::Log::CLI.logger('stderr')
|
27
28
|
# Bio::Log::CLI.trace('info')
|
28
29
|
|
29
|
-
options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000 }
|
30
|
+
options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000, timeout: 180 }
|
30
31
|
opts = OptionParser.new do |o|
|
31
32
|
o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g. #{File.basename($0)} < test/data/input/somaticsniper.vcf"
|
32
33
|
|
@@ -68,6 +69,9 @@ opts = OptionParser.new do |o|
|
|
68
69
|
o.on('--eval-once cmd',String, 'Evaluate command once (usually for header info)') do |cmd|
|
69
70
|
options[:eval_once] = true
|
70
71
|
options[:eval] = cmd
|
72
|
+
# options[:num_threads] = 1
|
73
|
+
# options[:thread_lines] = 1
|
74
|
+
options[:skip_header] = true
|
71
75
|
end
|
72
76
|
o.on('--seval cmd',String, 'Evaluate command on each sample') do |cmd|
|
73
77
|
options[:seval] = cmd
|
@@ -112,7 +116,14 @@ opts = OptionParser.new do |o|
|
|
112
116
|
options[:template] = s
|
113
117
|
options[:skip_header] = true
|
114
118
|
end
|
115
|
-
|
119
|
+
|
120
|
+
o.on("--add-header-tag", "Add bio-vcf status tag to header output") do |t|
|
121
|
+
options[:tag] = true
|
122
|
+
end
|
123
|
+
|
124
|
+
o.on("--timeout [num]", Integer, "Timeout waiting for thread to complete (default #{options[:timeout]})") do |i|
|
125
|
+
options[:timeout] = i
|
126
|
+
end
|
116
127
|
|
117
128
|
# Uncomment the following when using the bio-logger
|
118
129
|
# o.separator ""
|
@@ -137,9 +148,10 @@ opts = OptionParser.new do |o|
|
|
137
148
|
options[:verbose] = true
|
138
149
|
end
|
139
150
|
|
140
|
-
|
141
|
-
|
142
|
-
|
151
|
+
o.on("--debug", "Show debug messages") do |v|
|
152
|
+
# Bio::Log::CLI.trace('debug')
|
153
|
+
options[:debug] = true
|
154
|
+
end
|
143
155
|
|
144
156
|
o.separator ""
|
145
157
|
o.on_tail('-h', '--help', 'display this help and exit') do
|
@@ -150,8 +162,8 @@ end
|
|
150
162
|
opts.parse!(ARGV)
|
151
163
|
|
152
164
|
BIOVCF_VERSION=version
|
153
|
-
BIOVCF_BANNER = "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins
|
154
|
-
$stderr.print BIOVCF_BANNER
|
165
|
+
BIOVCF_BANNER = "bio-vcf #{version} (biogem Ruby #{RUBY_VERSION} with pcows) by Pjotr Prins 2015\n"
|
166
|
+
$stderr.print BIOVCF_BANNER if !options[:quiet]
|
155
167
|
|
156
168
|
if options[:show_help]
|
157
169
|
print opts
|
@@ -174,15 +186,6 @@ if options[:template]
|
|
174
186
|
template = Bio::Template.new(fn)
|
175
187
|
end
|
176
188
|
|
177
|
-
if options[:num_threads] != 1
|
178
|
-
begin
|
179
|
-
require 'parallel'
|
180
|
-
rescue LoadError
|
181
|
-
$stderr.print "Error: Missing 'parallel' module. Install with command 'gem install parallel' if you want multiple threads\n"
|
182
|
-
options[:num_threads] = 1
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
189
|
stats = nil
|
187
190
|
if options[:statistics]
|
188
191
|
options[:num_threads] = nil
|
@@ -202,7 +205,7 @@ include BioVcf
|
|
202
205
|
|
203
206
|
# Parse the header section of a VCF file (chomping STDIN)
|
204
207
|
def parse_header line, samples, options
|
205
|
-
header = VcfHeader.new
|
208
|
+
header = VcfHeader.new(options[:debug])
|
206
209
|
header.add(line)
|
207
210
|
print line if not options[:skip_header]
|
208
211
|
STDIN.each_line do | headerline |
|
@@ -214,7 +217,7 @@ def parse_header line, samples, options
|
|
214
217
|
if not options[:skip_header]
|
215
218
|
if headerline =~ /^#CHR/
|
216
219
|
# The header before actual data contains the sample names, first inject the BioVcf meta information
|
217
|
-
print header.tag(options),"\n" if not options[:skip_header]
|
220
|
+
print header.tag(options),"\n" if options[:tag] and not options[:skip_header]
|
218
221
|
selected = header.column_names
|
219
222
|
if samples
|
220
223
|
newfields = selected[0..8]
|
@@ -234,7 +237,7 @@ def parse_header line, samples, options
|
|
234
237
|
return header,line
|
235
238
|
end
|
236
239
|
|
237
|
-
# Parse a VCF line and return the result as a string
|
240
|
+
# Parse a VCF line and return the (template) result as a string buffer
|
238
241
|
def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
239
242
|
fields = VcfLine.parse(line)
|
240
243
|
rec = VcfRecord.new(fields,header)
|
@@ -261,7 +264,7 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
|
261
264
|
return if not bed
|
262
265
|
end
|
263
266
|
|
264
|
-
return if filter and not rec.
|
267
|
+
return if filter and not rec.gfilter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
|
265
268
|
|
266
269
|
if sfilter
|
267
270
|
rec.each_sample(options[:sfilter_samples]) do | sample |
|
@@ -320,13 +323,12 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
|
320
323
|
exit 1
|
321
324
|
end
|
322
325
|
return results.to_s+"\n" if results
|
323
|
-
exit(1) if options[:eval_once] # <--- can this be reached?
|
324
326
|
else
|
325
327
|
if options[:rdf]
|
326
328
|
# Output Turtle RDF
|
327
329
|
VcfRdf::record(options[:id],rec,options[:tags])
|
328
330
|
elsif options[:template]
|
329
|
-
#
|
331
|
+
# Use ERB template
|
330
332
|
begin
|
331
333
|
template.body(binding)
|
332
334
|
rescue Exception => e
|
@@ -347,13 +349,11 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
|
347
349
|
end
|
348
350
|
end
|
349
351
|
|
352
|
+
pcows = PCOWS.new(options[:num_threads],'bio-vcf',options[:timeout])
|
350
353
|
header = nil
|
351
354
|
header_output_completed = false
|
352
|
-
NUM_THREADS = options[:num_threads]
|
353
355
|
CHUNK_SIZE = options[:thread_lines]
|
354
|
-
|
355
|
-
chunks = []
|
356
|
-
lines = []
|
356
|
+
chunk_lines = []
|
357
357
|
line_number=0
|
358
358
|
|
359
359
|
if options[:bed]
|
@@ -361,6 +361,7 @@ if options[:bed]
|
|
361
361
|
end
|
362
362
|
|
363
363
|
begin
|
364
|
+
# Define linear parser function (going through one chunk)
|
364
365
|
process = lambda { | lines |
|
365
366
|
res = []
|
366
367
|
lines.each do | line |
|
@@ -368,20 +369,17 @@ begin
|
|
368
369
|
end
|
369
370
|
res
|
370
371
|
}
|
371
|
-
output = lambda { |collection|
|
372
|
-
collection.each do | result |
|
373
|
-
result.each { |line| print line }
|
374
|
-
end
|
375
|
-
} # end output
|
376
372
|
|
377
373
|
# ---- Main loop
|
378
374
|
STDIN.each_line do | line |
|
379
375
|
line_number += 1
|
380
|
-
# ---- In this section header information is handled
|
381
376
|
|
382
377
|
# ---- Skip embedded headers down the line...
|
383
378
|
next if header_output_completed and line =~ /^#/
|
384
379
|
|
380
|
+
# ---- In the following section header information is handled -
|
381
|
+
# this only happens once.
|
382
|
+
|
385
383
|
# ---- Parse the header lines (chomps from STDIN)
|
386
384
|
# and returns header info and the current line
|
387
385
|
if line =~ /^#/
|
@@ -399,50 +397,40 @@ begin
|
|
399
397
|
header_output_completed = true
|
400
398
|
end
|
401
399
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
exit 0
|
425
|
-
end
|
426
|
-
end
|
427
|
-
lines = []
|
428
|
-
end
|
400
|
+
if options[:eval_once]
|
401
|
+
# this happens if we only want one line evaluated - say to get
|
402
|
+
# the number of samples
|
403
|
+
print parse_line(line,header,options,bedfilter,samples,template,stats)
|
404
|
+
exit 0
|
405
|
+
end
|
406
|
+
|
407
|
+
# ---- Lines are collected in one buffer and the lines buffer
|
408
|
+
# is added to the chunks list (for the threads)
|
409
|
+
chunk_lines << line
|
410
|
+
|
411
|
+
# ---- In the following section the VCF lines are parsed by chunks
|
412
|
+
# The chunks may go into different threads
|
413
|
+
|
414
|
+
if chunk_lines.size > CHUNK_SIZE
|
415
|
+
# ---- process one chunk
|
416
|
+
$stderr.print '.' if not options[:quiet]
|
417
|
+
pcows.wait_for_worker_slot()
|
418
|
+
pcows.submit_worker(process,chunk_lines)
|
419
|
+
pcows.process_output()
|
420
|
+
|
421
|
+
chunk_lines = []
|
429
422
|
end
|
430
423
|
end
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
chunks << lines
|
436
|
-
output.call Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
|
437
|
-
process.call(chunk)
|
438
|
-
}
|
439
|
-
end
|
424
|
+
pcows.submit_worker(process,chunk_lines)
|
425
|
+
pcows.wait_for_workers()
|
426
|
+
pcows.process_remaining_output()
|
427
|
+
|
440
428
|
print template.footer(binding) if template
|
441
429
|
stats.print if stats
|
442
430
|
|
443
431
|
rescue Exception => e
|
444
432
|
# $stderr.print line
|
445
|
-
$stderr.print e.message,"\n"
|
433
|
+
$stderr.print e.message,"\n" if e.message != 'exit'
|
446
434
|
raise if options[:verbose]
|
447
435
|
exit 1
|
448
436
|
end
|
data/bio-vcf.gemspec
CHANGED
@@ -1,15 +1,13 @@
|
|
1
|
-
#
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
1
|
+
# No longer generated by jeweler
|
4
2
|
# -*- encoding: utf-8 -*-
|
5
3
|
|
6
4
|
Gem::Specification.new do |s|
|
7
5
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.
|
6
|
+
s.version = "0.9.0"
|
9
7
|
|
10
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
9
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "
|
10
|
+
# s.date = "2015-12-28"
|
13
11
|
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
14
12
|
s.email = "pjotr.public01@thebird.nl"
|
15
13
|
s.executables = ["bio-vcf"]
|
@@ -20,66 +18,19 @@ Gem::Specification.new do |s|
|
|
20
18
|
s.files = [
|
21
19
|
".travis.yml",
|
22
20
|
"Gemfile",
|
23
|
-
"Gemfile.lock",
|
24
21
|
"LICENSE.txt",
|
25
22
|
"README.md",
|
26
23
|
"Rakefile",
|
27
24
|
"VERSION",
|
28
25
|
"bin/bio-vcf",
|
29
26
|
"bio-vcf.gemspec",
|
30
|
-
"features/cli.feature",
|
31
|
-
"features/diff_count.feature",
|
32
|
-
"features/multisample.feature",
|
33
|
-
"features/sfilter.feature",
|
34
|
-
"features/somaticsniper.feature",
|
35
|
-
"features/step_definitions/bio-vcf_steps.rb",
|
36
|
-
"features/step_definitions/cli-feature.rb",
|
37
|
-
"features/step_definitions/diff_count.rb",
|
38
|
-
"features/step_definitions/multisample.rb",
|
39
|
-
"features/step_definitions/sfilter.rb",
|
40
|
-
"features/step_definitions/somaticsniper.rb",
|
41
|
-
"features/step_definitions/vcf_header.rb",
|
42
|
-
"features/support/env.rb",
|
43
|
-
"features/vcf_header.feature",
|
44
|
-
"lib/bio-vcf.rb",
|
45
|
-
"lib/bio-vcf/bedfilter.rb",
|
46
|
-
"lib/bio-vcf/template.rb",
|
47
|
-
"lib/bio-vcf/utils.rb",
|
48
|
-
"lib/bio-vcf/variant.rb",
|
49
|
-
"lib/bio-vcf/vcf.rb",
|
50
|
-
"lib/bio-vcf/vcfgenotypefield.rb",
|
51
|
-
"lib/bio-vcf/vcfheader.rb",
|
52
|
-
"lib/bio-vcf/vcfheader_line.rb",
|
53
|
-
"lib/bio-vcf/vcfline.rb",
|
54
|
-
"lib/bio-vcf/vcfrdf.rb",
|
55
|
-
"lib/bio-vcf/vcfrecord.rb",
|
56
|
-
"lib/bio-vcf/vcfsample.rb",
|
57
|
-
"lib/bio-vcf/vcfstatistics.rb",
|
58
|
-
"ragel/gen_vcfheaderline_parser.rb",
|
59
27
|
"ragel/gen_vcfheaderline_parser.rl",
|
60
28
|
"ragel/generate.sh",
|
61
|
-
"template/gatk_vcf2rdf.erb",
|
62
|
-
"template/vcf2json.erb",
|
63
|
-
"template/vcf2json_full_header.erb",
|
64
|
-
"template/vcf2json_use_meta.erb",
|
65
|
-
"template/vcf2rdf.erb",
|
66
|
-
"template/vcf2rdf_header.erb",
|
67
|
-
"test/data/input/dbsnp.vcf",
|
68
|
-
"test/data/input/multisample.vcf",
|
69
|
-
"test/data/input/somaticsniper.vcf",
|
70
|
-
"test/data/regression/eval_r.info.dp.ref",
|
71
|
-
"test/data/regression/ifilter_s.dp.ref",
|
72
|
-
"test/data/regression/r.info.dp.ref",
|
73
|
-
"test/data/regression/rewrite.info.sample.ref",
|
74
|
-
"test/data/regression/s.dp.ref",
|
75
|
-
"test/data/regression/seval_s.dp.ref",
|
76
|
-
"test/data/regression/sfilter_seval_s.dp.ref",
|
77
|
-
"test/data/regression/thread4.ref",
|
78
|
-
"test/data/regression/thread4_4.ref",
|
79
|
-
"test/data/regression/thread4_4_failed_filter-stderr.ref",
|
80
|
-
"test/data/regression/vcf2json_full_header.ref",
|
81
|
-
"test/performance/metrics.md"
|
82
29
|
]
|
30
|
+
s.files += Dir['lib/**/*.rb'] + Dir['bin/*']
|
31
|
+
s.files += Dir['[A-Z]*'] + Dir['test/**/*'] + Dir['features/**/*'] +
|
32
|
+
Dir['template/**/*']
|
33
|
+
|
83
34
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
84
35
|
s.licenses = ["MIT"]
|
85
36
|
s.require_paths = ["lib"]
|
@@ -87,25 +38,22 @@ Gem::Specification.new do |s|
|
|
87
38
|
s.rubygems_version = "2.0.3"
|
88
39
|
s.summary = "Fast multi-threaded VCF parser"
|
89
40
|
|
90
|
-
if s.respond_to? :specification_version then
|
91
|
-
|
41
|
+
# if s.respond_to? :specification_version then
|
42
|
+
# s.specification_version = 4
|
92
43
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
s.add_dependency(%q<jeweler>, [">= 2.0.1"])
|
108
|
-
s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
109
|
-
end
|
44
|
+
# if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
# s.add_development_dependency(%q<rspec>, [">= 2.14.0"])
|
46
|
+
# s.add_development_dependency(%q<cucumber>, [">= 1.3.11"])
|
47
|
+
# s.add_development_dependency(%q<regressiontest>, [">= 0.0.3"])
|
48
|
+
# else
|
49
|
+
# s.add_dependency(%q<rspec>, [">= 2.14.0"])
|
50
|
+
# s.add_dependency(%q<cucumber>, [">= 1.3.11"])
|
51
|
+
# s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
52
|
+
# end
|
53
|
+
# else
|
54
|
+
# s.add_dependency(%q<rspec>, [">= 2.14.0"])
|
55
|
+
# s.add_dependency(%q<cucumber>, [">= 1.3.11"])
|
56
|
+
# s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
57
|
+
# end
|
110
58
|
end
|
111
59
|
|
data/features/cli.feature
CHANGED
@@ -48,6 +48,11 @@ Feature: Command-line interface (CLI)
|
|
48
48
|
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
49
49
|
Then I expect the named output to match the named output "rewrite.info.sample"
|
50
50
|
|
51
|
+
Scenario: Test eval-once
|
52
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
53
|
+
When I execute "./bin/bio-vcf --eval-once header.meta[\'GATKCommandLine\']"
|
54
|
+
Then I expect the named output to match the named output "eval_once"
|
55
|
+
|
51
56
|
Scenario: Test JSON output with header meta data
|
52
57
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
53
58
|
When I execute "./bin/bio-vcf --template template/vcf2json_full_header.erb"
|
@@ -60,7 +65,7 @@ Feature: Command-line interface (CLI)
|
|
60
65
|
|
61
66
|
Scenario: Test deadlock on failed filter with threads
|
62
67
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
63
|
-
When I execute "./bin/bio-vcf --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
68
|
+
When I execute "./bin/bio-vcf -q --timeout 2 --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
64
69
|
Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
|
65
70
|
|
66
71
|
|
@@ -25,7 +25,10 @@ Feature: Multi-sample VCF
|
|
25
25
|
And I expect rec.info.ac to be 5
|
26
26
|
And I expect rec.info.af to be 0.357
|
27
27
|
And I expect rec.info.dp to be 1537
|
28
|
+
And I expect rec.info['dp'] to be 1537
|
28
29
|
And I expect rec.info.readposranksum to be 0.815
|
30
|
+
And I expect rec.info['ReadPosRankSum'] to be 0.815
|
31
|
+
And I expect rec.info.fields to contain ["AC", "AF", "AN", "BASEQRANKSUM", "DP", "DELS", "FS", "HAPLOTYPESCORE", "MLEAC", "MLEAF", "MQ", "MQ0", "MQRANKSUM", "QD", "READPOSRANKSUM"]
|
29
32
|
And I expect rec.sample['Original'].ad to be [189,25]
|
30
33
|
And I expect rec.sample['Original'].gt to be "0/1"
|
31
34
|
And I expect rec.sample['s3t2'].ad to be [167,26]
|
@@ -76,3 +79,12 @@ Feature: Multi-sample VCF
|
|
76
79
|
And I expect r.original.gts to be ["C","G"]
|
77
80
|
And I expect r.original.gts[0] to be "C"
|
78
81
|
And I expect r.original.gts[1] to be "G"
|
82
|
+
|
83
|
+
# INFO fields with matching tails
|
84
|
+
Given multisample vcf line
|
85
|
+
"""
|
86
|
+
1 10723 . C G 73.85 . AC=4;AF=0.667;CIEND=999;END=111;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL 0|1 ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
|
87
|
+
"""
|
88
|
+
When I parse the record
|
89
|
+
Then I expect r.info.end to be 111
|
90
|
+
And I expect r.info.ciend to be 999
|
@@ -8,9 +8,9 @@ When /^I execute "(.*?)"$/ do |arg1|
|
|
8
8
|
end
|
9
9
|
|
10
10
|
Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
11
|
-
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should
|
11
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should be_truthy
|
12
12
|
end
|
13
13
|
|
14
14
|
Then(/^I expect an error and the named output to match the named output "(.*?)" in under (\d+) seconds$/) do |arg1,arg2|
|
15
|
-
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should
|
15
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_truthy
|
16
16
|
end
|