bio-vcf 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +8 -2
  3. data/Gemfile +4 -6
  4. data/README.md +92 -57
  5. data/Rakefile +3 -41
  6. data/TAGS +115 -0
  7. data/VERSION +1 -1
  8. data/bin/bio-vcf +58 -70
  9. data/bio-vcf.gemspec +23 -75
  10. data/features/cli.feature +6 -1
  11. data/features/multisample.feature +12 -0
  12. data/features/step_definitions/cli-feature.rb +2 -2
  13. data/features/step_definitions/multisample.rb +19 -0
  14. data/features/step_definitions/vcf_header.rb +1 -1
  15. data/features/support/env.rb +0 -9
  16. data/lib/bio-vcf/pcows.rb +210 -0
  17. data/lib/bio-vcf/vcfheader.rb +28 -9
  18. data/lib/bio-vcf/vcfheader_line.rb +455 -160
  19. data/lib/bio-vcf/vcfrecord.rb +30 -15
  20. data/ragel/gen_vcfheaderline_parser.rl +68 -25
  21. data/ragel/generate.sh +4 -1
  22. data/template/vcf2json.erb +16 -16
  23. data/template/vcf2json_full_header.erb +16 -17
  24. data/template/vcf2json_use_meta.erb +35 -35
  25. data/test/data/input/gatk_exome.vcf +237 -0
  26. data/test/data/input/gatk_wgs.vcf +1000 -0
  27. data/test/data/input/test.bed +632 -0
  28. data/test/data/regression/eval_once-stderr.new +1 -0
  29. data/test/data/regression/eval_once.new +1 -0
  30. data/test/data/regression/eval_once.ref +1 -0
  31. data/test/data/regression/eval_r.info.dp-stderr.new +4 -0
  32. data/test/data/regression/eval_r.info.dp.new +150 -0
  33. data/test/data/regression/ifilter_s.dp-stderr.new +28 -0
  34. data/test/data/regression/ifilter_s.dp.new +31 -0
  35. data/test/data/regression/r.info.dp-stderr.new +4 -0
  36. data/test/data/regression/r.info.dp.new +147 -0
  37. data/test/data/regression/rewrite.info.sample-stderr.new +4 -0
  38. data/test/data/regression/rewrite.info.sample.new +150 -0
  39. data/test/data/regression/s.dp-stderr.new +12 -0
  40. data/test/data/regression/s.dp.new +145 -0
  41. data/test/data/regression/seval_s.dp-stderr.new +4 -0
  42. data/test/data/regression/seval_s.dp.new +36 -0
  43. data/test/data/regression/sfilter_seval_s.dp-stderr.new +12 -0
  44. data/test/data/regression/sfilter_seval_s.dp.new +31 -0
  45. data/test/data/regression/thread4-stderr.new +4 -0
  46. data/test/data/regression/thread4.new +150 -0
  47. data/test/data/regression/thread4_4-stderr.new +15 -0
  48. data/test/data/regression/thread4_4.new +150 -0
  49. data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
  50. data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
  51. data/test/data/regression/thread4_4_failed_filter.new +110 -0
  52. data/test/data/regression/vcf2json_full_header-stderr.new +4 -0
  53. data/test/data/regression/vcf2json_full_header.new +225 -0
  54. data/test/data/regression/vcf2json_full_header.ref +222 -258
  55. data/test/data/regression/vcf2json_use_meta-stderr.new +4 -0
  56. data/test/data/regression/vcf2json_use_meta.new +4697 -0
  57. data/test/data/regression/vcf2json_use_meta.ref +4697 -0
  58. data/test/performance/metrics.md +18 -1
  59. data/test/tmp/test.vcf +12469 -0
  60. metadata +38 -62
  61. data/Gemfile.lock +0 -81
  62. data/ragel/gen_vcfheaderline_parser.rb +0 -483
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.2
1
+ 0.9.0
@@ -4,7 +4,7 @@
4
4
  # Author:: Pjotr Prins
5
5
  # License:: MIT
6
6
  #
7
- # Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>
7
+ # Copyright (C) 2014-2015 Pjotr Prins <pjotr.prins@thebird.nl>
8
8
 
9
9
  USAGE = "Vcf parser"
10
10
 
@@ -15,6 +15,7 @@ VERSION_FILENAME=File.join(gempath,'VERSION')
15
15
  version = File.new(VERSION_FILENAME).read.chomp
16
16
 
17
17
  require 'bio-vcf'
18
+ require 'bio-vcf/pcows'
18
19
  require 'optparse'
19
20
  require 'timeout'
20
21
  require 'fileutils'
@@ -26,7 +27,7 @@ require 'fileutils'
26
27
  # Bio::Log::CLI.logger('stderr')
27
28
  # Bio::Log::CLI.trace('info')
28
29
 
29
- options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000 }
30
+ options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000, timeout: 180 }
30
31
  opts = OptionParser.new do |o|
31
32
  o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g. #{File.basename($0)} < test/data/input/somaticsniper.vcf"
32
33
 
@@ -68,6 +69,9 @@ opts = OptionParser.new do |o|
68
69
  o.on('--eval-once cmd',String, 'Evaluate command once (usually for header info)') do |cmd|
69
70
  options[:eval_once] = true
70
71
  options[:eval] = cmd
72
+ # options[:num_threads] = 1
73
+ # options[:thread_lines] = 1
74
+ options[:skip_header] = true
71
75
  end
72
76
  o.on('--seval cmd',String, 'Evaluate command on each sample') do |cmd|
73
77
  options[:seval] = cmd
@@ -112,7 +116,14 @@ opts = OptionParser.new do |o|
112
116
  options[:template] = s
113
117
  options[:skip_header] = true
114
118
  end
115
-
119
+
120
+ o.on("--add-header-tag", "Add bio-vcf status tag to header output") do |t|
121
+ options[:tag] = true
122
+ end
123
+
124
+ o.on("--timeout [num]", Integer, "Timeout waiting for thread to complete (default #{options[:timeout]})") do |i|
125
+ options[:timeout] = i
126
+ end
116
127
 
117
128
  # Uncomment the following when using the bio-logger
118
129
  # o.separator ""
@@ -137,9 +148,10 @@ opts = OptionParser.new do |o|
137
148
  options[:verbose] = true
138
149
  end
139
150
 
140
- # o.on("--debug", "Show debug messages") do |v|
141
- # Bio::Log::CLI.trace('debug')
142
- # end
151
+ o.on("--debug", "Show debug messages") do |v|
152
+ # Bio::Log::CLI.trace('debug')
153
+ options[:debug] = true
154
+ end
143
155
 
144
156
  o.separator ""
145
157
  o.on_tail('-h', '--help', 'display this help and exit') do
@@ -150,8 +162,8 @@ end
150
162
  opts.parse!(ARGV)
151
163
 
152
164
  BIOVCF_VERSION=version
153
- BIOVCF_BANNER = "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n" if !options[:quiet]
154
- $stderr.print BIOVCF_BANNER
165
+ BIOVCF_BANNER = "bio-vcf #{version} (biogem Ruby #{RUBY_VERSION} with pcows) by Pjotr Prins 2015\n"
166
+ $stderr.print BIOVCF_BANNER if !options[:quiet]
155
167
 
156
168
  if options[:show_help]
157
169
  print opts
@@ -174,15 +186,6 @@ if options[:template]
174
186
  template = Bio::Template.new(fn)
175
187
  end
176
188
 
177
- if options[:num_threads] != 1
178
- begin
179
- require 'parallel'
180
- rescue LoadError
181
- $stderr.print "Error: Missing 'parallel' module. Install with command 'gem install parallel' if you want multiple threads\n"
182
- options[:num_threads] = 1
183
- end
184
- end
185
-
186
189
  stats = nil
187
190
  if options[:statistics]
188
191
  options[:num_threads] = nil
@@ -202,7 +205,7 @@ include BioVcf
202
205
 
203
206
  # Parse the header section of a VCF file (chomping STDIN)
204
207
  def parse_header line, samples, options
205
- header = VcfHeader.new
208
+ header = VcfHeader.new(options[:debug])
206
209
  header.add(line)
207
210
  print line if not options[:skip_header]
208
211
  STDIN.each_line do | headerline |
@@ -214,7 +217,7 @@ def parse_header line, samples, options
214
217
  if not options[:skip_header]
215
218
  if headerline =~ /^#CHR/
216
219
  # The header before actual data contains the sample names, first inject the BioVcf meta information
217
- print header.tag(options),"\n" if not options[:skip_header]
220
+ print header.tag(options),"\n" if options[:tag] and not options[:skip_header]
218
221
  selected = header.column_names
219
222
  if samples
220
223
  newfields = selected[0..8]
@@ -234,7 +237,7 @@ def parse_header line, samples, options
234
237
  return header,line
235
238
  end
236
239
 
237
- # Parse a VCF line and return the result as a string
240
+ # Parse a VCF line and return the (template) result as a string buffer
238
241
  def parse_line line,header,options,bedfilter,samples,template,stats=nil
239
242
  fields = VcfLine.parse(line)
240
243
  rec = VcfRecord.new(fields,header)
@@ -261,7 +264,7 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
261
264
  return if not bed
262
265
  end
263
266
 
264
- return if filter and not rec.filter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
267
+ return if filter and not rec.gfilter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
265
268
 
266
269
  if sfilter
267
270
  rec.each_sample(options[:sfilter_samples]) do | sample |
@@ -320,13 +323,12 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
320
323
  exit 1
321
324
  end
322
325
  return results.to_s+"\n" if results
323
- exit(1) if options[:eval_once] # <--- can this be reached?
324
326
  else
325
327
  if options[:rdf]
326
328
  # Output Turtle RDF
327
329
  VcfRdf::record(options[:id],rec,options[:tags])
328
330
  elsif options[:template]
329
- # Ruby ERB template
331
+ # Use ERB template
330
332
  begin
331
333
  template.body(binding)
332
334
  rescue Exception => e
@@ -347,13 +349,11 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
347
349
  end
348
350
  end
349
351
 
352
+ pcows = PCOWS.new(options[:num_threads],'bio-vcf',options[:timeout])
350
353
  header = nil
351
354
  header_output_completed = false
352
- NUM_THREADS = options[:num_threads]
353
355
  CHUNK_SIZE = options[:thread_lines]
354
- CHUNK_NUM = (NUM_THREADS && NUM_THREADS>6 ? NUM_THREADS*4 : 24)
355
- chunks = []
356
- lines = []
356
+ chunk_lines = []
357
357
  line_number=0
358
358
 
359
359
  if options[:bed]
@@ -361,6 +361,7 @@ if options[:bed]
361
361
  end
362
362
 
363
363
  begin
364
+ # Define linear parser function (going through one chunk)
364
365
  process = lambda { | lines |
365
366
  res = []
366
367
  lines.each do | line |
@@ -368,20 +369,17 @@ begin
368
369
  end
369
370
  res
370
371
  }
371
- output = lambda { |collection|
372
- collection.each do | result |
373
- result.each { |line| print line }
374
- end
375
- } # end output
376
372
 
377
373
  # ---- Main loop
378
374
  STDIN.each_line do | line |
379
375
  line_number += 1
380
- # ---- In this section header information is handled
381
376
 
382
377
  # ---- Skip embedded headers down the line...
383
378
  next if header_output_completed and line =~ /^#/
384
379
 
380
+ # ---- In the following section header information is handled -
381
+ # this only happens once.
382
+
385
383
  # ---- Parse the header lines (chomps from STDIN)
386
384
  # and returns header info and the current line
387
385
  if line =~ /^#/
@@ -399,50 +397,40 @@ begin
399
397
  header_output_completed = true
400
398
  end
401
399
 
402
- # ---- In this section the VCF variant lines are parsed
403
- lines << line
404
- if NUM_THREADS == 1
405
- $stderr.print '.' if line_number % CHUNK_SIZE == 0 and not options[:quiet]
406
- if lines.size > CHUNK_SIZE
407
- process.call(lines).each { | l | print l }
408
- lines = []
409
- end
410
- else
411
- if lines.size > CHUNK_SIZE
412
- chunks << lines
413
- if chunks.size > CHUNK_NUM
414
- $stderr.print '.' if not options[:quiet]
415
- out = Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
416
- process.call(chunk)
417
- }
418
- chunks = []
419
- # Output is forked to a separate process too
420
- fork do
421
- output.call out
422
- STDOUT.flush
423
- STDOUT.close
424
- exit 0
425
- end
426
- end
427
- lines = []
428
- end
400
+ if options[:eval_once]
401
+ # this happens if we only want one line evaluated - say to get
402
+ # the number of samples
403
+ print parse_line(line,header,options,bedfilter,samples,template,stats)
404
+ exit 0
405
+ end
406
+
407
+ # ---- Lines are collected in one buffer and the lines buffer
408
+ # is added to the chunks list (for the threads)
409
+ chunk_lines << line
410
+
411
+ # ---- In the following section the VCF lines are parsed by chunks
412
+ # The chunks may go into different threads
413
+
414
+ if chunk_lines.size > CHUNK_SIZE
415
+ # ---- process one chunk
416
+ $stderr.print '.' if not options[:quiet]
417
+ pcows.wait_for_worker_slot()
418
+ pcows.submit_worker(process,chunk_lines)
419
+ pcows.process_output()
420
+
421
+ chunk_lines = []
429
422
  end
430
423
  end
431
- $stderr.print '.' if not options[:quiet]
432
- if NUM_THREADS == 1
433
- process.call(lines).each { |l| print l}
434
- else
435
- chunks << lines
436
- output.call Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
437
- process.call(chunk)
438
- }
439
- end
424
+ pcows.submit_worker(process,chunk_lines)
425
+ pcows.wait_for_workers()
426
+ pcows.process_remaining_output()
427
+
440
428
  print template.footer(binding) if template
441
429
  stats.print if stats
442
430
 
443
431
  rescue Exception => e
444
432
  # $stderr.print line
445
- $stderr.print e.message,"\n"
433
+ $stderr.print e.message,"\n" if e.message != 'exit'
446
434
  raise if options[:verbose]
447
435
  exit 1
448
436
  end
@@ -1,15 +1,13 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
1
+ # No longer generated by jeweler
4
2
  # -*- encoding: utf-8 -*-
5
3
 
6
4
  Gem::Specification.new do |s|
7
5
  s.name = "bio-vcf"
8
- s.version = "0.8.2"
6
+ s.version = "0.9.0"
9
7
 
10
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
9
  s.authors = ["Pjotr Prins"]
12
- s.date = "2014-12-28"
10
+ # s.date = "2015-12-28"
13
11
  s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
14
12
  s.email = "pjotr.public01@thebird.nl"
15
13
  s.executables = ["bio-vcf"]
@@ -20,66 +18,19 @@ Gem::Specification.new do |s|
20
18
  s.files = [
21
19
  ".travis.yml",
22
20
  "Gemfile",
23
- "Gemfile.lock",
24
21
  "LICENSE.txt",
25
22
  "README.md",
26
23
  "Rakefile",
27
24
  "VERSION",
28
25
  "bin/bio-vcf",
29
26
  "bio-vcf.gemspec",
30
- "features/cli.feature",
31
- "features/diff_count.feature",
32
- "features/multisample.feature",
33
- "features/sfilter.feature",
34
- "features/somaticsniper.feature",
35
- "features/step_definitions/bio-vcf_steps.rb",
36
- "features/step_definitions/cli-feature.rb",
37
- "features/step_definitions/diff_count.rb",
38
- "features/step_definitions/multisample.rb",
39
- "features/step_definitions/sfilter.rb",
40
- "features/step_definitions/somaticsniper.rb",
41
- "features/step_definitions/vcf_header.rb",
42
- "features/support/env.rb",
43
- "features/vcf_header.feature",
44
- "lib/bio-vcf.rb",
45
- "lib/bio-vcf/bedfilter.rb",
46
- "lib/bio-vcf/template.rb",
47
- "lib/bio-vcf/utils.rb",
48
- "lib/bio-vcf/variant.rb",
49
- "lib/bio-vcf/vcf.rb",
50
- "lib/bio-vcf/vcfgenotypefield.rb",
51
- "lib/bio-vcf/vcfheader.rb",
52
- "lib/bio-vcf/vcfheader_line.rb",
53
- "lib/bio-vcf/vcfline.rb",
54
- "lib/bio-vcf/vcfrdf.rb",
55
- "lib/bio-vcf/vcfrecord.rb",
56
- "lib/bio-vcf/vcfsample.rb",
57
- "lib/bio-vcf/vcfstatistics.rb",
58
- "ragel/gen_vcfheaderline_parser.rb",
59
27
  "ragel/gen_vcfheaderline_parser.rl",
60
28
  "ragel/generate.sh",
61
- "template/gatk_vcf2rdf.erb",
62
- "template/vcf2json.erb",
63
- "template/vcf2json_full_header.erb",
64
- "template/vcf2json_use_meta.erb",
65
- "template/vcf2rdf.erb",
66
- "template/vcf2rdf_header.erb",
67
- "test/data/input/dbsnp.vcf",
68
- "test/data/input/multisample.vcf",
69
- "test/data/input/somaticsniper.vcf",
70
- "test/data/regression/eval_r.info.dp.ref",
71
- "test/data/regression/ifilter_s.dp.ref",
72
- "test/data/regression/r.info.dp.ref",
73
- "test/data/regression/rewrite.info.sample.ref",
74
- "test/data/regression/s.dp.ref",
75
- "test/data/regression/seval_s.dp.ref",
76
- "test/data/regression/sfilter_seval_s.dp.ref",
77
- "test/data/regression/thread4.ref",
78
- "test/data/regression/thread4_4.ref",
79
- "test/data/regression/thread4_4_failed_filter-stderr.ref",
80
- "test/data/regression/vcf2json_full_header.ref",
81
- "test/performance/metrics.md"
82
29
  ]
30
+ s.files += Dir['lib/**/*.rb'] + Dir['bin/*']
31
+ s.files += Dir['[A-Z]*'] + Dir['test/**/*'] + Dir['features/**/*'] +
32
+ Dir['template/**/*']
33
+
83
34
  s.homepage = "http://github.com/pjotrp/bioruby-vcf"
84
35
  s.licenses = ["MIT"]
85
36
  s.require_paths = ["lib"]
@@ -87,25 +38,22 @@ Gem::Specification.new do |s|
87
38
  s.rubygems_version = "2.0.3"
88
39
  s.summary = "Fast multi-threaded VCF parser"
89
40
 
90
- if s.respond_to? :specification_version then
91
- s.specification_version = 4
41
+ # if s.respond_to? :specification_version then
42
+ # s.specification_version = 4
92
43
 
93
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
94
- s.add_development_dependency(%q<rspec>, [">= 2.14.0"])
95
- s.add_development_dependency(%q<cucumber>, [">= 1.3.11"])
96
- s.add_development_dependency(%q<jeweler>, [">= 2.0.1"])
97
- s.add_development_dependency(%q<regressiontest>, [">= 0.0.3"])
98
- else
99
- s.add_dependency(%q<rspec>, [">= 2.14.0"])
100
- s.add_dependency(%q<cucumber>, [">= 1.3.11"])
101
- s.add_dependency(%q<jeweler>, [">= 2.0.1"])
102
- s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
103
- end
104
- else
105
- s.add_dependency(%q<rspec>, [">= 2.14.0"])
106
- s.add_dependency(%q<cucumber>, [">= 1.3.11"])
107
- s.add_dependency(%q<jeweler>, [">= 2.0.1"])
108
- s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
109
- end
44
+ # if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ # s.add_development_dependency(%q<rspec>, [">= 2.14.0"])
46
+ # s.add_development_dependency(%q<cucumber>, [">= 1.3.11"])
47
+ # s.add_development_dependency(%q<regressiontest>, [">= 0.0.3"])
48
+ # else
49
+ # s.add_dependency(%q<rspec>, [">= 2.14.0"])
50
+ # s.add_dependency(%q<cucumber>, [">= 1.3.11"])
51
+ # s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
52
+ # end
53
+ # else
54
+ # s.add_dependency(%q<rspec>, [">= 2.14.0"])
55
+ # s.add_dependency(%q<cucumber>, [">= 1.3.11"])
56
+ # s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
57
+ # end
110
58
  end
111
59
 
@@ -48,6 +48,11 @@ Feature: Command-line interface (CLI)
48
48
  When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
49
49
  Then I expect the named output to match the named output "rewrite.info.sample"
50
50
 
51
+ Scenario: Test eval-once
52
+ Given I have input file(s) named "test/data/input/multisample.vcf"
53
+ When I execute "./bin/bio-vcf --eval-once header.meta[\'GATKCommandLine\']"
54
+ Then I expect the named output to match the named output "eval_once"
55
+
51
56
  Scenario: Test JSON output with header meta data
52
57
  Given I have input file(s) named "test/data/input/multisample.vcf"
53
58
  When I execute "./bin/bio-vcf --template template/vcf2json_full_header.erb"
@@ -60,7 +65,7 @@ Feature: Command-line interface (CLI)
60
65
 
61
66
  Scenario: Test deadlock on failed filter with threads
62
67
  Given I have input file(s) named "test/data/input/multisample.vcf"
63
- When I execute "./bin/bio-vcf --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
68
+ When I execute "./bin/bio-vcf -q --timeout 2 --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
64
69
  Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
65
70
 
66
71
 
@@ -25,7 +25,10 @@ Feature: Multi-sample VCF
25
25
  And I expect rec.info.ac to be 5
26
26
  And I expect rec.info.af to be 0.357
27
27
  And I expect rec.info.dp to be 1537
28
+ And I expect rec.info['dp'] to be 1537
28
29
  And I expect rec.info.readposranksum to be 0.815
30
+ And I expect rec.info['ReadPosRankSum'] to be 0.815
31
+ And I expect rec.info.fields to contain ["AC", "AF", "AN", "BASEQRANKSUM", "DP", "DELS", "FS", "HAPLOTYPESCORE", "MLEAC", "MLEAF", "MQ", "MQ0", "MQRANKSUM", "QD", "READPOSRANKSUM"]
29
32
  And I expect rec.sample['Original'].ad to be [189,25]
30
33
  And I expect rec.sample['Original'].gt to be "0/1"
31
34
  And I expect rec.sample['s3t2'].ad to be [167,26]
@@ -76,3 +79,12 @@ Feature: Multi-sample VCF
76
79
  And I expect r.original.gts to be ["C","G"]
77
80
  And I expect r.original.gts[0] to be "C"
78
81
  And I expect r.original.gts[1] to be "G"
82
+
83
+ # INFO fields with matching tails
84
+ Given multisample vcf line
85
+ """
86
+ 1 10723 . C G 73.85 . AC=4;AF=0.667;CIEND=999;END=111;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL 0|1 ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
87
+ """
88
+ When I parse the record
89
+ Then I expect r.info.end to be 111
90
+ And I expect r.info.ciend to be 999
@@ -8,9 +8,9 @@ When /^I execute "(.*?)"$/ do |arg1|
8
8
  end
9
9
 
10
10
  Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
11
- RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should be_true
11
+ RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should be_truthy
12
12
  end
13
13
 
14
14
  Then(/^I expect an error and the named output to match the named output "(.*?)" in under (\d+) seconds$/) do |arg1,arg2|
15
- RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_true
15
+ RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_truthy
16
16
  end