bio-vcf 0.8.1 → 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +1 -11
  3. data/Gemfile +2 -8
  4. data/LICENSE.txt +1 -1
  5. data/README.md +467 -129
  6. data/RELEASE_NOTES.md +27 -0
  7. data/RELEASE_NOTES.md~ +11 -0
  8. data/Rakefile +9 -42
  9. data/TAGS +115 -0
  10. data/VERSION +1 -1
  11. data/bin/bio-vcf +156 -108
  12. data/bio-vcf.gemspec +13 -75
  13. data/features/cli.feature +22 -4
  14. data/features/diff_count.feature +0 -1
  15. data/features/filter.feature +12 -0
  16. data/features/multisample.feature +12 -0
  17. data/features/somaticsniper.feature +2 -0
  18. data/features/step_definitions/cli-feature.rb +15 -6
  19. data/features/step_definitions/diff_count.rb +1 -1
  20. data/features/step_definitions/multisample.rb +19 -0
  21. data/features/step_definitions/somaticsniper.rb +9 -1
  22. data/features/step_definitions/vcf_header.rb +48 -0
  23. data/features/support/env.rb +1 -11
  24. data/features/vcf_header.feature +35 -0
  25. data/lib/bio-vcf.rb +1 -0
  26. data/lib/bio-vcf/pcows.rb +303 -0
  27. data/lib/bio-vcf/vcffile.rb +46 -0
  28. data/lib/bio-vcf/vcfgenotypefield.rb +19 -19
  29. data/lib/bio-vcf/vcfheader.rb +137 -5
  30. data/lib/bio-vcf/vcfheader_line.rb +778 -0
  31. data/lib/bio-vcf/vcfrecord.rb +56 -18
  32. data/lib/bio-vcf/vcfsample.rb +26 -2
  33. data/lib/regressiontest.rb +11 -0
  34. data/lib/regressiontest/cli_exec.rb +101 -0
  35. data/ragel/gen_vcfheaderline_parser.rl +165 -0
  36. data/ragel/generate.sh +8 -0
  37. data/template/vcf2json.erb +16 -16
  38. data/template/vcf2json_full_header.erb +22 -0
  39. data/template/vcf2json_use_meta.erb +41 -0
  40. data/test/data/input/empty.vcf +2 -0
  41. data/test/data/input/gatk_exome.vcf +237 -0
  42. data/test/data/input/gatk_wgs.vcf +1000 -0
  43. data/test/data/input/test.bed +632 -0
  44. data/test/data/regression/empty-stderr.new +12 -0
  45. data/test/data/regression/empty.new +2 -0
  46. data/test/data/regression/empty.ref +2 -0
  47. data/test/data/regression/eval_once-stderr.new +2 -0
  48. data/test/data/regression/eval_once.new +1 -0
  49. data/test/data/regression/eval_once.ref +1 -0
  50. data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
  51. data/test/data/regression/eval_r.info.dp.new +150 -0
  52. data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
  53. data/test/data/regression/ifilter_s.dp.new +31 -0
  54. data/test/data/regression/pass1-stderr.new +10 -0
  55. data/test/data/regression/pass1.new +88 -0
  56. data/test/data/regression/pass1.ref +88 -0
  57. data/test/data/regression/r.info.dp-stderr.new +4 -0
  58. data/test/data/regression/r.info.dp.new +114 -0
  59. data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
  60. data/test/data/regression/rewrite.info.sample.new +150 -0
  61. data/test/data/regression/s.dp-stderr.new +18 -0
  62. data/test/data/regression/s.dp.new +145 -0
  63. data/test/data/regression/seval_s.dp-stderr.new +10 -0
  64. data/test/data/regression/seval_s.dp.new +36 -0
  65. data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
  66. data/test/data/regression/sfilter_seval_s.dp.new +31 -0
  67. data/test/data/regression/thread4-stderr.new +10 -0
  68. data/test/data/regression/thread4.new +150 -0
  69. data/test/data/regression/thread4_4-stderr.new +25 -0
  70. data/test/data/regression/thread4_4.new +130 -0
  71. data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
  72. data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
  73. data/test/data/regression/thread4_4_failed_filter.new +110 -0
  74. data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
  75. data/test/data/regression/vcf2json_full_header.new +225 -0
  76. data/test/data/regression/vcf2json_full_header.ref +225 -0
  77. data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
  78. data/test/data/regression/vcf2json_use_meta.new +4697 -0
  79. data/test/data/regression/vcf2json_use_meta.ref +4697 -0
  80. data/test/performance/metrics.md +18 -1
  81. data/test/stress/stress_test.sh +15 -0
  82. data/test/tmp/test.vcf +12469 -0
  83. metadata +63 -64
  84. data/Gemfile.lock +0 -81
@@ -13,38 +13,49 @@ module BioVcf
13
13
  end
14
14
  end
15
15
 
16
- # Set INFO fields (used by --rewrite)
17
- def []= k, v
18
- split_fields if not @h
19
- kupper = k.upcase
20
- @h[kupper] = v
21
- @original_key[kupper] = k
22
- end
23
-
24
- def method_missing(m, *args, &block)
16
+ def [] k
25
17
  # split_fields if not @h
26
18
  # /#{m}=(?<value>[^;])/.@info
19
+ kupper = k.upcase
27
20
  v = if @h
28
- @h[m.to_s.upcase]
21
+ @h[kupper]
29
22
  else
30
- @info =~ /#{m.to_s}=([^;]+)/i
23
+ @info =~ /[\A;]#{k}=([^;]+)/i
31
24
  value = $1
32
25
  # p [m,value]
33
26
  # m = @info.match(/#{m.to_s.upcase}=(?<value>[^;]+)/) slower!
34
27
  # value = m[:value]
35
28
  if value == nil
36
29
  split_fields # no option but to split
37
- @h[m.to_s.upcase]
30
+ @h[kupper]
38
31
  else
39
32
  value
40
33
  end
41
34
  end
42
35
  ConvertStringToValue::convert(v)
36
+ end
37
+
38
+ # Set INFO fields (used by --rewrite)
39
+ def []= k, v
40
+ split_fields if not @h
41
+ kupper = k.upcase
42
+ @h[kupper] = v
43
+ @original_key[kupper] = k
44
+ end
45
+
46
+ def fields
47
+ split_fields
48
+ @h.keys
49
+ end
50
+
51
+ def method_missing(m, *args, &block)
52
+ self[m.to_s]
43
53
  end
44
54
 
45
55
  private
46
56
 
47
57
  def split_fields
58
+ return @h if @h
48
59
  @h = {}
49
60
  @original_key = {}
50
61
  @info.split(/;/).each do |f|
@@ -151,6 +162,10 @@ module BioVcf
151
162
  @qual ||= @fields[5].to_f
152
163
  end
153
164
 
165
+ def filter
166
+ @filter ||= @fields[6]
167
+ end
168
+
154
169
  def info
155
170
  @info ||= VcfRecordParser.get_info(@fields[7])
156
171
  end
@@ -184,15 +199,21 @@ module BioVcf
184
199
  end
185
200
 
186
201
  def sample_by_index i
187
- # p @fields
188
202
  raise "Can not index sample on parameter <#{i}>" if not i.kind_of?(Integer)
189
203
  @sample_by_index[i] ||= VcfGenotypeField.new(@fields[i+9],format,@header,ref,alt)
190
204
  end
191
205
 
192
206
  # Walk the samples. list contains an Array of int (the index)
193
207
  def each_sample(list = nil)
194
- list = @header.samples_index_array() if not list
195
- list.each { |i| yield VcfSample::Sample.new(self,sample_by_index(i.to_i)) }
208
+ @header.sample_subset_index(list).each { |i|
209
+ yield VcfSample::Sample.new(self,sample_by_index(i))
210
+ }
211
+ end
212
+
213
+ def samples
214
+ list = []
215
+ each_sample { |s| list << s }
216
+ list
196
217
  end
197
218
 
198
219
  def missing_samples?
@@ -229,6 +250,7 @@ module BioVcf
229
250
  $stderr.print "RECORD ERROR!\n"
230
251
  $stderr.print [@fields],"\n"
231
252
  $stderr.print expr,"\n"
253
+ $stderr.print "To ignore this error use the -i switch!\n"
232
254
  end
233
255
  if ignore_missing_data
234
256
  $stderr.print e.message if not quiet
@@ -239,19 +261,19 @@ module BioVcf
239
261
  end
240
262
  end
241
263
 
242
- def filter expr, ignore_missing_data: true, quiet: false
264
+ def gfilter expr, ignore_missing_data: true, quiet: false
243
265
  begin
244
266
  if not respond_to?(:call_cached_filter)
245
267
  code =
246
268
  """
247
- def call_cached_filter(rec,fields)
269
+ def call_cached_gfilter(rec,fields)
248
270
  r = rec
249
271
  #{expr}
250
272
  end
251
273
  """
252
274
  self.class.class_eval(code)
253
275
  end
254
- res = call_cached_filter(self,@fields)
276
+ res = call_cached_gfilter(self,@fields)
255
277
  if res.kind_of?(Array)
256
278
  res.join("\t")
257
279
  else
@@ -262,6 +284,7 @@ module BioVcf
262
284
  $stderr.print "RECORD ERROR!\n"
263
285
  $stderr.print [@fields],"\n"
264
286
  $stderr.print expr,"\n"
287
+ $stderr.print "To ignore this error use the -i switch!\n"
265
288
  end
266
289
  if ignore_missing_data
267
290
  $stderr.print e.message if not quiet
@@ -272,6 +295,21 @@ module BioVcf
272
295
  end
273
296
  end
274
297
 
298
+ def add_to_filter_field str
299
+ filter = @fields[6]
300
+ if not filter or filter == '.' or filter == 'PASS'
301
+ filter = str
302
+ else
303
+ values = filter.split(/;/)
304
+ if not values.include?(str)
305
+ filter = filter +';'+str
306
+ end
307
+ end
308
+ filter = '.' if filter == nil or filter == ''
309
+ @fields[6] = filter
310
+ filter
311
+ end
312
+
275
313
  # Return the sample
276
314
  def method_missing(m, *args, &block)
277
315
  name = m.to_s
@@ -3,7 +3,7 @@ module BioVcf
3
3
 
4
4
  # Check whether a sample is empty (on the raw string value)
5
5
  def VcfSample::empty? s
6
- s==nil or s == './.' or s == '' or s[0..2]=='./.'
6
+ s==nil or s == './.' or s == '' or s[0..2]=='./.' or s[0..1] == '.:'
7
7
  end
8
8
 
9
9
  class Sample
@@ -40,9 +40,24 @@ module BioVcf
40
40
  # Split GT into index values
41
41
  def gti
42
42
  v = fetch_values("GT")
43
+ v = './.' if v == '.' #In case that you have a single missing value, make both as missing.
43
44
  v.split(/[\/\|]/).map{ |v| (v=='.' ? nil : v.to_i) }
44
45
  end
45
46
 
47
+ def gtindex
48
+ v = fetch_values("GT")
49
+ return case v
50
+ when nil then nil
51
+ when '.' then nil
52
+ when './.' then nil
53
+ when '0/0' then 0
54
+ when '0/1' then 1
55
+ when '1/1' then 2
56
+ else
57
+ raise "Unknown genotype #{v}"
58
+ end
59
+ end
60
+
46
61
  # Split GT into into a nucleode sequence
47
62
  def gts
48
63
  gti.map { |i| (i ? @rec.get_gt(i) : nil) }
@@ -51,7 +66,16 @@ module BioVcf
51
66
  def cache_method(name, &block)
52
67
  self.class.send(:define_method, name, &block)
53
68
  end
54
-
69
+
70
+ def [] name
71
+ if @format[name]
72
+ v = fetch_values(name)
73
+ return nil if VcfValue::empty?(v)
74
+ return ConvertStringToValue::convert(v)
75
+ end
76
+ nil
77
+ end
78
+
55
79
  def method_missing(m, *args, &block)
56
80
  name = m.to_s.upcase
57
81
  # p [:here,name,m ,@values]
@@ -0,0 +1,11 @@
1
+ # Please require your code below, respecting the naming conventions in the
2
+ # bioruby directory tree.
3
+ #
4
+ # For example, say you have a plugin named bio-plugin, the only uncommented
5
+ # line in this file would be
6
+ #
7
+ # require 'bio/bio-plugin/plugin'
8
+ #
9
+ # In this file only require other files. Avoid other source code.
10
+
11
+ require 'regressiontest/cli_exec'
@@ -0,0 +1,101 @@
1
+ require 'fileutils'
2
+
3
+ module RegressionTest
4
+
5
+ DEFAULT_TESTDIR = "test/data/regression"
6
+
7
+ # Regression test runner compares output in ./test/data/regression
8
+ # (by default). The convention is to have a file with names .ref
9
+ # (reference) and create .new
10
+ #
11
+ # You can add an :ignore regex option which ignores lines in the
12
+ # comparson files matching a regex
13
+ #
14
+ # :timeout sets the time out for calling a system command
15
+ #
16
+ # :should_fail expects the system command to return a non-zero
17
+ module CliExec
18
+ FilePair = Struct.new(:outfn,:reffn)
19
+
20
+ def CliExec::exec command, testname, options = {}
21
+ # ---- Find .ref file
22
+ fullname = DEFAULT_TESTDIR + "/" + testname
23
+ basefn = if File.exist?(testname+".ref") || File.exist?(testname+"-stderr.ref")
24
+ testname
25
+ elsif File.exist?(fullname + ".ref") || File.exist?(fullname+"-stderr.ref")
26
+ FileUtils.mkdir_p DEFAULT_TESTDIR
27
+ fullname
28
+ else
29
+ raise "Can not find reference file for #{testname} - expected #{fullname}.ref"
30
+ end
31
+ std_out = FilePair.new(basefn + ".new", basefn + ".ref")
32
+ std_err = FilePair.new(basefn + "-stderr.new", basefn + "-stderr.ref")
33
+ files = [std_out,std_err]
34
+ # ---- Create .new file
35
+ cmd = command + " > #{std_out.outfn} 2>#{std_err.outfn}"
36
+ $stderr.print cmd,"\n"
37
+ exec_ret = nil
38
+ if options[:timeout] && options[:timeout] > 0
39
+ Timeout.timeout(options[:timeout]) do
40
+ begin
41
+ exec_ret = Kernel.system(cmd)
42
+ rescue Timeout::Error
43
+ $stderr.print cmd, " failed to finish in under #{options[:timeout]}\n"
44
+ return false
45
+ end
46
+ end
47
+ else
48
+ exec_ret = Kernel.system(cmd)
49
+ end
50
+ expect_fail = (options[:should_fail] != nil)
51
+ if !expect_fail and exec_ret==0
52
+ $stderr.print cmd," returned an error\n"
53
+ return false
54
+ end
55
+ if expect_fail and exec_ret
56
+ $stderr.print cmd," did not return an error\n"
57
+ return false
58
+ end
59
+ if options[:ignore]
60
+ regex = options[:ignore]
61
+ files.each do |f|
62
+ outfn = f.outfn
63
+ outfn1 = outfn + ".1"
64
+ FileUtils.mv(outfn,outfn1)
65
+ f1 = File.open(outfn1)
66
+ f2 = File.open(outfn,"w")
67
+ f1.each_line do | line |
68
+ f2.print(line) if line !~ /#{regex}/
69
+ end
70
+ f1.close
71
+ f2.close
72
+ FileUtils::rm(outfn1)
73
+ end
74
+ end
75
+ # ---- Compare the two files
76
+ files.each do |f|
77
+ next unless File.exist?(f.reffn)
78
+ return false unless compare_files(f.outfn,f.reffn,options[:ignore])
79
+ end
80
+ return true
81
+ end
82
+
83
+ def CliExec::compare_files fn1, fn2, ignore = nil
84
+ if not File.exist?(fn2)
85
+ FileUtils::cp(fn1,fn2)
86
+ true
87
+ else
88
+ cmd = "diff #{fn2} #{fn1}"
89
+ $stderr.print cmd+"\n"
90
+ return true if Kernel.system(cmd) == true
91
+ # Hmmm. We have a different result. We are going to try again
92
+ # because sometimes threads have not completed
93
+ sleep 0.25
94
+ return true if Kernel.system(cmd) == true
95
+ $stderr.print "If it is correct, execute \"cp #{fn1} #{fn2}\", and run again"
96
+ false
97
+ end
98
+ end
99
+ end
100
+
101
+ end
@@ -0,0 +1,165 @@
1
+ # Ragel lexer for VCF-header
2
+ #
3
+ # This is compact a parser/lexer for the VCF header format. Bio-vcf
4
+ # uses the parser to generate meta information that can be output to
5
+ # (for example) JSON format. The advantage of using ragel as a state
6
+ # engine is that it allows for easy parsing of key-value pairs with
7
+ # syntax checking and, for example, escaped quotes in quoted string
8
+ # values. This ragel parser/lexer generates valid Ruby; it should be
9
+ # fairly trivial to generate python/C/JAVA instead. Note that this
10
+ # edition validates ID and Number fields only. Other fields are
11
+ # dumped 'AS IS'.
12
+ #
13
+ # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
14
+ #
15
+ # by Pjotr Prins (c) 2014/2015
16
+
17
+ module BioVcf
18
+
19
+ module VcfHeaderParser
20
+
21
+ module RagelKeyValues
22
+
23
+ def self.debug msg
24
+ # nothing
25
+ # $stderr.print "DEBUG: ",msg,"\n"
26
+ end
27
+
28
+ =begin
29
+ %%{
30
+
31
+ machine simple_lexer;
32
+
33
+ action mark { ts=p }
34
+ action endquoted {
35
+ emit.call(:value,data,ts,p)
36
+ }
37
+
38
+ action kw {
39
+ emit.call(:kw,data,ts,p)
40
+ }
41
+
42
+ squote = "'";
43
+ dquote = '"';
44
+ not_squote_or_escape = [^'\\];
45
+ not_dquote_or_escape = [^"\\];
46
+ escaped_something = /\\./;
47
+ ss = squote ( not_squote_or_escape | escaped_something )* >mark %endquoted squote;
48
+ dd = dquote ( not_dquote_or_escape | escaped_something )* >mark %endquoted dquote;
49
+
50
+ integer = ('+'|'-')?digit+;
51
+ float = ('+'|'-')?digit+'.'digit+;
52
+ assignment = '=';
53
+ identifier = ( alnum (alnum|'.'|'_')* );
54
+ version = ( digit (alnum|'.'|'_'|'-')* );
55
+ str = (ss|dd)* ;
56
+ boolean = '.';
57
+ date = str;
58
+ key_word = ( ('Type'|'Description'|'Source'|identifier - ('ID'|'Number'|'length'|'Version'|'assembly'|'Date'|'CommandLineOptions')) >mark %{ emit.call(:key_word,data,ts,p) } );
59
+ any_value = ( str|( integer|float|boolean|identifier >mark %{ emit.call(:value,data,ts,p) } ));
60
+ id_value = ( identifier >mark %{ emit.call(:value,data,ts,p) } );
61
+
62
+ version_value = ( str| ( version >mark %{ emit.call(:value,data,ts,p) } ));
63
+ date_value = ( date );
64
+ gatk_value = ( str );
65
+ number_value = ( ( integer|boolean|'A'|'R'|'G' ) >mark %{ emit.call(:value,data,ts,p) } );
66
+
67
+ id_kv = ( ( ('ID'|'assembly') %kw '=' id_value ) %{ debug("ID FOUND") } @!{ error_code="Malformed ID"} );
68
+ version_kv = ( ( ('Version') %kw '=' version_value ) @!{ error_code="Version"} );
69
+ number_kv = ( ( ('Number'|'length') %kw '=' number_value ) @!{ error_code="Number"} );
70
+ date_kv = ( ( ('Date') %kw '=' date_value ) %{ debug("DATE FOUND") } @!{ error_code="Date"} );
71
+ gatk_kv = ( ( ('CommandLineOptions') %kw '=' gatk_value ) @!{ error_code="GATK"} );
72
+ key_value = ( id_kv | version_kv | date_kv | number_kv | gatk_kv | (key_word '=' any_value) ) %{ debug("KEY_VALUE found") } >mark @!{ error_code="unknown key-value " };
73
+
74
+ main := ( '##' ('FILTER'|'FORMAT'|'contig'|'INFO'|'ALT'|'GATKCommandLine') '=') (('<'|',') key_value )* '>';
75
+ }%%
76
+ =end
77
+
78
+ %% write data;
79
+ # %% this just fixes syntax highlighting...
80
+
81
+ def self.run_lexer(buf, options = {})
82
+ do_debug = (options[:debug] == true)
83
+ $stderr.print "---> ",buf,"\n" if do_debug
84
+ data = buf.unpack("c*") if(buf.is_a?(String))
85
+ eof = data.length
86
+ values = []
87
+ stack = []
88
+
89
+ emit = lambda { |type, data, ts, p|
90
+ # Print the type and text of the last read token
91
+ # p ts,p
92
+ $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
93
+ values << [type,data[ts...p].pack('c*')]
94
+ }
95
+
96
+ error_code = nil
97
+
98
+ %% write init;
99
+ %% write exec;
100
+
101
+ raise "ERROR: "+error_code+" in "+buf if error_code
102
+
103
+ begin
104
+ res = {}
105
+ # p values
106
+ values.each_slice(2) do | a,b |
107
+ $stderr.print '*',a,b if do_debug
108
+ keyword = a[1]
109
+ value = b[1]
110
+ value = value.to_i if ['length','Epoch'].index(keyword)
111
+ res[keyword] = value
112
+ # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
113
+ end
114
+ rescue
115
+ print "ERROR: "
116
+ p values
117
+ raise
118
+ end
119
+ $stderr.print(res,"\n") if do_debug
120
+ res
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ if __FILE__ == $0
127
+
128
+ gatkcommandline = <<LINE1
129
+ ##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
130
+ LINE1
131
+
132
+ h = {}
133
+ s = gatkcommandline.strip
134
+ # print s,"\n"
135
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
136
+ # h[result['ID']] = result
137
+ # p result
138
+
139
+ lines = <<LINES
140
+ ##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
141
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
142
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
143
+ ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
144
+ ##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
145
+ ##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
146
+ ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
147
+ ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
148
+ ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
149
+ ##contig=<ID=XXXY12>
150
+ ##contig=<ID=Y,length=59373566>
151
+ LINES
152
+
153
+ h = {}
154
+ lines.strip.split("\n").each { |s|
155
+ # print s,"\n"
156
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
157
+ h[result['ID']] = result
158
+ p result
159
+ }
160
+ p h
161
+
162
+ raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
163
+
164
+
165
+ end # test