bio-vcf 0.8.0 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +1 -11
  3. data/Gemfile +4 -5
  4. data/Gemfile.lock +28 -65
  5. data/LICENSE.txt +1 -1
  6. data/README.md +387 -107
  7. data/RELEASE_NOTES.md +20 -0
  8. data/RELEASE_NOTES.md~ +11 -0
  9. data/Rakefile +3 -40
  10. data/TAGS +115 -0
  11. data/VERSION +1 -1
  12. data/bin/bio-vcf +176 -109
  13. data/bio-vcf.gemspec +14 -70
  14. data/features/cli.feature +22 -4
  15. data/features/diff_count.feature +0 -1
  16. data/features/filter.feature +12 -0
  17. data/features/multisample.feature +25 -0
  18. data/features/somaticsniper.feature +2 -0
  19. data/features/step_definitions/cli-feature.rb +15 -6
  20. data/features/step_definitions/diff_count.rb +1 -1
  21. data/features/step_definitions/multisample.rb +19 -0
  22. data/features/step_definitions/somaticsniper.rb +9 -1
  23. data/features/step_definitions/vcf_header.rb +48 -0
  24. data/features/support/env.rb +0 -9
  25. data/features/vcf_header.feature +35 -0
  26. data/lib/bio-vcf.rb +2 -0
  27. data/lib/bio-vcf/bedfilter.rb +43 -0
  28. data/lib/bio-vcf/pcows.rb +303 -0
  29. data/lib/bio-vcf/template.rb +75 -0
  30. data/lib/bio-vcf/vcffile.rb +46 -0
  31. data/lib/bio-vcf/vcfgenotypefield.rb +25 -20
  32. data/lib/bio-vcf/vcfheader.rb +146 -6
  33. data/lib/bio-vcf/vcfheader_line.rb +778 -0
  34. data/lib/bio-vcf/vcfrecord.rb +56 -18
  35. data/lib/bio-vcf/vcfsample.rb +27 -3
  36. data/ragel/gen_vcfheaderline_parser.rl +165 -0
  37. data/ragel/generate.sh +8 -0
  38. data/template/vcf2json.erb +19 -7
  39. data/template/vcf2json_full_header.erb +22 -0
  40. data/template/vcf2json_use_meta.erb +41 -0
  41. data/template/vcf2rdf_header.erb +24 -0
  42. data/test/data/input/empty.vcf +2 -0
  43. data/test/data/input/gatk_exome.vcf +237 -0
  44. data/test/data/input/gatk_wgs.vcf +1000 -0
  45. data/test/data/input/test.bed +632 -0
  46. data/test/data/regression/empty-stderr.new +12 -0
  47. data/test/data/regression/empty.new +2 -0
  48. data/test/data/regression/empty.ref +2 -0
  49. data/test/data/regression/eval_once-stderr.new +2 -0
  50. data/test/data/regression/eval_once.new +1 -0
  51. data/test/data/regression/eval_once.ref +1 -0
  52. data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
  53. data/test/data/regression/eval_r.info.dp.new +150 -0
  54. data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
  55. data/test/data/regression/ifilter_s.dp.new +31 -0
  56. data/test/data/regression/pass1-stderr.new +10 -0
  57. data/test/data/regression/pass1.new +88 -0
  58. data/test/data/regression/pass1.ref +88 -0
  59. data/test/data/regression/r.info.dp-stderr.new +4 -0
  60. data/test/data/regression/r.info.dp.new +114 -0
  61. data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
  62. data/test/data/regression/rewrite.info.sample.new +150 -0
  63. data/test/data/regression/s.dp-stderr.new +18 -0
  64. data/test/data/regression/s.dp.new +145 -0
  65. data/test/data/regression/seval_s.dp-stderr.new +10 -0
  66. data/test/data/regression/seval_s.dp.new +36 -0
  67. data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
  68. data/test/data/regression/sfilter_seval_s.dp.new +31 -0
  69. data/test/data/regression/thread4-stderr.new +10 -0
  70. data/test/data/regression/thread4.new +150 -0
  71. data/test/data/regression/thread4_4-stderr.new +25 -0
  72. data/test/data/regression/thread4_4.new +130 -0
  73. data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
  74. data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -1
  75. data/test/data/regression/thread4_4_failed_filter.new +110 -0
  76. data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
  77. data/test/data/regression/vcf2json_full_header.new +225 -0
  78. data/test/data/regression/vcf2json_full_header.ref +225 -0
  79. data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
  80. data/test/data/regression/vcf2json_use_meta.new +4697 -0
  81. data/test/data/regression/vcf2json_use_meta.ref +4697 -0
  82. data/test/performance/metrics.md +18 -1
  83. data/test/stress/stress_test.sh +15 -0
  84. data/test/tmp/test.vcf +12469 -0
  85. metadata +65 -64
@@ -1,3 +1,14 @@
1
+ # This module parses the VCF header. A header consists of lines
2
+ # containing fields. Most fields are of 'key=value' type and appear
3
+ # only once. These can be retrieved with the find_field method.
4
+ #
5
+ # INFO, FORMAT and contig fields are special as they appear multiple times
6
+ # and contain multiple key values (identified by an ID field).
7
+ # To retrieve these call 'info' and 'format' functions respectively,
8
+ # which return a hash on the contained ID.
9
+ #
10
+ # For the INFO and FORMAT fields a Ragel parser is used, mostly to
11
+ # deal with embedded quoted fields.
1
12
 
2
13
  module BioVcf
3
14
 
@@ -13,21 +24,30 @@ module BioVcf
13
24
  end
14
25
  nil
15
26
  end
27
+
28
+ def VcfHeaderParser.parse_field(line, debug)
29
+ BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(line, debug: debug)
30
+ end
16
31
  end
17
32
 
18
33
  class VcfHeader
19
34
 
20
- attr_reader :lines
35
+ attr_reader :lines, :field
21
36
 
22
- def initialize
37
+ def initialize(debug = false)
38
+ @debug = debug
23
39
  @lines = []
40
+ @field = {}
41
+ @meta = nil
42
+ @cached_filter_index = {}
24
43
  end
25
44
 
45
+ # Add a new field to the header
26
46
  def add line
27
- @lines << line.strip
47
+ @lines += line.split(/\n/)
28
48
  end
29
49
 
30
- # Add a key value list to the header
50
+ # Push a special key value list to the header
31
51
  def tag h
32
52
  h2 = h.dup
33
53
  [:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
@@ -60,13 +80,21 @@ module BioVcf
60
80
  end
61
81
 
62
82
  def samples
63
- @samples ||= column_names[9..-1]
83
+ @samples ||= if column_names.size > 8
84
+ column_names[9..-1]
85
+ else
86
+ []
87
+ end
64
88
  end
65
89
 
66
90
  def samples_index_array
67
91
  @all_samples_index ||= column_names[9..-1].fill{|i| i}
68
92
  end
69
93
 
94
+ def num_samples
95
+ @num_samples ||= ( samples == nil ? 0 : samples.size )
96
+ end
97
+
70
98
  def sample_index
71
99
  return @sample_index if @sample_index
72
100
  index = {}
@@ -74,6 +102,118 @@ module BioVcf
74
102
  @sample_index = index
75
103
  index
76
104
  end
77
- end
78
105
 
106
+ # Give a list of samples (by index and/or name) and return 0-based index values
107
+ # The cache has to be able to hanle multiple lists - that is why it is a hash.
108
+ def sample_subset_index list
109
+ cached = @cached_filter_index[list]
110
+ if cached
111
+ l = cached
112
+ else
113
+ l = []
114
+ list = samples_index_array() if not list
115
+ list.each { |i|
116
+ value =
117
+ begin
118
+ Integer(i)
119
+ rescue
120
+ idx = samples.index(i)
121
+ if idx != nil
122
+ idx
123
+ else
124
+ raise "Unknown sample name '#{i}'"
125
+ end
126
+ end
127
+ l << value
128
+ }
129
+ @cached_filter_index[list] = l
130
+ end
131
+ l
132
+ end
133
+
134
+ # Look for a line in the header with the field name and return the
135
+ # value, otherwise return nil
136
+ def find_field name
137
+ return field[name] if field[name]
138
+ @lines.each do | line |
139
+ value = line.scan(/###{name}=(.*)/)
140
+ if value[0]
141
+ v = value[0][0]
142
+ field[name] = v
143
+ return v
144
+ end
145
+ end
146
+ nil
147
+ end
148
+
149
+ # Look for all the lines that match the field name and return
150
+ # a hash of hashes. An empty hash is returned when there are
151
+ # no matches.
152
+ def find_fields name
153
+ res = {}
154
+ @lines.each do | line |
155
+ value = line.scan(/###{name}=<(.*)>/)
156
+ if value[0]
157
+ str = value[0][0]
158
+ # p str
159
+ v = VcfHeaderParser.parse_field(line,@debug)
160
+ id = v['ID']
161
+ res[id] = v
162
+ end
163
+ end
164
+ # p res
165
+ res
166
+ end
167
+
168
+ def format
169
+ find_fields('FORMAT')
170
+ end
171
+
172
+ def filter
173
+ find_fields('FILTER')
174
+ end
175
+
176
+ def contig
177
+ find_fields('contig')
178
+ end
179
+
180
+ def info
181
+ find_fields('INFO')
182
+ end
183
+
184
+ def gatkcommandline
185
+ find_fields('GATKCommandLine')
186
+ end
187
+
188
+ def meta
189
+ return @meta if @meta
190
+ res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} }
191
+ @lines.each do | line |
192
+ value = line.scan(/##(.*?)=(.*)/)
193
+ if value[0]
194
+ k,v = value[0]
195
+ if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine'
196
+ # p [k,v]
197
+ res[k] = v
198
+ end
199
+ end
200
+ end
201
+ res['INFO'] = info()
202
+ res['FORMAT'] = format()
203
+ res['FILTER'] = filter()
204
+ res['contig'] = contig()
205
+ res['GATKCommandLine'] = gatkcommandline()
206
+ # p [:res, res]
207
+ @meta = res # cache values
208
+ res
209
+ end
210
+
211
+ def method_missing(m, *args, &block)
212
+ name = m.to_s
213
+ value = find_field(name)
214
+ return value if value
215
+ raise "Unknown VCF header query '#{name}'"
216
+ end
217
+
218
+ end
79
219
  end
@@ -0,0 +1,778 @@
1
+
2
+ # line 1 "gen_vcfheaderline_parser.rl"
3
+ # Ragel lexer for VCF-header
4
+ #
5
+ # This is compact a parser/lexer for the VCF header format. Bio-vcf
6
+ # uses the parser to generate meta information that can be output to
7
+ # (for example) JSON format. The advantage of using ragel as a state
8
+ # engine is that it allows for easy parsing of key-value pairs with
9
+ # syntax checking and, for example, escaped quotes in quoted string
10
+ # values. This ragel parser/lexer generates valid Ruby; it should be
11
+ # fairly trivial to generate python/C/JAVA instead. Note that this
12
+ # edition validates ID and Number fields only. Other fields are
13
+ # dumped 'AS IS'.
14
+ #
15
+ # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
16
+ #
17
+ # by Pjotr Prins (c) 2014/2015
18
+
19
+ module BioVcf
20
+
21
+ module VcfHeaderParser
22
+
23
+ module RagelKeyValues
24
+
25
+ def self.debug msg
26
+ # nothing
27
+ # $stderr.print "DEBUG: ",msg,"\n"
28
+ end
29
+
30
+ =begin
31
+
32
+ # line 75 "gen_vcfheaderline_parser.rl"
33
+
34
+ =end
35
+
36
+
37
+ # line 38 "gen_vcfheaderline_parser.rb"
38
+ class << self
39
+ attr_accessor :_simple_lexer_actions
40
+ private :_simple_lexer_actions, :_simple_lexer_actions=
41
+ end
42
+ self._simple_lexer_actions = [
43
+ 0, 1, 0, 1, 1, 1, 2, 1,
44
+ 3, 1, 15, 1, 16, 2, 0, 1,
45
+ 2, 4, 15, 2, 6, 15, 2, 7,
46
+ 15, 2, 9, 16, 2, 10, 16, 2,
47
+ 11, 16, 2, 12, 15, 2, 13, 16,
48
+ 2, 14, 16, 3, 5, 8, 15, 6,
49
+ 9, 10, 13, 11, 14, 16
50
+ ]
51
+
52
+ class << self
53
+ attr_accessor :_simple_lexer_key_offsets
54
+ private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
55
+ end
56
+ self._simple_lexer_key_offsets = [
57
+ 0, 0, 1, 2, 7, 8, 9, 10,
58
+ 13, 26, 35, 49, 51, 53, 58, 60,
59
+ 62, 62, 62, 64, 70, 72, 77, 80,
60
+ 91, 101, 111, 121, 131, 141, 151, 161,
61
+ 171, 181, 191, 201, 211, 221, 231, 241,
62
+ 251, 261, 270, 275, 277, 279, 279, 281,
63
+ 283, 283, 293, 303, 313, 322, 327, 329,
64
+ 331, 331, 333, 335, 335, 345, 354, 360,
65
+ 371, 381, 391, 401, 411, 421, 430, 438,
66
+ 440, 445, 448, 458, 468, 478, 488, 498,
67
+ 508, 517, 524, 526, 528, 533, 535, 537,
68
+ 537, 537, 549, 559, 569, 579, 589, 599,
69
+ 609, 619, 629, 639, 649, 659, 669, 671,
70
+ 672, 673, 674, 675, 676, 677, 678, 679,
71
+ 680, 681, 682, 683, 684, 685, 686, 687,
72
+ 688, 689, 690, 691, 692, 693, 694, 695,
73
+ 696, 697, 698, 699, 700
74
+ ]
75
+
76
+ class << self
77
+ attr_accessor :_simple_lexer_trans_keys
78
+ private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
79
+ end
80
+ self._simple_lexer_trans_keys = [
81
+ 35, 35, 65, 70, 71, 73, 99, 76,
82
+ 84, 61, 44, 60, 62, 67, 68, 73,
83
+ 78, 86, 97, 108, 48, 57, 65, 90,
84
+ 98, 122, 46, 61, 95, 48, 57, 65,
85
+ 90, 97, 122, 34, 39, 44, 46, 60,
86
+ 62, 43, 45, 48, 57, 65, 90, 97,
87
+ 122, 34, 92, 34, 92, 34, 39, 44,
88
+ 60, 62, 39, 92, 39, 92, 48, 57,
89
+ 44, 46, 60, 62, 48, 57, 48, 57,
90
+ 44, 60, 62, 48, 57, 44, 60, 62,
91
+ 44, 46, 60, 62, 95, 48, 57, 65,
92
+ 90, 97, 122, 46, 61, 95, 111, 48,
93
+ 57, 65, 90, 97, 122, 46, 61, 95,
94
+ 109, 48, 57, 65, 90, 97, 122, 46,
95
+ 61, 95, 109, 48, 57, 65, 90, 97,
96
+ 122, 46, 61, 95, 97, 48, 57, 65,
97
+ 90, 98, 122, 46, 61, 95, 110, 48,
98
+ 57, 65, 90, 97, 122, 46, 61, 95,
99
+ 100, 48, 57, 65, 90, 97, 122, 46,
100
+ 61, 76, 95, 48, 57, 65, 90, 97,
101
+ 122, 46, 61, 95, 105, 48, 57, 65,
102
+ 90, 97, 122, 46, 61, 95, 110, 48,
103
+ 57, 65, 90, 97, 122, 46, 61, 95,
104
+ 101, 48, 57, 65, 90, 97, 122, 46,
105
+ 61, 79, 95, 48, 57, 65, 90, 97,
106
+ 122, 46, 61, 95, 112, 48, 57, 65,
107
+ 90, 97, 122, 46, 61, 95, 116, 48,
108
+ 57, 65, 90, 97, 122, 46, 61, 95,
109
+ 105, 48, 57, 65, 90, 97, 122, 46,
110
+ 61, 95, 111, 48, 57, 65, 90, 97,
111
+ 122, 46, 61, 95, 110, 48, 57, 65,
112
+ 90, 97, 122, 46, 61, 95, 115, 48,
113
+ 57, 65, 90, 97, 122, 46, 61, 95,
114
+ 48, 57, 65, 90, 97, 122, 34, 39,
115
+ 44, 60, 62, 34, 92, 34, 92, 39,
116
+ 92, 39, 92, 46, 61, 95, 97, 48,
117
+ 57, 65, 90, 98, 122, 46, 61, 95,
118
+ 116, 48, 57, 65, 90, 97, 122, 46,
119
+ 61, 95, 101, 48, 57, 65, 90, 97,
120
+ 122, 46, 61, 95, 48, 57, 65, 90,
121
+ 97, 122, 34, 39, 44, 60, 62, 34,
122
+ 92, 34, 92, 39, 92, 39, 92, 46,
123
+ 61, 68, 95, 48, 57, 65, 90, 97,
124
+ 122, 46, 61, 95, 48, 57, 65, 90,
125
+ 97, 122, 48, 57, 65, 90, 97, 122,
126
+ 44, 46, 60, 62, 95, 48, 57, 65,
127
+ 90, 97, 122, 46, 61, 95, 117, 48,
128
+ 57, 65, 90, 97, 122, 46, 61, 95,
129
+ 109, 48, 57, 65, 90, 97, 122, 46,
130
+ 61, 95, 98, 48, 57, 65, 90, 97,
131
+ 122, 46, 61, 95, 101, 48, 57, 65,
132
+ 90, 97, 122, 46, 61, 95, 114, 48,
133
+ 57, 65, 90, 97, 122, 46, 61, 95,
134
+ 48, 57, 65, 90, 97, 122, 43, 45,
135
+ 46, 65, 71, 82, 48, 57, 48, 57,
136
+ 44, 60, 62, 48, 57, 44, 60, 62,
137
+ 46, 61, 95, 101, 48, 57, 65, 90,
138
+ 97, 122, 46, 61, 95, 114, 48, 57,
139
+ 65, 90, 97, 122, 46, 61, 95, 115,
140
+ 48, 57, 65, 90, 97, 122, 46, 61,
141
+ 95, 105, 48, 57, 65, 90, 97, 122,
142
+ 46, 61, 95, 111, 48, 57, 65, 90,
143
+ 97, 122, 46, 61, 95, 110, 48, 57,
144
+ 65, 90, 97, 122, 46, 61, 95, 48,
145
+ 57, 65, 90, 97, 122, 34, 39, 44,
146
+ 60, 62, 48, 57, 34, 92, 34, 92,
147
+ 34, 39, 44, 60, 62, 39, 92, 39,
148
+ 92, 44, 60, 62, 95, 45, 46, 48,
149
+ 57, 65, 90, 97, 122, 46, 61, 95,
150
+ 115, 48, 57, 65, 90, 97, 122, 46,
151
+ 61, 95, 115, 48, 57, 65, 90, 97,
152
+ 122, 46, 61, 95, 101, 48, 57, 65,
153
+ 90, 97, 122, 46, 61, 95, 109, 48,
154
+ 57, 65, 90, 97, 122, 46, 61, 95,
155
+ 98, 48, 57, 65, 90, 97, 122, 46,
156
+ 61, 95, 108, 48, 57, 65, 90, 97,
157
+ 122, 46, 61, 95, 121, 48, 57, 65,
158
+ 90, 97, 122, 46, 61, 95, 101, 48,
159
+ 57, 65, 90, 97, 122, 46, 61, 95,
160
+ 110, 48, 57, 65, 90, 97, 122, 46,
161
+ 61, 95, 103, 48, 57, 65, 90, 97,
162
+ 122, 46, 61, 95, 116, 48, 57, 65,
163
+ 90, 97, 122, 46, 61, 95, 104, 48,
164
+ 57, 65, 90, 97, 122, 73, 79, 76,
165
+ 84, 69, 82, 82, 77, 65, 65, 84,
166
+ 75, 67, 111, 109, 109, 97, 110, 100,
167
+ 76, 105, 110, 101, 78, 70, 79, 111,
168
+ 110, 116, 105, 103, 0
169
+ ]
170
+
171
+ class << self
172
+ attr_accessor :_simple_lexer_single_lengths
173
+ private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
174
+ end
175
+ self._simple_lexer_single_lengths = [
176
+ 0, 1, 1, 5, 1, 1, 1, 3,
177
+ 7, 3, 6, 2, 2, 5, 2, 2,
178
+ 0, 0, 0, 4, 0, 3, 3, 5,
179
+ 4, 4, 4, 4, 4, 4, 4, 4,
180
+ 4, 4, 4, 4, 4, 4, 4, 4,
181
+ 4, 3, 5, 2, 2, 0, 2, 2,
182
+ 0, 4, 4, 4, 3, 5, 2, 2,
183
+ 0, 2, 2, 0, 4, 3, 0, 5,
184
+ 4, 4, 4, 4, 4, 3, 6, 0,
185
+ 3, 3, 4, 4, 4, 4, 4, 4,
186
+ 3, 5, 2, 2, 5, 2, 2, 0,
187
+ 0, 4, 4, 4, 4, 4, 4, 4,
188
+ 4, 4, 4, 4, 4, 4, 2, 1,
189
+ 1, 1, 1, 1, 1, 1, 1, 1,
190
+ 1, 1, 1, 1, 1, 1, 1, 1,
191
+ 1, 1, 1, 1, 1, 1, 1, 1,
192
+ 1, 1, 1, 1, 0
193
+ ]
194
+
195
+ class << self
196
+ attr_accessor :_simple_lexer_range_lengths
197
+ private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
198
+ end
199
+ self._simple_lexer_range_lengths = [
200
+ 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 3, 3, 4, 0, 0, 0, 0, 0,
202
+ 0, 0, 1, 1, 1, 1, 0, 3,
203
+ 3, 3, 3, 3, 3, 3, 3, 3,
204
+ 3, 3, 3, 3, 3, 3, 3, 3,
205
+ 3, 3, 0, 0, 0, 0, 0, 0,
206
+ 0, 3, 3, 3, 3, 0, 0, 0,
207
+ 0, 0, 0, 0, 3, 3, 3, 3,
208
+ 3, 3, 3, 3, 3, 3, 1, 1,
209
+ 1, 0, 3, 3, 3, 3, 3, 3,
210
+ 3, 1, 0, 0, 0, 0, 0, 0,
211
+ 0, 4, 3, 3, 3, 3, 3, 3,
212
+ 3, 3, 3, 3, 3, 3, 0, 0,
213
+ 0, 0, 0, 0, 0, 0, 0, 0,
214
+ 0, 0, 0, 0, 0, 0, 0, 0,
215
+ 0, 0, 0, 0, 0, 0, 0, 0,
216
+ 0, 0, 0, 0, 0
217
+ ]
218
+
219
+ class << self
220
+ attr_accessor :_simple_lexer_index_offsets
221
+ private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
222
+ end
223
+ self._simple_lexer_index_offsets = [
224
+ 0, 0, 2, 4, 10, 12, 14, 16,
225
+ 20, 31, 38, 49, 52, 55, 61, 64,
226
+ 67, 68, 69, 71, 77, 79, 84, 88,
227
+ 97, 105, 113, 121, 129, 137, 145, 153,
228
+ 161, 169, 177, 185, 193, 201, 209, 217,
229
+ 225, 233, 240, 246, 249, 252, 253, 256,
230
+ 259, 260, 268, 276, 284, 291, 297, 300,
231
+ 303, 304, 307, 310, 311, 319, 326, 330,
232
+ 339, 347, 355, 363, 371, 379, 386, 394,
233
+ 396, 401, 405, 413, 421, 429, 437, 445,
234
+ 453, 460, 467, 470, 473, 479, 482, 485,
235
+ 486, 487, 496, 504, 512, 520, 528, 536,
236
+ 544, 552, 560, 568, 576, 584, 592, 595,
237
+ 597, 599, 601, 603, 605, 607, 609, 611,
238
+ 613, 615, 617, 619, 621, 623, 625, 627,
239
+ 629, 631, 633, 635, 637, 639, 641, 643,
240
+ 645, 647, 649, 651, 653
241
+ ]
242
+
243
+ class << self
244
+ attr_accessor :_simple_lexer_trans_targs
245
+ private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
246
+ end
247
+ self._simple_lexer_trans_targs = [
248
+ 2, 0, 3, 0, 4, 102, 110, 124,
249
+ 127, 0, 5, 0, 6, 0, 7, 0,
250
+ 8, 8, 132, 0, 24, 49, 60, 64,
251
+ 74, 90, 97, 9, 9, 9, 0, 9,
252
+ 10, 9, 9, 9, 9, 0, 11, 14,
253
+ 8, 22, 8, 132, 18, 23, 23, 23,
254
+ 0, 13, 17, 12, 13, 17, 12, 11,
255
+ 14, 8, 8, 132, 0, 13, 16, 15,
256
+ 13, 16, 15, 15, 12, 19, 0, 8,
257
+ 20, 8, 132, 19, 0, 21, 0, 8,
258
+ 8, 132, 21, 0, 8, 8, 132, 0,
259
+ 8, 23, 8, 132, 23, 23, 23, 23,
260
+ 0, 9, 10, 9, 25, 9, 9, 9,
261
+ 0, 9, 10, 9, 26, 9, 9, 9,
262
+ 0, 9, 10, 9, 27, 9, 9, 9,
263
+ 0, 9, 10, 9, 28, 9, 9, 9,
264
+ 0, 9, 10, 9, 29, 9, 9, 9,
265
+ 0, 9, 10, 9, 30, 9, 9, 9,
266
+ 0, 9, 10, 31, 9, 9, 9, 9,
267
+ 0, 9, 10, 9, 32, 9, 9, 9,
268
+ 0, 9, 10, 9, 33, 9, 9, 9,
269
+ 0, 9, 10, 9, 34, 9, 9, 9,
270
+ 0, 9, 10, 35, 9, 9, 9, 9,
271
+ 0, 9, 10, 9, 36, 9, 9, 9,
272
+ 0, 9, 10, 9, 37, 9, 9, 9,
273
+ 0, 9, 10, 9, 38, 9, 9, 9,
274
+ 0, 9, 10, 9, 39, 9, 9, 9,
275
+ 0, 9, 10, 9, 40, 9, 9, 9,
276
+ 0, 9, 10, 9, 41, 9, 9, 9,
277
+ 0, 9, 42, 9, 9, 9, 9, 0,
278
+ 43, 46, 8, 8, 132, 0, 42, 45,
279
+ 44, 42, 45, 44, 44, 42, 48, 47,
280
+ 42, 48, 47, 47, 9, 10, 9, 50,
281
+ 9, 9, 9, 0, 9, 10, 9, 51,
282
+ 9, 9, 9, 0, 9, 10, 9, 52,
283
+ 9, 9, 9, 0, 9, 53, 9, 9,
284
+ 9, 9, 0, 54, 57, 8, 8, 132,
285
+ 0, 53, 56, 55, 53, 56, 55, 55,
286
+ 53, 59, 58, 53, 59, 58, 58, 9,
287
+ 10, 61, 9, 9, 9, 9, 0, 9,
288
+ 62, 9, 9, 9, 9, 0, 63, 63,
289
+ 63, 0, 8, 63, 8, 132, 63, 63,
290
+ 63, 63, 0, 9, 10, 9, 65, 9,
291
+ 9, 9, 0, 9, 10, 9, 66, 9,
292
+ 9, 9, 0, 9, 10, 9, 67, 9,
293
+ 9, 9, 0, 9, 10, 9, 68, 9,
294
+ 9, 9, 0, 9, 10, 9, 69, 9,
295
+ 9, 9, 0, 9, 70, 9, 9, 9,
296
+ 9, 0, 71, 71, 73, 73, 73, 73,
297
+ 72, 0, 72, 0, 8, 8, 132, 72,
298
+ 0, 8, 8, 132, 0, 9, 10, 9,
299
+ 75, 9, 9, 9, 0, 9, 10, 9,
300
+ 76, 9, 9, 9, 0, 9, 10, 9,
301
+ 77, 9, 9, 9, 0, 9, 10, 9,
302
+ 78, 9, 9, 9, 0, 9, 10, 9,
303
+ 79, 9, 9, 9, 0, 9, 10, 9,
304
+ 80, 9, 9, 9, 0, 9, 81, 9,
305
+ 9, 9, 9, 0, 82, 85, 8, 8,
306
+ 132, 89, 0, 84, 88, 83, 84, 88,
307
+ 83, 82, 85, 8, 8, 132, 0, 84,
308
+ 87, 86, 84, 87, 86, 86, 83, 8,
309
+ 8, 132, 89, 89, 89, 89, 89, 0,
310
+ 9, 10, 9, 91, 9, 9, 9, 0,
311
+ 9, 10, 9, 92, 9, 9, 9, 0,
312
+ 9, 10, 9, 93, 9, 9, 9, 0,
313
+ 9, 10, 9, 94, 9, 9, 9, 0,
314
+ 9, 10, 9, 95, 9, 9, 9, 0,
315
+ 9, 10, 9, 96, 9, 9, 9, 0,
316
+ 9, 10, 9, 61, 9, 9, 9, 0,
317
+ 9, 10, 9, 98, 9, 9, 9, 0,
318
+ 9, 10, 9, 99, 9, 9, 9, 0,
319
+ 9, 10, 9, 100, 9, 9, 9, 0,
320
+ 9, 10, 9, 101, 9, 9, 9, 0,
321
+ 9, 10, 9, 69, 9, 9, 9, 0,
322
+ 103, 107, 0, 104, 0, 105, 0, 106,
323
+ 0, 6, 0, 108, 0, 109, 0, 5,
324
+ 0, 111, 0, 112, 0, 113, 0, 114,
325
+ 0, 115, 0, 116, 0, 117, 0, 118,
326
+ 0, 119, 0, 120, 0, 121, 0, 122,
327
+ 0, 123, 0, 6, 0, 125, 0, 126,
328
+ 0, 6, 0, 128, 0, 129, 0, 130,
329
+ 0, 131, 0, 6, 0, 0, 0
330
+ ]
331
+
332
+ class << self
333
+ attr_accessor :_simple_lexer_trans_actions
334
+ private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
335
+ end
336
+ self._simple_lexer_trans_actions = [
337
+ 0, 0, 0, 0, 0, 0, 0, 0,
338
+ 0, 0, 0, 0, 0, 0, 0, 0,
339
+ 0, 0, 0, 0, 1, 1, 1, 1,
340
+ 1, 1, 1, 1, 1, 1, 47, 0,
341
+ 7, 0, 0, 0, 0, 11, 0, 0,
342
+ 9, 0, 9, 9, 0, 1, 1, 1,
343
+ 0, 13, 1, 1, 3, 0, 0, 0,
344
+ 0, 9, 9, 9, 0, 13, 1, 1,
345
+ 3, 0, 0, 0, 0, 0, 11, 9,
346
+ 0, 9, 9, 0, 0, 0, 11, 9,
347
+ 9, 9, 0, 0, 9, 9, 9, 0,
348
+ 16, 0, 16, 16, 0, 0, 0, 0,
349
+ 0, 0, 7, 0, 0, 0, 0, 0,
350
+ 40, 0, 7, 0, 0, 0, 0, 0,
351
+ 40, 0, 7, 0, 0, 0, 0, 0,
352
+ 40, 0, 7, 0, 0, 0, 0, 0,
353
+ 40, 0, 7, 0, 0, 0, 0, 0,
354
+ 40, 0, 7, 0, 0, 0, 0, 0,
355
+ 40, 0, 7, 0, 0, 0, 0, 0,
356
+ 40, 0, 7, 0, 0, 0, 0, 0,
357
+ 40, 0, 7, 0, 0, 0, 0, 0,
358
+ 40, 0, 7, 0, 0, 0, 0, 0,
359
+ 40, 0, 7, 0, 0, 0, 0, 0,
360
+ 40, 0, 7, 0, 0, 0, 0, 0,
361
+ 40, 0, 7, 0, 0, 0, 0, 0,
362
+ 40, 0, 7, 0, 0, 0, 0, 0,
363
+ 40, 0, 7, 0, 0, 0, 0, 0,
364
+ 40, 0, 7, 0, 0, 0, 0, 0,
365
+ 40, 0, 7, 0, 0, 0, 0, 0,
366
+ 40, 0, 5, 0, 0, 0, 0, 40,
367
+ 0, 0, 9, 9, 9, 0, 13, 1,
368
+ 1, 3, 0, 0, 0, 13, 1, 1,
369
+ 3, 0, 0, 0, 0, 7, 0, 0,
370
+ 0, 0, 0, 37, 0, 7, 0, 0,
371
+ 0, 0, 0, 37, 0, 7, 0, 0,
372
+ 0, 0, 0, 37, 0, 5, 0, 0,
373
+ 0, 0, 37, 0, 0, 34, 34, 34,
374
+ 0, 13, 1, 1, 3, 0, 0, 0,
375
+ 13, 1, 1, 3, 0, 0, 0, 0,
376
+ 7, 0, 0, 0, 0, 0, 25, 0,
377
+ 5, 0, 0, 0, 0, 25, 1, 1,
378
+ 1, 25, 43, 0, 43, 43, 0, 0,
379
+ 0, 0, 0, 0, 7, 0, 0, 0,
380
+ 0, 0, 31, 0, 7, 0, 0, 0,
381
+ 0, 0, 31, 0, 7, 0, 0, 0,
382
+ 0, 0, 31, 0, 7, 0, 0, 0,
383
+ 0, 0, 31, 0, 7, 0, 0, 0,
384
+ 0, 0, 31, 0, 5, 0, 0, 0,
385
+ 0, 31, 1, 1, 1, 1, 1, 1,
386
+ 1, 31, 0, 31, 22, 22, 22, 0,
387
+ 0, 22, 22, 22, 0, 0, 7, 0,
388
+ 0, 0, 0, 0, 28, 0, 7, 0,
389
+ 0, 0, 0, 0, 28, 0, 7, 0,
390
+ 0, 0, 0, 0, 28, 0, 7, 0,
391
+ 0, 0, 0, 0, 28, 0, 7, 0,
392
+ 0, 0, 0, 0, 28, 0, 7, 0,
393
+ 0, 0, 0, 0, 28, 0, 5, 0,
394
+ 0, 0, 0, 28, 0, 0, 9, 9,
395
+ 9, 1, 0, 13, 1, 1, 3, 0,
396
+ 0, 0, 0, 9, 9, 9, 0, 13,
397
+ 1, 1, 3, 0, 0, 0, 0, 19,
398
+ 19, 19, 0, 0, 0, 0, 0, 0,
399
+ 0, 7, 0, 0, 0, 0, 0, 25,
400
+ 0, 7, 0, 0, 0, 0, 0, 25,
401
+ 0, 7, 0, 0, 0, 0, 0, 25,
402
+ 0, 7, 0, 0, 0, 0, 0, 25,
403
+ 0, 7, 0, 0, 0, 0, 0, 25,
404
+ 0, 7, 0, 0, 0, 0, 0, 25,
405
+ 0, 7, 0, 0, 0, 0, 0, 25,
406
+ 0, 7, 0, 0, 0, 0, 0, 31,
407
+ 0, 7, 0, 0, 0, 0, 0, 31,
408
+ 0, 7, 0, 0, 0, 0, 0, 31,
409
+ 0, 7, 0, 0, 0, 0, 0, 31,
410
+ 0, 7, 0, 0, 0, 0, 0, 31,
411
+ 0, 0, 0, 0, 0, 0, 0, 0,
412
+ 0, 0, 0, 0, 0, 0, 0, 0,
413
+ 0, 0, 0, 0, 0, 0, 0, 0,
414
+ 0, 0, 0, 0, 0, 0, 0, 0,
415
+ 0, 0, 0, 0, 0, 0, 0, 0,
416
+ 0, 0, 0, 0, 0, 0, 0, 0,
417
+ 0, 0, 0, 0, 0, 0, 0, 0,
418
+ 0, 0, 0, 0, 0, 0, 0
419
+ ]
420
+
421
+ class << self
422
+ attr_accessor :_simple_lexer_eof_actions
423
+ private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
424
+ end
425
+ self._simple_lexer_eof_actions = [
426
+ 0, 0, 0, 0, 0, 0, 0, 0,
427
+ 47, 11, 0, 11, 11, 0, 11, 11,
428
+ 11, 11, 11, 0, 11, 0, 0, 0,
429
+ 40, 40, 40, 40, 40, 40, 40, 40,
430
+ 40, 40, 40, 40, 40, 40, 40, 40,
431
+ 40, 40, 0, 40, 40, 40, 40, 40,
432
+ 40, 37, 37, 37, 37, 0, 37, 37,
433
+ 37, 37, 37, 37, 25, 25, 25, 0,
434
+ 31, 31, 31, 31, 31, 31, 31, 31,
435
+ 0, 0, 28, 28, 28, 28, 28, 28,
436
+ 28, 0, 28, 28, 0, 28, 28, 28,
437
+ 28, 0, 25, 25, 25, 25, 25, 25,
438
+ 25, 31, 31, 31, 31, 31, 0, 0,
439
+ 0, 0, 0, 0, 0, 0, 0, 0,
440
+ 0, 0, 0, 0, 0, 0, 0, 0,
441
+ 0, 0, 0, 0, 0, 0, 0, 0,
442
+ 0, 0, 0, 0, 0
443
+ ]
444
+
445
+ class << self
446
+ attr_accessor :simple_lexer_start
447
+ end
448
+ self.simple_lexer_start = 1;
449
+ class << self
450
+ attr_accessor :simple_lexer_first_final
451
+ end
452
+ self.simple_lexer_first_final = 132;
453
+ class << self
454
+ attr_accessor :simple_lexer_error
455
+ end
456
+ self.simple_lexer_error = 0;
457
+
458
+ class << self
459
+ attr_accessor :simple_lexer_en_main
460
+ end
461
+ self.simple_lexer_en_main = 1;
462
+
463
+
464
+ # line 79 "gen_vcfheaderline_parser.rl"
465
+ # %% this just fixes syntax highlighting...
466
+
467
+ def self.run_lexer(buf, options = {})
468
+ do_debug = (options[:debug] == true)
469
+ $stderr.print "---> ",buf,"\n" if do_debug
470
+ data = buf.unpack("c*") if(buf.is_a?(String))
471
+ eof = data.length
472
+ values = []
473
+ stack = []
474
+
475
+ emit = lambda { |type, data, ts, p|
476
+ # Print the type and text of the last read token
477
+ # p ts,p
478
+ $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
479
+ values << [type,data[ts...p].pack('c*')]
480
+ }
481
+
482
+ error_code = nil
483
+
484
+
485
+ # line 486 "gen_vcfheaderline_parser.rb"
486
+ begin
487
+ p ||= 0
488
+ pe ||= data.length
489
+ cs = simple_lexer_start
490
+ end
491
+
492
+ # line 99 "gen_vcfheaderline_parser.rl"
493
+
494
+ # line 495 "gen_vcfheaderline_parser.rb"
495
+ begin
496
+ _klen, _trans, _keys, _acts, _nacts = nil
497
+ _goto_level = 0
498
+ _resume = 10
499
+ _eof_trans = 15
500
+ _again = 20
501
+ _test_eof = 30
502
+ _out = 40
503
+ while true
504
+ _trigger_goto = false
505
+ if _goto_level <= 0
506
+ if p == pe
507
+ _goto_level = _test_eof
508
+ next
509
+ end
510
+ if cs == 0
511
+ _goto_level = _out
512
+ next
513
+ end
514
+ end
515
+ if _goto_level <= _resume
516
+ _keys = _simple_lexer_key_offsets[cs]
517
+ _trans = _simple_lexer_index_offsets[cs]
518
+ _klen = _simple_lexer_single_lengths[cs]
519
+ _break_match = false
520
+
521
+ begin
522
+ if _klen > 0
523
+ _lower = _keys
524
+ _upper = _keys + _klen - 1
525
+
526
+ loop do
527
+ break if _upper < _lower
528
+ _mid = _lower + ( (_upper - _lower) >> 1 )
529
+
530
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
531
+ _upper = _mid - 1
532
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid]
533
+ _lower = _mid + 1
534
+ else
535
+ _trans += (_mid - _keys)
536
+ _break_match = true
537
+ break
538
+ end
539
+ end # loop
540
+ break if _break_match
541
+ _keys += _klen
542
+ _trans += _klen
543
+ end
544
+ _klen = _simple_lexer_range_lengths[cs]
545
+ if _klen > 0
546
+ _lower = _keys
547
+ _upper = _keys + (_klen << 1) - 2
548
+ loop do
549
+ break if _upper < _lower
550
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1)
551
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
552
+ _upper = _mid - 2
553
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
554
+ _lower = _mid + 2
555
+ else
556
+ _trans += ((_mid - _keys) >> 1)
557
+ _break_match = true
558
+ break
559
+ end
560
+ end # loop
561
+ break if _break_match
562
+ _trans += _klen
563
+ end
564
+ end while false
565
+ cs = _simple_lexer_trans_targs[_trans]
566
+ if _simple_lexer_trans_actions[_trans] != 0
567
+ _acts = _simple_lexer_trans_actions[_trans]
568
+ _nacts = _simple_lexer_actions[_acts]
569
+ _acts += 1
570
+ while _nacts > 0
571
+ _nacts -= 1
572
+ _acts += 1
573
+ case _simple_lexer_actions[_acts - 1]
574
+ when 0 then
575
+ # line 33 "gen_vcfheaderline_parser.rl"
576
+ begin
577
+ ts=p end
578
+ when 1 then
579
+ # line 34 "gen_vcfheaderline_parser.rl"
580
+ begin
581
+
582
+ emit.call(:value,data,ts,p)
583
+ end
584
+ when 2 then
585
+ # line 38 "gen_vcfheaderline_parser.rl"
586
+ begin
587
+
588
+ emit.call(:kw,data,ts,p)
589
+ end
590
+ when 3 then
591
+ # line 58 "gen_vcfheaderline_parser.rl"
592
+ begin
593
+ emit.call(:key_word,data,ts,p) end
594
+ when 4 then
595
+ # line 59 "gen_vcfheaderline_parser.rl"
596
+ begin
597
+ emit.call(:value,data,ts,p) end
598
+ when 5 then
599
+ # line 60 "gen_vcfheaderline_parser.rl"
600
+ begin
601
+ emit.call(:value,data,ts,p) end
602
+ when 6 then
603
+ # line 62 "gen_vcfheaderline_parser.rl"
604
+ begin
605
+ emit.call(:value,data,ts,p) end
606
+ when 7 then
607
+ # line 65 "gen_vcfheaderline_parser.rl"
608
+ begin
609
+ emit.call(:value,data,ts,p) end
610
+ when 8 then
611
+ # line 67 "gen_vcfheaderline_parser.rl"
612
+ begin
613
+ debug("ID FOUND") end
614
+ when 9 then
615
+ # line 67 "gen_vcfheaderline_parser.rl"
616
+ begin
617
+ error_code="Malformed ID" end
618
+ when 10 then
619
+ # line 68 "gen_vcfheaderline_parser.rl"
620
+ begin
621
+ error_code="Version" end
622
+ when 11 then
623
+ # line 69 "gen_vcfheaderline_parser.rl"
624
+ begin
625
+ error_code="Number" end
626
+ when 12 then
627
+ # line 70 "gen_vcfheaderline_parser.rl"
628
+ begin
629
+ debug("DATE FOUND") end
630
+ when 13 then
631
+ # line 70 "gen_vcfheaderline_parser.rl"
632
+ begin
633
+ error_code="Date" end
634
+ when 14 then
635
+ # line 71 "gen_vcfheaderline_parser.rl"
636
+ begin
637
+ error_code="GATK" end
638
+ when 15 then
639
+ # line 72 "gen_vcfheaderline_parser.rl"
640
+ begin
641
+ debug("KEY_VALUE found") end
642
+ when 16 then
643
+ # line 72 "gen_vcfheaderline_parser.rl"
644
+ begin
645
+ error_code="unknown key-value " end
646
+ # line 647 "gen_vcfheaderline_parser.rb"
647
+ end # action switch
648
+ end
649
+ end
650
+ if _trigger_goto
651
+ next
652
+ end
653
+ end
654
+ if _goto_level <= _again
655
+ if cs == 0
656
+ _goto_level = _out
657
+ next
658
+ end
659
+ p += 1
660
+ if p != pe
661
+ _goto_level = _resume
662
+ next
663
+ end
664
+ end
665
+ if _goto_level <= _test_eof
666
+ if p == eof
667
+ __acts = _simple_lexer_eof_actions[cs]
668
+ __nacts = _simple_lexer_actions[__acts]
669
+ __acts += 1
670
+ while __nacts > 0
671
+ __nacts -= 1
672
+ __acts += 1
673
+ case _simple_lexer_actions[__acts - 1]
674
+ when 9 then
675
+ # line 67 "gen_vcfheaderline_parser.rl"
676
+ begin
677
+ error_code="Malformed ID" end
678
+ when 10 then
679
+ # line 68 "gen_vcfheaderline_parser.rl"
680
+ begin
681
+ error_code="Version" end
682
+ when 11 then
683
+ # line 69 "gen_vcfheaderline_parser.rl"
684
+ begin
685
+ error_code="Number" end
686
+ when 13 then
687
+ # line 70 "gen_vcfheaderline_parser.rl"
688
+ begin
689
+ error_code="Date" end
690
+ when 14 then
691
+ # line 71 "gen_vcfheaderline_parser.rl"
692
+ begin
693
+ error_code="GATK" end
694
+ when 16 then
695
+ # line 72 "gen_vcfheaderline_parser.rl"
696
+ begin
697
+ error_code="unknown key-value " end
698
+ # line 699 "gen_vcfheaderline_parser.rb"
699
+ end # eof action switch
700
+ end
701
+ if _trigger_goto
702
+ next
703
+ end
704
+ end
705
+ end
706
+ if _goto_level <= _out
707
+ break
708
+ end
709
+ end
710
+ end
711
+
712
+ # line 100 "gen_vcfheaderline_parser.rl"
713
+
714
+ raise "ERROR: "+error_code+" in "+buf if error_code
715
+
716
+ begin
717
+ res = {}
718
+ # p values
719
+ values.each_slice(2) do | a,b |
720
+ $stderr.print '*',a,b if do_debug
721
+ keyword = a[1]
722
+ value = b[1]
723
+ value = value.to_i if ['length','Epoch'].index(keyword)
724
+ res[keyword] = value
725
+ # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
726
+ end
727
+ rescue
728
+ print "ERROR: "
729
+ p values
730
+ raise
731
+ end
732
+ $stderr.print(res,"\n") if do_debug
733
+ res
734
+ end
735
+ end
736
+ end
737
+ end
738
+
739
+ if __FILE__ == $0
740
+
741
+ gatkcommandline = <<LINE1
742
+ ##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
743
+ LINE1
744
+
745
+ h = {}
746
+ s = gatkcommandline.strip
747
+ # print s,"\n"
748
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
749
+ # h[result['ID']] = result
750
+ # p result
751
+
752
+ lines = <<LINES
753
+ ##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
754
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
755
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
756
+ ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
757
+ ##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
758
+ ##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
759
+ ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
760
+ ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
761
+ ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
762
+ ##contig=<ID=XXXY12>
763
+ ##contig=<ID=Y,length=59373566>
764
+ LINES
765
+
766
+ h = {}
767
+ lines.strip.split("\n").each { |s|
768
+ # print s,"\n"
769
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
770
+ h[result['ID']] = result
771
+ p result
772
+ }
773
+ p h
774
+
775
+ raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
776
+
777
+
778
+ end # test