bio-vcf 0.8.1 → 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +1 -11
  3. data/Gemfile +2 -8
  4. data/LICENSE.txt +1 -1
  5. data/README.md +467 -129
  6. data/RELEASE_NOTES.md +27 -0
  7. data/RELEASE_NOTES.md~ +11 -0
  8. data/Rakefile +9 -42
  9. data/TAGS +115 -0
  10. data/VERSION +1 -1
  11. data/bin/bio-vcf +156 -108
  12. data/bio-vcf.gemspec +13 -75
  13. data/features/cli.feature +22 -4
  14. data/features/diff_count.feature +0 -1
  15. data/features/filter.feature +12 -0
  16. data/features/multisample.feature +12 -0
  17. data/features/somaticsniper.feature +2 -0
  18. data/features/step_definitions/cli-feature.rb +15 -6
  19. data/features/step_definitions/diff_count.rb +1 -1
  20. data/features/step_definitions/multisample.rb +19 -0
  21. data/features/step_definitions/somaticsniper.rb +9 -1
  22. data/features/step_definitions/vcf_header.rb +48 -0
  23. data/features/support/env.rb +1 -11
  24. data/features/vcf_header.feature +35 -0
  25. data/lib/bio-vcf.rb +1 -0
  26. data/lib/bio-vcf/pcows.rb +303 -0
  27. data/lib/bio-vcf/vcffile.rb +46 -0
  28. data/lib/bio-vcf/vcfgenotypefield.rb +19 -19
  29. data/lib/bio-vcf/vcfheader.rb +137 -5
  30. data/lib/bio-vcf/vcfheader_line.rb +778 -0
  31. data/lib/bio-vcf/vcfrecord.rb +56 -18
  32. data/lib/bio-vcf/vcfsample.rb +26 -2
  33. data/lib/regressiontest.rb +11 -0
  34. data/lib/regressiontest/cli_exec.rb +101 -0
  35. data/ragel/gen_vcfheaderline_parser.rl +165 -0
  36. data/ragel/generate.sh +8 -0
  37. data/template/vcf2json.erb +16 -16
  38. data/template/vcf2json_full_header.erb +22 -0
  39. data/template/vcf2json_use_meta.erb +41 -0
  40. data/test/data/input/empty.vcf +2 -0
  41. data/test/data/input/gatk_exome.vcf +237 -0
  42. data/test/data/input/gatk_wgs.vcf +1000 -0
  43. data/test/data/input/test.bed +632 -0
  44. data/test/data/regression/empty-stderr.new +12 -0
  45. data/test/data/regression/empty.new +2 -0
  46. data/test/data/regression/empty.ref +2 -0
  47. data/test/data/regression/eval_once-stderr.new +2 -0
  48. data/test/data/regression/eval_once.new +1 -0
  49. data/test/data/regression/eval_once.ref +1 -0
  50. data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
  51. data/test/data/regression/eval_r.info.dp.new +150 -0
  52. data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
  53. data/test/data/regression/ifilter_s.dp.new +31 -0
  54. data/test/data/regression/pass1-stderr.new +10 -0
  55. data/test/data/regression/pass1.new +88 -0
  56. data/test/data/regression/pass1.ref +88 -0
  57. data/test/data/regression/r.info.dp-stderr.new +4 -0
  58. data/test/data/regression/r.info.dp.new +114 -0
  59. data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
  60. data/test/data/regression/rewrite.info.sample.new +150 -0
  61. data/test/data/regression/s.dp-stderr.new +18 -0
  62. data/test/data/regression/s.dp.new +145 -0
  63. data/test/data/regression/seval_s.dp-stderr.new +10 -0
  64. data/test/data/regression/seval_s.dp.new +36 -0
  65. data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
  66. data/test/data/regression/sfilter_seval_s.dp.new +31 -0
  67. data/test/data/regression/thread4-stderr.new +10 -0
  68. data/test/data/regression/thread4.new +150 -0
  69. data/test/data/regression/thread4_4-stderr.new +25 -0
  70. data/test/data/regression/thread4_4.new +130 -0
  71. data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
  72. data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
  73. data/test/data/regression/thread4_4_failed_filter.new +110 -0
  74. data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
  75. data/test/data/regression/vcf2json_full_header.new +225 -0
  76. data/test/data/regression/vcf2json_full_header.ref +225 -0
  77. data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
  78. data/test/data/regression/vcf2json_use_meta.new +4697 -0
  79. data/test/data/regression/vcf2json_use_meta.ref +4697 -0
  80. data/test/performance/metrics.md +18 -1
  81. data/test/stress/stress_test.sh +15 -0
  82. data/test/tmp/test.vcf +12469 -0
  83. metadata +63 -64
  84. data/Gemfile.lock +0 -81
@@ -0,0 +1,46 @@
1
+ module BioVcf
2
+ # This class abstracts a VCF file that can be iterated.
3
+ # The VCF can be plain text or compressed with gzip
4
+ # Note that files compressed with bgzip will not work, as thie ruby implementation of Zlib don't allow concatenated files
5
+ class VCFfile
6
+
7
+ def initialize(file: "", is_gz: true)
8
+ @file = file
9
+ @is_gz = is_gz
10
+ end
11
+
12
+ def parseVCFheader(head_line="")
13
+ m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(head_line)
14
+ {:id=>m[1],:number=>m[2],:type=>m[3],:desc=>m[4]}
15
+ end
16
+
17
+
18
+ #Returns an enum that can be used as an iterator.
19
+ def each
20
+ return enum_for(:each) unless block_given?
21
+ io = nil
22
+ if @is_gz
23
+ infile = open(@file)
24
+ io = Zlib::GzipReader.new(infile)
25
+ else
26
+ io = File.open(@file)
27
+ end
28
+
29
+ header = BioVcf::VcfHeader.new
30
+ io.each_line do |line|
31
+ line.chomp!
32
+ if line =~ /^##fileformat=/
33
+ header.add(line)
34
+ next
35
+ end
36
+ if line =~ /^#/
37
+ header.add(line)
38
+ next
39
+ end
40
+ fields = BioVcf::VcfLine.parse(line)
41
+ rec = BioVcf::VcfRecord.new(fields,header)
42
+ yield rec
43
+ end
44
+ end
45
+ end
46
+ end
@@ -11,7 +11,7 @@ module BioVcf
11
11
  end
12
12
  end
13
13
 
14
- # Helper class for a list of (variant) values, such as A,G.
14
+ # Helper class for a list of (variant) values, such as A,G.
15
15
  # The [] function does the hard work. You can pass in an index (integer)
16
16
  # or nucleotide which translates to an index.
17
17
  # (see ./features for examples)
@@ -20,7 +20,7 @@ module BioVcf
20
20
  @alt = alt
21
21
  @list = list.split(/,/).map{|i| i.to_i}
22
22
  end
23
-
23
+
24
24
  def [] idx
25
25
  if idx.kind_of?(Integer)
26
26
  # return a value
@@ -67,7 +67,7 @@ module BioVcf
67
67
  @alt = alt
68
68
  @list = list.split(/,/).map{|i| i.to_i}
69
69
  end
70
-
70
+
71
71
  def [] idx
72
72
  if idx.kind_of?(Integer)
73
73
  @list[idx].to_i
@@ -87,15 +87,15 @@ module BioVcf
87
87
  end
88
88
 
89
89
  # Return the max value on the nucleotides in the list (typically rec.alt)
90
- def max
90
+ def max
91
91
  @list.reduce(0){ |memo,v| (v>memo ? v : memo) }
92
92
  end
93
93
 
94
- def min
94
+ def min
95
95
  @list.reduce(MAXINT){ |memo,v| (v<memo ? v : memo) }
96
96
  end
97
97
 
98
- def sum
98
+ def sum
99
99
  @list.reduce(0){ |memo,v| v+memo }
100
100
  end
101
101
  end
@@ -129,14 +129,14 @@ module BioVcf
129
129
  !empty?
130
130
  end
131
131
 
132
- def dp4
133
- ilist('DP4')
132
+ def dp4
133
+ ilist('DP4')
134
134
  end
135
- def ad
136
- ilist('AD')
135
+ def ad
136
+ ilist('AD')
137
137
  end
138
- def pl
139
- ilist('PL')
138
+ def pl
139
+ ilist('PL')
140
140
  end
141
141
 
142
142
  def bcount
@@ -178,11 +178,11 @@ module BioVcf
178
178
  else
179
179
  v = values[fetch(m.to_s.upcase)]
180
180
  return nil if VcfValue::empty?(v)
181
- v = v.to_i if v =~ /^\d+$/
182
- v = v.to_f if v =~ /^\d+\.\d+$/
181
+ return v.to_i if v =~ /^\d+$/
182
+ return v.to_f if v =~ /^\d+\.\d+$/
183
183
  v
184
184
  end
185
- end
185
+ end
186
186
 
187
187
  private
188
188
 
@@ -200,7 +200,7 @@ module BioVcf
200
200
  def ilist name
201
201
  v = fetch_value(name)
202
202
  return nil if not v
203
- v.split(',').map{|i| i.to_i}
203
+ v.split(',').map{|i| i.to_i}
204
204
  end
205
205
 
206
206
  end
@@ -222,11 +222,11 @@ module BioVcf
222
222
  @samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
223
223
  rescue TypeError
224
224
  $stderr.print "Unknown field name <#{name}> in record, did you mean r.info.#{name}?\n"
225
- raise
225
+ raise
226
226
  end
227
227
  end
228
228
 
229
- def method_missing(m, *args, &block)
229
+ def method_missing(m, *args, &block)
230
230
  name = m.to_s
231
231
  if name =~ /\?$/
232
232
  # test for valid sample
@@ -234,7 +234,7 @@ module BioVcf
234
234
  else
235
235
  @samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
236
236
  end
237
- end
237
+ end
238
238
 
239
239
  end
240
240
  end
@@ -1,3 +1,14 @@
1
+ # This module parses the VCF header. A header consists of lines
2
+ # containing fields. Most fields are of 'key=value' type and appear
3
+ # only once. These can be retrieved with the find_field method.
4
+ #
5
+ # INFO, FORMAT and contig fields are special as they appear multiple times
6
+ # and contain multiple key values (identified by an ID field).
7
+ # To retrieve these call 'info' and 'format' functions respectively,
8
+ # which return a hash on the contained ID.
9
+ #
10
+ # For the INFO and FORMAT fields a Ragel parser is used, mostly to
11
+ # deal with embedded quoted fields.
1
12
 
2
13
  module BioVcf
3
14
 
@@ -13,21 +24,30 @@ module BioVcf
13
24
  end
14
25
  nil
15
26
  end
27
+
28
+ def VcfHeaderParser.parse_field(line, debug)
29
+ BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(line, debug: debug)
30
+ end
16
31
  end
17
32
 
18
33
  class VcfHeader
19
34
 
20
- attr_reader :lines
35
+ attr_reader :lines, :field
21
36
 
22
- def initialize
37
+ def initialize(debug = false)
38
+ @debug = debug
23
39
  @lines = []
40
+ @field = {}
41
+ @meta = nil
42
+ @cached_filter_index = {}
24
43
  end
25
44
 
45
+ # Add a new field to the header
26
46
  def add line
27
- @lines << line.strip
47
+ @lines += line.split(/\n/)
28
48
  end
29
49
 
30
- # Add a key value list to the header
50
+ # Push a special key value list to the header
31
51
  def tag h
32
52
  h2 = h.dup
33
53
  [:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
@@ -82,6 +102,118 @@ module BioVcf
82
102
  @sample_index = index
83
103
  index
84
104
  end
85
- end
86
105
 
106
+ # Give a list of samples (by index and/or name) and return 0-based index values
107
+ # The cache has to be able to hanle multiple lists - that is why it is a hash.
108
+ def sample_subset_index list
109
+ cached = @cached_filter_index[list]
110
+ if cached
111
+ l = cached
112
+ else
113
+ l = []
114
+ list = samples_index_array() if not list
115
+ list.each { |i|
116
+ value =
117
+ begin
118
+ Integer(i)
119
+ rescue
120
+ idx = samples.index(i)
121
+ if idx != nil
122
+ idx
123
+ else
124
+ raise "Unknown sample name '#{i}'"
125
+ end
126
+ end
127
+ l << value
128
+ }
129
+ @cached_filter_index[list] = l
130
+ end
131
+ l
132
+ end
133
+
134
+ # Look for a line in the header with the field name and return the
135
+ # value, otherwise return nil
136
+ def find_field name
137
+ return field[name] if field[name]
138
+ @lines.each do | line |
139
+ value = line.scan(/###{name}=(.*)/)
140
+ if value[0]
141
+ v = value[0][0]
142
+ field[name] = v
143
+ return v
144
+ end
145
+ end
146
+ nil
147
+ end
148
+
149
+ # Look for all the lines that match the field name and return
150
+ # a hash of hashes. An empty hash is returned when there are
151
+ # no matches.
152
+ def find_fields name
153
+ res = {}
154
+ @lines.each do | line |
155
+ value = line.scan(/###{name}=<(.*)>/)
156
+ if value[0]
157
+ str = value[0][0]
158
+ # p str
159
+ v = VcfHeaderParser.parse_field(line,@debug)
160
+ id = v['ID']
161
+ res[id] = v
162
+ end
163
+ end
164
+ # p res
165
+ res
166
+ end
167
+
168
+ def format
169
+ find_fields('FORMAT')
170
+ end
171
+
172
+ def filter
173
+ find_fields('FILTER')
174
+ end
175
+
176
+ def contig
177
+ find_fields('contig')
178
+ end
179
+
180
+ def info
181
+ find_fields('INFO')
182
+ end
183
+
184
+ def gatkcommandline
185
+ find_fields('GATKCommandLine')
186
+ end
187
+
188
+ def meta
189
+ return @meta if @meta
190
+ res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} }
191
+ @lines.each do | line |
192
+ value = line.scan(/##(.*?)=(.*)/)
193
+ if value[0]
194
+ k,v = value[0]
195
+ if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine'
196
+ # p [k,v]
197
+ res[k] = v
198
+ end
199
+ end
200
+ end
201
+ res['INFO'] = info()
202
+ res['FORMAT'] = format()
203
+ res['FILTER'] = filter()
204
+ res['contig'] = contig()
205
+ res['GATKCommandLine'] = gatkcommandline()
206
+ # p [:res, res]
207
+ @meta = res # cache values
208
+ res
209
+ end
210
+
211
+ def method_missing(m, *args, &block)
212
+ name = m.to_s
213
+ value = find_field(name)
214
+ return value if value
215
+ raise "Unknown VCF header query '#{name}'"
216
+ end
217
+
218
+ end
87
219
  end
@@ -0,0 +1,778 @@
1
+
2
+ # line 1 "gen_vcfheaderline_parser.rl"
3
+ # Ragel lexer for VCF-header
4
+ #
5
+ # This is compact a parser/lexer for the VCF header format. Bio-vcf
6
+ # uses the parser to generate meta information that can be output to
7
+ # (for example) JSON format. The advantage of using ragel as a state
8
+ # engine is that it allows for easy parsing of key-value pairs with
9
+ # syntax checking and, for example, escaped quotes in quoted string
10
+ # values. This ragel parser/lexer generates valid Ruby; it should be
11
+ # fairly trivial to generate python/C/JAVA instead. Note that this
12
+ # edition validates ID and Number fields only. Other fields are
13
+ # dumped 'AS IS'.
14
+ #
15
+ # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
16
+ #
17
+ # by Pjotr Prins (c) 2014/2015
18
+
19
+ module BioVcf
20
+
21
+ module VcfHeaderParser
22
+
23
+ module RagelKeyValues
24
+
25
+ def self.debug msg
26
+ # nothing
27
+ # $stderr.print "DEBUG: ",msg,"\n"
28
+ end
29
+
30
+ =begin
31
+
32
+ # line 75 "gen_vcfheaderline_parser.rl"
33
+
34
+ =end
35
+
36
+
37
+ # line 38 "gen_vcfheaderline_parser.rb"
38
+ class << self
39
+ attr_accessor :_simple_lexer_actions
40
+ private :_simple_lexer_actions, :_simple_lexer_actions=
41
+ end
42
+ self._simple_lexer_actions = [
43
+ 0, 1, 0, 1, 1, 1, 2, 1,
44
+ 3, 1, 15, 1, 16, 2, 0, 1,
45
+ 2, 4, 15, 2, 6, 15, 2, 7,
46
+ 15, 2, 9, 16, 2, 10, 16, 2,
47
+ 11, 16, 2, 12, 15, 2, 13, 16,
48
+ 2, 14, 16, 3, 5, 8, 15, 6,
49
+ 9, 10, 13, 11, 14, 16
50
+ ]
51
+
52
+ class << self
53
+ attr_accessor :_simple_lexer_key_offsets
54
+ private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
55
+ end
56
+ self._simple_lexer_key_offsets = [
57
+ 0, 0, 1, 2, 7, 8, 9, 10,
58
+ 13, 26, 35, 49, 51, 53, 58, 60,
59
+ 62, 62, 62, 64, 70, 72, 77, 80,
60
+ 91, 101, 111, 121, 131, 141, 151, 161,
61
+ 171, 181, 191, 201, 211, 221, 231, 241,
62
+ 251, 261, 270, 275, 277, 279, 279, 281,
63
+ 283, 283, 293, 303, 313, 322, 327, 329,
64
+ 331, 331, 333, 335, 335, 345, 354, 360,
65
+ 371, 381, 391, 401, 411, 421, 430, 438,
66
+ 440, 445, 448, 458, 468, 478, 488, 498,
67
+ 508, 517, 524, 526, 528, 533, 535, 537,
68
+ 537, 537, 549, 559, 569, 579, 589, 599,
69
+ 609, 619, 629, 639, 649, 659, 669, 671,
70
+ 672, 673, 674, 675, 676, 677, 678, 679,
71
+ 680, 681, 682, 683, 684, 685, 686, 687,
72
+ 688, 689, 690, 691, 692, 693, 694, 695,
73
+ 696, 697, 698, 699, 700
74
+ ]
75
+
76
+ class << self
77
+ attr_accessor :_simple_lexer_trans_keys
78
+ private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
79
+ end
80
+ self._simple_lexer_trans_keys = [
81
+ 35, 35, 65, 70, 71, 73, 99, 76,
82
+ 84, 61, 44, 60, 62, 67, 68, 73,
83
+ 78, 86, 97, 108, 48, 57, 65, 90,
84
+ 98, 122, 46, 61, 95, 48, 57, 65,
85
+ 90, 97, 122, 34, 39, 44, 46, 60,
86
+ 62, 43, 45, 48, 57, 65, 90, 97,
87
+ 122, 34, 92, 34, 92, 34, 39, 44,
88
+ 60, 62, 39, 92, 39, 92, 48, 57,
89
+ 44, 46, 60, 62, 48, 57, 48, 57,
90
+ 44, 60, 62, 48, 57, 44, 60, 62,
91
+ 44, 46, 60, 62, 95, 48, 57, 65,
92
+ 90, 97, 122, 46, 61, 95, 111, 48,
93
+ 57, 65, 90, 97, 122, 46, 61, 95,
94
+ 109, 48, 57, 65, 90, 97, 122, 46,
95
+ 61, 95, 109, 48, 57, 65, 90, 97,
96
+ 122, 46, 61, 95, 97, 48, 57, 65,
97
+ 90, 98, 122, 46, 61, 95, 110, 48,
98
+ 57, 65, 90, 97, 122, 46, 61, 95,
99
+ 100, 48, 57, 65, 90, 97, 122, 46,
100
+ 61, 76, 95, 48, 57, 65, 90, 97,
101
+ 122, 46, 61, 95, 105, 48, 57, 65,
102
+ 90, 97, 122, 46, 61, 95, 110, 48,
103
+ 57, 65, 90, 97, 122, 46, 61, 95,
104
+ 101, 48, 57, 65, 90, 97, 122, 46,
105
+ 61, 79, 95, 48, 57, 65, 90, 97,
106
+ 122, 46, 61, 95, 112, 48, 57, 65,
107
+ 90, 97, 122, 46, 61, 95, 116, 48,
108
+ 57, 65, 90, 97, 122, 46, 61, 95,
109
+ 105, 48, 57, 65, 90, 97, 122, 46,
110
+ 61, 95, 111, 48, 57, 65, 90, 97,
111
+ 122, 46, 61, 95, 110, 48, 57, 65,
112
+ 90, 97, 122, 46, 61, 95, 115, 48,
113
+ 57, 65, 90, 97, 122, 46, 61, 95,
114
+ 48, 57, 65, 90, 97, 122, 34, 39,
115
+ 44, 60, 62, 34, 92, 34, 92, 39,
116
+ 92, 39, 92, 46, 61, 95, 97, 48,
117
+ 57, 65, 90, 98, 122, 46, 61, 95,
118
+ 116, 48, 57, 65, 90, 97, 122, 46,
119
+ 61, 95, 101, 48, 57, 65, 90, 97,
120
+ 122, 46, 61, 95, 48, 57, 65, 90,
121
+ 97, 122, 34, 39, 44, 60, 62, 34,
122
+ 92, 34, 92, 39, 92, 39, 92, 46,
123
+ 61, 68, 95, 48, 57, 65, 90, 97,
124
+ 122, 46, 61, 95, 48, 57, 65, 90,
125
+ 97, 122, 48, 57, 65, 90, 97, 122,
126
+ 44, 46, 60, 62, 95, 48, 57, 65,
127
+ 90, 97, 122, 46, 61, 95, 117, 48,
128
+ 57, 65, 90, 97, 122, 46, 61, 95,
129
+ 109, 48, 57, 65, 90, 97, 122, 46,
130
+ 61, 95, 98, 48, 57, 65, 90, 97,
131
+ 122, 46, 61, 95, 101, 48, 57, 65,
132
+ 90, 97, 122, 46, 61, 95, 114, 48,
133
+ 57, 65, 90, 97, 122, 46, 61, 95,
134
+ 48, 57, 65, 90, 97, 122, 43, 45,
135
+ 46, 65, 71, 82, 48, 57, 48, 57,
136
+ 44, 60, 62, 48, 57, 44, 60, 62,
137
+ 46, 61, 95, 101, 48, 57, 65, 90,
138
+ 97, 122, 46, 61, 95, 114, 48, 57,
139
+ 65, 90, 97, 122, 46, 61, 95, 115,
140
+ 48, 57, 65, 90, 97, 122, 46, 61,
141
+ 95, 105, 48, 57, 65, 90, 97, 122,
142
+ 46, 61, 95, 111, 48, 57, 65, 90,
143
+ 97, 122, 46, 61, 95, 110, 48, 57,
144
+ 65, 90, 97, 122, 46, 61, 95, 48,
145
+ 57, 65, 90, 97, 122, 34, 39, 44,
146
+ 60, 62, 48, 57, 34, 92, 34, 92,
147
+ 34, 39, 44, 60, 62, 39, 92, 39,
148
+ 92, 44, 60, 62, 95, 45, 46, 48,
149
+ 57, 65, 90, 97, 122, 46, 61, 95,
150
+ 115, 48, 57, 65, 90, 97, 122, 46,
151
+ 61, 95, 115, 48, 57, 65, 90, 97,
152
+ 122, 46, 61, 95, 101, 48, 57, 65,
153
+ 90, 97, 122, 46, 61, 95, 109, 48,
154
+ 57, 65, 90, 97, 122, 46, 61, 95,
155
+ 98, 48, 57, 65, 90, 97, 122, 46,
156
+ 61, 95, 108, 48, 57, 65, 90, 97,
157
+ 122, 46, 61, 95, 121, 48, 57, 65,
158
+ 90, 97, 122, 46, 61, 95, 101, 48,
159
+ 57, 65, 90, 97, 122, 46, 61, 95,
160
+ 110, 48, 57, 65, 90, 97, 122, 46,
161
+ 61, 95, 103, 48, 57, 65, 90, 97,
162
+ 122, 46, 61, 95, 116, 48, 57, 65,
163
+ 90, 97, 122, 46, 61, 95, 104, 48,
164
+ 57, 65, 90, 97, 122, 73, 79, 76,
165
+ 84, 69, 82, 82, 77, 65, 65, 84,
166
+ 75, 67, 111, 109, 109, 97, 110, 100,
167
+ 76, 105, 110, 101, 78, 70, 79, 111,
168
+ 110, 116, 105, 103, 0
169
+ ]
170
+
171
+ class << self
172
+ attr_accessor :_simple_lexer_single_lengths
173
+ private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
174
+ end
175
+ self._simple_lexer_single_lengths = [
176
+ 0, 1, 1, 5, 1, 1, 1, 3,
177
+ 7, 3, 6, 2, 2, 5, 2, 2,
178
+ 0, 0, 0, 4, 0, 3, 3, 5,
179
+ 4, 4, 4, 4, 4, 4, 4, 4,
180
+ 4, 4, 4, 4, 4, 4, 4, 4,
181
+ 4, 3, 5, 2, 2, 0, 2, 2,
182
+ 0, 4, 4, 4, 3, 5, 2, 2,
183
+ 0, 2, 2, 0, 4, 3, 0, 5,
184
+ 4, 4, 4, 4, 4, 3, 6, 0,
185
+ 3, 3, 4, 4, 4, 4, 4, 4,
186
+ 3, 5, 2, 2, 5, 2, 2, 0,
187
+ 0, 4, 4, 4, 4, 4, 4, 4,
188
+ 4, 4, 4, 4, 4, 4, 2, 1,
189
+ 1, 1, 1, 1, 1, 1, 1, 1,
190
+ 1, 1, 1, 1, 1, 1, 1, 1,
191
+ 1, 1, 1, 1, 1, 1, 1, 1,
192
+ 1, 1, 1, 1, 0
193
+ ]
194
+
195
+ class << self
196
+ attr_accessor :_simple_lexer_range_lengths
197
+ private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
198
+ end
199
+ self._simple_lexer_range_lengths = [
200
+ 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 3, 3, 4, 0, 0, 0, 0, 0,
202
+ 0, 0, 1, 1, 1, 1, 0, 3,
203
+ 3, 3, 3, 3, 3, 3, 3, 3,
204
+ 3, 3, 3, 3, 3, 3, 3, 3,
205
+ 3, 3, 0, 0, 0, 0, 0, 0,
206
+ 0, 3, 3, 3, 3, 0, 0, 0,
207
+ 0, 0, 0, 0, 3, 3, 3, 3,
208
+ 3, 3, 3, 3, 3, 3, 1, 1,
209
+ 1, 0, 3, 3, 3, 3, 3, 3,
210
+ 3, 1, 0, 0, 0, 0, 0, 0,
211
+ 0, 4, 3, 3, 3, 3, 3, 3,
212
+ 3, 3, 3, 3, 3, 3, 0, 0,
213
+ 0, 0, 0, 0, 0, 0, 0, 0,
214
+ 0, 0, 0, 0, 0, 0, 0, 0,
215
+ 0, 0, 0, 0, 0, 0, 0, 0,
216
+ 0, 0, 0, 0, 0
217
+ ]
218
+
219
+ class << self
220
+ attr_accessor :_simple_lexer_index_offsets
221
+ private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
222
+ end
223
+ self._simple_lexer_index_offsets = [
224
+ 0, 0, 2, 4, 10, 12, 14, 16,
225
+ 20, 31, 38, 49, 52, 55, 61, 64,
226
+ 67, 68, 69, 71, 77, 79, 84, 88,
227
+ 97, 105, 113, 121, 129, 137, 145, 153,
228
+ 161, 169, 177, 185, 193, 201, 209, 217,
229
+ 225, 233, 240, 246, 249, 252, 253, 256,
230
+ 259, 260, 268, 276, 284, 291, 297, 300,
231
+ 303, 304, 307, 310, 311, 319, 326, 330,
232
+ 339, 347, 355, 363, 371, 379, 386, 394,
233
+ 396, 401, 405, 413, 421, 429, 437, 445,
234
+ 453, 460, 467, 470, 473, 479, 482, 485,
235
+ 486, 487, 496, 504, 512, 520, 528, 536,
236
+ 544, 552, 560, 568, 576, 584, 592, 595,
237
+ 597, 599, 601, 603, 605, 607, 609, 611,
238
+ 613, 615, 617, 619, 621, 623, 625, 627,
239
+ 629, 631, 633, 635, 637, 639, 641, 643,
240
+ 645, 647, 649, 651, 653
241
+ ]
242
+
243
+ class << self
244
+ attr_accessor :_simple_lexer_trans_targs
245
+ private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
246
+ end
247
+ self._simple_lexer_trans_targs = [
248
+ 2, 0, 3, 0, 4, 102, 110, 124,
249
+ 127, 0, 5, 0, 6, 0, 7, 0,
250
+ 8, 8, 132, 0, 24, 49, 60, 64,
251
+ 74, 90, 97, 9, 9, 9, 0, 9,
252
+ 10, 9, 9, 9, 9, 0, 11, 14,
253
+ 8, 22, 8, 132, 18, 23, 23, 23,
254
+ 0, 13, 17, 12, 13, 17, 12, 11,
255
+ 14, 8, 8, 132, 0, 13, 16, 15,
256
+ 13, 16, 15, 15, 12, 19, 0, 8,
257
+ 20, 8, 132, 19, 0, 21, 0, 8,
258
+ 8, 132, 21, 0, 8, 8, 132, 0,
259
+ 8, 23, 8, 132, 23, 23, 23, 23,
260
+ 0, 9, 10, 9, 25, 9, 9, 9,
261
+ 0, 9, 10, 9, 26, 9, 9, 9,
262
+ 0, 9, 10, 9, 27, 9, 9, 9,
263
+ 0, 9, 10, 9, 28, 9, 9, 9,
264
+ 0, 9, 10, 9, 29, 9, 9, 9,
265
+ 0, 9, 10, 9, 30, 9, 9, 9,
266
+ 0, 9, 10, 31, 9, 9, 9, 9,
267
+ 0, 9, 10, 9, 32, 9, 9, 9,
268
+ 0, 9, 10, 9, 33, 9, 9, 9,
269
+ 0, 9, 10, 9, 34, 9, 9, 9,
270
+ 0, 9, 10, 35, 9, 9, 9, 9,
271
+ 0, 9, 10, 9, 36, 9, 9, 9,
272
+ 0, 9, 10, 9, 37, 9, 9, 9,
273
+ 0, 9, 10, 9, 38, 9, 9, 9,
274
+ 0, 9, 10, 9, 39, 9, 9, 9,
275
+ 0, 9, 10, 9, 40, 9, 9, 9,
276
+ 0, 9, 10, 9, 41, 9, 9, 9,
277
+ 0, 9, 42, 9, 9, 9, 9, 0,
278
+ 43, 46, 8, 8, 132, 0, 42, 45,
279
+ 44, 42, 45, 44, 44, 42, 48, 47,
280
+ 42, 48, 47, 47, 9, 10, 9, 50,
281
+ 9, 9, 9, 0, 9, 10, 9, 51,
282
+ 9, 9, 9, 0, 9, 10, 9, 52,
283
+ 9, 9, 9, 0, 9, 53, 9, 9,
284
+ 9, 9, 0, 54, 57, 8, 8, 132,
285
+ 0, 53, 56, 55, 53, 56, 55, 55,
286
+ 53, 59, 58, 53, 59, 58, 58, 9,
287
+ 10, 61, 9, 9, 9, 9, 0, 9,
288
+ 62, 9, 9, 9, 9, 0, 63, 63,
289
+ 63, 0, 8, 63, 8, 132, 63, 63,
290
+ 63, 63, 0, 9, 10, 9, 65, 9,
291
+ 9, 9, 0, 9, 10, 9, 66, 9,
292
+ 9, 9, 0, 9, 10, 9, 67, 9,
293
+ 9, 9, 0, 9, 10, 9, 68, 9,
294
+ 9, 9, 0, 9, 10, 9, 69, 9,
295
+ 9, 9, 0, 9, 70, 9, 9, 9,
296
+ 9, 0, 71, 71, 73, 73, 73, 73,
297
+ 72, 0, 72, 0, 8, 8, 132, 72,
298
+ 0, 8, 8, 132, 0, 9, 10, 9,
299
+ 75, 9, 9, 9, 0, 9, 10, 9,
300
+ 76, 9, 9, 9, 0, 9, 10, 9,
301
+ 77, 9, 9, 9, 0, 9, 10, 9,
302
+ 78, 9, 9, 9, 0, 9, 10, 9,
303
+ 79, 9, 9, 9, 0, 9, 10, 9,
304
+ 80, 9, 9, 9, 0, 9, 81, 9,
305
+ 9, 9, 9, 0, 82, 85, 8, 8,
306
+ 132, 89, 0, 84, 88, 83, 84, 88,
307
+ 83, 82, 85, 8, 8, 132, 0, 84,
308
+ 87, 86, 84, 87, 86, 86, 83, 8,
309
+ 8, 132, 89, 89, 89, 89, 89, 0,
310
+ 9, 10, 9, 91, 9, 9, 9, 0,
311
+ 9, 10, 9, 92, 9, 9, 9, 0,
312
+ 9, 10, 9, 93, 9, 9, 9, 0,
313
+ 9, 10, 9, 94, 9, 9, 9, 0,
314
+ 9, 10, 9, 95, 9, 9, 9, 0,
315
+ 9, 10, 9, 96, 9, 9, 9, 0,
316
+ 9, 10, 9, 61, 9, 9, 9, 0,
317
+ 9, 10, 9, 98, 9, 9, 9, 0,
318
+ 9, 10, 9, 99, 9, 9, 9, 0,
319
+ 9, 10, 9, 100, 9, 9, 9, 0,
320
+ 9, 10, 9, 101, 9, 9, 9, 0,
321
+ 9, 10, 9, 69, 9, 9, 9, 0,
322
+ 103, 107, 0, 104, 0, 105, 0, 106,
323
+ 0, 6, 0, 108, 0, 109, 0, 5,
324
+ 0, 111, 0, 112, 0, 113, 0, 114,
325
+ 0, 115, 0, 116, 0, 117, 0, 118,
326
+ 0, 119, 0, 120, 0, 121, 0, 122,
327
+ 0, 123, 0, 6, 0, 125, 0, 126,
328
+ 0, 6, 0, 128, 0, 129, 0, 130,
329
+ 0, 131, 0, 6, 0, 0, 0
330
+ ]
331
+
332
+ class << self
333
+ attr_accessor :_simple_lexer_trans_actions
334
+ private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
335
+ end
336
+ self._simple_lexer_trans_actions = [
337
+ 0, 0, 0, 0, 0, 0, 0, 0,
338
+ 0, 0, 0, 0, 0, 0, 0, 0,
339
+ 0, 0, 0, 0, 1, 1, 1, 1,
340
+ 1, 1, 1, 1, 1, 1, 47, 0,
341
+ 7, 0, 0, 0, 0, 11, 0, 0,
342
+ 9, 0, 9, 9, 0, 1, 1, 1,
343
+ 0, 13, 1, 1, 3, 0, 0, 0,
344
+ 0, 9, 9, 9, 0, 13, 1, 1,
345
+ 3, 0, 0, 0, 0, 0, 11, 9,
346
+ 0, 9, 9, 0, 0, 0, 11, 9,
347
+ 9, 9, 0, 0, 9, 9, 9, 0,
348
+ 16, 0, 16, 16, 0, 0, 0, 0,
349
+ 0, 0, 7, 0, 0, 0, 0, 0,
350
+ 40, 0, 7, 0, 0, 0, 0, 0,
351
+ 40, 0, 7, 0, 0, 0, 0, 0,
352
+ 40, 0, 7, 0, 0, 0, 0, 0,
353
+ 40, 0, 7, 0, 0, 0, 0, 0,
354
+ 40, 0, 7, 0, 0, 0, 0, 0,
355
+ 40, 0, 7, 0, 0, 0, 0, 0,
356
+ 40, 0, 7, 0, 0, 0, 0, 0,
357
+ 40, 0, 7, 0, 0, 0, 0, 0,
358
+ 40, 0, 7, 0, 0, 0, 0, 0,
359
+ 40, 0, 7, 0, 0, 0, 0, 0,
360
+ 40, 0, 7, 0, 0, 0, 0, 0,
361
+ 40, 0, 7, 0, 0, 0, 0, 0,
362
+ 40, 0, 7, 0, 0, 0, 0, 0,
363
+ 40, 0, 7, 0, 0, 0, 0, 0,
364
+ 40, 0, 7, 0, 0, 0, 0, 0,
365
+ 40, 0, 7, 0, 0, 0, 0, 0,
366
+ 40, 0, 5, 0, 0, 0, 0, 40,
367
+ 0, 0, 9, 9, 9, 0, 13, 1,
368
+ 1, 3, 0, 0, 0, 13, 1, 1,
369
+ 3, 0, 0, 0, 0, 7, 0, 0,
370
+ 0, 0, 0, 37, 0, 7, 0, 0,
371
+ 0, 0, 0, 37, 0, 7, 0, 0,
372
+ 0, 0, 0, 37, 0, 5, 0, 0,
373
+ 0, 0, 37, 0, 0, 34, 34, 34,
374
+ 0, 13, 1, 1, 3, 0, 0, 0,
375
+ 13, 1, 1, 3, 0, 0, 0, 0,
376
+ 7, 0, 0, 0, 0, 0, 25, 0,
377
+ 5, 0, 0, 0, 0, 25, 1, 1,
378
+ 1, 25, 43, 0, 43, 43, 0, 0,
379
+ 0, 0, 0, 0, 7, 0, 0, 0,
380
+ 0, 0, 31, 0, 7, 0, 0, 0,
381
+ 0, 0, 31, 0, 7, 0, 0, 0,
382
+ 0, 0, 31, 0, 7, 0, 0, 0,
383
+ 0, 0, 31, 0, 7, 0, 0, 0,
384
+ 0, 0, 31, 0, 5, 0, 0, 0,
385
+ 0, 31, 1, 1, 1, 1, 1, 1,
386
+ 1, 31, 0, 31, 22, 22, 22, 0,
387
+ 0, 22, 22, 22, 0, 0, 7, 0,
388
+ 0, 0, 0, 0, 28, 0, 7, 0,
389
+ 0, 0, 0, 0, 28, 0, 7, 0,
390
+ 0, 0, 0, 0, 28, 0, 7, 0,
391
+ 0, 0, 0, 0, 28, 0, 7, 0,
392
+ 0, 0, 0, 0, 28, 0, 7, 0,
393
+ 0, 0, 0, 0, 28, 0, 5, 0,
394
+ 0, 0, 0, 28, 0, 0, 9, 9,
395
+ 9, 1, 0, 13, 1, 1, 3, 0,
396
+ 0, 0, 0, 9, 9, 9, 0, 13,
397
+ 1, 1, 3, 0, 0, 0, 0, 19,
398
+ 19, 19, 0, 0, 0, 0, 0, 0,
399
+ 0, 7, 0, 0, 0, 0, 0, 25,
400
+ 0, 7, 0, 0, 0, 0, 0, 25,
401
+ 0, 7, 0, 0, 0, 0, 0, 25,
402
+ 0, 7, 0, 0, 0, 0, 0, 25,
403
+ 0, 7, 0, 0, 0, 0, 0, 25,
404
+ 0, 7, 0, 0, 0, 0, 0, 25,
405
+ 0, 7, 0, 0, 0, 0, 0, 25,
406
+ 0, 7, 0, 0, 0, 0, 0, 31,
407
+ 0, 7, 0, 0, 0, 0, 0, 31,
408
+ 0, 7, 0, 0, 0, 0, 0, 31,
409
+ 0, 7, 0, 0, 0, 0, 0, 31,
410
+ 0, 7, 0, 0, 0, 0, 0, 31,
411
+ 0, 0, 0, 0, 0, 0, 0, 0,
412
+ 0, 0, 0, 0, 0, 0, 0, 0,
413
+ 0, 0, 0, 0, 0, 0, 0, 0,
414
+ 0, 0, 0, 0, 0, 0, 0, 0,
415
+ 0, 0, 0, 0, 0, 0, 0, 0,
416
+ 0, 0, 0, 0, 0, 0, 0, 0,
417
+ 0, 0, 0, 0, 0, 0, 0, 0,
418
+ 0, 0, 0, 0, 0, 0, 0
419
+ ]
420
+
421
+ class << self
422
+ attr_accessor :_simple_lexer_eof_actions
423
+ private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
424
+ end
425
+ self._simple_lexer_eof_actions = [
426
+ 0, 0, 0, 0, 0, 0, 0, 0,
427
+ 47, 11, 0, 11, 11, 0, 11, 11,
428
+ 11, 11, 11, 0, 11, 0, 0, 0,
429
+ 40, 40, 40, 40, 40, 40, 40, 40,
430
+ 40, 40, 40, 40, 40, 40, 40, 40,
431
+ 40, 40, 0, 40, 40, 40, 40, 40,
432
+ 40, 37, 37, 37, 37, 0, 37, 37,
433
+ 37, 37, 37, 37, 25, 25, 25, 0,
434
+ 31, 31, 31, 31, 31, 31, 31, 31,
435
+ 0, 0, 28, 28, 28, 28, 28, 28,
436
+ 28, 0, 28, 28, 0, 28, 28, 28,
437
+ 28, 0, 25, 25, 25, 25, 25, 25,
438
+ 25, 31, 31, 31, 31, 31, 0, 0,
439
+ 0, 0, 0, 0, 0, 0, 0, 0,
440
+ 0, 0, 0, 0, 0, 0, 0, 0,
441
+ 0, 0, 0, 0, 0, 0, 0, 0,
442
+ 0, 0, 0, 0, 0
443
+ ]
444
+
445
+ class << self
446
+ attr_accessor :simple_lexer_start
447
+ end
448
+ self.simple_lexer_start = 1;
449
+ class << self
450
+ attr_accessor :simple_lexer_first_final
451
+ end
452
+ self.simple_lexer_first_final = 132;
453
+ class << self
454
+ attr_accessor :simple_lexer_error
455
+ end
456
+ self.simple_lexer_error = 0;
457
+
458
+ class << self
459
+ attr_accessor :simple_lexer_en_main
460
+ end
461
+ self.simple_lexer_en_main = 1;
462
+
463
+
464
+ # line 79 "gen_vcfheaderline_parser.rl"
465
+ # %% this just fixes syntax highlighting...
466
+
467
+ def self.run_lexer(buf, options = {})
468
+ do_debug = (options[:debug] == true)
469
+ $stderr.print "---> ",buf,"\n" if do_debug
470
+ data = buf.unpack("c*") if(buf.is_a?(String))
471
+ eof = data.length
472
+ values = []
473
+ stack = []
474
+
475
+ emit = lambda { |type, data, ts, p|
476
+ # Print the type and text of the last read token
477
+ # p ts,p
478
+ $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
479
+ values << [type,data[ts...p].pack('c*')]
480
+ }
481
+
482
+ error_code = nil
483
+
484
+
485
+ # line 486 "gen_vcfheaderline_parser.rb"
486
+ begin
487
+ p ||= 0
488
+ pe ||= data.length
489
+ cs = simple_lexer_start
490
+ end
491
+
492
+ # line 99 "gen_vcfheaderline_parser.rl"
493
+
494
+ # line 495 "gen_vcfheaderline_parser.rb"
495
+ begin
496
+ _klen, _trans, _keys, _acts, _nacts = nil
497
+ _goto_level = 0
498
+ _resume = 10
499
+ _eof_trans = 15
500
+ _again = 20
501
+ _test_eof = 30
502
+ _out = 40
503
+ while true
504
+ _trigger_goto = false
505
+ if _goto_level <= 0
506
+ if p == pe
507
+ _goto_level = _test_eof
508
+ next
509
+ end
510
+ if cs == 0
511
+ _goto_level = _out
512
+ next
513
+ end
514
+ end
515
+ if _goto_level <= _resume
516
+ _keys = _simple_lexer_key_offsets[cs]
517
+ _trans = _simple_lexer_index_offsets[cs]
518
+ _klen = _simple_lexer_single_lengths[cs]
519
+ _break_match = false
520
+
521
+ begin
522
+ if _klen > 0
523
+ _lower = _keys
524
+ _upper = _keys + _klen - 1
525
+
526
+ loop do
527
+ break if _upper < _lower
528
+ _mid = _lower + ( (_upper - _lower) >> 1 )
529
+
530
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
531
+ _upper = _mid - 1
532
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid]
533
+ _lower = _mid + 1
534
+ else
535
+ _trans += (_mid - _keys)
536
+ _break_match = true
537
+ break
538
+ end
539
+ end # loop
540
+ break if _break_match
541
+ _keys += _klen
542
+ _trans += _klen
543
+ end
544
+ _klen = _simple_lexer_range_lengths[cs]
545
+ if _klen > 0
546
+ _lower = _keys
547
+ _upper = _keys + (_klen << 1) - 2
548
+ loop do
549
+ break if _upper < _lower
550
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1)
551
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
552
+ _upper = _mid - 2
553
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
554
+ _lower = _mid + 2
555
+ else
556
+ _trans += ((_mid - _keys) >> 1)
557
+ _break_match = true
558
+ break
559
+ end
560
+ end # loop
561
+ break if _break_match
562
+ _trans += _klen
563
+ end
564
+ end while false
565
+ cs = _simple_lexer_trans_targs[_trans]
566
+ if _simple_lexer_trans_actions[_trans] != 0
567
+ _acts = _simple_lexer_trans_actions[_trans]
568
+ _nacts = _simple_lexer_actions[_acts]
569
+ _acts += 1
570
+ while _nacts > 0
571
+ _nacts -= 1
572
+ _acts += 1
573
+ case _simple_lexer_actions[_acts - 1]
574
+ when 0 then
575
+ # line 33 "gen_vcfheaderline_parser.rl"
576
+ begin
577
+ ts=p end
578
+ when 1 then
579
+ # line 34 "gen_vcfheaderline_parser.rl"
580
+ begin
581
+
582
+ emit.call(:value,data,ts,p)
583
+ end
584
+ when 2 then
585
+ # line 38 "gen_vcfheaderline_parser.rl"
586
+ begin
587
+
588
+ emit.call(:kw,data,ts,p)
589
+ end
590
+ when 3 then
591
+ # line 58 "gen_vcfheaderline_parser.rl"
592
+ begin
593
+ emit.call(:key_word,data,ts,p) end
594
+ when 4 then
595
+ # line 59 "gen_vcfheaderline_parser.rl"
596
+ begin
597
+ emit.call(:value,data,ts,p) end
598
+ when 5 then
599
+ # line 60 "gen_vcfheaderline_parser.rl"
600
+ begin
601
+ emit.call(:value,data,ts,p) end
602
+ when 6 then
603
+ # line 62 "gen_vcfheaderline_parser.rl"
604
+ begin
605
+ emit.call(:value,data,ts,p) end
606
+ when 7 then
607
+ # line 65 "gen_vcfheaderline_parser.rl"
608
+ begin
609
+ emit.call(:value,data,ts,p) end
610
+ when 8 then
611
+ # line 67 "gen_vcfheaderline_parser.rl"
612
+ begin
613
+ debug("ID FOUND") end
614
+ when 9 then
615
+ # line 67 "gen_vcfheaderline_parser.rl"
616
+ begin
617
+ error_code="Malformed ID" end
618
+ when 10 then
619
+ # line 68 "gen_vcfheaderline_parser.rl"
620
+ begin
621
+ error_code="Version" end
622
+ when 11 then
623
+ # line 69 "gen_vcfheaderline_parser.rl"
624
+ begin
625
+ error_code="Number" end
626
+ when 12 then
627
+ # line 70 "gen_vcfheaderline_parser.rl"
628
+ begin
629
+ debug("DATE FOUND") end
630
+ when 13 then
631
+ # line 70 "gen_vcfheaderline_parser.rl"
632
+ begin
633
+ error_code="Date" end
634
+ when 14 then
635
+ # line 71 "gen_vcfheaderline_parser.rl"
636
+ begin
637
+ error_code="GATK" end
638
+ when 15 then
639
+ # line 72 "gen_vcfheaderline_parser.rl"
640
+ begin
641
+ debug("KEY_VALUE found") end
642
+ when 16 then
643
+ # line 72 "gen_vcfheaderline_parser.rl"
644
+ begin
645
+ error_code="unknown key-value " end
646
+ # line 647 "gen_vcfheaderline_parser.rb"
647
+ end # action switch
648
+ end
649
+ end
650
+ if _trigger_goto
651
+ next
652
+ end
653
+ end
654
+ if _goto_level <= _again
655
+ if cs == 0
656
+ _goto_level = _out
657
+ next
658
+ end
659
+ p += 1
660
+ if p != pe
661
+ _goto_level = _resume
662
+ next
663
+ end
664
+ end
665
+ if _goto_level <= _test_eof
666
+ if p == eof
667
+ __acts = _simple_lexer_eof_actions[cs]
668
+ __nacts = _simple_lexer_actions[__acts]
669
+ __acts += 1
670
+ while __nacts > 0
671
+ __nacts -= 1
672
+ __acts += 1
673
+ case _simple_lexer_actions[__acts - 1]
674
+ when 9 then
675
+ # line 67 "gen_vcfheaderline_parser.rl"
676
+ begin
677
+ error_code="Malformed ID" end
678
+ when 10 then
679
+ # line 68 "gen_vcfheaderline_parser.rl"
680
+ begin
681
+ error_code="Version" end
682
+ when 11 then
683
+ # line 69 "gen_vcfheaderline_parser.rl"
684
+ begin
685
+ error_code="Number" end
686
+ when 13 then
687
+ # line 70 "gen_vcfheaderline_parser.rl"
688
+ begin
689
+ error_code="Date" end
690
+ when 14 then
691
+ # line 71 "gen_vcfheaderline_parser.rl"
692
+ begin
693
+ error_code="GATK" end
694
+ when 16 then
695
+ # line 72 "gen_vcfheaderline_parser.rl"
696
+ begin
697
+ error_code="unknown key-value " end
698
+ # line 699 "gen_vcfheaderline_parser.rb"
699
+ end # eof action switch
700
+ end
701
+ if _trigger_goto
702
+ next
703
+ end
704
+ end
705
+ end
706
+ if _goto_level <= _out
707
+ break
708
+ end
709
+ end
710
+ end
711
+
712
+ # line 100 "gen_vcfheaderline_parser.rl"
713
+
714
+ raise "ERROR: "+error_code+" in "+buf if error_code
715
+
716
+ begin
717
+ res = {}
718
+ # p values
719
+ values.each_slice(2) do | a,b |
720
+ $stderr.print '*',a,b if do_debug
721
+ keyword = a[1]
722
+ value = b[1]
723
+ value = value.to_i if ['length','Epoch'].index(keyword)
724
+ res[keyword] = value
725
+ # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
726
+ end
727
+ rescue
728
+ print "ERROR: "
729
+ p values
730
+ raise
731
+ end
732
+ $stderr.print(res,"\n") if do_debug
733
+ res
734
+ end
735
+ end
736
+ end
737
+ end
738
+
739
+ if __FILE__ == $0
740
+
741
+ gatkcommandline = <<LINE1
742
+ ##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
743
+ LINE1
744
+
745
+ h = {}
746
+ s = gatkcommandline.strip
747
+ # print s,"\n"
748
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
749
+ # h[result['ID']] = result
750
+ # p result
751
+
752
+ lines = <<LINES
753
+ ##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
754
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
755
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
756
+ ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
757
+ ##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
758
+ ##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
759
+ ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
760
+ ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
761
+ ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
762
+ ##contig=<ID=XXXY12>
763
+ ##contig=<ID=Y,length=59373566>
764
+ LINES
765
+
766
+ h = {}
767
+ lines.strip.split("\n").each { |s|
768
+ # print s,"\n"
769
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
770
+ h[result['ID']] = result
771
+ p result
772
+ }
773
+ p h
774
+
775
+ raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
776
+
777
+
778
+ end # test