bio-vcf 0.8.1 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +1 -11
  3. data/Gemfile +2 -8
  4. data/LICENSE.txt +1 -1
  5. data/README.md +467 -129
  6. data/RELEASE_NOTES.md +27 -0
  7. data/RELEASE_NOTES.md~ +11 -0
  8. data/Rakefile +9 -42
  9. data/TAGS +115 -0
  10. data/VERSION +1 -1
  11. data/bin/bio-vcf +156 -108
  12. data/bio-vcf.gemspec +13 -75
  13. data/features/cli.feature +22 -4
  14. data/features/diff_count.feature +0 -1
  15. data/features/filter.feature +12 -0
  16. data/features/multisample.feature +12 -0
  17. data/features/somaticsniper.feature +2 -0
  18. data/features/step_definitions/cli-feature.rb +15 -6
  19. data/features/step_definitions/diff_count.rb +1 -1
  20. data/features/step_definitions/multisample.rb +19 -0
  21. data/features/step_definitions/somaticsniper.rb +9 -1
  22. data/features/step_definitions/vcf_header.rb +48 -0
  23. data/features/support/env.rb +1 -11
  24. data/features/vcf_header.feature +35 -0
  25. data/lib/bio-vcf.rb +1 -0
  26. data/lib/bio-vcf/pcows.rb +303 -0
  27. data/lib/bio-vcf/vcffile.rb +46 -0
  28. data/lib/bio-vcf/vcfgenotypefield.rb +19 -19
  29. data/lib/bio-vcf/vcfheader.rb +137 -5
  30. data/lib/bio-vcf/vcfheader_line.rb +778 -0
  31. data/lib/bio-vcf/vcfrecord.rb +56 -18
  32. data/lib/bio-vcf/vcfsample.rb +26 -2
  33. data/lib/regressiontest.rb +11 -0
  34. data/lib/regressiontest/cli_exec.rb +101 -0
  35. data/ragel/gen_vcfheaderline_parser.rl +165 -0
  36. data/ragel/generate.sh +8 -0
  37. data/template/vcf2json.erb +16 -16
  38. data/template/vcf2json_full_header.erb +22 -0
  39. data/template/vcf2json_use_meta.erb +41 -0
  40. data/test/data/input/empty.vcf +2 -0
  41. data/test/data/input/gatk_exome.vcf +237 -0
  42. data/test/data/input/gatk_wgs.vcf +1000 -0
  43. data/test/data/input/test.bed +632 -0
  44. data/test/data/regression/empty-stderr.new +12 -0
  45. data/test/data/regression/empty.new +2 -0
  46. data/test/data/regression/empty.ref +2 -0
  47. data/test/data/regression/eval_once-stderr.new +2 -0
  48. data/test/data/regression/eval_once.new +1 -0
  49. data/test/data/regression/eval_once.ref +1 -0
  50. data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
  51. data/test/data/regression/eval_r.info.dp.new +150 -0
  52. data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
  53. data/test/data/regression/ifilter_s.dp.new +31 -0
  54. data/test/data/regression/pass1-stderr.new +10 -0
  55. data/test/data/regression/pass1.new +88 -0
  56. data/test/data/regression/pass1.ref +88 -0
  57. data/test/data/regression/r.info.dp-stderr.new +4 -0
  58. data/test/data/regression/r.info.dp.new +114 -0
  59. data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
  60. data/test/data/regression/rewrite.info.sample.new +150 -0
  61. data/test/data/regression/s.dp-stderr.new +18 -0
  62. data/test/data/regression/s.dp.new +145 -0
  63. data/test/data/regression/seval_s.dp-stderr.new +10 -0
  64. data/test/data/regression/seval_s.dp.new +36 -0
  65. data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
  66. data/test/data/regression/sfilter_seval_s.dp.new +31 -0
  67. data/test/data/regression/thread4-stderr.new +10 -0
  68. data/test/data/regression/thread4.new +150 -0
  69. data/test/data/regression/thread4_4-stderr.new +25 -0
  70. data/test/data/regression/thread4_4.new +130 -0
  71. data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
  72. data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
  73. data/test/data/regression/thread4_4_failed_filter.new +110 -0
  74. data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
  75. data/test/data/regression/vcf2json_full_header.new +225 -0
  76. data/test/data/regression/vcf2json_full_header.ref +225 -0
  77. data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
  78. data/test/data/regression/vcf2json_use_meta.new +4697 -0
  79. data/test/data/regression/vcf2json_use_meta.ref +4697 -0
  80. data/test/performance/metrics.md +18 -1
  81. data/test/stress/stress_test.sh +15 -0
  82. data/test/tmp/test.vcf +12469 -0
  83. metadata +63 -64
  84. data/Gemfile.lock +0 -81
@@ -0,0 +1,46 @@
1
+ module BioVcf
2
+ # This class abstracts a VCF file that can be iterated.
3
+ # The VCF can be plain text or compressed with gzip
4
+ # Note that files compressed with bgzip will not work, as thie ruby implementation of Zlib don't allow concatenated files
5
+ class VCFfile
6
+
7
+ def initialize(file: "", is_gz: true)
8
+ @file = file
9
+ @is_gz = is_gz
10
+ end
11
+
12
+ def parseVCFheader(head_line="")
13
+ m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(head_line)
14
+ {:id=>m[1],:number=>m[2],:type=>m[3],:desc=>m[4]}
15
+ end
16
+
17
+
18
+ #Returns an enum that can be used as an iterator.
19
+ def each
20
+ return enum_for(:each) unless block_given?
21
+ io = nil
22
+ if @is_gz
23
+ infile = open(@file)
24
+ io = Zlib::GzipReader.new(infile)
25
+ else
26
+ io = File.open(@file)
27
+ end
28
+
29
+ header = BioVcf::VcfHeader.new
30
+ io.each_line do |line|
31
+ line.chomp!
32
+ if line =~ /^##fileformat=/
33
+ header.add(line)
34
+ next
35
+ end
36
+ if line =~ /^#/
37
+ header.add(line)
38
+ next
39
+ end
40
+ fields = BioVcf::VcfLine.parse(line)
41
+ rec = BioVcf::VcfRecord.new(fields,header)
42
+ yield rec
43
+ end
44
+ end
45
+ end
46
+ end
@@ -11,7 +11,7 @@ module BioVcf
11
11
  end
12
12
  end
13
13
 
14
- # Helper class for a list of (variant) values, such as A,G.
14
+ # Helper class for a list of (variant) values, such as A,G.
15
15
  # The [] function does the hard work. You can pass in an index (integer)
16
16
  # or nucleotide which translates to an index.
17
17
  # (see ./features for examples)
@@ -20,7 +20,7 @@ module BioVcf
20
20
  @alt = alt
21
21
  @list = list.split(/,/).map{|i| i.to_i}
22
22
  end
23
-
23
+
24
24
  def [] idx
25
25
  if idx.kind_of?(Integer)
26
26
  # return a value
@@ -67,7 +67,7 @@ module BioVcf
67
67
  @alt = alt
68
68
  @list = list.split(/,/).map{|i| i.to_i}
69
69
  end
70
-
70
+
71
71
  def [] idx
72
72
  if idx.kind_of?(Integer)
73
73
  @list[idx].to_i
@@ -87,15 +87,15 @@ module BioVcf
87
87
  end
88
88
 
89
89
  # Return the max value on the nucleotides in the list (typically rec.alt)
90
- def max
90
+ def max
91
91
  @list.reduce(0){ |memo,v| (v>memo ? v : memo) }
92
92
  end
93
93
 
94
- def min
94
+ def min
95
95
  @list.reduce(MAXINT){ |memo,v| (v<memo ? v : memo) }
96
96
  end
97
97
 
98
- def sum
98
+ def sum
99
99
  @list.reduce(0){ |memo,v| v+memo }
100
100
  end
101
101
  end
@@ -129,14 +129,14 @@ module BioVcf
129
129
  !empty?
130
130
  end
131
131
 
132
- def dp4
133
- ilist('DP4')
132
+ def dp4
133
+ ilist('DP4')
134
134
  end
135
- def ad
136
- ilist('AD')
135
+ def ad
136
+ ilist('AD')
137
137
  end
138
- def pl
139
- ilist('PL')
138
+ def pl
139
+ ilist('PL')
140
140
  end
141
141
 
142
142
  def bcount
@@ -178,11 +178,11 @@ module BioVcf
178
178
  else
179
179
  v = values[fetch(m.to_s.upcase)]
180
180
  return nil if VcfValue::empty?(v)
181
- v = v.to_i if v =~ /^\d+$/
182
- v = v.to_f if v =~ /^\d+\.\d+$/
181
+ return v.to_i if v =~ /^\d+$/
182
+ return v.to_f if v =~ /^\d+\.\d+$/
183
183
  v
184
184
  end
185
- end
185
+ end
186
186
 
187
187
  private
188
188
 
@@ -200,7 +200,7 @@ module BioVcf
200
200
  def ilist name
201
201
  v = fetch_value(name)
202
202
  return nil if not v
203
- v.split(',').map{|i| i.to_i}
203
+ v.split(',').map{|i| i.to_i}
204
204
  end
205
205
 
206
206
  end
@@ -222,11 +222,11 @@ module BioVcf
222
222
  @samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
223
223
  rescue TypeError
224
224
  $stderr.print "Unknown field name <#{name}> in record, did you mean r.info.#{name}?\n"
225
- raise
225
+ raise
226
226
  end
227
227
  end
228
228
 
229
- def method_missing(m, *args, &block)
229
+ def method_missing(m, *args, &block)
230
230
  name = m.to_s
231
231
  if name =~ /\?$/
232
232
  # test for valid sample
@@ -234,7 +234,7 @@ module BioVcf
234
234
  else
235
235
  @samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
236
236
  end
237
- end
237
+ end
238
238
 
239
239
  end
240
240
  end
@@ -1,3 +1,14 @@
1
+ # This module parses the VCF header. A header consists of lines
2
+ # containing fields. Most fields are of 'key=value' type and appear
3
+ # only once. These can be retrieved with the find_field method.
4
+ #
5
+ # INFO, FORMAT and contig fields are special as they appear multiple times
6
+ # and contain multiple key values (identified by an ID field).
7
+ # To retrieve these call 'info' and 'format' functions respectively,
8
+ # which return a hash on the contained ID.
9
+ #
10
+ # For the INFO and FORMAT fields a Ragel parser is used, mostly to
11
+ # deal with embedded quoted fields.
1
12
 
2
13
  module BioVcf
3
14
 
@@ -13,21 +24,30 @@ module BioVcf
13
24
  end
14
25
  nil
15
26
  end
27
+
28
+ def VcfHeaderParser.parse_field(line, debug)
29
+ BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(line, debug: debug)
30
+ end
16
31
  end
17
32
 
18
33
  class VcfHeader
19
34
 
20
- attr_reader :lines
35
+ attr_reader :lines, :field
21
36
 
22
- def initialize
37
+ def initialize(debug = false)
38
+ @debug = debug
23
39
  @lines = []
40
+ @field = {}
41
+ @meta = nil
42
+ @cached_filter_index = {}
24
43
  end
25
44
 
45
+ # Add a new field to the header
26
46
  def add line
27
- @lines << line.strip
47
+ @lines += line.split(/\n/)
28
48
  end
29
49
 
30
- # Add a key value list to the header
50
+ # Push a special key value list to the header
31
51
  def tag h
32
52
  h2 = h.dup
33
53
  [:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
@@ -82,6 +102,118 @@ module BioVcf
82
102
  @sample_index = index
83
103
  index
84
104
  end
85
- end
86
105
 
106
+ # Give a list of samples (by index and/or name) and return 0-based index values
107
+ # The cache has to be able to hanle multiple lists - that is why it is a hash.
108
+ def sample_subset_index list
109
+ cached = @cached_filter_index[list]
110
+ if cached
111
+ l = cached
112
+ else
113
+ l = []
114
+ list = samples_index_array() if not list
115
+ list.each { |i|
116
+ value =
117
+ begin
118
+ Integer(i)
119
+ rescue
120
+ idx = samples.index(i)
121
+ if idx != nil
122
+ idx
123
+ else
124
+ raise "Unknown sample name '#{i}'"
125
+ end
126
+ end
127
+ l << value
128
+ }
129
+ @cached_filter_index[list] = l
130
+ end
131
+ l
132
+ end
133
+
134
+ # Look for a line in the header with the field name and return the
135
+ # value, otherwise return nil
136
+ def find_field name
137
+ return field[name] if field[name]
138
+ @lines.each do | line |
139
+ value = line.scan(/###{name}=(.*)/)
140
+ if value[0]
141
+ v = value[0][0]
142
+ field[name] = v
143
+ return v
144
+ end
145
+ end
146
+ nil
147
+ end
148
+
149
+ # Look for all the lines that match the field name and return
150
+ # a hash of hashes. An empty hash is returned when there are
151
+ # no matches.
152
+ def find_fields name
153
+ res = {}
154
+ @lines.each do | line |
155
+ value = line.scan(/###{name}=<(.*)>/)
156
+ if value[0]
157
+ str = value[0][0]
158
+ # p str
159
+ v = VcfHeaderParser.parse_field(line,@debug)
160
+ id = v['ID']
161
+ res[id] = v
162
+ end
163
+ end
164
+ # p res
165
+ res
166
+ end
167
+
168
+ def format
169
+ find_fields('FORMAT')
170
+ end
171
+
172
+ def filter
173
+ find_fields('FILTER')
174
+ end
175
+
176
+ def contig
177
+ find_fields('contig')
178
+ end
179
+
180
+ def info
181
+ find_fields('INFO')
182
+ end
183
+
184
+ def gatkcommandline
185
+ find_fields('GATKCommandLine')
186
+ end
187
+
188
+ def meta
189
+ return @meta if @meta
190
+ res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} }
191
+ @lines.each do | line |
192
+ value = line.scan(/##(.*?)=(.*)/)
193
+ if value[0]
194
+ k,v = value[0]
195
+ if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine'
196
+ # p [k,v]
197
+ res[k] = v
198
+ end
199
+ end
200
+ end
201
+ res['INFO'] = info()
202
+ res['FORMAT'] = format()
203
+ res['FILTER'] = filter()
204
+ res['contig'] = contig()
205
+ res['GATKCommandLine'] = gatkcommandline()
206
+ # p [:res, res]
207
+ @meta = res # cache values
208
+ res
209
+ end
210
+
211
+ def method_missing(m, *args, &block)
212
+ name = m.to_s
213
+ value = find_field(name)
214
+ return value if value
215
+ raise "Unknown VCF header query '#{name}'"
216
+ end
217
+
218
+ end
87
219
  end
@@ -0,0 +1,778 @@
1
+
2
+ # line 1 "gen_vcfheaderline_parser.rl"
3
+ # Ragel lexer for VCF-header
4
+ #
5
+ # This is compact a parser/lexer for the VCF header format. Bio-vcf
6
+ # uses the parser to generate meta information that can be output to
7
+ # (for example) JSON format. The advantage of using ragel as a state
8
+ # engine is that it allows for easy parsing of key-value pairs with
9
+ # syntax checking and, for example, escaped quotes in quoted string
10
+ # values. This ragel parser/lexer generates valid Ruby; it should be
11
+ # fairly trivial to generate python/C/JAVA instead. Note that this
12
+ # edition validates ID and Number fields only. Other fields are
13
+ # dumped 'AS IS'.
14
+ #
15
+ # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
16
+ #
17
+ # by Pjotr Prins (c) 2014/2015
18
+
19
+ module BioVcf
20
+
21
+ module VcfHeaderParser
22
+
23
+ module RagelKeyValues
24
+
25
+ def self.debug msg
26
+ # nothing
27
+ # $stderr.print "DEBUG: ",msg,"\n"
28
+ end
29
+
30
+ =begin
31
+
32
+ # line 75 "gen_vcfheaderline_parser.rl"
33
+
34
+ =end
35
+
36
+
37
+ # line 38 "gen_vcfheaderline_parser.rb"
38
+ class << self
39
+ attr_accessor :_simple_lexer_actions
40
+ private :_simple_lexer_actions, :_simple_lexer_actions=
41
+ end
42
+ self._simple_lexer_actions = [
43
+ 0, 1, 0, 1, 1, 1, 2, 1,
44
+ 3, 1, 15, 1, 16, 2, 0, 1,
45
+ 2, 4, 15, 2, 6, 15, 2, 7,
46
+ 15, 2, 9, 16, 2, 10, 16, 2,
47
+ 11, 16, 2, 12, 15, 2, 13, 16,
48
+ 2, 14, 16, 3, 5, 8, 15, 6,
49
+ 9, 10, 13, 11, 14, 16
50
+ ]
51
+
52
+ class << self
53
+ attr_accessor :_simple_lexer_key_offsets
54
+ private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
55
+ end
56
+ self._simple_lexer_key_offsets = [
57
+ 0, 0, 1, 2, 7, 8, 9, 10,
58
+ 13, 26, 35, 49, 51, 53, 58, 60,
59
+ 62, 62, 62, 64, 70, 72, 77, 80,
60
+ 91, 101, 111, 121, 131, 141, 151, 161,
61
+ 171, 181, 191, 201, 211, 221, 231, 241,
62
+ 251, 261, 270, 275, 277, 279, 279, 281,
63
+ 283, 283, 293, 303, 313, 322, 327, 329,
64
+ 331, 331, 333, 335, 335, 345, 354, 360,
65
+ 371, 381, 391, 401, 411, 421, 430, 438,
66
+ 440, 445, 448, 458, 468, 478, 488, 498,
67
+ 508, 517, 524, 526, 528, 533, 535, 537,
68
+ 537, 537, 549, 559, 569, 579, 589, 599,
69
+ 609, 619, 629, 639, 649, 659, 669, 671,
70
+ 672, 673, 674, 675, 676, 677, 678, 679,
71
+ 680, 681, 682, 683, 684, 685, 686, 687,
72
+ 688, 689, 690, 691, 692, 693, 694, 695,
73
+ 696, 697, 698, 699, 700
74
+ ]
75
+
76
+ class << self
77
+ attr_accessor :_simple_lexer_trans_keys
78
+ private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
79
+ end
80
+ self._simple_lexer_trans_keys = [
81
+ 35, 35, 65, 70, 71, 73, 99, 76,
82
+ 84, 61, 44, 60, 62, 67, 68, 73,
83
+ 78, 86, 97, 108, 48, 57, 65, 90,
84
+ 98, 122, 46, 61, 95, 48, 57, 65,
85
+ 90, 97, 122, 34, 39, 44, 46, 60,
86
+ 62, 43, 45, 48, 57, 65, 90, 97,
87
+ 122, 34, 92, 34, 92, 34, 39, 44,
88
+ 60, 62, 39, 92, 39, 92, 48, 57,
89
+ 44, 46, 60, 62, 48, 57, 48, 57,
90
+ 44, 60, 62, 48, 57, 44, 60, 62,
91
+ 44, 46, 60, 62, 95, 48, 57, 65,
92
+ 90, 97, 122, 46, 61, 95, 111, 48,
93
+ 57, 65, 90, 97, 122, 46, 61, 95,
94
+ 109, 48, 57, 65, 90, 97, 122, 46,
95
+ 61, 95, 109, 48, 57, 65, 90, 97,
96
+ 122, 46, 61, 95, 97, 48, 57, 65,
97
+ 90, 98, 122, 46, 61, 95, 110, 48,
98
+ 57, 65, 90, 97, 122, 46, 61, 95,
99
+ 100, 48, 57, 65, 90, 97, 122, 46,
100
+ 61, 76, 95, 48, 57, 65, 90, 97,
101
+ 122, 46, 61, 95, 105, 48, 57, 65,
102
+ 90, 97, 122, 46, 61, 95, 110, 48,
103
+ 57, 65, 90, 97, 122, 46, 61, 95,
104
+ 101, 48, 57, 65, 90, 97, 122, 46,
105
+ 61, 79, 95, 48, 57, 65, 90, 97,
106
+ 122, 46, 61, 95, 112, 48, 57, 65,
107
+ 90, 97, 122, 46, 61, 95, 116, 48,
108
+ 57, 65, 90, 97, 122, 46, 61, 95,
109
+ 105, 48, 57, 65, 90, 97, 122, 46,
110
+ 61, 95, 111, 48, 57, 65, 90, 97,
111
+ 122, 46, 61, 95, 110, 48, 57, 65,
112
+ 90, 97, 122, 46, 61, 95, 115, 48,
113
+ 57, 65, 90, 97, 122, 46, 61, 95,
114
+ 48, 57, 65, 90, 97, 122, 34, 39,
115
+ 44, 60, 62, 34, 92, 34, 92, 39,
116
+ 92, 39, 92, 46, 61, 95, 97, 48,
117
+ 57, 65, 90, 98, 122, 46, 61, 95,
118
+ 116, 48, 57, 65, 90, 97, 122, 46,
119
+ 61, 95, 101, 48, 57, 65, 90, 97,
120
+ 122, 46, 61, 95, 48, 57, 65, 90,
121
+ 97, 122, 34, 39, 44, 60, 62, 34,
122
+ 92, 34, 92, 39, 92, 39, 92, 46,
123
+ 61, 68, 95, 48, 57, 65, 90, 97,
124
+ 122, 46, 61, 95, 48, 57, 65, 90,
125
+ 97, 122, 48, 57, 65, 90, 97, 122,
126
+ 44, 46, 60, 62, 95, 48, 57, 65,
127
+ 90, 97, 122, 46, 61, 95, 117, 48,
128
+ 57, 65, 90, 97, 122, 46, 61, 95,
129
+ 109, 48, 57, 65, 90, 97, 122, 46,
130
+ 61, 95, 98, 48, 57, 65, 90, 97,
131
+ 122, 46, 61, 95, 101, 48, 57, 65,
132
+ 90, 97, 122, 46, 61, 95, 114, 48,
133
+ 57, 65, 90, 97, 122, 46, 61, 95,
134
+ 48, 57, 65, 90, 97, 122, 43, 45,
135
+ 46, 65, 71, 82, 48, 57, 48, 57,
136
+ 44, 60, 62, 48, 57, 44, 60, 62,
137
+ 46, 61, 95, 101, 48, 57, 65, 90,
138
+ 97, 122, 46, 61, 95, 114, 48, 57,
139
+ 65, 90, 97, 122, 46, 61, 95, 115,
140
+ 48, 57, 65, 90, 97, 122, 46, 61,
141
+ 95, 105, 48, 57, 65, 90, 97, 122,
142
+ 46, 61, 95, 111, 48, 57, 65, 90,
143
+ 97, 122, 46, 61, 95, 110, 48, 57,
144
+ 65, 90, 97, 122, 46, 61, 95, 48,
145
+ 57, 65, 90, 97, 122, 34, 39, 44,
146
+ 60, 62, 48, 57, 34, 92, 34, 92,
147
+ 34, 39, 44, 60, 62, 39, 92, 39,
148
+ 92, 44, 60, 62, 95, 45, 46, 48,
149
+ 57, 65, 90, 97, 122, 46, 61, 95,
150
+ 115, 48, 57, 65, 90, 97, 122, 46,
151
+ 61, 95, 115, 48, 57, 65, 90, 97,
152
+ 122, 46, 61, 95, 101, 48, 57, 65,
153
+ 90, 97, 122, 46, 61, 95, 109, 48,
154
+ 57, 65, 90, 97, 122, 46, 61, 95,
155
+ 98, 48, 57, 65, 90, 97, 122, 46,
156
+ 61, 95, 108, 48, 57, 65, 90, 97,
157
+ 122, 46, 61, 95, 121, 48, 57, 65,
158
+ 90, 97, 122, 46, 61, 95, 101, 48,
159
+ 57, 65, 90, 97, 122, 46, 61, 95,
160
+ 110, 48, 57, 65, 90, 97, 122, 46,
161
+ 61, 95, 103, 48, 57, 65, 90, 97,
162
+ 122, 46, 61, 95, 116, 48, 57, 65,
163
+ 90, 97, 122, 46, 61, 95, 104, 48,
164
+ 57, 65, 90, 97, 122, 73, 79, 76,
165
+ 84, 69, 82, 82, 77, 65, 65, 84,
166
+ 75, 67, 111, 109, 109, 97, 110, 100,
167
+ 76, 105, 110, 101, 78, 70, 79, 111,
168
+ 110, 116, 105, 103, 0
169
+ ]
170
+
171
+ class << self
172
+ attr_accessor :_simple_lexer_single_lengths
173
+ private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
174
+ end
175
+ self._simple_lexer_single_lengths = [
176
+ 0, 1, 1, 5, 1, 1, 1, 3,
177
+ 7, 3, 6, 2, 2, 5, 2, 2,
178
+ 0, 0, 0, 4, 0, 3, 3, 5,
179
+ 4, 4, 4, 4, 4, 4, 4, 4,
180
+ 4, 4, 4, 4, 4, 4, 4, 4,
181
+ 4, 3, 5, 2, 2, 0, 2, 2,
182
+ 0, 4, 4, 4, 3, 5, 2, 2,
183
+ 0, 2, 2, 0, 4, 3, 0, 5,
184
+ 4, 4, 4, 4, 4, 3, 6, 0,
185
+ 3, 3, 4, 4, 4, 4, 4, 4,
186
+ 3, 5, 2, 2, 5, 2, 2, 0,
187
+ 0, 4, 4, 4, 4, 4, 4, 4,
188
+ 4, 4, 4, 4, 4, 4, 2, 1,
189
+ 1, 1, 1, 1, 1, 1, 1, 1,
190
+ 1, 1, 1, 1, 1, 1, 1, 1,
191
+ 1, 1, 1, 1, 1, 1, 1, 1,
192
+ 1, 1, 1, 1, 0
193
+ ]
194
+
195
+ class << self
196
+ attr_accessor :_simple_lexer_range_lengths
197
+ private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
198
+ end
199
+ self._simple_lexer_range_lengths = [
200
+ 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 3, 3, 4, 0, 0, 0, 0, 0,
202
+ 0, 0, 1, 1, 1, 1, 0, 3,
203
+ 3, 3, 3, 3, 3, 3, 3, 3,
204
+ 3, 3, 3, 3, 3, 3, 3, 3,
205
+ 3, 3, 0, 0, 0, 0, 0, 0,
206
+ 0, 3, 3, 3, 3, 0, 0, 0,
207
+ 0, 0, 0, 0, 3, 3, 3, 3,
208
+ 3, 3, 3, 3, 3, 3, 1, 1,
209
+ 1, 0, 3, 3, 3, 3, 3, 3,
210
+ 3, 1, 0, 0, 0, 0, 0, 0,
211
+ 0, 4, 3, 3, 3, 3, 3, 3,
212
+ 3, 3, 3, 3, 3, 3, 0, 0,
213
+ 0, 0, 0, 0, 0, 0, 0, 0,
214
+ 0, 0, 0, 0, 0, 0, 0, 0,
215
+ 0, 0, 0, 0, 0, 0, 0, 0,
216
+ 0, 0, 0, 0, 0
217
+ ]
218
+
219
+ class << self
220
+ attr_accessor :_simple_lexer_index_offsets
221
+ private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
222
+ end
223
+ self._simple_lexer_index_offsets = [
224
+ 0, 0, 2, 4, 10, 12, 14, 16,
225
+ 20, 31, 38, 49, 52, 55, 61, 64,
226
+ 67, 68, 69, 71, 77, 79, 84, 88,
227
+ 97, 105, 113, 121, 129, 137, 145, 153,
228
+ 161, 169, 177, 185, 193, 201, 209, 217,
229
+ 225, 233, 240, 246, 249, 252, 253, 256,
230
+ 259, 260, 268, 276, 284, 291, 297, 300,
231
+ 303, 304, 307, 310, 311, 319, 326, 330,
232
+ 339, 347, 355, 363, 371, 379, 386, 394,
233
+ 396, 401, 405, 413, 421, 429, 437, 445,
234
+ 453, 460, 467, 470, 473, 479, 482, 485,
235
+ 486, 487, 496, 504, 512, 520, 528, 536,
236
+ 544, 552, 560, 568, 576, 584, 592, 595,
237
+ 597, 599, 601, 603, 605, 607, 609, 611,
238
+ 613, 615, 617, 619, 621, 623, 625, 627,
239
+ 629, 631, 633, 635, 637, 639, 641, 643,
240
+ 645, 647, 649, 651, 653
241
+ ]
242
+
243
+ class << self
244
+ attr_accessor :_simple_lexer_trans_targs
245
+ private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
246
+ end
247
+ self._simple_lexer_trans_targs = [
248
+ 2, 0, 3, 0, 4, 102, 110, 124,
249
+ 127, 0, 5, 0, 6, 0, 7, 0,
250
+ 8, 8, 132, 0, 24, 49, 60, 64,
251
+ 74, 90, 97, 9, 9, 9, 0, 9,
252
+ 10, 9, 9, 9, 9, 0, 11, 14,
253
+ 8, 22, 8, 132, 18, 23, 23, 23,
254
+ 0, 13, 17, 12, 13, 17, 12, 11,
255
+ 14, 8, 8, 132, 0, 13, 16, 15,
256
+ 13, 16, 15, 15, 12, 19, 0, 8,
257
+ 20, 8, 132, 19, 0, 21, 0, 8,
258
+ 8, 132, 21, 0, 8, 8, 132, 0,
259
+ 8, 23, 8, 132, 23, 23, 23, 23,
260
+ 0, 9, 10, 9, 25, 9, 9, 9,
261
+ 0, 9, 10, 9, 26, 9, 9, 9,
262
+ 0, 9, 10, 9, 27, 9, 9, 9,
263
+ 0, 9, 10, 9, 28, 9, 9, 9,
264
+ 0, 9, 10, 9, 29, 9, 9, 9,
265
+ 0, 9, 10, 9, 30, 9, 9, 9,
266
+ 0, 9, 10, 31, 9, 9, 9, 9,
267
+ 0, 9, 10, 9, 32, 9, 9, 9,
268
+ 0, 9, 10, 9, 33, 9, 9, 9,
269
+ 0, 9, 10, 9, 34, 9, 9, 9,
270
+ 0, 9, 10, 35, 9, 9, 9, 9,
271
+ 0, 9, 10, 9, 36, 9, 9, 9,
272
+ 0, 9, 10, 9, 37, 9, 9, 9,
273
+ 0, 9, 10, 9, 38, 9, 9, 9,
274
+ 0, 9, 10, 9, 39, 9, 9, 9,
275
+ 0, 9, 10, 9, 40, 9, 9, 9,
276
+ 0, 9, 10, 9, 41, 9, 9, 9,
277
+ 0, 9, 42, 9, 9, 9, 9, 0,
278
+ 43, 46, 8, 8, 132, 0, 42, 45,
279
+ 44, 42, 45, 44, 44, 42, 48, 47,
280
+ 42, 48, 47, 47, 9, 10, 9, 50,
281
+ 9, 9, 9, 0, 9, 10, 9, 51,
282
+ 9, 9, 9, 0, 9, 10, 9, 52,
283
+ 9, 9, 9, 0, 9, 53, 9, 9,
284
+ 9, 9, 0, 54, 57, 8, 8, 132,
285
+ 0, 53, 56, 55, 53, 56, 55, 55,
286
+ 53, 59, 58, 53, 59, 58, 58, 9,
287
+ 10, 61, 9, 9, 9, 9, 0, 9,
288
+ 62, 9, 9, 9, 9, 0, 63, 63,
289
+ 63, 0, 8, 63, 8, 132, 63, 63,
290
+ 63, 63, 0, 9, 10, 9, 65, 9,
291
+ 9, 9, 0, 9, 10, 9, 66, 9,
292
+ 9, 9, 0, 9, 10, 9, 67, 9,
293
+ 9, 9, 0, 9, 10, 9, 68, 9,
294
+ 9, 9, 0, 9, 10, 9, 69, 9,
295
+ 9, 9, 0, 9, 70, 9, 9, 9,
296
+ 9, 0, 71, 71, 73, 73, 73, 73,
297
+ 72, 0, 72, 0, 8, 8, 132, 72,
298
+ 0, 8, 8, 132, 0, 9, 10, 9,
299
+ 75, 9, 9, 9, 0, 9, 10, 9,
300
+ 76, 9, 9, 9, 0, 9, 10, 9,
301
+ 77, 9, 9, 9, 0, 9, 10, 9,
302
+ 78, 9, 9, 9, 0, 9, 10, 9,
303
+ 79, 9, 9, 9, 0, 9, 10, 9,
304
+ 80, 9, 9, 9, 0, 9, 81, 9,
305
+ 9, 9, 9, 0, 82, 85, 8, 8,
306
+ 132, 89, 0, 84, 88, 83, 84, 88,
307
+ 83, 82, 85, 8, 8, 132, 0, 84,
308
+ 87, 86, 84, 87, 86, 86, 83, 8,
309
+ 8, 132, 89, 89, 89, 89, 89, 0,
310
+ 9, 10, 9, 91, 9, 9, 9, 0,
311
+ 9, 10, 9, 92, 9, 9, 9, 0,
312
+ 9, 10, 9, 93, 9, 9, 9, 0,
313
+ 9, 10, 9, 94, 9, 9, 9, 0,
314
+ 9, 10, 9, 95, 9, 9, 9, 0,
315
+ 9, 10, 9, 96, 9, 9, 9, 0,
316
+ 9, 10, 9, 61, 9, 9, 9, 0,
317
+ 9, 10, 9, 98, 9, 9, 9, 0,
318
+ 9, 10, 9, 99, 9, 9, 9, 0,
319
+ 9, 10, 9, 100, 9, 9, 9, 0,
320
+ 9, 10, 9, 101, 9, 9, 9, 0,
321
+ 9, 10, 9, 69, 9, 9, 9, 0,
322
+ 103, 107, 0, 104, 0, 105, 0, 106,
323
+ 0, 6, 0, 108, 0, 109, 0, 5,
324
+ 0, 111, 0, 112, 0, 113, 0, 114,
325
+ 0, 115, 0, 116, 0, 117, 0, 118,
326
+ 0, 119, 0, 120, 0, 121, 0, 122,
327
+ 0, 123, 0, 6, 0, 125, 0, 126,
328
+ 0, 6, 0, 128, 0, 129, 0, 130,
329
+ 0, 131, 0, 6, 0, 0, 0
330
+ ]
331
+
332
+ class << self
333
+ attr_accessor :_simple_lexer_trans_actions
334
+ private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
335
+ end
336
+ self._simple_lexer_trans_actions = [
337
+ 0, 0, 0, 0, 0, 0, 0, 0,
338
+ 0, 0, 0, 0, 0, 0, 0, 0,
339
+ 0, 0, 0, 0, 1, 1, 1, 1,
340
+ 1, 1, 1, 1, 1, 1, 47, 0,
341
+ 7, 0, 0, 0, 0, 11, 0, 0,
342
+ 9, 0, 9, 9, 0, 1, 1, 1,
343
+ 0, 13, 1, 1, 3, 0, 0, 0,
344
+ 0, 9, 9, 9, 0, 13, 1, 1,
345
+ 3, 0, 0, 0, 0, 0, 11, 9,
346
+ 0, 9, 9, 0, 0, 0, 11, 9,
347
+ 9, 9, 0, 0, 9, 9, 9, 0,
348
+ 16, 0, 16, 16, 0, 0, 0, 0,
349
+ 0, 0, 7, 0, 0, 0, 0, 0,
350
+ 40, 0, 7, 0, 0, 0, 0, 0,
351
+ 40, 0, 7, 0, 0, 0, 0, 0,
352
+ 40, 0, 7, 0, 0, 0, 0, 0,
353
+ 40, 0, 7, 0, 0, 0, 0, 0,
354
+ 40, 0, 7, 0, 0, 0, 0, 0,
355
+ 40, 0, 7, 0, 0, 0, 0, 0,
356
+ 40, 0, 7, 0, 0, 0, 0, 0,
357
+ 40, 0, 7, 0, 0, 0, 0, 0,
358
+ 40, 0, 7, 0, 0, 0, 0, 0,
359
+ 40, 0, 7, 0, 0, 0, 0, 0,
360
+ 40, 0, 7, 0, 0, 0, 0, 0,
361
+ 40, 0, 7, 0, 0, 0, 0, 0,
362
+ 40, 0, 7, 0, 0, 0, 0, 0,
363
+ 40, 0, 7, 0, 0, 0, 0, 0,
364
+ 40, 0, 7, 0, 0, 0, 0, 0,
365
+ 40, 0, 7, 0, 0, 0, 0, 0,
366
+ 40, 0, 5, 0, 0, 0, 0, 40,
367
+ 0, 0, 9, 9, 9, 0, 13, 1,
368
+ 1, 3, 0, 0, 0, 13, 1, 1,
369
+ 3, 0, 0, 0, 0, 7, 0, 0,
370
+ 0, 0, 0, 37, 0, 7, 0, 0,
371
+ 0, 0, 0, 37, 0, 7, 0, 0,
372
+ 0, 0, 0, 37, 0, 5, 0, 0,
373
+ 0, 0, 37, 0, 0, 34, 34, 34,
374
+ 0, 13, 1, 1, 3, 0, 0, 0,
375
+ 13, 1, 1, 3, 0, 0, 0, 0,
376
+ 7, 0, 0, 0, 0, 0, 25, 0,
377
+ 5, 0, 0, 0, 0, 25, 1, 1,
378
+ 1, 25, 43, 0, 43, 43, 0, 0,
379
+ 0, 0, 0, 0, 7, 0, 0, 0,
380
+ 0, 0, 31, 0, 7, 0, 0, 0,
381
+ 0, 0, 31, 0, 7, 0, 0, 0,
382
+ 0, 0, 31, 0, 7, 0, 0, 0,
383
+ 0, 0, 31, 0, 7, 0, 0, 0,
384
+ 0, 0, 31, 0, 5, 0, 0, 0,
385
+ 0, 31, 1, 1, 1, 1, 1, 1,
386
+ 1, 31, 0, 31, 22, 22, 22, 0,
387
+ 0, 22, 22, 22, 0, 0, 7, 0,
388
+ 0, 0, 0, 0, 28, 0, 7, 0,
389
+ 0, 0, 0, 0, 28, 0, 7, 0,
390
+ 0, 0, 0, 0, 28, 0, 7, 0,
391
+ 0, 0, 0, 0, 28, 0, 7, 0,
392
+ 0, 0, 0, 0, 28, 0, 7, 0,
393
+ 0, 0, 0, 0, 28, 0, 5, 0,
394
+ 0, 0, 0, 28, 0, 0, 9, 9,
395
+ 9, 1, 0, 13, 1, 1, 3, 0,
396
+ 0, 0, 0, 9, 9, 9, 0, 13,
397
+ 1, 1, 3, 0, 0, 0, 0, 19,
398
+ 19, 19, 0, 0, 0, 0, 0, 0,
399
+ 0, 7, 0, 0, 0, 0, 0, 25,
400
+ 0, 7, 0, 0, 0, 0, 0, 25,
401
+ 0, 7, 0, 0, 0, 0, 0, 25,
402
+ 0, 7, 0, 0, 0, 0, 0, 25,
403
+ 0, 7, 0, 0, 0, 0, 0, 25,
404
+ 0, 7, 0, 0, 0, 0, 0, 25,
405
+ 0, 7, 0, 0, 0, 0, 0, 25,
406
+ 0, 7, 0, 0, 0, 0, 0, 31,
407
+ 0, 7, 0, 0, 0, 0, 0, 31,
408
+ 0, 7, 0, 0, 0, 0, 0, 31,
409
+ 0, 7, 0, 0, 0, 0, 0, 31,
410
+ 0, 7, 0, 0, 0, 0, 0, 31,
411
+ 0, 0, 0, 0, 0, 0, 0, 0,
412
+ 0, 0, 0, 0, 0, 0, 0, 0,
413
+ 0, 0, 0, 0, 0, 0, 0, 0,
414
+ 0, 0, 0, 0, 0, 0, 0, 0,
415
+ 0, 0, 0, 0, 0, 0, 0, 0,
416
+ 0, 0, 0, 0, 0, 0, 0, 0,
417
+ 0, 0, 0, 0, 0, 0, 0, 0,
418
+ 0, 0, 0, 0, 0, 0, 0
419
+ ]
420
+
421
+ class << self
422
+ attr_accessor :_simple_lexer_eof_actions
423
+ private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
424
+ end
425
+ self._simple_lexer_eof_actions = [
426
+ 0, 0, 0, 0, 0, 0, 0, 0,
427
+ 47, 11, 0, 11, 11, 0, 11, 11,
428
+ 11, 11, 11, 0, 11, 0, 0, 0,
429
+ 40, 40, 40, 40, 40, 40, 40, 40,
430
+ 40, 40, 40, 40, 40, 40, 40, 40,
431
+ 40, 40, 0, 40, 40, 40, 40, 40,
432
+ 40, 37, 37, 37, 37, 0, 37, 37,
433
+ 37, 37, 37, 37, 25, 25, 25, 0,
434
+ 31, 31, 31, 31, 31, 31, 31, 31,
435
+ 0, 0, 28, 28, 28, 28, 28, 28,
436
+ 28, 0, 28, 28, 0, 28, 28, 28,
437
+ 28, 0, 25, 25, 25, 25, 25, 25,
438
+ 25, 31, 31, 31, 31, 31, 0, 0,
439
+ 0, 0, 0, 0, 0, 0, 0, 0,
440
+ 0, 0, 0, 0, 0, 0, 0, 0,
441
+ 0, 0, 0, 0, 0, 0, 0, 0,
442
+ 0, 0, 0, 0, 0
443
+ ]
444
+
445
+ class << self
446
+ attr_accessor :simple_lexer_start
447
+ end
448
+ self.simple_lexer_start = 1;
449
+ class << self
450
+ attr_accessor :simple_lexer_first_final
451
+ end
452
+ self.simple_lexer_first_final = 132;
453
+ class << self
454
+ attr_accessor :simple_lexer_error
455
+ end
456
+ self.simple_lexer_error = 0;
457
+
458
+ class << self
459
+ attr_accessor :simple_lexer_en_main
460
+ end
461
+ self.simple_lexer_en_main = 1;
462
+
463
+
464
+ # line 79 "gen_vcfheaderline_parser.rl"
465
+ # %% this just fixes syntax highlighting...
466
+
467
+ def self.run_lexer(buf, options = {})
468
+ do_debug = (options[:debug] == true)
469
+ $stderr.print "---> ",buf,"\n" if do_debug
470
+ data = buf.unpack("c*") if(buf.is_a?(String))
471
+ eof = data.length
472
+ values = []
473
+ stack = []
474
+
475
+ emit = lambda { |type, data, ts, p|
476
+ # Print the type and text of the last read token
477
+ # p ts,p
478
+ $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
479
+ values << [type,data[ts...p].pack('c*')]
480
+ }
481
+
482
+ error_code = nil
483
+
484
+
485
+ # line 486 "gen_vcfheaderline_parser.rb"
486
+ begin
487
+ p ||= 0
488
+ pe ||= data.length
489
+ cs = simple_lexer_start
490
+ end
491
+
492
+ # line 99 "gen_vcfheaderline_parser.rl"
493
+
494
+ # line 495 "gen_vcfheaderline_parser.rb"
495
+ begin
496
+ _klen, _trans, _keys, _acts, _nacts = nil
497
+ _goto_level = 0
498
+ _resume = 10
499
+ _eof_trans = 15
500
+ _again = 20
501
+ _test_eof = 30
502
+ _out = 40
503
+ while true
504
+ _trigger_goto = false
505
+ if _goto_level <= 0
506
+ if p == pe
507
+ _goto_level = _test_eof
508
+ next
509
+ end
510
+ if cs == 0
511
+ _goto_level = _out
512
+ next
513
+ end
514
+ end
515
+ if _goto_level <= _resume
516
+ _keys = _simple_lexer_key_offsets[cs]
517
+ _trans = _simple_lexer_index_offsets[cs]
518
+ _klen = _simple_lexer_single_lengths[cs]
519
+ _break_match = false
520
+
521
+ begin
522
+ if _klen > 0
523
+ _lower = _keys
524
+ _upper = _keys + _klen - 1
525
+
526
+ loop do
527
+ break if _upper < _lower
528
+ _mid = _lower + ( (_upper - _lower) >> 1 )
529
+
530
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
531
+ _upper = _mid - 1
532
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid]
533
+ _lower = _mid + 1
534
+ else
535
+ _trans += (_mid - _keys)
536
+ _break_match = true
537
+ break
538
+ end
539
+ end # loop
540
+ break if _break_match
541
+ _keys += _klen
542
+ _trans += _klen
543
+ end
544
+ _klen = _simple_lexer_range_lengths[cs]
545
+ if _klen > 0
546
+ _lower = _keys
547
+ _upper = _keys + (_klen << 1) - 2
548
+ loop do
549
+ break if _upper < _lower
550
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1)
551
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
552
+ _upper = _mid - 2
553
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
554
+ _lower = _mid + 2
555
+ else
556
+ _trans += ((_mid - _keys) >> 1)
557
+ _break_match = true
558
+ break
559
+ end
560
+ end # loop
561
+ break if _break_match
562
+ _trans += _klen
563
+ end
564
+ end while false
565
+ cs = _simple_lexer_trans_targs[_trans]
566
+ if _simple_lexer_trans_actions[_trans] != 0
567
+ _acts = _simple_lexer_trans_actions[_trans]
568
+ _nacts = _simple_lexer_actions[_acts]
569
+ _acts += 1
570
+ while _nacts > 0
571
+ _nacts -= 1
572
+ _acts += 1
573
+ case _simple_lexer_actions[_acts - 1]
574
+ when 0 then
575
+ # line 33 "gen_vcfheaderline_parser.rl"
576
+ begin
577
+ ts=p end
578
+ when 1 then
579
+ # line 34 "gen_vcfheaderline_parser.rl"
580
+ begin
581
+
582
+ emit.call(:value,data,ts,p)
583
+ end
584
+ when 2 then
585
+ # line 38 "gen_vcfheaderline_parser.rl"
586
+ begin
587
+
588
+ emit.call(:kw,data,ts,p)
589
+ end
590
+ when 3 then
591
+ # line 58 "gen_vcfheaderline_parser.rl"
592
+ begin
593
+ emit.call(:key_word,data,ts,p) end
594
+ when 4 then
595
+ # line 59 "gen_vcfheaderline_parser.rl"
596
+ begin
597
+ emit.call(:value,data,ts,p) end
598
+ when 5 then
599
+ # line 60 "gen_vcfheaderline_parser.rl"
600
+ begin
601
+ emit.call(:value,data,ts,p) end
602
+ when 6 then
603
+ # line 62 "gen_vcfheaderline_parser.rl"
604
+ begin
605
+ emit.call(:value,data,ts,p) end
606
+ when 7 then
607
+ # line 65 "gen_vcfheaderline_parser.rl"
608
+ begin
609
+ emit.call(:value,data,ts,p) end
610
+ when 8 then
611
+ # line 67 "gen_vcfheaderline_parser.rl"
612
+ begin
613
+ debug("ID FOUND") end
614
+ when 9 then
615
+ # line 67 "gen_vcfheaderline_parser.rl"
616
+ begin
617
+ error_code="Malformed ID" end
618
+ when 10 then
619
+ # line 68 "gen_vcfheaderline_parser.rl"
620
+ begin
621
+ error_code="Version" end
622
+ when 11 then
623
+ # line 69 "gen_vcfheaderline_parser.rl"
624
+ begin
625
+ error_code="Number" end
626
+ when 12 then
627
+ # line 70 "gen_vcfheaderline_parser.rl"
628
+ begin
629
+ debug("DATE FOUND") end
630
+ when 13 then
631
+ # line 70 "gen_vcfheaderline_parser.rl"
632
+ begin
633
+ error_code="Date" end
634
+ when 14 then
635
+ # line 71 "gen_vcfheaderline_parser.rl"
636
+ begin
637
+ error_code="GATK" end
638
+ when 15 then
639
+ # line 72 "gen_vcfheaderline_parser.rl"
640
+ begin
641
+ debug("KEY_VALUE found") end
642
+ when 16 then
643
+ # line 72 "gen_vcfheaderline_parser.rl"
644
+ begin
645
+ error_code="unknown key-value " end
646
+ # line 647 "gen_vcfheaderline_parser.rb"
647
+ end # action switch
648
+ end
649
+ end
650
+ if _trigger_goto
651
+ next
652
+ end
653
+ end
654
+ if _goto_level <= _again
655
+ if cs == 0
656
+ _goto_level = _out
657
+ next
658
+ end
659
+ p += 1
660
+ if p != pe
661
+ _goto_level = _resume
662
+ next
663
+ end
664
+ end
665
+ if _goto_level <= _test_eof
666
+ if p == eof
667
+ __acts = _simple_lexer_eof_actions[cs]
668
+ __nacts = _simple_lexer_actions[__acts]
669
+ __acts += 1
670
+ while __nacts > 0
671
+ __nacts -= 1
672
+ __acts += 1
673
+ case _simple_lexer_actions[__acts - 1]
674
+ when 9 then
675
+ # line 67 "gen_vcfheaderline_parser.rl"
676
+ begin
677
+ error_code="Malformed ID" end
678
+ when 10 then
679
+ # line 68 "gen_vcfheaderline_parser.rl"
680
+ begin
681
+ error_code="Version" end
682
+ when 11 then
683
+ # line 69 "gen_vcfheaderline_parser.rl"
684
+ begin
685
+ error_code="Number" end
686
+ when 13 then
687
+ # line 70 "gen_vcfheaderline_parser.rl"
688
+ begin
689
+ error_code="Date" end
690
+ when 14 then
691
+ # line 71 "gen_vcfheaderline_parser.rl"
692
+ begin
693
+ error_code="GATK" end
694
+ when 16 then
695
+ # line 72 "gen_vcfheaderline_parser.rl"
696
+ begin
697
+ error_code="unknown key-value " end
698
+ # line 699 "gen_vcfheaderline_parser.rb"
699
+ end # eof action switch
700
+ end
701
+ if _trigger_goto
702
+ next
703
+ end
704
+ end
705
+ end
706
+ if _goto_level <= _out
707
+ break
708
+ end
709
+ end
710
+ end
711
+
712
+ # line 100 "gen_vcfheaderline_parser.rl"
713
+
714
+ raise "ERROR: "+error_code+" in "+buf if error_code
715
+
716
+ begin
717
+ res = {}
718
+ # p values
719
+ values.each_slice(2) do | a,b |
720
+ $stderr.print '*',a,b if do_debug
721
+ keyword = a[1]
722
+ value = b[1]
723
+ value = value.to_i if ['length','Epoch'].index(keyword)
724
+ res[keyword] = value
725
+ # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
726
+ end
727
+ rescue
728
+ print "ERROR: "
729
+ p values
730
+ raise
731
+ end
732
+ $stderr.print(res,"\n") if do_debug
733
+ res
734
+ end
735
+ end
736
+ end
737
+ end
738
+
739
+ if __FILE__ == $0
740
+
741
+ gatkcommandline = <<LINE1
742
+ ##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
743
+ LINE1
744
+
745
+ h = {}
746
+ s = gatkcommandline.strip
747
+ # print s,"\n"
748
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
749
+ # h[result['ID']] = result
750
+ # p result
751
+
752
+ lines = <<LINES
753
+ ##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
754
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
755
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
756
+ ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
757
+ ##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
758
+ ##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
759
+ ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
760
+ ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
761
+ ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
762
+ ##contig=<ID=XXXY12>
763
+ ##contig=<ID=Y,length=59373566>
764
+ LINES
765
+
766
+ h = {}
767
+ lines.strip.split("\n").each { |s|
768
+ # print s,"\n"
769
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
770
+ h[result['ID']] = result
771
+ p result
772
+ }
773
+ p h
774
+
775
+ raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
776
+
777
+
778
+ end # test