bio-vcf 0.8.0 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +1 -11
- data/Gemfile +4 -5
- data/Gemfile.lock +28 -65
- data/LICENSE.txt +1 -1
- data/README.md +387 -107
- data/RELEASE_NOTES.md +20 -0
- data/RELEASE_NOTES.md~ +11 -0
- data/Rakefile +3 -40
- data/TAGS +115 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +176 -109
- data/bio-vcf.gemspec +14 -70
- data/features/cli.feature +22 -4
- data/features/diff_count.feature +0 -1
- data/features/filter.feature +12 -0
- data/features/multisample.feature +25 -0
- data/features/somaticsniper.feature +2 -0
- data/features/step_definitions/cli-feature.rb +15 -6
- data/features/step_definitions/diff_count.rb +1 -1
- data/features/step_definitions/multisample.rb +19 -0
- data/features/step_definitions/somaticsniper.rb +9 -1
- data/features/step_definitions/vcf_header.rb +48 -0
- data/features/support/env.rb +0 -9
- data/features/vcf_header.feature +35 -0
- data/lib/bio-vcf.rb +2 -0
- data/lib/bio-vcf/bedfilter.rb +43 -0
- data/lib/bio-vcf/pcows.rb +303 -0
- data/lib/bio-vcf/template.rb +75 -0
- data/lib/bio-vcf/vcffile.rb +46 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +25 -20
- data/lib/bio-vcf/vcfheader.rb +146 -6
- data/lib/bio-vcf/vcfheader_line.rb +778 -0
- data/lib/bio-vcf/vcfrecord.rb +56 -18
- data/lib/bio-vcf/vcfsample.rb +27 -3
- data/ragel/gen_vcfheaderline_parser.rl +165 -0
- data/ragel/generate.sh +8 -0
- data/template/vcf2json.erb +19 -7
- data/template/vcf2json_full_header.erb +22 -0
- data/template/vcf2json_use_meta.erb +41 -0
- data/template/vcf2rdf_header.erb +24 -0
- data/test/data/input/empty.vcf +2 -0
- data/test/data/input/gatk_exome.vcf +237 -0
- data/test/data/input/gatk_wgs.vcf +1000 -0
- data/test/data/input/test.bed +632 -0
- data/test/data/regression/empty-stderr.new +12 -0
- data/test/data/regression/empty.new +2 -0
- data/test/data/regression/empty.ref +2 -0
- data/test/data/regression/eval_once-stderr.new +2 -0
- data/test/data/regression/eval_once.new +1 -0
- data/test/data/regression/eval_once.ref +1 -0
- data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
- data/test/data/regression/eval_r.info.dp.new +150 -0
- data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
- data/test/data/regression/ifilter_s.dp.new +31 -0
- data/test/data/regression/pass1-stderr.new +10 -0
- data/test/data/regression/pass1.new +88 -0
- data/test/data/regression/pass1.ref +88 -0
- data/test/data/regression/r.info.dp-stderr.new +4 -0
- data/test/data/regression/r.info.dp.new +114 -0
- data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
- data/test/data/regression/rewrite.info.sample.new +150 -0
- data/test/data/regression/s.dp-stderr.new +18 -0
- data/test/data/regression/s.dp.new +145 -0
- data/test/data/regression/seval_s.dp-stderr.new +10 -0
- data/test/data/regression/seval_s.dp.new +36 -0
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
- data/test/data/regression/sfilter_seval_s.dp.new +31 -0
- data/test/data/regression/thread4-stderr.new +10 -0
- data/test/data/regression/thread4.new +150 -0
- data/test/data/regression/thread4_4-stderr.new +25 -0
- data/test/data/regression/thread4_4.new +130 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -1
- data/test/data/regression/thread4_4_failed_filter.new +110 -0
- data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
- data/test/data/regression/vcf2json_full_header.new +225 -0
- data/test/data/regression/vcf2json_full_header.ref +225 -0
- data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
- data/test/data/regression/vcf2json_use_meta.new +4697 -0
- data/test/data/regression/vcf2json_use_meta.ref +4697 -0
- data/test/performance/metrics.md +18 -1
- data/test/stress/stress_test.sh +15 -0
- data/test/tmp/test.vcf +12469 -0
- metadata +65 -64
data/lib/bio-vcf/vcfheader.rb
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
# This module parses the VCF header. A header consists of lines
|
2
|
+
# containing fields. Most fields are of 'key=value' type and appear
|
3
|
+
# only once. These can be retrieved with the find_field method.
|
4
|
+
#
|
5
|
+
# INFO, FORMAT and contig fields are special as they appear multiple times
|
6
|
+
# and contain multiple key values (identified by an ID field).
|
7
|
+
# To retrieve these call 'info' and 'format' functions respectively,
|
8
|
+
# which return a hash on the contained ID.
|
9
|
+
#
|
10
|
+
# For the INFO and FORMAT fields a Ragel parser is used, mostly to
|
11
|
+
# deal with embedded quoted fields.
|
1
12
|
|
2
13
|
module BioVcf
|
3
14
|
|
@@ -13,21 +24,30 @@ module BioVcf
|
|
13
24
|
end
|
14
25
|
nil
|
15
26
|
end
|
27
|
+
|
28
|
+
def VcfHeaderParser.parse_field(line, debug)
|
29
|
+
BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(line, debug: debug)
|
30
|
+
end
|
16
31
|
end
|
17
32
|
|
18
33
|
class VcfHeader
|
19
34
|
|
20
|
-
attr_reader :lines
|
35
|
+
attr_reader :lines, :field
|
21
36
|
|
22
|
-
def initialize
|
37
|
+
def initialize(debug = false)
|
38
|
+
@debug = debug
|
23
39
|
@lines = []
|
40
|
+
@field = {}
|
41
|
+
@meta = nil
|
42
|
+
@cached_filter_index = {}
|
24
43
|
end
|
25
44
|
|
45
|
+
# Add a new field to the header
|
26
46
|
def add line
|
27
|
-
@lines
|
47
|
+
@lines += line.split(/\n/)
|
28
48
|
end
|
29
49
|
|
30
|
-
#
|
50
|
+
# Push a special key value list to the header
|
31
51
|
def tag h
|
32
52
|
h2 = h.dup
|
33
53
|
[:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
|
@@ -60,13 +80,21 @@ module BioVcf
|
|
60
80
|
end
|
61
81
|
|
62
82
|
def samples
|
63
|
-
@samples ||= column_names
|
83
|
+
@samples ||= if column_names.size > 8
|
84
|
+
column_names[9..-1]
|
85
|
+
else
|
86
|
+
[]
|
87
|
+
end
|
64
88
|
end
|
65
89
|
|
66
90
|
def samples_index_array
|
67
91
|
@all_samples_index ||= column_names[9..-1].fill{|i| i}
|
68
92
|
end
|
69
93
|
|
94
|
+
def num_samples
|
95
|
+
@num_samples ||= ( samples == nil ? 0 : samples.size )
|
96
|
+
end
|
97
|
+
|
70
98
|
def sample_index
|
71
99
|
return @sample_index if @sample_index
|
72
100
|
index = {}
|
@@ -74,6 +102,118 @@ module BioVcf
|
|
74
102
|
@sample_index = index
|
75
103
|
index
|
76
104
|
end
|
77
|
-
end
|
78
105
|
|
106
|
+
# Give a list of samples (by index and/or name) and return 0-based index values
|
107
|
+
# The cache has to be able to hanle multiple lists - that is why it is a hash.
|
108
|
+
def sample_subset_index list
|
109
|
+
cached = @cached_filter_index[list]
|
110
|
+
if cached
|
111
|
+
l = cached
|
112
|
+
else
|
113
|
+
l = []
|
114
|
+
list = samples_index_array() if not list
|
115
|
+
list.each { |i|
|
116
|
+
value =
|
117
|
+
begin
|
118
|
+
Integer(i)
|
119
|
+
rescue
|
120
|
+
idx = samples.index(i)
|
121
|
+
if idx != nil
|
122
|
+
idx
|
123
|
+
else
|
124
|
+
raise "Unknown sample name '#{i}'"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
l << value
|
128
|
+
}
|
129
|
+
@cached_filter_index[list] = l
|
130
|
+
end
|
131
|
+
l
|
132
|
+
end
|
133
|
+
|
134
|
+
# Look for a line in the header with the field name and return the
|
135
|
+
# value, otherwise return nil
|
136
|
+
def find_field name
|
137
|
+
return field[name] if field[name]
|
138
|
+
@lines.each do | line |
|
139
|
+
value = line.scan(/###{name}=(.*)/)
|
140
|
+
if value[0]
|
141
|
+
v = value[0][0]
|
142
|
+
field[name] = v
|
143
|
+
return v
|
144
|
+
end
|
145
|
+
end
|
146
|
+
nil
|
147
|
+
end
|
148
|
+
|
149
|
+
# Look for all the lines that match the field name and return
|
150
|
+
# a hash of hashes. An empty hash is returned when there are
|
151
|
+
# no matches.
|
152
|
+
def find_fields name
|
153
|
+
res = {}
|
154
|
+
@lines.each do | line |
|
155
|
+
value = line.scan(/###{name}=<(.*)>/)
|
156
|
+
if value[0]
|
157
|
+
str = value[0][0]
|
158
|
+
# p str
|
159
|
+
v = VcfHeaderParser.parse_field(line,@debug)
|
160
|
+
id = v['ID']
|
161
|
+
res[id] = v
|
162
|
+
end
|
163
|
+
end
|
164
|
+
# p res
|
165
|
+
res
|
166
|
+
end
|
167
|
+
|
168
|
+
def format
|
169
|
+
find_fields('FORMAT')
|
170
|
+
end
|
171
|
+
|
172
|
+
def filter
|
173
|
+
find_fields('FILTER')
|
174
|
+
end
|
175
|
+
|
176
|
+
def contig
|
177
|
+
find_fields('contig')
|
178
|
+
end
|
179
|
+
|
180
|
+
def info
|
181
|
+
find_fields('INFO')
|
182
|
+
end
|
183
|
+
|
184
|
+
def gatkcommandline
|
185
|
+
find_fields('GATKCommandLine')
|
186
|
+
end
|
187
|
+
|
188
|
+
def meta
|
189
|
+
return @meta if @meta
|
190
|
+
res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} }
|
191
|
+
@lines.each do | line |
|
192
|
+
value = line.scan(/##(.*?)=(.*)/)
|
193
|
+
if value[0]
|
194
|
+
k,v = value[0]
|
195
|
+
if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine'
|
196
|
+
# p [k,v]
|
197
|
+
res[k] = v
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
res['INFO'] = info()
|
202
|
+
res['FORMAT'] = format()
|
203
|
+
res['FILTER'] = filter()
|
204
|
+
res['contig'] = contig()
|
205
|
+
res['GATKCommandLine'] = gatkcommandline()
|
206
|
+
# p [:res, res]
|
207
|
+
@meta = res # cache values
|
208
|
+
res
|
209
|
+
end
|
210
|
+
|
211
|
+
def method_missing(m, *args, &block)
|
212
|
+
name = m.to_s
|
213
|
+
value = find_field(name)
|
214
|
+
return value if value
|
215
|
+
raise "Unknown VCF header query '#{name}'"
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
79
219
|
end
|
@@ -0,0 +1,778 @@
|
|
1
|
+
|
2
|
+
# line 1 "gen_vcfheaderline_parser.rl"
|
3
|
+
# Ragel lexer for VCF-header
|
4
|
+
#
|
5
|
+
# This is compact a parser/lexer for the VCF header format. Bio-vcf
|
6
|
+
# uses the parser to generate meta information that can be output to
|
7
|
+
# (for example) JSON format. The advantage of using ragel as a state
|
8
|
+
# engine is that it allows for easy parsing of key-value pairs with
|
9
|
+
# syntax checking and, for example, escaped quotes in quoted string
|
10
|
+
# values. This ragel parser/lexer generates valid Ruby; it should be
|
11
|
+
# fairly trivial to generate python/C/JAVA instead. Note that this
|
12
|
+
# edition validates ID and Number fields only. Other fields are
|
13
|
+
# dumped 'AS IS'.
|
14
|
+
#
|
15
|
+
# Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
|
16
|
+
#
|
17
|
+
# by Pjotr Prins (c) 2014/2015
|
18
|
+
|
19
|
+
module BioVcf
|
20
|
+
|
21
|
+
module VcfHeaderParser
|
22
|
+
|
23
|
+
module RagelKeyValues
|
24
|
+
|
25
|
+
def self.debug msg
|
26
|
+
# nothing
|
27
|
+
# $stderr.print "DEBUG: ",msg,"\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
=begin
|
31
|
+
|
32
|
+
# line 75 "gen_vcfheaderline_parser.rl"
|
33
|
+
|
34
|
+
=end
|
35
|
+
|
36
|
+
|
37
|
+
# line 38 "gen_vcfheaderline_parser.rb"
|
38
|
+
class << self
|
39
|
+
attr_accessor :_simple_lexer_actions
|
40
|
+
private :_simple_lexer_actions, :_simple_lexer_actions=
|
41
|
+
end
|
42
|
+
self._simple_lexer_actions = [
|
43
|
+
0, 1, 0, 1, 1, 1, 2, 1,
|
44
|
+
3, 1, 15, 1, 16, 2, 0, 1,
|
45
|
+
2, 4, 15, 2, 6, 15, 2, 7,
|
46
|
+
15, 2, 9, 16, 2, 10, 16, 2,
|
47
|
+
11, 16, 2, 12, 15, 2, 13, 16,
|
48
|
+
2, 14, 16, 3, 5, 8, 15, 6,
|
49
|
+
9, 10, 13, 11, 14, 16
|
50
|
+
]
|
51
|
+
|
52
|
+
class << self
|
53
|
+
attr_accessor :_simple_lexer_key_offsets
|
54
|
+
private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
|
55
|
+
end
|
56
|
+
self._simple_lexer_key_offsets = [
|
57
|
+
0, 0, 1, 2, 7, 8, 9, 10,
|
58
|
+
13, 26, 35, 49, 51, 53, 58, 60,
|
59
|
+
62, 62, 62, 64, 70, 72, 77, 80,
|
60
|
+
91, 101, 111, 121, 131, 141, 151, 161,
|
61
|
+
171, 181, 191, 201, 211, 221, 231, 241,
|
62
|
+
251, 261, 270, 275, 277, 279, 279, 281,
|
63
|
+
283, 283, 293, 303, 313, 322, 327, 329,
|
64
|
+
331, 331, 333, 335, 335, 345, 354, 360,
|
65
|
+
371, 381, 391, 401, 411, 421, 430, 438,
|
66
|
+
440, 445, 448, 458, 468, 478, 488, 498,
|
67
|
+
508, 517, 524, 526, 528, 533, 535, 537,
|
68
|
+
537, 537, 549, 559, 569, 579, 589, 599,
|
69
|
+
609, 619, 629, 639, 649, 659, 669, 671,
|
70
|
+
672, 673, 674, 675, 676, 677, 678, 679,
|
71
|
+
680, 681, 682, 683, 684, 685, 686, 687,
|
72
|
+
688, 689, 690, 691, 692, 693, 694, 695,
|
73
|
+
696, 697, 698, 699, 700
|
74
|
+
]
|
75
|
+
|
76
|
+
class << self
|
77
|
+
attr_accessor :_simple_lexer_trans_keys
|
78
|
+
private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
|
79
|
+
end
|
80
|
+
self._simple_lexer_trans_keys = [
|
81
|
+
35, 35, 65, 70, 71, 73, 99, 76,
|
82
|
+
84, 61, 44, 60, 62, 67, 68, 73,
|
83
|
+
78, 86, 97, 108, 48, 57, 65, 90,
|
84
|
+
98, 122, 46, 61, 95, 48, 57, 65,
|
85
|
+
90, 97, 122, 34, 39, 44, 46, 60,
|
86
|
+
62, 43, 45, 48, 57, 65, 90, 97,
|
87
|
+
122, 34, 92, 34, 92, 34, 39, 44,
|
88
|
+
60, 62, 39, 92, 39, 92, 48, 57,
|
89
|
+
44, 46, 60, 62, 48, 57, 48, 57,
|
90
|
+
44, 60, 62, 48, 57, 44, 60, 62,
|
91
|
+
44, 46, 60, 62, 95, 48, 57, 65,
|
92
|
+
90, 97, 122, 46, 61, 95, 111, 48,
|
93
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
94
|
+
109, 48, 57, 65, 90, 97, 122, 46,
|
95
|
+
61, 95, 109, 48, 57, 65, 90, 97,
|
96
|
+
122, 46, 61, 95, 97, 48, 57, 65,
|
97
|
+
90, 98, 122, 46, 61, 95, 110, 48,
|
98
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
99
|
+
100, 48, 57, 65, 90, 97, 122, 46,
|
100
|
+
61, 76, 95, 48, 57, 65, 90, 97,
|
101
|
+
122, 46, 61, 95, 105, 48, 57, 65,
|
102
|
+
90, 97, 122, 46, 61, 95, 110, 48,
|
103
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
104
|
+
101, 48, 57, 65, 90, 97, 122, 46,
|
105
|
+
61, 79, 95, 48, 57, 65, 90, 97,
|
106
|
+
122, 46, 61, 95, 112, 48, 57, 65,
|
107
|
+
90, 97, 122, 46, 61, 95, 116, 48,
|
108
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
109
|
+
105, 48, 57, 65, 90, 97, 122, 46,
|
110
|
+
61, 95, 111, 48, 57, 65, 90, 97,
|
111
|
+
122, 46, 61, 95, 110, 48, 57, 65,
|
112
|
+
90, 97, 122, 46, 61, 95, 115, 48,
|
113
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
114
|
+
48, 57, 65, 90, 97, 122, 34, 39,
|
115
|
+
44, 60, 62, 34, 92, 34, 92, 39,
|
116
|
+
92, 39, 92, 46, 61, 95, 97, 48,
|
117
|
+
57, 65, 90, 98, 122, 46, 61, 95,
|
118
|
+
116, 48, 57, 65, 90, 97, 122, 46,
|
119
|
+
61, 95, 101, 48, 57, 65, 90, 97,
|
120
|
+
122, 46, 61, 95, 48, 57, 65, 90,
|
121
|
+
97, 122, 34, 39, 44, 60, 62, 34,
|
122
|
+
92, 34, 92, 39, 92, 39, 92, 46,
|
123
|
+
61, 68, 95, 48, 57, 65, 90, 97,
|
124
|
+
122, 46, 61, 95, 48, 57, 65, 90,
|
125
|
+
97, 122, 48, 57, 65, 90, 97, 122,
|
126
|
+
44, 46, 60, 62, 95, 48, 57, 65,
|
127
|
+
90, 97, 122, 46, 61, 95, 117, 48,
|
128
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
129
|
+
109, 48, 57, 65, 90, 97, 122, 46,
|
130
|
+
61, 95, 98, 48, 57, 65, 90, 97,
|
131
|
+
122, 46, 61, 95, 101, 48, 57, 65,
|
132
|
+
90, 97, 122, 46, 61, 95, 114, 48,
|
133
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
134
|
+
48, 57, 65, 90, 97, 122, 43, 45,
|
135
|
+
46, 65, 71, 82, 48, 57, 48, 57,
|
136
|
+
44, 60, 62, 48, 57, 44, 60, 62,
|
137
|
+
46, 61, 95, 101, 48, 57, 65, 90,
|
138
|
+
97, 122, 46, 61, 95, 114, 48, 57,
|
139
|
+
65, 90, 97, 122, 46, 61, 95, 115,
|
140
|
+
48, 57, 65, 90, 97, 122, 46, 61,
|
141
|
+
95, 105, 48, 57, 65, 90, 97, 122,
|
142
|
+
46, 61, 95, 111, 48, 57, 65, 90,
|
143
|
+
97, 122, 46, 61, 95, 110, 48, 57,
|
144
|
+
65, 90, 97, 122, 46, 61, 95, 48,
|
145
|
+
57, 65, 90, 97, 122, 34, 39, 44,
|
146
|
+
60, 62, 48, 57, 34, 92, 34, 92,
|
147
|
+
34, 39, 44, 60, 62, 39, 92, 39,
|
148
|
+
92, 44, 60, 62, 95, 45, 46, 48,
|
149
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
150
|
+
115, 48, 57, 65, 90, 97, 122, 46,
|
151
|
+
61, 95, 115, 48, 57, 65, 90, 97,
|
152
|
+
122, 46, 61, 95, 101, 48, 57, 65,
|
153
|
+
90, 97, 122, 46, 61, 95, 109, 48,
|
154
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
155
|
+
98, 48, 57, 65, 90, 97, 122, 46,
|
156
|
+
61, 95, 108, 48, 57, 65, 90, 97,
|
157
|
+
122, 46, 61, 95, 121, 48, 57, 65,
|
158
|
+
90, 97, 122, 46, 61, 95, 101, 48,
|
159
|
+
57, 65, 90, 97, 122, 46, 61, 95,
|
160
|
+
110, 48, 57, 65, 90, 97, 122, 46,
|
161
|
+
61, 95, 103, 48, 57, 65, 90, 97,
|
162
|
+
122, 46, 61, 95, 116, 48, 57, 65,
|
163
|
+
90, 97, 122, 46, 61, 95, 104, 48,
|
164
|
+
57, 65, 90, 97, 122, 73, 79, 76,
|
165
|
+
84, 69, 82, 82, 77, 65, 65, 84,
|
166
|
+
75, 67, 111, 109, 109, 97, 110, 100,
|
167
|
+
76, 105, 110, 101, 78, 70, 79, 111,
|
168
|
+
110, 116, 105, 103, 0
|
169
|
+
]
|
170
|
+
|
171
|
+
class << self
|
172
|
+
attr_accessor :_simple_lexer_single_lengths
|
173
|
+
private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
|
174
|
+
end
|
175
|
+
self._simple_lexer_single_lengths = [
|
176
|
+
0, 1, 1, 5, 1, 1, 1, 3,
|
177
|
+
7, 3, 6, 2, 2, 5, 2, 2,
|
178
|
+
0, 0, 0, 4, 0, 3, 3, 5,
|
179
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
180
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
181
|
+
4, 3, 5, 2, 2, 0, 2, 2,
|
182
|
+
0, 4, 4, 4, 3, 5, 2, 2,
|
183
|
+
0, 2, 2, 0, 4, 3, 0, 5,
|
184
|
+
4, 4, 4, 4, 4, 3, 6, 0,
|
185
|
+
3, 3, 4, 4, 4, 4, 4, 4,
|
186
|
+
3, 5, 2, 2, 5, 2, 2, 0,
|
187
|
+
0, 4, 4, 4, 4, 4, 4, 4,
|
188
|
+
4, 4, 4, 4, 4, 4, 2, 1,
|
189
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
190
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
191
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
192
|
+
1, 1, 1, 1, 0
|
193
|
+
]
|
194
|
+
|
195
|
+
class << self
|
196
|
+
attr_accessor :_simple_lexer_range_lengths
|
197
|
+
private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
|
198
|
+
end
|
199
|
+
self._simple_lexer_range_lengths = [
|
200
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
201
|
+
3, 3, 4, 0, 0, 0, 0, 0,
|
202
|
+
0, 0, 1, 1, 1, 1, 0, 3,
|
203
|
+
3, 3, 3, 3, 3, 3, 3, 3,
|
204
|
+
3, 3, 3, 3, 3, 3, 3, 3,
|
205
|
+
3, 3, 0, 0, 0, 0, 0, 0,
|
206
|
+
0, 3, 3, 3, 3, 0, 0, 0,
|
207
|
+
0, 0, 0, 0, 3, 3, 3, 3,
|
208
|
+
3, 3, 3, 3, 3, 3, 1, 1,
|
209
|
+
1, 0, 3, 3, 3, 3, 3, 3,
|
210
|
+
3, 1, 0, 0, 0, 0, 0, 0,
|
211
|
+
0, 4, 3, 3, 3, 3, 3, 3,
|
212
|
+
3, 3, 3, 3, 3, 3, 0, 0,
|
213
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
214
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
215
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
216
|
+
0, 0, 0, 0, 0
|
217
|
+
]
|
218
|
+
|
219
|
+
class << self
|
220
|
+
attr_accessor :_simple_lexer_index_offsets
|
221
|
+
private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
|
222
|
+
end
|
223
|
+
self._simple_lexer_index_offsets = [
|
224
|
+
0, 0, 2, 4, 10, 12, 14, 16,
|
225
|
+
20, 31, 38, 49, 52, 55, 61, 64,
|
226
|
+
67, 68, 69, 71, 77, 79, 84, 88,
|
227
|
+
97, 105, 113, 121, 129, 137, 145, 153,
|
228
|
+
161, 169, 177, 185, 193, 201, 209, 217,
|
229
|
+
225, 233, 240, 246, 249, 252, 253, 256,
|
230
|
+
259, 260, 268, 276, 284, 291, 297, 300,
|
231
|
+
303, 304, 307, 310, 311, 319, 326, 330,
|
232
|
+
339, 347, 355, 363, 371, 379, 386, 394,
|
233
|
+
396, 401, 405, 413, 421, 429, 437, 445,
|
234
|
+
453, 460, 467, 470, 473, 479, 482, 485,
|
235
|
+
486, 487, 496, 504, 512, 520, 528, 536,
|
236
|
+
544, 552, 560, 568, 576, 584, 592, 595,
|
237
|
+
597, 599, 601, 603, 605, 607, 609, 611,
|
238
|
+
613, 615, 617, 619, 621, 623, 625, 627,
|
239
|
+
629, 631, 633, 635, 637, 639, 641, 643,
|
240
|
+
645, 647, 649, 651, 653
|
241
|
+
]
|
242
|
+
|
243
|
+
class << self
|
244
|
+
attr_accessor :_simple_lexer_trans_targs
|
245
|
+
private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
|
246
|
+
end
|
247
|
+
self._simple_lexer_trans_targs = [
|
248
|
+
2, 0, 3, 0, 4, 102, 110, 124,
|
249
|
+
127, 0, 5, 0, 6, 0, 7, 0,
|
250
|
+
8, 8, 132, 0, 24, 49, 60, 64,
|
251
|
+
74, 90, 97, 9, 9, 9, 0, 9,
|
252
|
+
10, 9, 9, 9, 9, 0, 11, 14,
|
253
|
+
8, 22, 8, 132, 18, 23, 23, 23,
|
254
|
+
0, 13, 17, 12, 13, 17, 12, 11,
|
255
|
+
14, 8, 8, 132, 0, 13, 16, 15,
|
256
|
+
13, 16, 15, 15, 12, 19, 0, 8,
|
257
|
+
20, 8, 132, 19, 0, 21, 0, 8,
|
258
|
+
8, 132, 21, 0, 8, 8, 132, 0,
|
259
|
+
8, 23, 8, 132, 23, 23, 23, 23,
|
260
|
+
0, 9, 10, 9, 25, 9, 9, 9,
|
261
|
+
0, 9, 10, 9, 26, 9, 9, 9,
|
262
|
+
0, 9, 10, 9, 27, 9, 9, 9,
|
263
|
+
0, 9, 10, 9, 28, 9, 9, 9,
|
264
|
+
0, 9, 10, 9, 29, 9, 9, 9,
|
265
|
+
0, 9, 10, 9, 30, 9, 9, 9,
|
266
|
+
0, 9, 10, 31, 9, 9, 9, 9,
|
267
|
+
0, 9, 10, 9, 32, 9, 9, 9,
|
268
|
+
0, 9, 10, 9, 33, 9, 9, 9,
|
269
|
+
0, 9, 10, 9, 34, 9, 9, 9,
|
270
|
+
0, 9, 10, 35, 9, 9, 9, 9,
|
271
|
+
0, 9, 10, 9, 36, 9, 9, 9,
|
272
|
+
0, 9, 10, 9, 37, 9, 9, 9,
|
273
|
+
0, 9, 10, 9, 38, 9, 9, 9,
|
274
|
+
0, 9, 10, 9, 39, 9, 9, 9,
|
275
|
+
0, 9, 10, 9, 40, 9, 9, 9,
|
276
|
+
0, 9, 10, 9, 41, 9, 9, 9,
|
277
|
+
0, 9, 42, 9, 9, 9, 9, 0,
|
278
|
+
43, 46, 8, 8, 132, 0, 42, 45,
|
279
|
+
44, 42, 45, 44, 44, 42, 48, 47,
|
280
|
+
42, 48, 47, 47, 9, 10, 9, 50,
|
281
|
+
9, 9, 9, 0, 9, 10, 9, 51,
|
282
|
+
9, 9, 9, 0, 9, 10, 9, 52,
|
283
|
+
9, 9, 9, 0, 9, 53, 9, 9,
|
284
|
+
9, 9, 0, 54, 57, 8, 8, 132,
|
285
|
+
0, 53, 56, 55, 53, 56, 55, 55,
|
286
|
+
53, 59, 58, 53, 59, 58, 58, 9,
|
287
|
+
10, 61, 9, 9, 9, 9, 0, 9,
|
288
|
+
62, 9, 9, 9, 9, 0, 63, 63,
|
289
|
+
63, 0, 8, 63, 8, 132, 63, 63,
|
290
|
+
63, 63, 0, 9, 10, 9, 65, 9,
|
291
|
+
9, 9, 0, 9, 10, 9, 66, 9,
|
292
|
+
9, 9, 0, 9, 10, 9, 67, 9,
|
293
|
+
9, 9, 0, 9, 10, 9, 68, 9,
|
294
|
+
9, 9, 0, 9, 10, 9, 69, 9,
|
295
|
+
9, 9, 0, 9, 70, 9, 9, 9,
|
296
|
+
9, 0, 71, 71, 73, 73, 73, 73,
|
297
|
+
72, 0, 72, 0, 8, 8, 132, 72,
|
298
|
+
0, 8, 8, 132, 0, 9, 10, 9,
|
299
|
+
75, 9, 9, 9, 0, 9, 10, 9,
|
300
|
+
76, 9, 9, 9, 0, 9, 10, 9,
|
301
|
+
77, 9, 9, 9, 0, 9, 10, 9,
|
302
|
+
78, 9, 9, 9, 0, 9, 10, 9,
|
303
|
+
79, 9, 9, 9, 0, 9, 10, 9,
|
304
|
+
80, 9, 9, 9, 0, 9, 81, 9,
|
305
|
+
9, 9, 9, 0, 82, 85, 8, 8,
|
306
|
+
132, 89, 0, 84, 88, 83, 84, 88,
|
307
|
+
83, 82, 85, 8, 8, 132, 0, 84,
|
308
|
+
87, 86, 84, 87, 86, 86, 83, 8,
|
309
|
+
8, 132, 89, 89, 89, 89, 89, 0,
|
310
|
+
9, 10, 9, 91, 9, 9, 9, 0,
|
311
|
+
9, 10, 9, 92, 9, 9, 9, 0,
|
312
|
+
9, 10, 9, 93, 9, 9, 9, 0,
|
313
|
+
9, 10, 9, 94, 9, 9, 9, 0,
|
314
|
+
9, 10, 9, 95, 9, 9, 9, 0,
|
315
|
+
9, 10, 9, 96, 9, 9, 9, 0,
|
316
|
+
9, 10, 9, 61, 9, 9, 9, 0,
|
317
|
+
9, 10, 9, 98, 9, 9, 9, 0,
|
318
|
+
9, 10, 9, 99, 9, 9, 9, 0,
|
319
|
+
9, 10, 9, 100, 9, 9, 9, 0,
|
320
|
+
9, 10, 9, 101, 9, 9, 9, 0,
|
321
|
+
9, 10, 9, 69, 9, 9, 9, 0,
|
322
|
+
103, 107, 0, 104, 0, 105, 0, 106,
|
323
|
+
0, 6, 0, 108, 0, 109, 0, 5,
|
324
|
+
0, 111, 0, 112, 0, 113, 0, 114,
|
325
|
+
0, 115, 0, 116, 0, 117, 0, 118,
|
326
|
+
0, 119, 0, 120, 0, 121, 0, 122,
|
327
|
+
0, 123, 0, 6, 0, 125, 0, 126,
|
328
|
+
0, 6, 0, 128, 0, 129, 0, 130,
|
329
|
+
0, 131, 0, 6, 0, 0, 0
|
330
|
+
]
|
331
|
+
|
332
|
+
class << self
|
333
|
+
attr_accessor :_simple_lexer_trans_actions
|
334
|
+
private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
|
335
|
+
end
|
336
|
+
self._simple_lexer_trans_actions = [
|
337
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
338
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
339
|
+
0, 0, 0, 0, 1, 1, 1, 1,
|
340
|
+
1, 1, 1, 1, 1, 1, 47, 0,
|
341
|
+
7, 0, 0, 0, 0, 11, 0, 0,
|
342
|
+
9, 0, 9, 9, 0, 1, 1, 1,
|
343
|
+
0, 13, 1, 1, 3, 0, 0, 0,
|
344
|
+
0, 9, 9, 9, 0, 13, 1, 1,
|
345
|
+
3, 0, 0, 0, 0, 0, 11, 9,
|
346
|
+
0, 9, 9, 0, 0, 0, 11, 9,
|
347
|
+
9, 9, 0, 0, 9, 9, 9, 0,
|
348
|
+
16, 0, 16, 16, 0, 0, 0, 0,
|
349
|
+
0, 0, 7, 0, 0, 0, 0, 0,
|
350
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
351
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
352
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
353
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
354
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
355
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
356
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
357
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
358
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
359
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
360
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
361
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
362
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
363
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
364
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
365
|
+
40, 0, 7, 0, 0, 0, 0, 0,
|
366
|
+
40, 0, 5, 0, 0, 0, 0, 40,
|
367
|
+
0, 0, 9, 9, 9, 0, 13, 1,
|
368
|
+
1, 3, 0, 0, 0, 13, 1, 1,
|
369
|
+
3, 0, 0, 0, 0, 7, 0, 0,
|
370
|
+
0, 0, 0, 37, 0, 7, 0, 0,
|
371
|
+
0, 0, 0, 37, 0, 7, 0, 0,
|
372
|
+
0, 0, 0, 37, 0, 5, 0, 0,
|
373
|
+
0, 0, 37, 0, 0, 34, 34, 34,
|
374
|
+
0, 13, 1, 1, 3, 0, 0, 0,
|
375
|
+
13, 1, 1, 3, 0, 0, 0, 0,
|
376
|
+
7, 0, 0, 0, 0, 0, 25, 0,
|
377
|
+
5, 0, 0, 0, 0, 25, 1, 1,
|
378
|
+
1, 25, 43, 0, 43, 43, 0, 0,
|
379
|
+
0, 0, 0, 0, 7, 0, 0, 0,
|
380
|
+
0, 0, 31, 0, 7, 0, 0, 0,
|
381
|
+
0, 0, 31, 0, 7, 0, 0, 0,
|
382
|
+
0, 0, 31, 0, 7, 0, 0, 0,
|
383
|
+
0, 0, 31, 0, 7, 0, 0, 0,
|
384
|
+
0, 0, 31, 0, 5, 0, 0, 0,
|
385
|
+
0, 31, 1, 1, 1, 1, 1, 1,
|
386
|
+
1, 31, 0, 31, 22, 22, 22, 0,
|
387
|
+
0, 22, 22, 22, 0, 0, 7, 0,
|
388
|
+
0, 0, 0, 0, 28, 0, 7, 0,
|
389
|
+
0, 0, 0, 0, 28, 0, 7, 0,
|
390
|
+
0, 0, 0, 0, 28, 0, 7, 0,
|
391
|
+
0, 0, 0, 0, 28, 0, 7, 0,
|
392
|
+
0, 0, 0, 0, 28, 0, 7, 0,
|
393
|
+
0, 0, 0, 0, 28, 0, 5, 0,
|
394
|
+
0, 0, 0, 28, 0, 0, 9, 9,
|
395
|
+
9, 1, 0, 13, 1, 1, 3, 0,
|
396
|
+
0, 0, 0, 9, 9, 9, 0, 13,
|
397
|
+
1, 1, 3, 0, 0, 0, 0, 19,
|
398
|
+
19, 19, 0, 0, 0, 0, 0, 0,
|
399
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
400
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
401
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
402
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
403
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
404
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
405
|
+
0, 7, 0, 0, 0, 0, 0, 25,
|
406
|
+
0, 7, 0, 0, 0, 0, 0, 31,
|
407
|
+
0, 7, 0, 0, 0, 0, 0, 31,
|
408
|
+
0, 7, 0, 0, 0, 0, 0, 31,
|
409
|
+
0, 7, 0, 0, 0, 0, 0, 31,
|
410
|
+
0, 7, 0, 0, 0, 0, 0, 31,
|
411
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
412
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
413
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
414
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
415
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
416
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
417
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
418
|
+
0, 0, 0, 0, 0, 0, 0
|
419
|
+
]
|
420
|
+
|
421
|
+
class << self
|
422
|
+
attr_accessor :_simple_lexer_eof_actions
|
423
|
+
private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
|
424
|
+
end
|
425
|
+
self._simple_lexer_eof_actions = [
|
426
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
427
|
+
47, 11, 0, 11, 11, 0, 11, 11,
|
428
|
+
11, 11, 11, 0, 11, 0, 0, 0,
|
429
|
+
40, 40, 40, 40, 40, 40, 40, 40,
|
430
|
+
40, 40, 40, 40, 40, 40, 40, 40,
|
431
|
+
40, 40, 0, 40, 40, 40, 40, 40,
|
432
|
+
40, 37, 37, 37, 37, 0, 37, 37,
|
433
|
+
37, 37, 37, 37, 25, 25, 25, 0,
|
434
|
+
31, 31, 31, 31, 31, 31, 31, 31,
|
435
|
+
0, 0, 28, 28, 28, 28, 28, 28,
|
436
|
+
28, 0, 28, 28, 0, 28, 28, 28,
|
437
|
+
28, 0, 25, 25, 25, 25, 25, 25,
|
438
|
+
25, 31, 31, 31, 31, 31, 0, 0,
|
439
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
440
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
441
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
442
|
+
0, 0, 0, 0, 0
|
443
|
+
]
|
444
|
+
|
445
|
+
class << self
|
446
|
+
attr_accessor :simple_lexer_start
|
447
|
+
end
|
448
|
+
self.simple_lexer_start = 1;
|
449
|
+
class << self
|
450
|
+
attr_accessor :simple_lexer_first_final
|
451
|
+
end
|
452
|
+
self.simple_lexer_first_final = 132;
|
453
|
+
class << self
|
454
|
+
attr_accessor :simple_lexer_error
|
455
|
+
end
|
456
|
+
self.simple_lexer_error = 0;
|
457
|
+
|
458
|
+
class << self
|
459
|
+
attr_accessor :simple_lexer_en_main
|
460
|
+
end
|
461
|
+
self.simple_lexer_en_main = 1;
|
462
|
+
|
463
|
+
|
464
|
+
# line 79 "gen_vcfheaderline_parser.rl"
|
465
|
+
# %% this just fixes syntax highlighting...
|
466
|
+
|
467
|
+
def self.run_lexer(buf, options = {})
|
468
|
+
do_debug = (options[:debug] == true)
|
469
|
+
$stderr.print "---> ",buf,"\n" if do_debug
|
470
|
+
data = buf.unpack("c*") if(buf.is_a?(String))
|
471
|
+
eof = data.length
|
472
|
+
values = []
|
473
|
+
stack = []
|
474
|
+
|
475
|
+
emit = lambda { |type, data, ts, p|
|
476
|
+
# Print the type and text of the last read token
|
477
|
+
# p ts,p
|
478
|
+
$stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
|
479
|
+
values << [type,data[ts...p].pack('c*')]
|
480
|
+
}
|
481
|
+
|
482
|
+
error_code = nil
|
483
|
+
|
484
|
+
|
485
|
+
# line 486 "gen_vcfheaderline_parser.rb"
|
486
|
+
begin
|
487
|
+
p ||= 0
|
488
|
+
pe ||= data.length
|
489
|
+
cs = simple_lexer_start
|
490
|
+
end
|
491
|
+
|
492
|
+
# line 99 "gen_vcfheaderline_parser.rl"
|
493
|
+
|
494
|
+
# line 495 "gen_vcfheaderline_parser.rb"
|
495
|
+
begin
|
496
|
+
_klen, _trans, _keys, _acts, _nacts = nil
|
497
|
+
_goto_level = 0
|
498
|
+
_resume = 10
|
499
|
+
_eof_trans = 15
|
500
|
+
_again = 20
|
501
|
+
_test_eof = 30
|
502
|
+
_out = 40
|
503
|
+
while true
|
504
|
+
_trigger_goto = false
|
505
|
+
if _goto_level <= 0
|
506
|
+
if p == pe
|
507
|
+
_goto_level = _test_eof
|
508
|
+
next
|
509
|
+
end
|
510
|
+
if cs == 0
|
511
|
+
_goto_level = _out
|
512
|
+
next
|
513
|
+
end
|
514
|
+
end
|
515
|
+
if _goto_level <= _resume
|
516
|
+
_keys = _simple_lexer_key_offsets[cs]
|
517
|
+
_trans = _simple_lexer_index_offsets[cs]
|
518
|
+
_klen = _simple_lexer_single_lengths[cs]
|
519
|
+
_break_match = false
|
520
|
+
|
521
|
+
begin
|
522
|
+
if _klen > 0
|
523
|
+
_lower = _keys
|
524
|
+
_upper = _keys + _klen - 1
|
525
|
+
|
526
|
+
loop do
|
527
|
+
break if _upper < _lower
|
528
|
+
_mid = _lower + ( (_upper - _lower) >> 1 )
|
529
|
+
|
530
|
+
if data[p].ord < _simple_lexer_trans_keys[_mid]
|
531
|
+
_upper = _mid - 1
|
532
|
+
elsif data[p].ord > _simple_lexer_trans_keys[_mid]
|
533
|
+
_lower = _mid + 1
|
534
|
+
else
|
535
|
+
_trans += (_mid - _keys)
|
536
|
+
_break_match = true
|
537
|
+
break
|
538
|
+
end
|
539
|
+
end # loop
|
540
|
+
break if _break_match
|
541
|
+
_keys += _klen
|
542
|
+
_trans += _klen
|
543
|
+
end
|
544
|
+
_klen = _simple_lexer_range_lengths[cs]
|
545
|
+
if _klen > 0
|
546
|
+
_lower = _keys
|
547
|
+
_upper = _keys + (_klen << 1) - 2
|
548
|
+
loop do
|
549
|
+
break if _upper < _lower
|
550
|
+
_mid = _lower + (((_upper-_lower) >> 1) & ~1)
|
551
|
+
if data[p].ord < _simple_lexer_trans_keys[_mid]
|
552
|
+
_upper = _mid - 2
|
553
|
+
elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
|
554
|
+
_lower = _mid + 2
|
555
|
+
else
|
556
|
+
_trans += ((_mid - _keys) >> 1)
|
557
|
+
_break_match = true
|
558
|
+
break
|
559
|
+
end
|
560
|
+
end # loop
|
561
|
+
break if _break_match
|
562
|
+
_trans += _klen
|
563
|
+
end
|
564
|
+
end while false
|
565
|
+
cs = _simple_lexer_trans_targs[_trans]
|
566
|
+
if _simple_lexer_trans_actions[_trans] != 0
|
567
|
+
_acts = _simple_lexer_trans_actions[_trans]
|
568
|
+
_nacts = _simple_lexer_actions[_acts]
|
569
|
+
_acts += 1
|
570
|
+
while _nacts > 0
|
571
|
+
_nacts -= 1
|
572
|
+
_acts += 1
|
573
|
+
case _simple_lexer_actions[_acts - 1]
|
574
|
+
when 0 then
|
575
|
+
# line 33 "gen_vcfheaderline_parser.rl"
|
576
|
+
begin
|
577
|
+
ts=p end
|
578
|
+
when 1 then
|
579
|
+
# line 34 "gen_vcfheaderline_parser.rl"
|
580
|
+
begin
|
581
|
+
|
582
|
+
emit.call(:value,data,ts,p)
|
583
|
+
end
|
584
|
+
when 2 then
|
585
|
+
# line 38 "gen_vcfheaderline_parser.rl"
|
586
|
+
begin
|
587
|
+
|
588
|
+
emit.call(:kw,data,ts,p)
|
589
|
+
end
|
590
|
+
when 3 then
|
591
|
+
# line 58 "gen_vcfheaderline_parser.rl"
|
592
|
+
begin
|
593
|
+
emit.call(:key_word,data,ts,p) end
|
594
|
+
when 4 then
|
595
|
+
# line 59 "gen_vcfheaderline_parser.rl"
|
596
|
+
begin
|
597
|
+
emit.call(:value,data,ts,p) end
|
598
|
+
when 5 then
|
599
|
+
# line 60 "gen_vcfheaderline_parser.rl"
|
600
|
+
begin
|
601
|
+
emit.call(:value,data,ts,p) end
|
602
|
+
when 6 then
|
603
|
+
# line 62 "gen_vcfheaderline_parser.rl"
|
604
|
+
begin
|
605
|
+
emit.call(:value,data,ts,p) end
|
606
|
+
when 7 then
|
607
|
+
# line 65 "gen_vcfheaderline_parser.rl"
|
608
|
+
begin
|
609
|
+
emit.call(:value,data,ts,p) end
|
610
|
+
when 8 then
|
611
|
+
# line 67 "gen_vcfheaderline_parser.rl"
|
612
|
+
begin
|
613
|
+
debug("ID FOUND") end
|
614
|
+
when 9 then
|
615
|
+
# line 67 "gen_vcfheaderline_parser.rl"
|
616
|
+
begin
|
617
|
+
error_code="Malformed ID" end
|
618
|
+
when 10 then
|
619
|
+
# line 68 "gen_vcfheaderline_parser.rl"
|
620
|
+
begin
|
621
|
+
error_code="Version" end
|
622
|
+
when 11 then
|
623
|
+
# line 69 "gen_vcfheaderline_parser.rl"
|
624
|
+
begin
|
625
|
+
error_code="Number" end
|
626
|
+
when 12 then
|
627
|
+
# line 70 "gen_vcfheaderline_parser.rl"
|
628
|
+
begin
|
629
|
+
debug("DATE FOUND") end
|
630
|
+
when 13 then
|
631
|
+
# line 70 "gen_vcfheaderline_parser.rl"
|
632
|
+
begin
|
633
|
+
error_code="Date" end
|
634
|
+
when 14 then
|
635
|
+
# line 71 "gen_vcfheaderline_parser.rl"
|
636
|
+
begin
|
637
|
+
error_code="GATK" end
|
638
|
+
when 15 then
|
639
|
+
# line 72 "gen_vcfheaderline_parser.rl"
|
640
|
+
begin
|
641
|
+
debug("KEY_VALUE found") end
|
642
|
+
when 16 then
|
643
|
+
# line 72 "gen_vcfheaderline_parser.rl"
|
644
|
+
begin
|
645
|
+
error_code="unknown key-value " end
|
646
|
+
# line 647 "gen_vcfheaderline_parser.rb"
|
647
|
+
end # action switch
|
648
|
+
end
|
649
|
+
end
|
650
|
+
if _trigger_goto
|
651
|
+
next
|
652
|
+
end
|
653
|
+
end
|
654
|
+
if _goto_level <= _again
|
655
|
+
if cs == 0
|
656
|
+
_goto_level = _out
|
657
|
+
next
|
658
|
+
end
|
659
|
+
p += 1
|
660
|
+
if p != pe
|
661
|
+
_goto_level = _resume
|
662
|
+
next
|
663
|
+
end
|
664
|
+
end
|
665
|
+
if _goto_level <= _test_eof
|
666
|
+
if p == eof
|
667
|
+
__acts = _simple_lexer_eof_actions[cs]
|
668
|
+
__nacts = _simple_lexer_actions[__acts]
|
669
|
+
__acts += 1
|
670
|
+
while __nacts > 0
|
671
|
+
__nacts -= 1
|
672
|
+
__acts += 1
|
673
|
+
case _simple_lexer_actions[__acts - 1]
|
674
|
+
when 9 then
|
675
|
+
# line 67 "gen_vcfheaderline_parser.rl"
|
676
|
+
begin
|
677
|
+
error_code="Malformed ID" end
|
678
|
+
when 10 then
|
679
|
+
# line 68 "gen_vcfheaderline_parser.rl"
|
680
|
+
begin
|
681
|
+
error_code="Version" end
|
682
|
+
when 11 then
|
683
|
+
# line 69 "gen_vcfheaderline_parser.rl"
|
684
|
+
begin
|
685
|
+
error_code="Number" end
|
686
|
+
when 13 then
|
687
|
+
# line 70 "gen_vcfheaderline_parser.rl"
|
688
|
+
begin
|
689
|
+
error_code="Date" end
|
690
|
+
when 14 then
|
691
|
+
# line 71 "gen_vcfheaderline_parser.rl"
|
692
|
+
begin
|
693
|
+
error_code="GATK" end
|
694
|
+
when 16 then
|
695
|
+
# line 72 "gen_vcfheaderline_parser.rl"
|
696
|
+
begin
|
697
|
+
error_code="unknown key-value " end
|
698
|
+
# line 699 "gen_vcfheaderline_parser.rb"
|
699
|
+
end # eof action switch
|
700
|
+
end
|
701
|
+
if _trigger_goto
|
702
|
+
next
|
703
|
+
end
|
704
|
+
end
|
705
|
+
end
|
706
|
+
if _goto_level <= _out
|
707
|
+
break
|
708
|
+
end
|
709
|
+
end
|
710
|
+
end
|
711
|
+
|
712
|
+
# line 100 "gen_vcfheaderline_parser.rl"
|
713
|
+
|
714
|
+
raise "ERROR: "+error_code+" in "+buf if error_code
|
715
|
+
|
716
|
+
begin
|
717
|
+
res = {}
|
718
|
+
# p values
|
719
|
+
values.each_slice(2) do | a,b |
|
720
|
+
$stderr.print '*',a,b if do_debug
|
721
|
+
keyword = a[1]
|
722
|
+
value = b[1]
|
723
|
+
value = value.to_i if ['length','Epoch'].index(keyword)
|
724
|
+
res[keyword] = value
|
725
|
+
# p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
|
726
|
+
end
|
727
|
+
rescue
|
728
|
+
print "ERROR: "
|
729
|
+
p values
|
730
|
+
raise
|
731
|
+
end
|
732
|
+
$stderr.print(res,"\n") if do_debug
|
733
|
+
res
|
734
|
+
end
|
735
|
+
end
|
736
|
+
end
|
737
|
+
end
|
738
|
+
|
739
|
+
if __FILE__ == $0
|
740
|
+
|
741
|
+
gatkcommandline = <<LINE1
|
742
|
+
##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
|
743
|
+
LINE1
|
744
|
+
|
745
|
+
h = {}
|
746
|
+
s = gatkcommandline.strip
|
747
|
+
# print s,"\n"
|
748
|
+
result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
|
749
|
+
# h[result['ID']] = result
|
750
|
+
# p result
|
751
|
+
|
752
|
+
lines = <<LINES
|
753
|
+
##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
|
754
|
+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
755
|
+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
|
756
|
+
##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
|
757
|
+
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
|
758
|
+
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
|
759
|
+
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
|
760
|
+
##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
|
761
|
+
##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
|
762
|
+
##contig=<ID=XXXY12>
|
763
|
+
##contig=<ID=Y,length=59373566>
|
764
|
+
LINES
|
765
|
+
|
766
|
+
h = {}
|
767
|
+
lines.strip.split("\n").each { |s|
|
768
|
+
# print s,"\n"
|
769
|
+
result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
|
770
|
+
h[result['ID']] = result
|
771
|
+
p result
|
772
|
+
}
|
773
|
+
p h
|
774
|
+
|
775
|
+
raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
|
776
|
+
|
777
|
+
|
778
|
+
end # test
|