bio-vcf 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,483 @@
1
+
2
+ # line 1 "gen_vcfheaderline_parser.rl"
3
+ # Ragel lexer for VCF-header
4
+ #
5
+ # This is a partial lexer for the VCF header format. Bio-vcf uses this
6
+ # to generate meta information in (for example) JSON format. The
7
+ # advantage of using a full state engine is that it allows for easy
8
+ # parsing of key-value pairs with syntax checking and, for example,
9
+ # escaped quotes in quoted string values. This edition validates ID and
10
+ # Number fields only.
11
+ #
12
+ # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
13
+
14
+ module BioVcf
15
+
16
+ module VcfHeaderParser
17
+
18
+ module RagelKeyValues
19
+
20
+ =begin
21
+
22
+ # line 57 "gen_vcfheaderline_parser.rl"
23
+
24
+ =end
25
+
26
+
27
+ # line 28 "gen_vcfheaderline_parser.rb"
28
+ class << self
29
+ attr_accessor :_simple_lexer_actions
30
+ private :_simple_lexer_actions, :_simple_lexer_actions=
31
+ end
32
+ self._simple_lexer_actions = [
33
+ 0, 1, 0, 1, 1, 1, 2, 1,
34
+ 3, 1, 4, 1, 5, 1, 6, 1,
35
+ 9, 2, 0, 1, 2, 7, 9, 2,
36
+ 8, 9, 3, 7, 8, 9
37
+ ]
38
+
39
+ class << self
40
+ attr_accessor :_simple_lexer_key_offsets
41
+ private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
42
+ end
43
+ self._simple_lexer_key_offsets = [
44
+ 0, 0, 1, 2, 5, 6, 7, 8,
45
+ 14, 20, 27, 32, 34, 36, 38, 40,
46
+ 40, 40, 42, 44, 50, 57, 64, 68,
47
+ 74, 81, 89, 97, 105, 113, 120, 128,
48
+ 130, 132, 133, 134, 135, 136, 137, 138,
49
+ 139, 140, 141, 142, 144, 160, 167, 172,
50
+ 176, 184, 192, 196
51
+ ]
52
+
53
+ class << self
54
+ attr_accessor :_simple_lexer_trans_keys
55
+ private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
56
+ end
57
+ self._simple_lexer_trans_keys = [
58
+ 35, 35, 65, 70, 73, 76, 84, 61,
59
+ 73, 78, 65, 90, 97, 122, 48, 57,
60
+ 65, 90, 97, 122, 61, 48, 57, 65,
61
+ 90, 97, 122, 32, 34, 39, 9, 13,
62
+ 34, 92, 34, 92, 39, 92, 39, 92,
63
+ 48, 57, 48, 57, 48, 57, 65, 90,
64
+ 97, 122, 68, 48, 57, 65, 90, 97,
65
+ 122, 61, 48, 57, 65, 90, 97, 122,
66
+ 65, 90, 97, 122, 48, 57, 65, 90,
67
+ 97, 122, 117, 48, 57, 65, 90, 97,
68
+ 122, 61, 109, 48, 57, 65, 90, 97,
69
+ 122, 61, 98, 48, 57, 65, 90, 97,
70
+ 122, 61, 101, 48, 57, 65, 90, 97,
71
+ 122, 61, 114, 48, 57, 65, 90, 97,
72
+ 122, 61, 48, 57, 65, 90, 97, 122,
73
+ 43, 45, 46, 65, 71, 82, 48, 57,
74
+ 48, 57, 73, 79, 76, 84, 69, 82,
75
+ 82, 77, 65, 78, 70, 79, 44, 60,
76
+ 32, 34, 39, 44, 46, 60, 9, 13,
77
+ 43, 45, 48, 57, 65, 90, 97, 122,
78
+ 32, 34, 39, 44, 60, 9, 13, 44,
79
+ 46, 60, 48, 57, 44, 60, 48, 57,
80
+ 44, 60, 48, 57, 65, 90, 97, 122,
81
+ 44, 60, 48, 57, 65, 90, 97, 122,
82
+ 44, 60, 48, 57, 44, 60, 0
83
+ ]
84
+
85
+ class << self
86
+ attr_accessor :_simple_lexer_single_lengths
87
+ private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
88
+ end
89
+ self._simple_lexer_single_lengths = [
90
+ 0, 1, 1, 3, 1, 1, 1, 2,
91
+ 0, 1, 3, 2, 2, 2, 2, 0,
92
+ 0, 0, 0, 0, 1, 1, 0, 0,
93
+ 1, 2, 2, 2, 2, 1, 6, 0,
94
+ 2, 1, 1, 1, 1, 1, 1, 1,
95
+ 1, 1, 1, 2, 6, 5, 3, 2,
96
+ 2, 2, 2, 2
97
+ ]
98
+
99
+ class << self
100
+ attr_accessor :_simple_lexer_range_lengths
101
+ private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
102
+ end
103
+ self._simple_lexer_range_lengths = [
104
+ 0, 0, 0, 0, 0, 0, 0, 2,
105
+ 3, 3, 1, 0, 0, 0, 0, 0,
106
+ 0, 1, 1, 3, 3, 3, 2, 3,
107
+ 3, 3, 3, 3, 3, 3, 1, 1,
108
+ 0, 0, 0, 0, 0, 0, 0, 0,
109
+ 0, 0, 0, 0, 5, 1, 1, 1,
110
+ 3, 3, 1, 0
111
+ ]
112
+
113
+ class << self
114
+ attr_accessor :_simple_lexer_index_offsets
115
+ private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
116
+ end
117
+ self._simple_lexer_index_offsets = [
118
+ 0, 0, 2, 4, 8, 10, 12, 14,
119
+ 19, 23, 28, 33, 36, 39, 42, 45,
120
+ 46, 47, 49, 51, 55, 60, 65, 68,
121
+ 72, 77, 83, 89, 95, 101, 106, 114,
122
+ 116, 119, 121, 123, 125, 127, 129, 131,
123
+ 133, 135, 137, 139, 142, 154, 161, 166,
124
+ 170, 176, 182, 186
125
+ ]
126
+
127
+ class << self
128
+ attr_accessor :_simple_lexer_indicies
129
+ private :_simple_lexer_indicies, :_simple_lexer_indicies=
130
+ end
131
+ self._simple_lexer_indicies = [
132
+ 0, 1, 2, 1, 3, 4, 5, 1,
133
+ 6, 1, 7, 1, 8, 1, 11, 12,
134
+ 10, 10, 9, 14, 14, 14, 13, 15,
135
+ 14, 14, 14, 13, 16, 17, 18, 16,
136
+ 13, 20, 21, 19, 23, 24, 22, 20,
137
+ 26, 25, 23, 28, 27, 27, 22, 29,
138
+ 13, 30, 13, 31, 31, 31, 13, 33,
139
+ 14, 14, 14, 32, 34, 14, 14, 14,
140
+ 32, 35, 35, 32, 36, 36, 36, 32,
141
+ 38, 14, 14, 14, 37, 15, 39, 14,
142
+ 14, 14, 37, 15, 40, 14, 14, 14,
143
+ 37, 15, 41, 14, 14, 14, 37, 15,
144
+ 42, 14, 14, 14, 37, 43, 14, 14,
145
+ 14, 37, 44, 44, 45, 45, 45, 45,
146
+ 46, 37, 47, 37, 48, 49, 1, 50,
147
+ 1, 51, 1, 52, 1, 7, 1, 53,
148
+ 1, 54, 1, 6, 1, 55, 1, 56,
149
+ 1, 7, 1, 57, 57, 1, 16, 17,
150
+ 18, 57, 8, 57, 16, 58, 29, 59,
151
+ 59, 1, 16, 17, 18, 57, 57, 16,
152
+ 1, 57, 60, 57, 29, 1, 57, 57,
153
+ 30, 1, 61, 61, 31, 31, 31, 1,
154
+ 62, 62, 36, 36, 36, 1, 63, 63,
155
+ 47, 1, 63, 63, 1, 0
156
+ ]
157
+
158
+ class << self
159
+ attr_accessor :_simple_lexer_trans_targs
160
+ private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
161
+ end
162
+ self._simple_lexer_trans_targs = [
163
+ 2, 0, 3, 4, 32, 40, 5, 6,
164
+ 43, 0, 8, 20, 24, 0, 9, 44,
165
+ 10, 11, 13, 12, 45, 16, 12, 45,
166
+ 16, 14, 15, 14, 15, 46, 47, 48,
167
+ 0, 21, 22, 23, 49, 0, 25, 26,
168
+ 27, 28, 29, 30, 31, 51, 50, 50,
169
+ 33, 37, 34, 35, 36, 38, 39, 41,
170
+ 42, 7, 17, 19, 18, 7, 7, 7
171
+ ]
172
+
173
+ class << self
174
+ attr_accessor :_simple_lexer_trans_actions
175
+ private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
176
+ end
177
+ self._simple_lexer_trans_actions = [
178
+ 0, 0, 0, 0, 0, 0, 0, 0,
179
+ 0, 26, 1, 1, 1, 15, 0, 7,
180
+ 0, 0, 0, 1, 17, 1, 0, 3,
181
+ 0, 1, 1, 0, 0, 0, 0, 0,
182
+ 20, 0, 5, 1, 0, 23, 0, 0,
183
+ 0, 0, 0, 5, 1, 1, 1, 0,
184
+ 0, 0, 0, 0, 0, 0, 0, 0,
185
+ 0, 0, 0, 1, 0, 9, 11, 13
186
+ ]
187
+
188
+ class << self
189
+ attr_accessor :_simple_lexer_eof_actions
190
+ private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
191
+ end
192
+ self._simple_lexer_eof_actions = [
193
+ 0, 0, 0, 0, 0, 0, 0, 26,
194
+ 15, 15, 15, 15, 15, 15, 15, 15,
195
+ 15, 15, 15, 15, 20, 20, 20, 20,
196
+ 23, 23, 23, 23, 23, 23, 23, 23,
197
+ 0, 0, 0, 0, 0, 0, 0, 0,
198
+ 0, 0, 0, 0, 0, 0, 0, 0,
199
+ 9, 11, 13, 13
200
+ ]
201
+
202
+ class << self
203
+ attr_accessor :simple_lexer_start
204
+ end
205
+ self.simple_lexer_start = 1;
206
+ class << self
207
+ attr_accessor :simple_lexer_first_final
208
+ end
209
+ self.simple_lexer_first_final = 43;
210
+ class << self
211
+ attr_accessor :simple_lexer_error
212
+ end
213
+ self.simple_lexer_error = 0;
214
+
215
+ class << self
216
+ attr_accessor :simple_lexer_en_main
217
+ end
218
+ self.simple_lexer_en_main = 1;
219
+
220
+
221
+ # line 61 "gen_vcfheaderline_parser.rl"
222
+ # %% this just fixes our syntax highlighting...
223
+
224
+ def self.run_lexer(buf, options = {})
225
+ do_debug = (options[:debug] == true)
226
+ data = buf.unpack("c*") if(buf.is_a?(String))
227
+ eof = data.length
228
+ values = []
229
+ stack = []
230
+
231
+ emit = lambda { |type, data, ts, p|
232
+ # Print the type and text of the last read token
233
+ # p ts,p
234
+ puts "#{type}: #{data[ts...p].pack('c*')}" if do_debug
235
+ values << [type,data[ts...p].pack('c*')]
236
+ }
237
+
238
+ error_code = nil
239
+
240
+
241
+ # line 242 "gen_vcfheaderline_parser.rb"
242
+ begin
243
+ p ||= 0
244
+ pe ||= data.length
245
+ cs = simple_lexer_start
246
+ end
247
+
248
+ # line 80 "gen_vcfheaderline_parser.rl"
249
+
250
+ # line 251 "gen_vcfheaderline_parser.rb"
251
+ begin
252
+ _klen, _trans, _keys, _acts, _nacts = nil
253
+ _goto_level = 0
254
+ _resume = 10
255
+ _eof_trans = 15
256
+ _again = 20
257
+ _test_eof = 30
258
+ _out = 40
259
+ while true
260
+ _trigger_goto = false
261
+ if _goto_level <= 0
262
+ if p == pe
263
+ _goto_level = _test_eof
264
+ next
265
+ end
266
+ if cs == 0
267
+ _goto_level = _out
268
+ next
269
+ end
270
+ end
271
+ if _goto_level <= _resume
272
+ _keys = _simple_lexer_key_offsets[cs]
273
+ _trans = _simple_lexer_index_offsets[cs]
274
+ _klen = _simple_lexer_single_lengths[cs]
275
+ _break_match = false
276
+
277
+ begin
278
+ if _klen > 0
279
+ _lower = _keys
280
+ _upper = _keys + _klen - 1
281
+
282
+ loop do
283
+ break if _upper < _lower
284
+ _mid = _lower + ( (_upper - _lower) >> 1 )
285
+
286
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
287
+ _upper = _mid - 1
288
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid]
289
+ _lower = _mid + 1
290
+ else
291
+ _trans += (_mid - _keys)
292
+ _break_match = true
293
+ break
294
+ end
295
+ end # loop
296
+ break if _break_match
297
+ _keys += _klen
298
+ _trans += _klen
299
+ end
300
+ _klen = _simple_lexer_range_lengths[cs]
301
+ if _klen > 0
302
+ _lower = _keys
303
+ _upper = _keys + (_klen << 1) - 2
304
+ loop do
305
+ break if _upper < _lower
306
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1)
307
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
308
+ _upper = _mid - 2
309
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
310
+ _lower = _mid + 2
311
+ else
312
+ _trans += ((_mid - _keys) >> 1)
313
+ _break_match = true
314
+ break
315
+ end
316
+ end # loop
317
+ break if _break_match
318
+ _trans += _klen
319
+ end
320
+ end while false
321
+ _trans = _simple_lexer_indicies[_trans]
322
+ cs = _simple_lexer_trans_targs[_trans]
323
+ if _simple_lexer_trans_actions[_trans] != 0
324
+ _acts = _simple_lexer_trans_actions[_trans]
325
+ _nacts = _simple_lexer_actions[_acts]
326
+ _acts += 1
327
+ while _nacts > 0
328
+ _nacts -= 1
329
+ _acts += 1
330
+ case _simple_lexer_actions[_acts - 1]
331
+ when 0 then
332
+ # line 23 "gen_vcfheaderline_parser.rl"
333
+ begin
334
+ ts=p end
335
+ when 1 then
336
+ # line 24 "gen_vcfheaderline_parser.rl"
337
+ begin
338
+
339
+ emit.call(:value,data,ts,p)
340
+ end
341
+ when 2 then
342
+ # line 28 "gen_vcfheaderline_parser.rl"
343
+ begin
344
+
345
+ emit.call(:kw,data,ts,p)
346
+ end
347
+ when 3 then
348
+ # line 46 "gen_vcfheaderline_parser.rl"
349
+ begin
350
+ emit.call(:key_word,data,ts,p) end
351
+ when 4 then
352
+ # line 47 "gen_vcfheaderline_parser.rl"
353
+ begin
354
+ emit.call(:value,data,ts,p) end
355
+ when 5 then
356
+ # line 48 "gen_vcfheaderline_parser.rl"
357
+ begin
358
+ emit.call(:value,data,ts,p) end
359
+ when 6 then
360
+ # line 50 "gen_vcfheaderline_parser.rl"
361
+ begin
362
+ emit.call(:value,data,ts,p) end
363
+ when 7 then
364
+ # line 52 "gen_vcfheaderline_parser.rl"
365
+ begin
366
+ error_code="ID" end
367
+ when 8 then
368
+ # line 53 "gen_vcfheaderline_parser.rl"
369
+ begin
370
+ error_code="Number" end
371
+ when 9 then
372
+ # line 54 "gen_vcfheaderline_parser.rl"
373
+ begin
374
+ error_code="key-value" end
375
+ # line 376 "gen_vcfheaderline_parser.rb"
376
+ end # action switch
377
+ end
378
+ end
379
+ if _trigger_goto
380
+ next
381
+ end
382
+ end
383
+ if _goto_level <= _again
384
+ if cs == 0
385
+ _goto_level = _out
386
+ next
387
+ end
388
+ p += 1
389
+ if p != pe
390
+ _goto_level = _resume
391
+ next
392
+ end
393
+ end
394
+ if _goto_level <= _test_eof
395
+ if p == eof
396
+ __acts = _simple_lexer_eof_actions[cs]
397
+ __nacts = _simple_lexer_actions[__acts]
398
+ __acts += 1
399
+ while __nacts > 0
400
+ __nacts -= 1
401
+ __acts += 1
402
+ case _simple_lexer_actions[__acts - 1]
403
+ when 4 then
404
+ # line 47 "gen_vcfheaderline_parser.rl"
405
+ begin
406
+ emit.call(:value,data,ts,p) end
407
+ when 5 then
408
+ # line 48 "gen_vcfheaderline_parser.rl"
409
+ begin
410
+ emit.call(:value,data,ts,p) end
411
+ when 6 then
412
+ # line 50 "gen_vcfheaderline_parser.rl"
413
+ begin
414
+ emit.call(:value,data,ts,p) end
415
+ when 7 then
416
+ # line 52 "gen_vcfheaderline_parser.rl"
417
+ begin
418
+ error_code="ID" end
419
+ when 8 then
420
+ # line 53 "gen_vcfheaderline_parser.rl"
421
+ begin
422
+ error_code="Number" end
423
+ when 9 then
424
+ # line 54 "gen_vcfheaderline_parser.rl"
425
+ begin
426
+ error_code="key-value" end
427
+ # line 428 "gen_vcfheaderline_parser.rb"
428
+ end # eof action switch
429
+ end
430
+ if _trigger_goto
431
+ next
432
+ end
433
+ end
434
+ end
435
+ if _goto_level <= _out
436
+ break
437
+ end
438
+ end
439
+ end
440
+
441
+ # line 81 "gen_vcfheaderline_parser.rl"
442
+
443
+ raise "ERROR: "+error_code+" in "+buf if error_code
444
+
445
+ begin
446
+ res = {}
447
+ # p values
448
+ values.each_slice(2) do | a,b |
449
+ # p '*',a,b
450
+ res[a[1]] = b[1]
451
+ # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
452
+ end
453
+ rescue
454
+ print "ERROR: "
455
+ p values
456
+ raise
457
+ end
458
+ p res if do_debug
459
+ res
460
+ end
461
+ end
462
+ end
463
+ end
464
+
465
+ if __FILE__ == $0
466
+
467
+ lines = <<LINES
468
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
469
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
470
+ ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
471
+ ##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
472
+ ##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
473
+ ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
474
+ ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
475
+ ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
476
+ LINES
477
+
478
+ lines.strip.split("\n").each { |s|
479
+ print s,"\n"
480
+ p BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: false)
481
+ }
482
+
483
+ end # test