bio-vcf 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+
2
+ # line 1 "gen_vcfheaderline_parser.rl"
3
+ # Ragel lexer for VCF-header
4
+ #
5
+ # This is a partial lexer for the VCF header format. Bio-vcf uses this
6
+ # to generate meta information in (for example) JSON format. The
7
+ # advantage of using a full state engine is that it allows for easy
8
+ # parsing of key-value pairs with syntax checking and, for example,
9
+ # escaped quotes in quoted string values. This edition validates ID and
10
+ # Number fields only.
11
+ #
12
+ # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
13
+
14
+ module BioVcf
15
+
16
+ module VcfHeaderParser
17
+
18
+ module RagelKeyValues
19
+
20
+ =begin
21
+
22
+ # line 57 "gen_vcfheaderline_parser.rl"
23
+
24
+ =end
25
+
26
+
27
+ # line 28 "gen_vcfheaderline_parser.rb"
28
+ class << self
29
+ attr_accessor :_simple_lexer_actions
30
+ private :_simple_lexer_actions, :_simple_lexer_actions=
31
+ end
32
+ self._simple_lexer_actions = [
33
+ 0, 1, 0, 1, 1, 1, 2, 1,
34
+ 3, 1, 4, 1, 5, 1, 6, 1,
35
+ 9, 2, 0, 1, 2, 7, 9, 2,
36
+ 8, 9, 3, 7, 8, 9
37
+ ]
38
+
39
+ class << self
40
+ attr_accessor :_simple_lexer_key_offsets
41
+ private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
42
+ end
43
+ self._simple_lexer_key_offsets = [
44
+ 0, 0, 1, 2, 5, 6, 7, 8,
45
+ 14, 20, 27, 32, 34, 36, 38, 40,
46
+ 40, 40, 42, 44, 50, 57, 64, 68,
47
+ 74, 81, 89, 97, 105, 113, 120, 128,
48
+ 130, 132, 133, 134, 135, 136, 137, 138,
49
+ 139, 140, 141, 142, 144, 160, 167, 172,
50
+ 176, 184, 192, 196
51
+ ]
52
+
53
+ class << self
54
+ attr_accessor :_simple_lexer_trans_keys
55
+ private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
56
+ end
57
+ self._simple_lexer_trans_keys = [
58
+ 35, 35, 65, 70, 73, 76, 84, 61,
59
+ 73, 78, 65, 90, 97, 122, 48, 57,
60
+ 65, 90, 97, 122, 61, 48, 57, 65,
61
+ 90, 97, 122, 32, 34, 39, 9, 13,
62
+ 34, 92, 34, 92, 39, 92, 39, 92,
63
+ 48, 57, 48, 57, 48, 57, 65, 90,
64
+ 97, 122, 68, 48, 57, 65, 90, 97,
65
+ 122, 61, 48, 57, 65, 90, 97, 122,
66
+ 65, 90, 97, 122, 48, 57, 65, 90,
67
+ 97, 122, 117, 48, 57, 65, 90, 97,
68
+ 122, 61, 109, 48, 57, 65, 90, 97,
69
+ 122, 61, 98, 48, 57, 65, 90, 97,
70
+ 122, 61, 101, 48, 57, 65, 90, 97,
71
+ 122, 61, 114, 48, 57, 65, 90, 97,
72
+ 122, 61, 48, 57, 65, 90, 97, 122,
73
+ 43, 45, 46, 65, 71, 82, 48, 57,
74
+ 48, 57, 73, 79, 76, 84, 69, 82,
75
+ 82, 77, 65, 78, 70, 79, 44, 60,
76
+ 32, 34, 39, 44, 46, 60, 9, 13,
77
+ 43, 45, 48, 57, 65, 90, 97, 122,
78
+ 32, 34, 39, 44, 60, 9, 13, 44,
79
+ 46, 60, 48, 57, 44, 60, 48, 57,
80
+ 44, 60, 48, 57, 65, 90, 97, 122,
81
+ 44, 60, 48, 57, 65, 90, 97, 122,
82
+ 44, 60, 48, 57, 44, 60, 0
83
+ ]
84
+
85
+ class << self
86
+ attr_accessor :_simple_lexer_single_lengths
87
+ private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
88
+ end
89
+ self._simple_lexer_single_lengths = [
90
+ 0, 1, 1, 3, 1, 1, 1, 2,
91
+ 0, 1, 3, 2, 2, 2, 2, 0,
92
+ 0, 0, 0, 0, 1, 1, 0, 0,
93
+ 1, 2, 2, 2, 2, 1, 6, 0,
94
+ 2, 1, 1, 1, 1, 1, 1, 1,
95
+ 1, 1, 1, 2, 6, 5, 3, 2,
96
+ 2, 2, 2, 2
97
+ ]
98
+
99
+ class << self
100
+ attr_accessor :_simple_lexer_range_lengths
101
+ private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
102
+ end
103
+ self._simple_lexer_range_lengths = [
104
+ 0, 0, 0, 0, 0, 0, 0, 2,
105
+ 3, 3, 1, 0, 0, 0, 0, 0,
106
+ 0, 1, 1, 3, 3, 3, 2, 3,
107
+ 3, 3, 3, 3, 3, 3, 1, 1,
108
+ 0, 0, 0, 0, 0, 0, 0, 0,
109
+ 0, 0, 0, 0, 5, 1, 1, 1,
110
+ 3, 3, 1, 0
111
+ ]
112
+
113
+ class << self
114
+ attr_accessor :_simple_lexer_index_offsets
115
+ private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
116
+ end
117
+ self._simple_lexer_index_offsets = [
118
+ 0, 0, 2, 4, 8, 10, 12, 14,
119
+ 19, 23, 28, 33, 36, 39, 42, 45,
120
+ 46, 47, 49, 51, 55, 60, 65, 68,
121
+ 72, 77, 83, 89, 95, 101, 106, 114,
122
+ 116, 119, 121, 123, 125, 127, 129, 131,
123
+ 133, 135, 137, 139, 142, 154, 161, 166,
124
+ 170, 176, 182, 186
125
+ ]
126
+
127
+ class << self
128
+ attr_accessor :_simple_lexer_indicies
129
+ private :_simple_lexer_indicies, :_simple_lexer_indicies=
130
+ end
131
+ self._simple_lexer_indicies = [
132
+ 0, 1, 2, 1, 3, 4, 5, 1,
133
+ 6, 1, 7, 1, 8, 1, 11, 12,
134
+ 10, 10, 9, 14, 14, 14, 13, 15,
135
+ 14, 14, 14, 13, 16, 17, 18, 16,
136
+ 13, 20, 21, 19, 23, 24, 22, 20,
137
+ 26, 25, 23, 28, 27, 27, 22, 29,
138
+ 13, 30, 13, 31, 31, 31, 13, 33,
139
+ 14, 14, 14, 32, 34, 14, 14, 14,
140
+ 32, 35, 35, 32, 36, 36, 36, 32,
141
+ 38, 14, 14, 14, 37, 15, 39, 14,
142
+ 14, 14, 37, 15, 40, 14, 14, 14,
143
+ 37, 15, 41, 14, 14, 14, 37, 15,
144
+ 42, 14, 14, 14, 37, 43, 14, 14,
145
+ 14, 37, 44, 44, 45, 45, 45, 45,
146
+ 46, 37, 47, 37, 48, 49, 1, 50,
147
+ 1, 51, 1, 52, 1, 7, 1, 53,
148
+ 1, 54, 1, 6, 1, 55, 1, 56,
149
+ 1, 7, 1, 57, 57, 1, 16, 17,
150
+ 18, 57, 8, 57, 16, 58, 29, 59,
151
+ 59, 1, 16, 17, 18, 57, 57, 16,
152
+ 1, 57, 60, 57, 29, 1, 57, 57,
153
+ 30, 1, 61, 61, 31, 31, 31, 1,
154
+ 62, 62, 36, 36, 36, 1, 63, 63,
155
+ 47, 1, 63, 63, 1, 0
156
+ ]
157
+
158
+ class << self
159
+ attr_accessor :_simple_lexer_trans_targs
160
+ private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
161
+ end
162
+ self._simple_lexer_trans_targs = [
163
+ 2, 0, 3, 4, 32, 40, 5, 6,
164
+ 43, 0, 8, 20, 24, 0, 9, 44,
165
+ 10, 11, 13, 12, 45, 16, 12, 45,
166
+ 16, 14, 15, 14, 15, 46, 47, 48,
167
+ 0, 21, 22, 23, 49, 0, 25, 26,
168
+ 27, 28, 29, 30, 31, 51, 50, 50,
169
+ 33, 37, 34, 35, 36, 38, 39, 41,
170
+ 42, 7, 17, 19, 18, 7, 7, 7
171
+ ]
172
+
173
+ class << self
174
+ attr_accessor :_simple_lexer_trans_actions
175
+ private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
176
+ end
177
+ self._simple_lexer_trans_actions = [
178
+ 0, 0, 0, 0, 0, 0, 0, 0,
179
+ 0, 26, 1, 1, 1, 15, 0, 7,
180
+ 0, 0, 0, 1, 17, 1, 0, 3,
181
+ 0, 1, 1, 0, 0, 0, 0, 0,
182
+ 20, 0, 5, 1, 0, 23, 0, 0,
183
+ 0, 0, 0, 5, 1, 1, 1, 0,
184
+ 0, 0, 0, 0, 0, 0, 0, 0,
185
+ 0, 0, 0, 1, 0, 9, 11, 13
186
+ ]
187
+
188
+ class << self
189
+ attr_accessor :_simple_lexer_eof_actions
190
+ private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
191
+ end
192
+ self._simple_lexer_eof_actions = [
193
+ 0, 0, 0, 0, 0, 0, 0, 26,
194
+ 15, 15, 15, 15, 15, 15, 15, 15,
195
+ 15, 15, 15, 15, 20, 20, 20, 20,
196
+ 23, 23, 23, 23, 23, 23, 23, 23,
197
+ 0, 0, 0, 0, 0, 0, 0, 0,
198
+ 0, 0, 0, 0, 0, 0, 0, 0,
199
+ 9, 11, 13, 13
200
+ ]
201
+
202
+ class << self
203
+ attr_accessor :simple_lexer_start
204
+ end
205
+ self.simple_lexer_start = 1;
206
+ class << self
207
+ attr_accessor :simple_lexer_first_final
208
+ end
209
+ self.simple_lexer_first_final = 43;
210
+ class << self
211
+ attr_accessor :simple_lexer_error
212
+ end
213
+ self.simple_lexer_error = 0;
214
+
215
+ class << self
216
+ attr_accessor :simple_lexer_en_main
217
+ end
218
+ self.simple_lexer_en_main = 1;
219
+
220
+
221
+ # line 61 "gen_vcfheaderline_parser.rl"
222
+ # %% this just fixes our syntax highlighting...
223
+
224
+ def self.run_lexer(buf, options = {})
225
+ do_debug = (options[:debug] == true)
226
+ data = buf.unpack("c*") if(buf.is_a?(String))
227
+ eof = data.length
228
+ values = []
229
+ stack = []
230
+
231
+ emit = lambda { |type, data, ts, p|
232
+ # Print the type and text of the last read token
233
+ # p ts,p
234
+ puts "#{type}: #{data[ts...p].pack('c*')}" if do_debug
235
+ values << [type,data[ts...p].pack('c*')]
236
+ }
237
+
238
+ error_code = nil
239
+
240
+
241
+ # line 242 "gen_vcfheaderline_parser.rb"
242
+ begin
243
+ p ||= 0
244
+ pe ||= data.length
245
+ cs = simple_lexer_start
246
+ end
247
+
248
+ # line 80 "gen_vcfheaderline_parser.rl"
249
+
250
+ # line 251 "gen_vcfheaderline_parser.rb"
251
+ begin
252
+ _klen, _trans, _keys, _acts, _nacts = nil
253
+ _goto_level = 0
254
+ _resume = 10
255
+ _eof_trans = 15
256
+ _again = 20
257
+ _test_eof = 30
258
+ _out = 40
259
+ while true
260
+ _trigger_goto = false
261
+ if _goto_level <= 0
262
+ if p == pe
263
+ _goto_level = _test_eof
264
+ next
265
+ end
266
+ if cs == 0
267
+ _goto_level = _out
268
+ next
269
+ end
270
+ end
271
+ if _goto_level <= _resume
272
+ _keys = _simple_lexer_key_offsets[cs]
273
+ _trans = _simple_lexer_index_offsets[cs]
274
+ _klen = _simple_lexer_single_lengths[cs]
275
+ _break_match = false
276
+
277
+ begin
278
+ if _klen > 0
279
+ _lower = _keys
280
+ _upper = _keys + _klen - 1
281
+
282
+ loop do
283
+ break if _upper < _lower
284
+ _mid = _lower + ( (_upper - _lower) >> 1 )
285
+
286
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
287
+ _upper = _mid - 1
288
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid]
289
+ _lower = _mid + 1
290
+ else
291
+ _trans += (_mid - _keys)
292
+ _break_match = true
293
+ break
294
+ end
295
+ end # loop
296
+ break if _break_match
297
+ _keys += _klen
298
+ _trans += _klen
299
+ end
300
+ _klen = _simple_lexer_range_lengths[cs]
301
+ if _klen > 0
302
+ _lower = _keys
303
+ _upper = _keys + (_klen << 1) - 2
304
+ loop do
305
+ break if _upper < _lower
306
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1)
307
+ if data[p].ord < _simple_lexer_trans_keys[_mid]
308
+ _upper = _mid - 2
309
+ elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
310
+ _lower = _mid + 2
311
+ else
312
+ _trans += ((_mid - _keys) >> 1)
313
+ _break_match = true
314
+ break
315
+ end
316
+ end # loop
317
+ break if _break_match
318
+ _trans += _klen
319
+ end
320
+ end while false
321
+ _trans = _simple_lexer_indicies[_trans]
322
+ cs = _simple_lexer_trans_targs[_trans]
323
+ if _simple_lexer_trans_actions[_trans] != 0
324
+ _acts = _simple_lexer_trans_actions[_trans]
325
+ _nacts = _simple_lexer_actions[_acts]
326
+ _acts += 1
327
+ while _nacts > 0
328
+ _nacts -= 1
329
+ _acts += 1
330
+ case _simple_lexer_actions[_acts - 1]
331
+ when 0 then
332
+ # line 23 "gen_vcfheaderline_parser.rl"
333
+ begin
334
+ ts=p end
335
+ when 1 then
336
+ # line 24 "gen_vcfheaderline_parser.rl"
337
+ begin
338
+
339
+ emit.call(:value,data,ts,p)
340
+ end
341
+ when 2 then
342
+ # line 28 "gen_vcfheaderline_parser.rl"
343
+ begin
344
+
345
+ emit.call(:kw,data,ts,p)
346
+ end
347
+ when 3 then
348
+ # line 46 "gen_vcfheaderline_parser.rl"
349
+ begin
350
+ emit.call(:key_word,data,ts,p) end
351
+ when 4 then
352
+ # line 47 "gen_vcfheaderline_parser.rl"
353
+ begin
354
+ emit.call(:value,data,ts,p) end
355
+ when 5 then
356
+ # line 48 "gen_vcfheaderline_parser.rl"
357
+ begin
358
+ emit.call(:value,data,ts,p) end
359
+ when 6 then
360
+ # line 50 "gen_vcfheaderline_parser.rl"
361
+ begin
362
+ emit.call(:value,data,ts,p) end
363
+ when 7 then
364
+ # line 52 "gen_vcfheaderline_parser.rl"
365
+ begin
366
+ error_code="ID" end
367
+ when 8 then
368
+ # line 53 "gen_vcfheaderline_parser.rl"
369
+ begin
370
+ error_code="Number" end
371
+ when 9 then
372
+ # line 54 "gen_vcfheaderline_parser.rl"
373
+ begin
374
+ error_code="key-value" end
375
+ # line 376 "gen_vcfheaderline_parser.rb"
376
+ end # action switch
377
+ end
378
+ end
379
+ if _trigger_goto
380
+ next
381
+ end
382
+ end
383
+ if _goto_level <= _again
384
+ if cs == 0
385
+ _goto_level = _out
386
+ next
387
+ end
388
+ p += 1
389
+ if p != pe
390
+ _goto_level = _resume
391
+ next
392
+ end
393
+ end
394
+ if _goto_level <= _test_eof
395
+ if p == eof
396
+ __acts = _simple_lexer_eof_actions[cs]
397
+ __nacts = _simple_lexer_actions[__acts]
398
+ __acts += 1
399
+ while __nacts > 0
400
+ __nacts -= 1
401
+ __acts += 1
402
+ case _simple_lexer_actions[__acts - 1]
403
+ when 4 then
404
+ # line 47 "gen_vcfheaderline_parser.rl"
405
+ begin
406
+ emit.call(:value,data,ts,p) end
407
+ when 5 then
408
+ # line 48 "gen_vcfheaderline_parser.rl"
409
+ begin
410
+ emit.call(:value,data,ts,p) end
411
+ when 6 then
412
+ # line 50 "gen_vcfheaderline_parser.rl"
413
+ begin
414
+ emit.call(:value,data,ts,p) end
415
+ when 7 then
416
+ # line 52 "gen_vcfheaderline_parser.rl"
417
+ begin
418
+ error_code="ID" end
419
+ when 8 then
420
+ # line 53 "gen_vcfheaderline_parser.rl"
421
+ begin
422
+ error_code="Number" end
423
+ when 9 then
424
+ # line 54 "gen_vcfheaderline_parser.rl"
425
+ begin
426
+ error_code="key-value" end
427
+ # line 428 "gen_vcfheaderline_parser.rb"
428
+ end # eof action switch
429
+ end
430
+ if _trigger_goto
431
+ next
432
+ end
433
+ end
434
+ end
435
+ if _goto_level <= _out
436
+ break
437
+ end
438
+ end
439
+ end
440
+
441
+ # line 81 "gen_vcfheaderline_parser.rl"
442
+
443
+ raise "ERROR: "+error_code+" in "+buf if error_code
444
+
445
+ begin
446
+ res = {}
447
+ # p values
448
+ values.each_slice(2) do | a,b |
449
+ # p '*',a,b
450
+ res[a[1]] = b[1]
451
+ # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
452
+ end
453
+ rescue
454
+ print "ERROR: "
455
+ p values
456
+ raise
457
+ end
458
+ p res if do_debug
459
+ res
460
+ end
461
+ end
462
+ end
463
+ end
464
+
465
+ if __FILE__ == $0
466
+
467
+ lines = <<LINES
468
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
469
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
470
+ ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
471
+ ##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
472
+ ##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
473
+ ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
474
+ ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
475
+ ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
476
+ LINES
477
+
478
+ lines.strip.split("\n").each { |s|
479
+ print s,"\n"
480
+ p BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: false)
481
+ }
482
+
483
+ end # test