bio-vcf 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +8 -2
  3. data/Gemfile +4 -6
  4. data/README.md +92 -57
  5. data/Rakefile +3 -41
  6. data/TAGS +115 -0
  7. data/VERSION +1 -1
  8. data/bin/bio-vcf +58 -70
  9. data/bio-vcf.gemspec +23 -75
  10. data/features/cli.feature +6 -1
  11. data/features/multisample.feature +12 -0
  12. data/features/step_definitions/cli-feature.rb +2 -2
  13. data/features/step_definitions/multisample.rb +19 -0
  14. data/features/step_definitions/vcf_header.rb +1 -1
  15. data/features/support/env.rb +0 -9
  16. data/lib/bio-vcf/pcows.rb +210 -0
  17. data/lib/bio-vcf/vcfheader.rb +28 -9
  18. data/lib/bio-vcf/vcfheader_line.rb +455 -160
  19. data/lib/bio-vcf/vcfrecord.rb +30 -15
  20. data/ragel/gen_vcfheaderline_parser.rl +68 -25
  21. data/ragel/generate.sh +4 -1
  22. data/template/vcf2json.erb +16 -16
  23. data/template/vcf2json_full_header.erb +16 -17
  24. data/template/vcf2json_use_meta.erb +35 -35
  25. data/test/data/input/gatk_exome.vcf +237 -0
  26. data/test/data/input/gatk_wgs.vcf +1000 -0
  27. data/test/data/input/test.bed +632 -0
  28. data/test/data/regression/eval_once-stderr.new +1 -0
  29. data/test/data/regression/eval_once.new +1 -0
  30. data/test/data/regression/eval_once.ref +1 -0
  31. data/test/data/regression/eval_r.info.dp-stderr.new +4 -0
  32. data/test/data/regression/eval_r.info.dp.new +150 -0
  33. data/test/data/regression/ifilter_s.dp-stderr.new +28 -0
  34. data/test/data/regression/ifilter_s.dp.new +31 -0
  35. data/test/data/regression/r.info.dp-stderr.new +4 -0
  36. data/test/data/regression/r.info.dp.new +147 -0
  37. data/test/data/regression/rewrite.info.sample-stderr.new +4 -0
  38. data/test/data/regression/rewrite.info.sample.new +150 -0
  39. data/test/data/regression/s.dp-stderr.new +12 -0
  40. data/test/data/regression/s.dp.new +145 -0
  41. data/test/data/regression/seval_s.dp-stderr.new +4 -0
  42. data/test/data/regression/seval_s.dp.new +36 -0
  43. data/test/data/regression/sfilter_seval_s.dp-stderr.new +12 -0
  44. data/test/data/regression/sfilter_seval_s.dp.new +31 -0
  45. data/test/data/regression/thread4-stderr.new +4 -0
  46. data/test/data/regression/thread4.new +150 -0
  47. data/test/data/regression/thread4_4-stderr.new +15 -0
  48. data/test/data/regression/thread4_4.new +150 -0
  49. data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
  50. data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
  51. data/test/data/regression/thread4_4_failed_filter.new +110 -0
  52. data/test/data/regression/vcf2json_full_header-stderr.new +4 -0
  53. data/test/data/regression/vcf2json_full_header.new +225 -0
  54. data/test/data/regression/vcf2json_full_header.ref +222 -258
  55. data/test/data/regression/vcf2json_use_meta-stderr.new +4 -0
  56. data/test/data/regression/vcf2json_use_meta.new +4697 -0
  57. data/test/data/regression/vcf2json_use_meta.ref +4697 -0
  58. data/test/performance/metrics.md +18 -1
  59. data/test/tmp/test.vcf +12469 -0
  60. metadata +38 -62
  61. data/Gemfile.lock +0 -81
  62. data/ragel/gen_vcfheaderline_parser.rb +0 -483
@@ -37,6 +37,18 @@ Then(/^I expect rec\.info\.readposranksum to be (\d+)\.(\d+)$/) do |arg1, arg2|
37
37
  expect(@rec1.info.readposranksum).to eq 0.815
38
38
  end
39
39
 
40
+ Then(/^I expect rec\.info\['dp'\] to be (\d+)$/) do |arg1|
41
+ expect(@rec1.info['dp']).to eq 1537
42
+ end
43
+
44
+ Then(/^I expect rec\.info\['ReadPosRankSum'\] to be (\d+)\.(\d+)$/) do |arg1, arg2|
45
+ expect(@rec1.info['ReadPosRankSum']).to eq 0.815
46
+ end
47
+
48
+ Then(/^I expect rec\.info\.fields to contain \["(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)", "(.*?)"\]$/) do |arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15|
49
+ expect(@rec1.info.fields).to eq ["AC", "AF", "AN", "BASEQRANKSUM", "DP", "DELS", "FS", "HAPLOTYPESCORE", "MLEAC", "MLEAF", "MQ", "MQ0", "MQRANKSUM", "QD", "READPOSRANKSUM"]
50
+ end
51
+
40
52
  Then(/^I expect rec\.sample\.original\.gt to be "(.*?)"$/) do |arg1|
41
53
  expect(@rec1.sample['Original'].gt).to eq "0/1"
42
54
  end
@@ -161,3 +173,10 @@ Then(/^I expect r\.original\.gts\[(\d+)\] to be "(.*?)"$/) do |arg1, arg2|
161
173
  expect(@rec1.original.gts[arg1.to_i]).to eq arg2
162
174
  end
163
175
 
176
+ Then(/^I expect r\.info\.end to be (\d+)$/) do |arg1|
177
+ expect(@rec1.info.end).to eq arg1.to_i
178
+ end
179
+
180
+ Then(/^I expect r\.info\.ciend to be (\d+)$/) do |arg1|
181
+ expect(@rec1.info.ciend).to eq arg1.to_i
182
+ end
@@ -43,6 +43,6 @@ Then(/^I expect vcf\.meta to contain all header meta information$/) do
43
43
  m = @vcf.meta
44
44
  expect(m['fileformat']).to eq "VCFv4.1"
45
45
  expect(m['FORMAT']['DP']['Number']).to eq "1"
46
- expect(m.size).to be 6
46
+ expect(m.size).to be 9
47
47
  end
48
48
 
@@ -1,12 +1,3 @@
1
- require 'bundler'
2
- begin
3
- Bundler.setup(:default, :development)
4
- rescue Bundler::BundlerError => e
5
- $stderr.puts e.message
6
- $stderr.puts "Run `bundle install` to install missing gems"
7
- exit e.status_code
8
- end
9
-
10
1
  # require 'mini/test'
11
2
 
12
3
  $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
@@ -0,0 +1,210 @@
1
+ # Parallel copy-on-write streaming (PCOWS)
2
+
3
+ require 'tempfile'
4
+
5
+ class PCOWS
6
+
7
+ RUNNINGEXT = 'part'
8
+
9
+ def initialize(num_threads,name=File.basename(__FILE__),timeout=180)
10
+ num_threads = cpu_count() if not num_threads # FIXME: set to cpu_num by default
11
+ # $stderr.print "Using ",num_threads,"threads \n"
12
+ @num_threads = num_threads
13
+ @pid_list = []
14
+ @name = name
15
+ @timeout = timeout
16
+ if multi_threaded
17
+ @tmpdir = Dir::mktmpdir(@name+'_')
18
+ end
19
+ @last_output = 0 # counter
20
+ @output_locked = nil
21
+ end
22
+
23
+ # Feed the worker func and state to COWS. Note that func is a lambda
24
+ # closure so it can pick up surrounding scope at invocation in
25
+ # addition to the data captured in 'state'.
26
+
27
+ def submit_worker(func,state)
28
+ pid = nil
29
+ if multi_threaded
30
+ count = @pid_list.size+1
31
+ fn = mktmpfilename(count)
32
+ pid = fork do
33
+ # ---- This is running a new copy-on-write process
34
+ tempfn = fn+'.'+RUNNINGEXT
35
+ STDOUT.reopen(File.open(tempfn, 'w+'))
36
+ func.call(state).each { | line | print line }
37
+ STDOUT.flush
38
+ STDOUT.close
39
+ FileUtils::mv(tempfn,fn)
40
+ exit 0
41
+ end
42
+ else
43
+ # ---- Call in main process and output immediately
44
+ func.call(state).each { | line | print line }
45
+ end
46
+ @pid_list << [ pid,count,fn ]
47
+ return true
48
+ end
49
+
50
+ # Make sure no more than num_threads are running at the same time -
51
+ # this is achieved by checking the PID table and the running files
52
+ # in the tmpdir
53
+
54
+ def wait_for_worker_slot()
55
+ return if single_threaded
56
+ Timeout.timeout(@timeout) do
57
+
58
+ while true
59
+ # ---- count running pids
60
+ running = @pid_list.reduce(0) do | sum, info |
61
+ (pid,count,fn) = info
62
+ if pid_or_file_running?(pid,fn)
63
+ sum+1
64
+ else
65
+ sum
66
+ end
67
+ end
68
+ return if running < @num_threads
69
+ $stderr.print "Waiting for slot (timeout=#{@timeout})\n"
70
+ sleep 0.1
71
+
72
+ end
73
+ end
74
+ end
75
+
76
+ # ---- In this section the output gets collected and passed on to a
77
+ # printer thread. This function makes sure the printing is
78
+ # ordered and that no printers are running at the same
79
+ # time. The printer thread should be doing as little processing
80
+ # as possible.
81
+ #
82
+ # In this implementation type==:by_line will call func for
83
+ # each line. Otherwise it is called once with the filename.
84
+
85
+ def process_output(func=nil,type = :by_line, blocking=false)
86
+ return if single_threaded
87
+ output = lambda { |fn|
88
+ if type == :by_line
89
+ File.new(fn).each_line { |buf|
90
+ print buf
91
+ }
92
+ else
93
+ func.call(fn)
94
+ end
95
+ File.unlink(fn)
96
+ }
97
+ if @output_locked
98
+ (pid,count,fn) = @output_locked
99
+ return if File.exist?(fn) # still processing
100
+ # on to the next one
101
+ @last_output += 1
102
+ @output_locked = nil
103
+ end
104
+ if info = @pid_list[@last_output]
105
+ (pid,count,fn) = info
106
+ $stderr.print "Processing #{fn}\n"
107
+ if File.exist?(fn)
108
+ # Yes! We have the next output, create outputter
109
+ if not blocking
110
+ pid = fork do
111
+ output.call(fn)
112
+ exit(0)
113
+ end
114
+ @output_locked = info
115
+ else
116
+ output.call(fn)
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def wait_for_worker(info)
123
+ (pid,count,fn) = info
124
+ if pid_or_file_running?(pid,fn)
125
+ $stderr.print "Waiting up to #{@timeout} seconds for pid=#{pid} to complete\n"
126
+ begin
127
+ Timeout.timeout(@timeout) do
128
+ while not File.exist?(fn) # wait for the result to appear
129
+ sleep 0.2
130
+ end
131
+ end
132
+ # Thread file should have gone:
133
+ raise "FATAL: child process appears to have crashed #{fn}" if not File.exist?(fn)
134
+ $stderr.print "OK pid=#{pid}, processing #{fn}\n"
135
+ rescue Timeout::Error
136
+ if pid_running?(pid)
137
+ # Kill it to speed up exit
138
+ Process.kill 9, pid
139
+ Process.wait pid
140
+ end
141
+ $stderr.print "FATAL: child process killed because it stopped responding, pid = #{pid}\n"
142
+ end
143
+ end
144
+ end
145
+
146
+ # This is the final cleanup after the reader thread is done. All workers
147
+ # need to complete.
148
+
149
+ def wait_for_workers()
150
+ return if single_threaded
151
+ @pid_list.each do |info|
152
+ wait_for_worker(info)
153
+ end
154
+ end
155
+
156
+ def process_remaining_output()
157
+ return if single_threaded
158
+ while @output_locked
159
+ sleep 0.2
160
+ process_output()
161
+ end
162
+ @pid_list.each do |info|
163
+ process_output(nil,:by_line,true)
164
+ end
165
+ # final cleanup
166
+ Dir.unlink(@tmpdir) if @tmpdir
167
+ end
168
+
169
+ private
170
+
171
+ def mktmpfilename(num,ext=nil)
172
+ @tmpdir+sprintf("/%0.6d-",num)+@name+(ext ? '.'+ext : '')
173
+ end
174
+
175
+ def pid_or_file_running?(pid,fn)
176
+ (pid && pid_running?(pid)) or File.exist?(fn+'.'+RUNNINGEXT)
177
+ end
178
+
179
+ def pid_running?(pid)
180
+ begin
181
+ fpid,status=Process.waitpid2(pid,Process::WNOHANG)
182
+ rescue Errno::ECHILD, Errno::ESRCH
183
+ return false
184
+ end
185
+ return true if nil == fpid && nil == status
186
+ return ! (status.exited? || status.signaled?)
187
+ end
188
+
189
+ def single_threaded
190
+ @num_threads == 1
191
+ end
192
+
193
+ def multi_threaded
194
+ @num_threads > 1
195
+ end
196
+
197
+ def cpu_count
198
+ begin
199
+ return File.read('/proc/cpuinfo').scan(/^processor\s*:/).size if File.exist? '/proc/cpuinfo'
200
+ # Actually, the JVM does not allow fork...
201
+ return Java::Java.lang.Runtime.getRuntime.availableProcessors if defined? Java::Java
202
+ rescue LoadError
203
+ # Count on MAC
204
+ return Integer `sysctl -n hw.ncpu 2>/dev/null`
205
+ end
206
+ $stderr.print "Could not determine number of CPUs"
207
+ 1
208
+ end
209
+
210
+ end
@@ -2,7 +2,7 @@
2
2
  # containing fields. Most fields are of 'key=value' type and appear
3
3
  # only once. These can be retrieved with the find_field method.
4
4
  #
5
- # INFO and FORMAT fields are special as they appear multiple times
5
+ # INFO, FORMAT and contig fields are special as they appear multiple times
6
6
  # and contain multiple key values (identified by an ID field).
7
7
  # To retrieve these call 'info' and 'format' functions respectively,
8
8
  # which return a hash on the contained ID.
@@ -25,8 +25,8 @@ module BioVcf
25
25
  nil
26
26
  end
27
27
 
28
- def VcfHeaderParser.parse_field(line)
29
- BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(line, debug: false)
28
+ def VcfHeaderParser.parse_field(line, debug)
29
+ BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(line, debug: debug)
30
30
  end
31
31
  end
32
32
 
@@ -34,9 +34,11 @@ module BioVcf
34
34
 
35
35
  attr_reader :lines, :field
36
36
 
37
- def initialize
37
+ def initialize(debug = false)
38
+ @debug = debug
38
39
  @lines = []
39
40
  @field = {}
41
+ @meta = nil
40
42
  end
41
43
 
42
44
  # Add a new field to the header
@@ -125,7 +127,7 @@ module BioVcf
125
127
  if value[0]
126
128
  str = value[0][0]
127
129
  # p str
128
- v = VcfHeaderParser.parse_field(line)
130
+ v = VcfHeaderParser.parse_field(line,@debug)
129
131
  id = v['ID']
130
132
  res[id] = v
131
133
  end
@@ -138,25 +140,42 @@ module BioVcf
138
140
  find_fields('FORMAT')
139
141
  end
140
142
 
143
+ def filter
144
+ find_fields('FILTER')
145
+ end
146
+
147
+ def contig
148
+ find_fields('contig')
149
+ end
150
+
141
151
  def info
142
152
  find_fields('INFO')
143
153
  end
144
154
 
155
+ def gatkcommandline
156
+ find_fields('GATKCommandLine')
157
+ end
158
+
145
159
  def meta
146
- res = { 'INFO' => {}, 'FORMAT' => {} }
160
+ return @meta if @meta
161
+ res = { 'INFO' => {}, 'FORMAT' => {}, 'FILTER' => {}, 'contig' => {}, 'GATKCommandLine' => {} }
147
162
  @lines.each do | line |
148
163
  value = line.scan(/##(.*?)=(.*)/)
149
164
  if value[0]
150
165
  k,v = value[0]
151
- if k != 'FORMAT' and k != 'INFO'
166
+ if k != 'FORMAT' and k != 'INFO' and k != 'FILTER' and k != 'contig' and k != 'GATKCommandLine'
152
167
  # p [k,v]
153
168
  res[k] = v
154
169
  end
155
170
  end
156
171
  end
157
- res['INFO'] = info
158
- res['FORMAT'] = format
172
+ res['INFO'] = info()
173
+ res['FORMAT'] = format()
174
+ res['FILTER'] = filter()
175
+ res['contig'] = contig()
176
+ res['GATKCommandLine'] = gatkcommandline()
159
177
  # p [:res, res]
178
+ @meta = res # cache values
160
179
  res
161
180
  end
162
181
 
@@ -2,38 +2,51 @@
2
2
  # line 1 "gen_vcfheaderline_parser.rl"
3
3
  # Ragel lexer for VCF-header
4
4
  #
5
- # This is a partial lexer for the VCF header format. Bio-vcf uses this
6
- # to generate meta information in (for example) JSON format. The
7
- # advantage of using a full state engine is that it allows for easy
8
- # parsing of key-value pairs with syntax checking and, for example,
9
- # escaped quotes in quoted string values. This edition validates ID and
10
- # Number fields only.
5
+ # This is compact a parser/lexer for the VCF header format. Bio-vcf
6
+ # uses the parser to generate meta information that can be output to
7
+ # (for example) JSON format. The advantage of using ragel as a state
8
+ # engine is that it allows for easy parsing of key-value pairs with
9
+ # syntax checking and, for example, escaped quotes in quoted string
10
+ # values. This ragel parser/lexer generates valid Ruby; it should be
11
+ # fairly trivial to generate python/C/JAVA instead. Note that this
12
+ # edition validates ID and Number fields only. Other fields are
13
+ # dumped 'AS IS'.
11
14
  #
12
15
  # Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
16
+ #
17
+ # by Pjotr Prins (c) 2014/2015
13
18
 
14
19
  module BioVcf
15
20
 
16
21
  module VcfHeaderParser
17
22
 
18
23
  module RagelKeyValues
19
-
24
+
25
+ def self.debug msg
26
+ # nothing
27
+ # $stderr.print "DEBUG: ",msg,"\n"
28
+ end
29
+
20
30
  =begin
21
31
 
22
- # line 57 "gen_vcfheaderline_parser.rl"
32
+ # line 75 "gen_vcfheaderline_parser.rl"
23
33
 
24
34
  =end
25
35
 
26
36
 
27
- # line 28 "gen_vcfheaderline_parser.rb"
37
+ # line 38 "gen_vcfheaderline_parser.rb"
28
38
  class << self
29
39
  attr_accessor :_simple_lexer_actions
30
40
  private :_simple_lexer_actions, :_simple_lexer_actions=
31
41
  end
32
42
  self._simple_lexer_actions = [
33
43
  0, 1, 0, 1, 1, 1, 2, 1,
34
- 3, 1, 4, 1, 5, 1, 6, 1,
35
- 9, 2, 0, 1, 2, 7, 9, 2,
36
- 8, 9, 3, 7, 8, 9
44
+ 3, 1, 15, 1, 16, 2, 0, 1,
45
+ 2, 4, 15, 2, 6, 15, 2, 7,
46
+ 15, 2, 9, 16, 2, 10, 16, 2,
47
+ 11, 16, 2, 12, 15, 2, 13, 16,
48
+ 2, 14, 16, 3, 5, 8, 15, 6,
49
+ 9, 10, 13, 11, 14, 16
37
50
  ]
38
51
 
39
52
  class << self
@@ -41,13 +54,23 @@ class << self
41
54
  private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
42
55
  end
43
56
  self._simple_lexer_key_offsets = [
44
- 0, 0, 1, 2, 5, 6, 7, 8,
45
- 14, 20, 27, 32, 34, 36, 38, 40,
46
- 40, 40, 42, 44, 50, 57, 64, 68,
47
- 74, 81, 89, 97, 105, 113, 120, 128,
48
- 130, 132, 133, 134, 135, 136, 137, 138,
49
- 139, 140, 141, 142, 144, 160, 167, 172,
50
- 176, 184, 192, 196
57
+ 0, 0, 1, 2, 7, 8, 9, 10,
58
+ 13, 26, 35, 49, 51, 53, 58, 60,
59
+ 62, 62, 62, 64, 70, 72, 77, 80,
60
+ 91, 101, 111, 121, 131, 141, 151, 161,
61
+ 171, 181, 191, 201, 211, 221, 231, 241,
62
+ 251, 261, 270, 275, 277, 279, 279, 281,
63
+ 283, 283, 293, 303, 313, 322, 327, 329,
64
+ 331, 331, 333, 335, 335, 345, 354, 360,
65
+ 371, 381, 391, 401, 411, 421, 430, 438,
66
+ 440, 445, 448, 458, 468, 478, 488, 498,
67
+ 508, 517, 524, 526, 528, 533, 535, 537,
68
+ 537, 537, 549, 559, 569, 579, 589, 599,
69
+ 609, 619, 629, 639, 649, 659, 669, 671,
70
+ 672, 673, 674, 675, 676, 677, 678, 679,
71
+ 680, 681, 682, 683, 684, 685, 686, 687,
72
+ 688, 689, 690, 691, 692, 693, 694, 695,
73
+ 696, 697, 698, 699, 700
51
74
  ]
52
75
 
53
76
  class << self
@@ -55,31 +78,94 @@ class << self
55
78
  private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
56
79
  end
57
80
  self._simple_lexer_trans_keys = [
58
- 35, 35, 65, 70, 73, 76, 84, 61,
59
- 73, 78, 65, 90, 97, 122, 48, 57,
60
- 65, 90, 97, 122, 61, 48, 57, 65,
61
- 90, 97, 122, 32, 34, 39, 9, 13,
62
- 34, 92, 34, 92, 39, 92, 39, 92,
63
- 48, 57, 48, 57, 48, 57, 65, 90,
64
- 97, 122, 68, 48, 57, 65, 90, 97,
65
- 122, 61, 48, 57, 65, 90, 97, 122,
66
- 65, 90, 97, 122, 48, 57, 65, 90,
67
- 97, 122, 117, 48, 57, 65, 90, 97,
68
- 122, 61, 109, 48, 57, 65, 90, 97,
69
- 122, 61, 98, 48, 57, 65, 90, 97,
70
- 122, 61, 101, 48, 57, 65, 90, 97,
71
- 122, 61, 114, 48, 57, 65, 90, 97,
72
- 122, 61, 48, 57, 65, 90, 97, 122,
73
- 43, 45, 46, 65, 71, 82, 48, 57,
74
- 48, 57, 73, 79, 76, 84, 69, 82,
75
- 82, 77, 65, 78, 70, 79, 44, 60,
76
- 32, 34, 39, 44, 46, 60, 9, 13,
77
- 43, 45, 48, 57, 65, 90, 97, 122,
78
- 32, 34, 39, 44, 60, 9, 13, 44,
79
- 46, 60, 48, 57, 44, 60, 48, 57,
80
- 44, 60, 48, 57, 65, 90, 97, 122,
81
- 44, 60, 48, 57, 65, 90, 97, 122,
82
- 44, 60, 48, 57, 44, 60, 0
81
+ 35, 35, 65, 70, 71, 73, 99, 76,
82
+ 84, 61, 44, 60, 62, 67, 68, 73,
83
+ 78, 86, 97, 108, 48, 57, 65, 90,
84
+ 98, 122, 46, 61, 95, 48, 57, 65,
85
+ 90, 97, 122, 34, 39, 44, 46, 60,
86
+ 62, 43, 45, 48, 57, 65, 90, 97,
87
+ 122, 34, 92, 34, 92, 34, 39, 44,
88
+ 60, 62, 39, 92, 39, 92, 48, 57,
89
+ 44, 46, 60, 62, 48, 57, 48, 57,
90
+ 44, 60, 62, 48, 57, 44, 60, 62,
91
+ 44, 46, 60, 62, 95, 48, 57, 65,
92
+ 90, 97, 122, 46, 61, 95, 111, 48,
93
+ 57, 65, 90, 97, 122, 46, 61, 95,
94
+ 109, 48, 57, 65, 90, 97, 122, 46,
95
+ 61, 95, 109, 48, 57, 65, 90, 97,
96
+ 122, 46, 61, 95, 97, 48, 57, 65,
97
+ 90, 98, 122, 46, 61, 95, 110, 48,
98
+ 57, 65, 90, 97, 122, 46, 61, 95,
99
+ 100, 48, 57, 65, 90, 97, 122, 46,
100
+ 61, 76, 95, 48, 57, 65, 90, 97,
101
+ 122, 46, 61, 95, 105, 48, 57, 65,
102
+ 90, 97, 122, 46, 61, 95, 110, 48,
103
+ 57, 65, 90, 97, 122, 46, 61, 95,
104
+ 101, 48, 57, 65, 90, 97, 122, 46,
105
+ 61, 79, 95, 48, 57, 65, 90, 97,
106
+ 122, 46, 61, 95, 112, 48, 57, 65,
107
+ 90, 97, 122, 46, 61, 95, 116, 48,
108
+ 57, 65, 90, 97, 122, 46, 61, 95,
109
+ 105, 48, 57, 65, 90, 97, 122, 46,
110
+ 61, 95, 111, 48, 57, 65, 90, 97,
111
+ 122, 46, 61, 95, 110, 48, 57, 65,
112
+ 90, 97, 122, 46, 61, 95, 115, 48,
113
+ 57, 65, 90, 97, 122, 46, 61, 95,
114
+ 48, 57, 65, 90, 97, 122, 34, 39,
115
+ 44, 60, 62, 34, 92, 34, 92, 39,
116
+ 92, 39, 92, 46, 61, 95, 97, 48,
117
+ 57, 65, 90, 98, 122, 46, 61, 95,
118
+ 116, 48, 57, 65, 90, 97, 122, 46,
119
+ 61, 95, 101, 48, 57, 65, 90, 97,
120
+ 122, 46, 61, 95, 48, 57, 65, 90,
121
+ 97, 122, 34, 39, 44, 60, 62, 34,
122
+ 92, 34, 92, 39, 92, 39, 92, 46,
123
+ 61, 68, 95, 48, 57, 65, 90, 97,
124
+ 122, 46, 61, 95, 48, 57, 65, 90,
125
+ 97, 122, 48, 57, 65, 90, 97, 122,
126
+ 44, 46, 60, 62, 95, 48, 57, 65,
127
+ 90, 97, 122, 46, 61, 95, 117, 48,
128
+ 57, 65, 90, 97, 122, 46, 61, 95,
129
+ 109, 48, 57, 65, 90, 97, 122, 46,
130
+ 61, 95, 98, 48, 57, 65, 90, 97,
131
+ 122, 46, 61, 95, 101, 48, 57, 65,
132
+ 90, 97, 122, 46, 61, 95, 114, 48,
133
+ 57, 65, 90, 97, 122, 46, 61, 95,
134
+ 48, 57, 65, 90, 97, 122, 43, 45,
135
+ 46, 65, 71, 82, 48, 57, 48, 57,
136
+ 44, 60, 62, 48, 57, 44, 60, 62,
137
+ 46, 61, 95, 101, 48, 57, 65, 90,
138
+ 97, 122, 46, 61, 95, 114, 48, 57,
139
+ 65, 90, 97, 122, 46, 61, 95, 115,
140
+ 48, 57, 65, 90, 97, 122, 46, 61,
141
+ 95, 105, 48, 57, 65, 90, 97, 122,
142
+ 46, 61, 95, 111, 48, 57, 65, 90,
143
+ 97, 122, 46, 61, 95, 110, 48, 57,
144
+ 65, 90, 97, 122, 46, 61, 95, 48,
145
+ 57, 65, 90, 97, 122, 34, 39, 44,
146
+ 60, 62, 48, 57, 34, 92, 34, 92,
147
+ 34, 39, 44, 60, 62, 39, 92, 39,
148
+ 92, 44, 60, 62, 95, 45, 46, 48,
149
+ 57, 65, 90, 97, 122, 46, 61, 95,
150
+ 115, 48, 57, 65, 90, 97, 122, 46,
151
+ 61, 95, 115, 48, 57, 65, 90, 97,
152
+ 122, 46, 61, 95, 101, 48, 57, 65,
153
+ 90, 97, 122, 46, 61, 95, 109, 48,
154
+ 57, 65, 90, 97, 122, 46, 61, 95,
155
+ 98, 48, 57, 65, 90, 97, 122, 46,
156
+ 61, 95, 108, 48, 57, 65, 90, 97,
157
+ 122, 46, 61, 95, 121, 48, 57, 65,
158
+ 90, 97, 122, 46, 61, 95, 101, 48,
159
+ 57, 65, 90, 97, 122, 46, 61, 95,
160
+ 110, 48, 57, 65, 90, 97, 122, 46,
161
+ 61, 95, 103, 48, 57, 65, 90, 97,
162
+ 122, 46, 61, 95, 116, 48, 57, 65,
163
+ 90, 97, 122, 46, 61, 95, 104, 48,
164
+ 57, 65, 90, 97, 122, 73, 79, 76,
165
+ 84, 69, 82, 82, 77, 65, 65, 84,
166
+ 75, 67, 111, 109, 109, 97, 110, 100,
167
+ 76, 105, 110, 101, 78, 70, 79, 111,
168
+ 110, 116, 105, 103, 0
83
169
  ]
84
170
 
85
171
  class << self
@@ -87,13 +173,23 @@ class << self
87
173
  private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
88
174
  end
89
175
  self._simple_lexer_single_lengths = [
90
- 0, 1, 1, 3, 1, 1, 1, 2,
91
- 0, 1, 3, 2, 2, 2, 2, 0,
92
- 0, 0, 0, 0, 1, 1, 0, 0,
93
- 1, 2, 2, 2, 2, 1, 6, 0,
94
- 2, 1, 1, 1, 1, 1, 1, 1,
95
- 1, 1, 1, 2, 6, 5, 3, 2,
96
- 2, 2, 2, 2
176
+ 0, 1, 1, 5, 1, 1, 1, 3,
177
+ 7, 3, 6, 2, 2, 5, 2, 2,
178
+ 0, 0, 0, 4, 0, 3, 3, 5,
179
+ 4, 4, 4, 4, 4, 4, 4, 4,
180
+ 4, 4, 4, 4, 4, 4, 4, 4,
181
+ 4, 3, 5, 2, 2, 0, 2, 2,
182
+ 0, 4, 4, 4, 3, 5, 2, 2,
183
+ 0, 2, 2, 0, 4, 3, 0, 5,
184
+ 4, 4, 4, 4, 4, 3, 6, 0,
185
+ 3, 3, 4, 4, 4, 4, 4, 4,
186
+ 3, 5, 2, 2, 5, 2, 2, 0,
187
+ 0, 4, 4, 4, 4, 4, 4, 4,
188
+ 4, 4, 4, 4, 4, 4, 2, 1,
189
+ 1, 1, 1, 1, 1, 1, 1, 1,
190
+ 1, 1, 1, 1, 1, 1, 1, 1,
191
+ 1, 1, 1, 1, 1, 1, 1, 1,
192
+ 1, 1, 1, 1, 0
97
193
  ]
98
194
 
99
195
  class << self
@@ -101,13 +197,23 @@ class << self
101
197
  private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
102
198
  end
103
199
  self._simple_lexer_range_lengths = [
104
- 0, 0, 0, 0, 0, 0, 0, 2,
105
- 3, 3, 1, 0, 0, 0, 0, 0,
106
- 0, 1, 1, 3, 3, 3, 2, 3,
200
+ 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 3, 3, 4, 0, 0, 0, 0, 0,
202
+ 0, 0, 1, 1, 1, 1, 0, 3,
203
+ 3, 3, 3, 3, 3, 3, 3, 3,
204
+ 3, 3, 3, 3, 3, 3, 3, 3,
205
+ 3, 3, 0, 0, 0, 0, 0, 0,
206
+ 0, 3, 3, 3, 3, 0, 0, 0,
207
+ 0, 0, 0, 0, 3, 3, 3, 3,
107
208
  3, 3, 3, 3, 3, 3, 1, 1,
209
+ 1, 0, 3, 3, 3, 3, 3, 3,
210
+ 3, 1, 0, 0, 0, 0, 0, 0,
211
+ 0, 4, 3, 3, 3, 3, 3, 3,
212
+ 3, 3, 3, 3, 3, 3, 0, 0,
213
+ 0, 0, 0, 0, 0, 0, 0, 0,
214
+ 0, 0, 0, 0, 0, 0, 0, 0,
108
215
  0, 0, 0, 0, 0, 0, 0, 0,
109
- 0, 0, 0, 0, 5, 1, 1, 1,
110
- 3, 3, 1, 0
216
+ 0, 0, 0, 0, 0
111
217
  ]
112
218
 
113
219
  class << self
@@ -115,44 +221,23 @@ class << self
115
221
  private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
116
222
  end
117
223
  self._simple_lexer_index_offsets = [
118
- 0, 0, 2, 4, 8, 10, 12, 14,
119
- 19, 23, 28, 33, 36, 39, 42, 45,
120
- 46, 47, 49, 51, 55, 60, 65, 68,
121
- 72, 77, 83, 89, 95, 101, 106, 114,
122
- 116, 119, 121, 123, 125, 127, 129, 131,
123
- 133, 135, 137, 139, 142, 154, 161, 166,
124
- 170, 176, 182, 186
125
- ]
126
-
127
- class << self
128
- attr_accessor :_simple_lexer_indicies
129
- private :_simple_lexer_indicies, :_simple_lexer_indicies=
130
- end
131
- self._simple_lexer_indicies = [
132
- 0, 1, 2, 1, 3, 4, 5, 1,
133
- 6, 1, 7, 1, 8, 1, 11, 12,
134
- 10, 10, 9, 14, 14, 14, 13, 15,
135
- 14, 14, 14, 13, 16, 17, 18, 16,
136
- 13, 20, 21, 19, 23, 24, 22, 20,
137
- 26, 25, 23, 28, 27, 27, 22, 29,
138
- 13, 30, 13, 31, 31, 31, 13, 33,
139
- 14, 14, 14, 32, 34, 14, 14, 14,
140
- 32, 35, 35, 32, 36, 36, 36, 32,
141
- 38, 14, 14, 14, 37, 15, 39, 14,
142
- 14, 14, 37, 15, 40, 14, 14, 14,
143
- 37, 15, 41, 14, 14, 14, 37, 15,
144
- 42, 14, 14, 14, 37, 43, 14, 14,
145
- 14, 37, 44, 44, 45, 45, 45, 45,
146
- 46, 37, 47, 37, 48, 49, 1, 50,
147
- 1, 51, 1, 52, 1, 7, 1, 53,
148
- 1, 54, 1, 6, 1, 55, 1, 56,
149
- 1, 7, 1, 57, 57, 1, 16, 17,
150
- 18, 57, 8, 57, 16, 58, 29, 59,
151
- 59, 1, 16, 17, 18, 57, 57, 16,
152
- 1, 57, 60, 57, 29, 1, 57, 57,
153
- 30, 1, 61, 61, 31, 31, 31, 1,
154
- 62, 62, 36, 36, 36, 1, 63, 63,
155
- 47, 1, 63, 63, 1, 0
224
+ 0, 0, 2, 4, 10, 12, 14, 16,
225
+ 20, 31, 38, 49, 52, 55, 61, 64,
226
+ 67, 68, 69, 71, 77, 79, 84, 88,
227
+ 97, 105, 113, 121, 129, 137, 145, 153,
228
+ 161, 169, 177, 185, 193, 201, 209, 217,
229
+ 225, 233, 240, 246, 249, 252, 253, 256,
230
+ 259, 260, 268, 276, 284, 291, 297, 300,
231
+ 303, 304, 307, 310, 311, 319, 326, 330,
232
+ 339, 347, 355, 363, 371, 379, 386, 394,
233
+ 396, 401, 405, 413, 421, 429, 437, 445,
234
+ 453, 460, 467, 470, 473, 479, 482, 485,
235
+ 486, 487, 496, 504, 512, 520, 528, 536,
236
+ 544, 552, 560, 568, 576, 584, 592, 595,
237
+ 597, 599, 601, 603, 605, 607, 609, 611,
238
+ 613, 615, 617, 619, 621, 623, 625, 627,
239
+ 629, 631, 633, 635, 637, 639, 641, 643,
240
+ 645, 647, 649, 651, 653
156
241
  ]
157
242
 
158
243
  class << self
@@ -160,14 +245,88 @@ class << self
160
245
  private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
161
246
  end
162
247
  self._simple_lexer_trans_targs = [
163
- 2, 0, 3, 4, 32, 40, 5, 6,
164
- 43, 0, 8, 20, 24, 0, 9, 44,
165
- 10, 11, 13, 12, 45, 16, 12, 45,
166
- 16, 14, 15, 14, 15, 46, 47, 48,
167
- 0, 21, 22, 23, 49, 0, 25, 26,
168
- 27, 28, 29, 30, 31, 51, 50, 50,
169
- 33, 37, 34, 35, 36, 38, 39, 41,
170
- 42, 7, 17, 19, 18, 7, 7, 7
248
+ 2, 0, 3, 0, 4, 102, 110, 124,
249
+ 127, 0, 5, 0, 6, 0, 7, 0,
250
+ 8, 8, 132, 0, 24, 49, 60, 64,
251
+ 74, 90, 97, 9, 9, 9, 0, 9,
252
+ 10, 9, 9, 9, 9, 0, 11, 14,
253
+ 8, 22, 8, 132, 18, 23, 23, 23,
254
+ 0, 13, 17, 12, 13, 17, 12, 11,
255
+ 14, 8, 8, 132, 0, 13, 16, 15,
256
+ 13, 16, 15, 15, 12, 19, 0, 8,
257
+ 20, 8, 132, 19, 0, 21, 0, 8,
258
+ 8, 132, 21, 0, 8, 8, 132, 0,
259
+ 8, 23, 8, 132, 23, 23, 23, 23,
260
+ 0, 9, 10, 9, 25, 9, 9, 9,
261
+ 0, 9, 10, 9, 26, 9, 9, 9,
262
+ 0, 9, 10, 9, 27, 9, 9, 9,
263
+ 0, 9, 10, 9, 28, 9, 9, 9,
264
+ 0, 9, 10, 9, 29, 9, 9, 9,
265
+ 0, 9, 10, 9, 30, 9, 9, 9,
266
+ 0, 9, 10, 31, 9, 9, 9, 9,
267
+ 0, 9, 10, 9, 32, 9, 9, 9,
268
+ 0, 9, 10, 9, 33, 9, 9, 9,
269
+ 0, 9, 10, 9, 34, 9, 9, 9,
270
+ 0, 9, 10, 35, 9, 9, 9, 9,
271
+ 0, 9, 10, 9, 36, 9, 9, 9,
272
+ 0, 9, 10, 9, 37, 9, 9, 9,
273
+ 0, 9, 10, 9, 38, 9, 9, 9,
274
+ 0, 9, 10, 9, 39, 9, 9, 9,
275
+ 0, 9, 10, 9, 40, 9, 9, 9,
276
+ 0, 9, 10, 9, 41, 9, 9, 9,
277
+ 0, 9, 42, 9, 9, 9, 9, 0,
278
+ 43, 46, 8, 8, 132, 0, 42, 45,
279
+ 44, 42, 45, 44, 44, 42, 48, 47,
280
+ 42, 48, 47, 47, 9, 10, 9, 50,
281
+ 9, 9, 9, 0, 9, 10, 9, 51,
282
+ 9, 9, 9, 0, 9, 10, 9, 52,
283
+ 9, 9, 9, 0, 9, 53, 9, 9,
284
+ 9, 9, 0, 54, 57, 8, 8, 132,
285
+ 0, 53, 56, 55, 53, 56, 55, 55,
286
+ 53, 59, 58, 53, 59, 58, 58, 9,
287
+ 10, 61, 9, 9, 9, 9, 0, 9,
288
+ 62, 9, 9, 9, 9, 0, 63, 63,
289
+ 63, 0, 8, 63, 8, 132, 63, 63,
290
+ 63, 63, 0, 9, 10, 9, 65, 9,
291
+ 9, 9, 0, 9, 10, 9, 66, 9,
292
+ 9, 9, 0, 9, 10, 9, 67, 9,
293
+ 9, 9, 0, 9, 10, 9, 68, 9,
294
+ 9, 9, 0, 9, 10, 9, 69, 9,
295
+ 9, 9, 0, 9, 70, 9, 9, 9,
296
+ 9, 0, 71, 71, 73, 73, 73, 73,
297
+ 72, 0, 72, 0, 8, 8, 132, 72,
298
+ 0, 8, 8, 132, 0, 9, 10, 9,
299
+ 75, 9, 9, 9, 0, 9, 10, 9,
300
+ 76, 9, 9, 9, 0, 9, 10, 9,
301
+ 77, 9, 9, 9, 0, 9, 10, 9,
302
+ 78, 9, 9, 9, 0, 9, 10, 9,
303
+ 79, 9, 9, 9, 0, 9, 10, 9,
304
+ 80, 9, 9, 9, 0, 9, 81, 9,
305
+ 9, 9, 9, 0, 82, 85, 8, 8,
306
+ 132, 89, 0, 84, 88, 83, 84, 88,
307
+ 83, 82, 85, 8, 8, 132, 0, 84,
308
+ 87, 86, 84, 87, 86, 86, 83, 8,
309
+ 8, 132, 89, 89, 89, 89, 89, 0,
310
+ 9, 10, 9, 91, 9, 9, 9, 0,
311
+ 9, 10, 9, 92, 9, 9, 9, 0,
312
+ 9, 10, 9, 93, 9, 9, 9, 0,
313
+ 9, 10, 9, 94, 9, 9, 9, 0,
314
+ 9, 10, 9, 95, 9, 9, 9, 0,
315
+ 9, 10, 9, 96, 9, 9, 9, 0,
316
+ 9, 10, 9, 61, 9, 9, 9, 0,
317
+ 9, 10, 9, 98, 9, 9, 9, 0,
318
+ 9, 10, 9, 99, 9, 9, 9, 0,
319
+ 9, 10, 9, 100, 9, 9, 9, 0,
320
+ 9, 10, 9, 101, 9, 9, 9, 0,
321
+ 9, 10, 9, 69, 9, 9, 9, 0,
322
+ 103, 107, 0, 104, 0, 105, 0, 106,
323
+ 0, 6, 0, 108, 0, 109, 0, 5,
324
+ 0, 111, 0, 112, 0, 113, 0, 114,
325
+ 0, 115, 0, 116, 0, 117, 0, 118,
326
+ 0, 119, 0, 120, 0, 121, 0, 122,
327
+ 0, 123, 0, 6, 0, 125, 0, 126,
328
+ 0, 6, 0, 128, 0, 129, 0, 130,
329
+ 0, 131, 0, 6, 0, 0, 0
171
330
  ]
172
331
 
173
332
  class << self
@@ -176,13 +335,87 @@ class << self
176
335
  end
177
336
  self._simple_lexer_trans_actions = [
178
337
  0, 0, 0, 0, 0, 0, 0, 0,
179
- 0, 26, 1, 1, 1, 15, 0, 7,
180
- 0, 0, 0, 1, 17, 1, 0, 3,
181
- 0, 1, 1, 0, 0, 0, 0, 0,
182
- 20, 0, 5, 1, 0, 23, 0, 0,
183
- 0, 0, 0, 5, 1, 1, 1, 0,
184
338
  0, 0, 0, 0, 0, 0, 0, 0,
185
- 0, 0, 0, 1, 0, 9, 11, 13
339
+ 0, 0, 0, 0, 1, 1, 1, 1,
340
+ 1, 1, 1, 1, 1, 1, 47, 0,
341
+ 7, 0, 0, 0, 0, 11, 0, 0,
342
+ 9, 0, 9, 9, 0, 1, 1, 1,
343
+ 0, 13, 1, 1, 3, 0, 0, 0,
344
+ 0, 9, 9, 9, 0, 13, 1, 1,
345
+ 3, 0, 0, 0, 0, 0, 11, 9,
346
+ 0, 9, 9, 0, 0, 0, 11, 9,
347
+ 9, 9, 0, 0, 9, 9, 9, 0,
348
+ 16, 0, 16, 16, 0, 0, 0, 0,
349
+ 0, 0, 7, 0, 0, 0, 0, 0,
350
+ 40, 0, 7, 0, 0, 0, 0, 0,
351
+ 40, 0, 7, 0, 0, 0, 0, 0,
352
+ 40, 0, 7, 0, 0, 0, 0, 0,
353
+ 40, 0, 7, 0, 0, 0, 0, 0,
354
+ 40, 0, 7, 0, 0, 0, 0, 0,
355
+ 40, 0, 7, 0, 0, 0, 0, 0,
356
+ 40, 0, 7, 0, 0, 0, 0, 0,
357
+ 40, 0, 7, 0, 0, 0, 0, 0,
358
+ 40, 0, 7, 0, 0, 0, 0, 0,
359
+ 40, 0, 7, 0, 0, 0, 0, 0,
360
+ 40, 0, 7, 0, 0, 0, 0, 0,
361
+ 40, 0, 7, 0, 0, 0, 0, 0,
362
+ 40, 0, 7, 0, 0, 0, 0, 0,
363
+ 40, 0, 7, 0, 0, 0, 0, 0,
364
+ 40, 0, 7, 0, 0, 0, 0, 0,
365
+ 40, 0, 7, 0, 0, 0, 0, 0,
366
+ 40, 0, 5, 0, 0, 0, 0, 40,
367
+ 0, 0, 9, 9, 9, 0, 13, 1,
368
+ 1, 3, 0, 0, 0, 13, 1, 1,
369
+ 3, 0, 0, 0, 0, 7, 0, 0,
370
+ 0, 0, 0, 37, 0, 7, 0, 0,
371
+ 0, 0, 0, 37, 0, 7, 0, 0,
372
+ 0, 0, 0, 37, 0, 5, 0, 0,
373
+ 0, 0, 37, 0, 0, 34, 34, 34,
374
+ 0, 13, 1, 1, 3, 0, 0, 0,
375
+ 13, 1, 1, 3, 0, 0, 0, 0,
376
+ 7, 0, 0, 0, 0, 0, 25, 0,
377
+ 5, 0, 0, 0, 0, 25, 1, 1,
378
+ 1, 25, 43, 0, 43, 43, 0, 0,
379
+ 0, 0, 0, 0, 7, 0, 0, 0,
380
+ 0, 0, 31, 0, 7, 0, 0, 0,
381
+ 0, 0, 31, 0, 7, 0, 0, 0,
382
+ 0, 0, 31, 0, 7, 0, 0, 0,
383
+ 0, 0, 31, 0, 7, 0, 0, 0,
384
+ 0, 0, 31, 0, 5, 0, 0, 0,
385
+ 0, 31, 1, 1, 1, 1, 1, 1,
386
+ 1, 31, 0, 31, 22, 22, 22, 0,
387
+ 0, 22, 22, 22, 0, 0, 7, 0,
388
+ 0, 0, 0, 0, 28, 0, 7, 0,
389
+ 0, 0, 0, 0, 28, 0, 7, 0,
390
+ 0, 0, 0, 0, 28, 0, 7, 0,
391
+ 0, 0, 0, 0, 28, 0, 7, 0,
392
+ 0, 0, 0, 0, 28, 0, 7, 0,
393
+ 0, 0, 0, 0, 28, 0, 5, 0,
394
+ 0, 0, 0, 28, 0, 0, 9, 9,
395
+ 9, 1, 0, 13, 1, 1, 3, 0,
396
+ 0, 0, 0, 9, 9, 9, 0, 13,
397
+ 1, 1, 3, 0, 0, 0, 0, 19,
398
+ 19, 19, 0, 0, 0, 0, 0, 0,
399
+ 0, 7, 0, 0, 0, 0, 0, 25,
400
+ 0, 7, 0, 0, 0, 0, 0, 25,
401
+ 0, 7, 0, 0, 0, 0, 0, 25,
402
+ 0, 7, 0, 0, 0, 0, 0, 25,
403
+ 0, 7, 0, 0, 0, 0, 0, 25,
404
+ 0, 7, 0, 0, 0, 0, 0, 25,
405
+ 0, 7, 0, 0, 0, 0, 0, 25,
406
+ 0, 7, 0, 0, 0, 0, 0, 31,
407
+ 0, 7, 0, 0, 0, 0, 0, 31,
408
+ 0, 7, 0, 0, 0, 0, 0, 31,
409
+ 0, 7, 0, 0, 0, 0, 0, 31,
410
+ 0, 7, 0, 0, 0, 0, 0, 31,
411
+ 0, 0, 0, 0, 0, 0, 0, 0,
412
+ 0, 0, 0, 0, 0, 0, 0, 0,
413
+ 0, 0, 0, 0, 0, 0, 0, 0,
414
+ 0, 0, 0, 0, 0, 0, 0, 0,
415
+ 0, 0, 0, 0, 0, 0, 0, 0,
416
+ 0, 0, 0, 0, 0, 0, 0, 0,
417
+ 0, 0, 0, 0, 0, 0, 0, 0,
418
+ 0, 0, 0, 0, 0, 0, 0
186
419
  ]
187
420
 
188
421
  class << self
@@ -190,13 +423,23 @@ class << self
190
423
  private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
191
424
  end
192
425
  self._simple_lexer_eof_actions = [
193
- 0, 0, 0, 0, 0, 0, 0, 26,
194
- 15, 15, 15, 15, 15, 15, 15, 15,
195
- 15, 15, 15, 15, 20, 20, 20, 20,
196
- 23, 23, 23, 23, 23, 23, 23, 23,
426
+ 0, 0, 0, 0, 0, 0, 0, 0,
427
+ 47, 11, 0, 11, 11, 0, 11, 11,
428
+ 11, 11, 11, 0, 11, 0, 0, 0,
429
+ 40, 40, 40, 40, 40, 40, 40, 40,
430
+ 40, 40, 40, 40, 40, 40, 40, 40,
431
+ 40, 40, 0, 40, 40, 40, 40, 40,
432
+ 40, 37, 37, 37, 37, 0, 37, 37,
433
+ 37, 37, 37, 37, 25, 25, 25, 0,
434
+ 31, 31, 31, 31, 31, 31, 31, 31,
435
+ 0, 0, 28, 28, 28, 28, 28, 28,
436
+ 28, 0, 28, 28, 0, 28, 28, 28,
437
+ 28, 0, 25, 25, 25, 25, 25, 25,
438
+ 25, 31, 31, 31, 31, 31, 0, 0,
197
439
  0, 0, 0, 0, 0, 0, 0, 0,
198
440
  0, 0, 0, 0, 0, 0, 0, 0,
199
- 9, 11, 13, 13
441
+ 0, 0, 0, 0, 0, 0, 0, 0,
442
+ 0, 0, 0, 0, 0
200
443
  ]
201
444
 
202
445
  class << self
@@ -206,7 +449,7 @@ self.simple_lexer_start = 1;
206
449
  class << self
207
450
  attr_accessor :simple_lexer_first_final
208
451
  end
209
- self.simple_lexer_first_final = 43;
452
+ self.simple_lexer_first_final = 132;
210
453
  class << self
211
454
  attr_accessor :simple_lexer_error
212
455
  end
@@ -218,11 +461,12 @@ end
218
461
  self.simple_lexer_en_main = 1;
219
462
 
220
463
 
221
- # line 61 "gen_vcfheaderline_parser.rl"
222
- # %% this just fixes our syntax highlighting...
464
+ # line 79 "gen_vcfheaderline_parser.rl"
465
+ # %% this just fixes syntax highlighting...
223
466
 
224
467
  def self.run_lexer(buf, options = {})
225
468
  do_debug = (options[:debug] == true)
469
+ $stderr.print "---> ",buf,"\n" if do_debug
226
470
  data = buf.unpack("c*") if(buf.is_a?(String))
227
471
  eof = data.length
228
472
  values = []
@@ -231,23 +475,23 @@ def self.run_lexer(buf, options = {})
231
475
  emit = lambda { |type, data, ts, p|
232
476
  # Print the type and text of the last read token
233
477
  # p ts,p
234
- puts "#{type}: #{data[ts...p].pack('c*')}" if do_debug
478
+ $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
235
479
  values << [type,data[ts...p].pack('c*')]
236
480
  }
237
481
 
238
482
  error_code = nil
239
483
 
240
484
 
241
- # line 242 "gen_vcfheaderline_parser.rb"
485
+ # line 486 "gen_vcfheaderline_parser.rb"
242
486
  begin
243
487
  p ||= 0
244
488
  pe ||= data.length
245
489
  cs = simple_lexer_start
246
490
  end
247
491
 
248
- # line 80 "gen_vcfheaderline_parser.rl"
492
+ # line 99 "gen_vcfheaderline_parser.rl"
249
493
 
250
- # line 251 "gen_vcfheaderline_parser.rb"
494
+ # line 495 "gen_vcfheaderline_parser.rb"
251
495
  begin
252
496
  _klen, _trans, _keys, _acts, _nacts = nil
253
497
  _goto_level = 0
@@ -318,7 +562,6 @@ begin
318
562
  _trans += _klen
319
563
  end
320
564
  end while false
321
- _trans = _simple_lexer_indicies[_trans]
322
565
  cs = _simple_lexer_trans_targs[_trans]
323
566
  if _simple_lexer_trans_actions[_trans] != 0
324
567
  _acts = _simple_lexer_trans_actions[_trans]
@@ -329,50 +572,78 @@ begin
329
572
  _acts += 1
330
573
  case _simple_lexer_actions[_acts - 1]
331
574
  when 0 then
332
- # line 23 "gen_vcfheaderline_parser.rl"
575
+ # line 33 "gen_vcfheaderline_parser.rl"
333
576
  begin
334
577
  ts=p end
335
578
  when 1 then
336
- # line 24 "gen_vcfheaderline_parser.rl"
579
+ # line 34 "gen_vcfheaderline_parser.rl"
337
580
  begin
338
581
 
339
582
  emit.call(:value,data,ts,p)
340
583
  end
341
584
  when 2 then
342
- # line 28 "gen_vcfheaderline_parser.rl"
585
+ # line 38 "gen_vcfheaderline_parser.rl"
343
586
  begin
344
587
 
345
588
  emit.call(:kw,data,ts,p)
346
589
  end
347
590
  when 3 then
348
- # line 46 "gen_vcfheaderline_parser.rl"
591
+ # line 58 "gen_vcfheaderline_parser.rl"
349
592
  begin
350
593
  emit.call(:key_word,data,ts,p) end
351
594
  when 4 then
352
- # line 47 "gen_vcfheaderline_parser.rl"
595
+ # line 59 "gen_vcfheaderline_parser.rl"
353
596
  begin
354
597
  emit.call(:value,data,ts,p) end
355
598
  when 5 then
356
- # line 48 "gen_vcfheaderline_parser.rl"
599
+ # line 60 "gen_vcfheaderline_parser.rl"
357
600
  begin
358
601
  emit.call(:value,data,ts,p) end
359
602
  when 6 then
360
- # line 50 "gen_vcfheaderline_parser.rl"
603
+ # line 62 "gen_vcfheaderline_parser.rl"
361
604
  begin
362
605
  emit.call(:value,data,ts,p) end
363
606
  when 7 then
364
- # line 52 "gen_vcfheaderline_parser.rl"
607
+ # line 65 "gen_vcfheaderline_parser.rl"
365
608
  begin
366
- error_code="ID" end
609
+ emit.call(:value,data,ts,p) end
367
610
  when 8 then
368
- # line 53 "gen_vcfheaderline_parser.rl"
611
+ # line 67 "gen_vcfheaderline_parser.rl"
369
612
  begin
370
- error_code="Number" end
613
+ debug("ID FOUND") end
371
614
  when 9 then
372
- # line 54 "gen_vcfheaderline_parser.rl"
615
+ # line 67 "gen_vcfheaderline_parser.rl"
616
+ begin
617
+ error_code="Malformed ID" end
618
+ when 10 then
619
+ # line 68 "gen_vcfheaderline_parser.rl"
620
+ begin
621
+ error_code="Version" end
622
+ when 11 then
623
+ # line 69 "gen_vcfheaderline_parser.rl"
373
624
  begin
374
- error_code="key-value" end
375
- # line 376 "gen_vcfheaderline_parser.rb"
625
+ error_code="Number" end
626
+ when 12 then
627
+ # line 70 "gen_vcfheaderline_parser.rl"
628
+ begin
629
+ debug("DATE FOUND") end
630
+ when 13 then
631
+ # line 70 "gen_vcfheaderline_parser.rl"
632
+ begin
633
+ error_code="Date" end
634
+ when 14 then
635
+ # line 71 "gen_vcfheaderline_parser.rl"
636
+ begin
637
+ error_code="GATK" end
638
+ when 15 then
639
+ # line 72 "gen_vcfheaderline_parser.rl"
640
+ begin
641
+ debug("KEY_VALUE found") end
642
+ when 16 then
643
+ # line 72 "gen_vcfheaderline_parser.rl"
644
+ begin
645
+ error_code="unknown key-value " end
646
+ # line 647 "gen_vcfheaderline_parser.rb"
376
647
  end # action switch
377
648
  end
378
649
  end
@@ -400,31 +671,31 @@ when 9 then
400
671
  __nacts -= 1
401
672
  __acts += 1
402
673
  case _simple_lexer_actions[__acts - 1]
403
- when 4 then
404
- # line 47 "gen_vcfheaderline_parser.rl"
674
+ when 9 then
675
+ # line 67 "gen_vcfheaderline_parser.rl"
405
676
  begin
406
- emit.call(:value,data,ts,p) end
407
- when 5 then
408
- # line 48 "gen_vcfheaderline_parser.rl"
677
+ error_code="Malformed ID" end
678
+ when 10 then
679
+ # line 68 "gen_vcfheaderline_parser.rl"
409
680
  begin
410
- emit.call(:value,data,ts,p) end
411
- when 6 then
412
- # line 50 "gen_vcfheaderline_parser.rl"
681
+ error_code="Version" end
682
+ when 11 then
683
+ # line 69 "gen_vcfheaderline_parser.rl"
413
684
  begin
414
- emit.call(:value,data,ts,p) end
415
- when 7 then
416
- # line 52 "gen_vcfheaderline_parser.rl"
685
+ error_code="Number" end
686
+ when 13 then
687
+ # line 70 "gen_vcfheaderline_parser.rl"
417
688
  begin
418
- error_code="ID" end
419
- when 8 then
420
- # line 53 "gen_vcfheaderline_parser.rl"
689
+ error_code="Date" end
690
+ when 14 then
691
+ # line 71 "gen_vcfheaderline_parser.rl"
421
692
  begin
422
- error_code="Number" end
423
- when 9 then
424
- # line 54 "gen_vcfheaderline_parser.rl"
693
+ error_code="GATK" end
694
+ when 16 then
695
+ # line 72 "gen_vcfheaderline_parser.rl"
425
696
  begin
426
- error_code="key-value" end
427
- # line 428 "gen_vcfheaderline_parser.rb"
697
+ error_code="unknown key-value " end
698
+ # line 699 "gen_vcfheaderline_parser.rb"
428
699
  end # eof action switch
429
700
  end
430
701
  if _trigger_goto
@@ -438,7 +709,7 @@ end
438
709
  end
439
710
  end
440
711
 
441
- # line 81 "gen_vcfheaderline_parser.rl"
712
+ # line 100 "gen_vcfheaderline_parser.rl"
442
713
 
443
714
  raise "ERROR: "+error_code+" in "+buf if error_code
444
715
 
@@ -446,8 +717,11 @@ end
446
717
  res = {}
447
718
  # p values
448
719
  values.each_slice(2) do | a,b |
449
- # p '*',a,b
450
- res[a[1]] = b[1]
720
+ $stderr.print '*',a,b if do_debug
721
+ keyword = a[1]
722
+ value = b[1]
723
+ value = value.to_i if ['length','Epoch'].index(keyword)
724
+ res[keyword] = value
451
725
  # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
452
726
  end
453
727
  rescue
@@ -455,7 +729,7 @@ end
455
729
  p values
456
730
  raise
457
731
  end
458
- p res if do_debug
732
+ $stderr.print(res,"\n") if do_debug
459
733
  res
460
734
  end
461
735
  end
@@ -464,7 +738,19 @@ end
464
738
 
465
739
  if __FILE__ == $0
466
740
 
741
+ gatkcommandline = <<LINE1
742
+ ##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
743
+ LINE1
744
+
745
+ h = {}
746
+ s = gatkcommandline.strip
747
+ # print s,"\n"
748
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
749
+ # h[result['ID']] = result
750
+ # p result
751
+
467
752
  lines = <<LINES
753
+ ##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
468
754
  ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
469
755
  ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
470
756
  ##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
@@ -473,11 +759,20 @@ lines = <<LINES
473
759
  ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
474
760
  ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
475
761
  ##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
762
+ ##contig=<ID=XXXY12>
763
+ ##contig=<ID=Y,length=59373566>
476
764
  LINES
477
765
 
766
+ h = {}
478
767
  lines.strip.split("\n").each { |s|
479
- print s,"\n"
480
- p BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: false)
768
+ # print s,"\n"
769
+ result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
770
+ h[result['ID']] = result
771
+ p result
481
772
  }
773
+ p h
774
+
775
+ raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
776
+
482
777
 
483
778
  end # test