bio-vcf 0.8.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +8 -2
- data/Gemfile +4 -6
- data/README.md +92 -57
- data/Rakefile +3 -41
- data/TAGS +115 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +58 -70
- data/bio-vcf.gemspec +23 -75
- data/features/cli.feature +6 -1
- data/features/multisample.feature +12 -0
- data/features/step_definitions/cli-feature.rb +2 -2
- data/features/step_definitions/multisample.rb +19 -0
- data/features/step_definitions/vcf_header.rb +1 -1
- data/features/support/env.rb +0 -9
- data/lib/bio-vcf/pcows.rb +210 -0
- data/lib/bio-vcf/vcfheader.rb +28 -9
- data/lib/bio-vcf/vcfheader_line.rb +455 -160
- data/lib/bio-vcf/vcfrecord.rb +30 -15
- data/ragel/gen_vcfheaderline_parser.rl +68 -25
- data/ragel/generate.sh +4 -1
- data/template/vcf2json.erb +16 -16
- data/template/vcf2json_full_header.erb +16 -17
- data/template/vcf2json_use_meta.erb +35 -35
- data/test/data/input/gatk_exome.vcf +237 -0
- data/test/data/input/gatk_wgs.vcf +1000 -0
- data/test/data/input/test.bed +632 -0
- data/test/data/regression/eval_once-stderr.new +1 -0
- data/test/data/regression/eval_once.new +1 -0
- data/test/data/regression/eval_once.ref +1 -0
- data/test/data/regression/eval_r.info.dp-stderr.new +4 -0
- data/test/data/regression/eval_r.info.dp.new +150 -0
- data/test/data/regression/ifilter_s.dp-stderr.new +28 -0
- data/test/data/regression/ifilter_s.dp.new +31 -0
- data/test/data/regression/r.info.dp-stderr.new +4 -0
- data/test/data/regression/r.info.dp.new +147 -0
- data/test/data/regression/rewrite.info.sample-stderr.new +4 -0
- data/test/data/regression/rewrite.info.sample.new +150 -0
- data/test/data/regression/s.dp-stderr.new +12 -0
- data/test/data/regression/s.dp.new +145 -0
- data/test/data/regression/seval_s.dp-stderr.new +4 -0
- data/test/data/regression/seval_s.dp.new +36 -0
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +12 -0
- data/test/data/regression/sfilter_seval_s.dp.new +31 -0
- data/test/data/regression/thread4-stderr.new +4 -0
- data/test/data/regression/thread4.new +150 -0
- data/test/data/regression/thread4_4-stderr.new +15 -0
- data/test/data/regression/thread4_4.new +150 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
- data/test/data/regression/thread4_4_failed_filter.new +110 -0
- data/test/data/regression/vcf2json_full_header-stderr.new +4 -0
- data/test/data/regression/vcf2json_full_header.new +225 -0
- data/test/data/regression/vcf2json_full_header.ref +222 -258
- data/test/data/regression/vcf2json_use_meta-stderr.new +4 -0
- data/test/data/regression/vcf2json_use_meta.new +4697 -0
- data/test/data/regression/vcf2json_use_meta.ref +4697 -0
- data/test/performance/metrics.md +18 -1
- data/test/tmp/test.vcf +12469 -0
- metadata +38 -62
- data/Gemfile.lock +0 -81
- data/ragel/gen_vcfheaderline_parser.rb +0 -483
metadata
CHANGED
@@ -1,71 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-vcf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: rspec
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 2.14.0
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 2.14.0
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: cucumber
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 1.3.11
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 1.3.11
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: jeweler
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 2.0.1
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 2.0.1
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: regressiontest
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 0.0.3
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 0.0.3
|
11
|
+
date: 2015-08-16 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
69
13
|
description: Smart lazy multi-threaded parser for VCF format with useful filtering
|
70
14
|
and output rewriting
|
71
15
|
email: pjotr.public01@thebird.nl
|
@@ -78,10 +22,10 @@ extra_rdoc_files:
|
|
78
22
|
files:
|
79
23
|
- ".travis.yml"
|
80
24
|
- Gemfile
|
81
|
-
- Gemfile.lock
|
82
25
|
- LICENSE.txt
|
83
26
|
- README.md
|
84
27
|
- Rakefile
|
28
|
+
- TAGS
|
85
29
|
- VERSION
|
86
30
|
- bin/bio-vcf
|
87
31
|
- bio-vcf.gemspec
|
@@ -101,6 +45,7 @@ files:
|
|
101
45
|
- features/vcf_header.feature
|
102
46
|
- lib/bio-vcf.rb
|
103
47
|
- lib/bio-vcf/bedfilter.rb
|
48
|
+
- lib/bio-vcf/pcows.rb
|
104
49
|
- lib/bio-vcf/template.rb
|
105
50
|
- lib/bio-vcf/utils.rb
|
106
51
|
- lib/bio-vcf/variant.rb
|
@@ -113,7 +58,6 @@ files:
|
|
113
58
|
- lib/bio-vcf/vcfrecord.rb
|
114
59
|
- lib/bio-vcf/vcfsample.rb
|
115
60
|
- lib/bio-vcf/vcfstatistics.rb
|
116
|
-
- ragel/gen_vcfheaderline_parser.rb
|
117
61
|
- ragel/gen_vcfheaderline_parser.rl
|
118
62
|
- ragel/generate.sh
|
119
63
|
- template/gatk_vcf2rdf.erb
|
@@ -123,20 +67,52 @@ files:
|
|
123
67
|
- template/vcf2rdf.erb
|
124
68
|
- template/vcf2rdf_header.erb
|
125
69
|
- test/data/input/dbsnp.vcf
|
70
|
+
- test/data/input/gatk_exome.vcf
|
71
|
+
- test/data/input/gatk_wgs.vcf
|
126
72
|
- test/data/input/multisample.vcf
|
127
73
|
- test/data/input/somaticsniper.vcf
|
74
|
+
- test/data/input/test.bed
|
75
|
+
- test/data/regression/eval_once-stderr.new
|
76
|
+
- test/data/regression/eval_once.new
|
77
|
+
- test/data/regression/eval_once.ref
|
78
|
+
- test/data/regression/eval_r.info.dp-stderr.new
|
79
|
+
- test/data/regression/eval_r.info.dp.new
|
128
80
|
- test/data/regression/eval_r.info.dp.ref
|
81
|
+
- test/data/regression/ifilter_s.dp-stderr.new
|
82
|
+
- test/data/regression/ifilter_s.dp.new
|
129
83
|
- test/data/regression/ifilter_s.dp.ref
|
84
|
+
- test/data/regression/r.info.dp-stderr.new
|
85
|
+
- test/data/regression/r.info.dp.new
|
130
86
|
- test/data/regression/r.info.dp.ref
|
87
|
+
- test/data/regression/rewrite.info.sample-stderr.new
|
88
|
+
- test/data/regression/rewrite.info.sample.new
|
131
89
|
- test/data/regression/rewrite.info.sample.ref
|
90
|
+
- test/data/regression/s.dp-stderr.new
|
91
|
+
- test/data/regression/s.dp.new
|
132
92
|
- test/data/regression/s.dp.ref
|
93
|
+
- test/data/regression/seval_s.dp-stderr.new
|
94
|
+
- test/data/regression/seval_s.dp.new
|
133
95
|
- test/data/regression/seval_s.dp.ref
|
96
|
+
- test/data/regression/sfilter_seval_s.dp-stderr.new
|
97
|
+
- test/data/regression/sfilter_seval_s.dp.new
|
134
98
|
- test/data/regression/sfilter_seval_s.dp.ref
|
99
|
+
- test/data/regression/thread4-stderr.new
|
100
|
+
- test/data/regression/thread4.new
|
135
101
|
- test/data/regression/thread4.ref
|
102
|
+
- test/data/regression/thread4_4-stderr.new
|
103
|
+
- test/data/regression/thread4_4.new
|
136
104
|
- test/data/regression/thread4_4.ref
|
105
|
+
- test/data/regression/thread4_4_failed_filter-stderr.new
|
137
106
|
- test/data/regression/thread4_4_failed_filter-stderr.ref
|
107
|
+
- test/data/regression/thread4_4_failed_filter.new
|
108
|
+
- test/data/regression/vcf2json_full_header-stderr.new
|
109
|
+
- test/data/regression/vcf2json_full_header.new
|
138
110
|
- test/data/regression/vcf2json_full_header.ref
|
111
|
+
- test/data/regression/vcf2json_use_meta-stderr.new
|
112
|
+
- test/data/regression/vcf2json_use_meta.new
|
113
|
+
- test/data/regression/vcf2json_use_meta.ref
|
139
114
|
- test/performance/metrics.md
|
115
|
+
- test/tmp/test.vcf
|
140
116
|
homepage: http://github.com/pjotrp/bioruby-vcf
|
141
117
|
licenses:
|
142
118
|
- MIT
|
@@ -157,7 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
157
133
|
version: '0'
|
158
134
|
requirements: []
|
159
135
|
rubyforge_project:
|
160
|
-
rubygems_version: 2.
|
136
|
+
rubygems_version: 2.4.5
|
161
137
|
signing_key:
|
162
138
|
specification_version: 4
|
163
139
|
summary: Fast multi-threaded VCF parser
|
data/Gemfile.lock
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: http://rubygems.org/
|
3
|
-
specs:
|
4
|
-
addressable (2.3.5)
|
5
|
-
builder (3.2.2)
|
6
|
-
cucumber (1.3.11)
|
7
|
-
builder (>= 2.1.2)
|
8
|
-
diff-lcs (>= 1.1.3)
|
9
|
-
gherkin (~> 2.12)
|
10
|
-
multi_json (>= 1.7.5, < 2.0)
|
11
|
-
multi_test (>= 0.0.2)
|
12
|
-
descendants_tracker (0.0.3)
|
13
|
-
diff-lcs (1.2.5)
|
14
|
-
faraday (0.9.0)
|
15
|
-
multipart-post (>= 1.2, < 3)
|
16
|
-
gherkin (2.12.2)
|
17
|
-
multi_json (~> 1.3)
|
18
|
-
gherkin (2.12.2-java)
|
19
|
-
multi_json (~> 1.3)
|
20
|
-
git (1.2.6)
|
21
|
-
github_api (0.11.3)
|
22
|
-
addressable (~> 2.3)
|
23
|
-
descendants_tracker (~> 0.0.1)
|
24
|
-
faraday (~> 0.8, < 0.10)
|
25
|
-
hashie (>= 1.2)
|
26
|
-
multi_json (>= 1.7.5, < 2.0)
|
27
|
-
nokogiri (~> 1.6.0)
|
28
|
-
oauth2
|
29
|
-
hashie (2.0.5)
|
30
|
-
highline (1.6.21)
|
31
|
-
jeweler (2.0.1)
|
32
|
-
builder
|
33
|
-
bundler (>= 1.0)
|
34
|
-
git (>= 1.2.5)
|
35
|
-
github_api
|
36
|
-
highline (>= 1.6.15)
|
37
|
-
nokogiri (>= 1.5.10)
|
38
|
-
rake
|
39
|
-
rdoc
|
40
|
-
json (1.8.1)
|
41
|
-
json (1.8.1-java)
|
42
|
-
jwt (0.1.11)
|
43
|
-
multi_json (>= 1.5)
|
44
|
-
mini_portile (0.5.2)
|
45
|
-
multi_json (1.9.0)
|
46
|
-
multi_test (0.0.3)
|
47
|
-
multi_xml (0.5.5)
|
48
|
-
multipart-post (2.0.0)
|
49
|
-
nokogiri (1.6.1)
|
50
|
-
mini_portile (~> 0.5.0)
|
51
|
-
nokogiri (1.6.1-java)
|
52
|
-
mini_portile (~> 0.5.0)
|
53
|
-
oauth2 (0.9.3)
|
54
|
-
faraday (>= 0.8, < 0.10)
|
55
|
-
jwt (~> 0.1.8)
|
56
|
-
multi_json (~> 1.3)
|
57
|
-
multi_xml (~> 0.5)
|
58
|
-
rack (~> 1.2)
|
59
|
-
rack (1.5.2)
|
60
|
-
rake (10.1.1)
|
61
|
-
rdoc (4.1.1)
|
62
|
-
json (~> 1.4)
|
63
|
-
regressiontest (0.0.3)
|
64
|
-
rspec (2.14.1)
|
65
|
-
rspec-core (~> 2.14.0)
|
66
|
-
rspec-expectations (~> 2.14.0)
|
67
|
-
rspec-mocks (~> 2.14.0)
|
68
|
-
rspec-core (2.14.8)
|
69
|
-
rspec-expectations (2.14.5)
|
70
|
-
diff-lcs (>= 1.1.3, < 2.0)
|
71
|
-
rspec-mocks (2.14.6)
|
72
|
-
|
73
|
-
PLATFORMS
|
74
|
-
java
|
75
|
-
ruby
|
76
|
-
|
77
|
-
DEPENDENCIES
|
78
|
-
cucumber (>= 1.3.11)
|
79
|
-
jeweler (>= 2.0.1)
|
80
|
-
regressiontest (>= 0.0.3)
|
81
|
-
rspec (>= 2.14.0)
|
@@ -1,483 +0,0 @@
|
|
1
|
-
|
2
|
-
# line 1 "gen_vcfheaderline_parser.rl"
|
3
|
-
# Ragel lexer for VCF-header
|
4
|
-
#
|
5
|
-
# This is a partial lexer for the VCF header format. Bio-vcf uses this
|
6
|
-
# to generate meta information in (for example) JSON format. The
|
7
|
-
# advantage of using a full state engine is that it allows for easy
|
8
|
-
# parsing of key-value pairs with syntax checking and, for example,
|
9
|
-
# escaped quotes in quoted string values. This edition validates ID and
|
10
|
-
# Number fields only.
|
11
|
-
#
|
12
|
-
# Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
|
13
|
-
|
14
|
-
module BioVcf
|
15
|
-
|
16
|
-
module VcfHeaderParser
|
17
|
-
|
18
|
-
module RagelKeyValues
|
19
|
-
|
20
|
-
=begin
|
21
|
-
|
22
|
-
# line 57 "gen_vcfheaderline_parser.rl"
|
23
|
-
|
24
|
-
=end
|
25
|
-
|
26
|
-
|
27
|
-
# line 28 "gen_vcfheaderline_parser.rb"
|
28
|
-
class << self
|
29
|
-
attr_accessor :_simple_lexer_actions
|
30
|
-
private :_simple_lexer_actions, :_simple_lexer_actions=
|
31
|
-
end
|
32
|
-
self._simple_lexer_actions = [
|
33
|
-
0, 1, 0, 1, 1, 1, 2, 1,
|
34
|
-
3, 1, 4, 1, 5, 1, 6, 1,
|
35
|
-
9, 2, 0, 1, 2, 7, 9, 2,
|
36
|
-
8, 9, 3, 7, 8, 9
|
37
|
-
]
|
38
|
-
|
39
|
-
class << self
|
40
|
-
attr_accessor :_simple_lexer_key_offsets
|
41
|
-
private :_simple_lexer_key_offsets, :_simple_lexer_key_offsets=
|
42
|
-
end
|
43
|
-
self._simple_lexer_key_offsets = [
|
44
|
-
0, 0, 1, 2, 5, 6, 7, 8,
|
45
|
-
14, 20, 27, 32, 34, 36, 38, 40,
|
46
|
-
40, 40, 42, 44, 50, 57, 64, 68,
|
47
|
-
74, 81, 89, 97, 105, 113, 120, 128,
|
48
|
-
130, 132, 133, 134, 135, 136, 137, 138,
|
49
|
-
139, 140, 141, 142, 144, 160, 167, 172,
|
50
|
-
176, 184, 192, 196
|
51
|
-
]
|
52
|
-
|
53
|
-
class << self
|
54
|
-
attr_accessor :_simple_lexer_trans_keys
|
55
|
-
private :_simple_lexer_trans_keys, :_simple_lexer_trans_keys=
|
56
|
-
end
|
57
|
-
self._simple_lexer_trans_keys = [
|
58
|
-
35, 35, 65, 70, 73, 76, 84, 61,
|
59
|
-
73, 78, 65, 90, 97, 122, 48, 57,
|
60
|
-
65, 90, 97, 122, 61, 48, 57, 65,
|
61
|
-
90, 97, 122, 32, 34, 39, 9, 13,
|
62
|
-
34, 92, 34, 92, 39, 92, 39, 92,
|
63
|
-
48, 57, 48, 57, 48, 57, 65, 90,
|
64
|
-
97, 122, 68, 48, 57, 65, 90, 97,
|
65
|
-
122, 61, 48, 57, 65, 90, 97, 122,
|
66
|
-
65, 90, 97, 122, 48, 57, 65, 90,
|
67
|
-
97, 122, 117, 48, 57, 65, 90, 97,
|
68
|
-
122, 61, 109, 48, 57, 65, 90, 97,
|
69
|
-
122, 61, 98, 48, 57, 65, 90, 97,
|
70
|
-
122, 61, 101, 48, 57, 65, 90, 97,
|
71
|
-
122, 61, 114, 48, 57, 65, 90, 97,
|
72
|
-
122, 61, 48, 57, 65, 90, 97, 122,
|
73
|
-
43, 45, 46, 65, 71, 82, 48, 57,
|
74
|
-
48, 57, 73, 79, 76, 84, 69, 82,
|
75
|
-
82, 77, 65, 78, 70, 79, 44, 60,
|
76
|
-
32, 34, 39, 44, 46, 60, 9, 13,
|
77
|
-
43, 45, 48, 57, 65, 90, 97, 122,
|
78
|
-
32, 34, 39, 44, 60, 9, 13, 44,
|
79
|
-
46, 60, 48, 57, 44, 60, 48, 57,
|
80
|
-
44, 60, 48, 57, 65, 90, 97, 122,
|
81
|
-
44, 60, 48, 57, 65, 90, 97, 122,
|
82
|
-
44, 60, 48, 57, 44, 60, 0
|
83
|
-
]
|
84
|
-
|
85
|
-
class << self
|
86
|
-
attr_accessor :_simple_lexer_single_lengths
|
87
|
-
private :_simple_lexer_single_lengths, :_simple_lexer_single_lengths=
|
88
|
-
end
|
89
|
-
self._simple_lexer_single_lengths = [
|
90
|
-
0, 1, 1, 3, 1, 1, 1, 2,
|
91
|
-
0, 1, 3, 2, 2, 2, 2, 0,
|
92
|
-
0, 0, 0, 0, 1, 1, 0, 0,
|
93
|
-
1, 2, 2, 2, 2, 1, 6, 0,
|
94
|
-
2, 1, 1, 1, 1, 1, 1, 1,
|
95
|
-
1, 1, 1, 2, 6, 5, 3, 2,
|
96
|
-
2, 2, 2, 2
|
97
|
-
]
|
98
|
-
|
99
|
-
class << self
|
100
|
-
attr_accessor :_simple_lexer_range_lengths
|
101
|
-
private :_simple_lexer_range_lengths, :_simple_lexer_range_lengths=
|
102
|
-
end
|
103
|
-
self._simple_lexer_range_lengths = [
|
104
|
-
0, 0, 0, 0, 0, 0, 0, 2,
|
105
|
-
3, 3, 1, 0, 0, 0, 0, 0,
|
106
|
-
0, 1, 1, 3, 3, 3, 2, 3,
|
107
|
-
3, 3, 3, 3, 3, 3, 1, 1,
|
108
|
-
0, 0, 0, 0, 0, 0, 0, 0,
|
109
|
-
0, 0, 0, 0, 5, 1, 1, 1,
|
110
|
-
3, 3, 1, 0
|
111
|
-
]
|
112
|
-
|
113
|
-
class << self
|
114
|
-
attr_accessor :_simple_lexer_index_offsets
|
115
|
-
private :_simple_lexer_index_offsets, :_simple_lexer_index_offsets=
|
116
|
-
end
|
117
|
-
self._simple_lexer_index_offsets = [
|
118
|
-
0, 0, 2, 4, 8, 10, 12, 14,
|
119
|
-
19, 23, 28, 33, 36, 39, 42, 45,
|
120
|
-
46, 47, 49, 51, 55, 60, 65, 68,
|
121
|
-
72, 77, 83, 89, 95, 101, 106, 114,
|
122
|
-
116, 119, 121, 123, 125, 127, 129, 131,
|
123
|
-
133, 135, 137, 139, 142, 154, 161, 166,
|
124
|
-
170, 176, 182, 186
|
125
|
-
]
|
126
|
-
|
127
|
-
class << self
|
128
|
-
attr_accessor :_simple_lexer_indicies
|
129
|
-
private :_simple_lexer_indicies, :_simple_lexer_indicies=
|
130
|
-
end
|
131
|
-
self._simple_lexer_indicies = [
|
132
|
-
0, 1, 2, 1, 3, 4, 5, 1,
|
133
|
-
6, 1, 7, 1, 8, 1, 11, 12,
|
134
|
-
10, 10, 9, 14, 14, 14, 13, 15,
|
135
|
-
14, 14, 14, 13, 16, 17, 18, 16,
|
136
|
-
13, 20, 21, 19, 23, 24, 22, 20,
|
137
|
-
26, 25, 23, 28, 27, 27, 22, 29,
|
138
|
-
13, 30, 13, 31, 31, 31, 13, 33,
|
139
|
-
14, 14, 14, 32, 34, 14, 14, 14,
|
140
|
-
32, 35, 35, 32, 36, 36, 36, 32,
|
141
|
-
38, 14, 14, 14, 37, 15, 39, 14,
|
142
|
-
14, 14, 37, 15, 40, 14, 14, 14,
|
143
|
-
37, 15, 41, 14, 14, 14, 37, 15,
|
144
|
-
42, 14, 14, 14, 37, 43, 14, 14,
|
145
|
-
14, 37, 44, 44, 45, 45, 45, 45,
|
146
|
-
46, 37, 47, 37, 48, 49, 1, 50,
|
147
|
-
1, 51, 1, 52, 1, 7, 1, 53,
|
148
|
-
1, 54, 1, 6, 1, 55, 1, 56,
|
149
|
-
1, 7, 1, 57, 57, 1, 16, 17,
|
150
|
-
18, 57, 8, 57, 16, 58, 29, 59,
|
151
|
-
59, 1, 16, 17, 18, 57, 57, 16,
|
152
|
-
1, 57, 60, 57, 29, 1, 57, 57,
|
153
|
-
30, 1, 61, 61, 31, 31, 31, 1,
|
154
|
-
62, 62, 36, 36, 36, 1, 63, 63,
|
155
|
-
47, 1, 63, 63, 1, 0
|
156
|
-
]
|
157
|
-
|
158
|
-
class << self
|
159
|
-
attr_accessor :_simple_lexer_trans_targs
|
160
|
-
private :_simple_lexer_trans_targs, :_simple_lexer_trans_targs=
|
161
|
-
end
|
162
|
-
self._simple_lexer_trans_targs = [
|
163
|
-
2, 0, 3, 4, 32, 40, 5, 6,
|
164
|
-
43, 0, 8, 20, 24, 0, 9, 44,
|
165
|
-
10, 11, 13, 12, 45, 16, 12, 45,
|
166
|
-
16, 14, 15, 14, 15, 46, 47, 48,
|
167
|
-
0, 21, 22, 23, 49, 0, 25, 26,
|
168
|
-
27, 28, 29, 30, 31, 51, 50, 50,
|
169
|
-
33, 37, 34, 35, 36, 38, 39, 41,
|
170
|
-
42, 7, 17, 19, 18, 7, 7, 7
|
171
|
-
]
|
172
|
-
|
173
|
-
class << self
|
174
|
-
attr_accessor :_simple_lexer_trans_actions
|
175
|
-
private :_simple_lexer_trans_actions, :_simple_lexer_trans_actions=
|
176
|
-
end
|
177
|
-
self._simple_lexer_trans_actions = [
|
178
|
-
0, 0, 0, 0, 0, 0, 0, 0,
|
179
|
-
0, 26, 1, 1, 1, 15, 0, 7,
|
180
|
-
0, 0, 0, 1, 17, 1, 0, 3,
|
181
|
-
0, 1, 1, 0, 0, 0, 0, 0,
|
182
|
-
20, 0, 5, 1, 0, 23, 0, 0,
|
183
|
-
0, 0, 0, 5, 1, 1, 1, 0,
|
184
|
-
0, 0, 0, 0, 0, 0, 0, 0,
|
185
|
-
0, 0, 0, 1, 0, 9, 11, 13
|
186
|
-
]
|
187
|
-
|
188
|
-
class << self
|
189
|
-
attr_accessor :_simple_lexer_eof_actions
|
190
|
-
private :_simple_lexer_eof_actions, :_simple_lexer_eof_actions=
|
191
|
-
end
|
192
|
-
self._simple_lexer_eof_actions = [
|
193
|
-
0, 0, 0, 0, 0, 0, 0, 26,
|
194
|
-
15, 15, 15, 15, 15, 15, 15, 15,
|
195
|
-
15, 15, 15, 15, 20, 20, 20, 20,
|
196
|
-
23, 23, 23, 23, 23, 23, 23, 23,
|
197
|
-
0, 0, 0, 0, 0, 0, 0, 0,
|
198
|
-
0, 0, 0, 0, 0, 0, 0, 0,
|
199
|
-
9, 11, 13, 13
|
200
|
-
]
|
201
|
-
|
202
|
-
class << self
|
203
|
-
attr_accessor :simple_lexer_start
|
204
|
-
end
|
205
|
-
self.simple_lexer_start = 1;
|
206
|
-
class << self
|
207
|
-
attr_accessor :simple_lexer_first_final
|
208
|
-
end
|
209
|
-
self.simple_lexer_first_final = 43;
|
210
|
-
class << self
|
211
|
-
attr_accessor :simple_lexer_error
|
212
|
-
end
|
213
|
-
self.simple_lexer_error = 0;
|
214
|
-
|
215
|
-
class << self
|
216
|
-
attr_accessor :simple_lexer_en_main
|
217
|
-
end
|
218
|
-
self.simple_lexer_en_main = 1;
|
219
|
-
|
220
|
-
|
221
|
-
# line 61 "gen_vcfheaderline_parser.rl"
|
222
|
-
# %% this just fixes our syntax highlighting...
|
223
|
-
|
224
|
-
def self.run_lexer(buf, options = {})
|
225
|
-
do_debug = (options[:debug] == true)
|
226
|
-
data = buf.unpack("c*") if(buf.is_a?(String))
|
227
|
-
eof = data.length
|
228
|
-
values = []
|
229
|
-
stack = []
|
230
|
-
|
231
|
-
emit = lambda { |type, data, ts, p|
|
232
|
-
# Print the type and text of the last read token
|
233
|
-
# p ts,p
|
234
|
-
puts "#{type}: #{data[ts...p].pack('c*')}" if do_debug
|
235
|
-
values << [type,data[ts...p].pack('c*')]
|
236
|
-
}
|
237
|
-
|
238
|
-
error_code = nil
|
239
|
-
|
240
|
-
|
241
|
-
# line 242 "gen_vcfheaderline_parser.rb"
|
242
|
-
begin
|
243
|
-
p ||= 0
|
244
|
-
pe ||= data.length
|
245
|
-
cs = simple_lexer_start
|
246
|
-
end
|
247
|
-
|
248
|
-
# line 80 "gen_vcfheaderline_parser.rl"
|
249
|
-
|
250
|
-
# line 251 "gen_vcfheaderline_parser.rb"
|
251
|
-
begin
|
252
|
-
_klen, _trans, _keys, _acts, _nacts = nil
|
253
|
-
_goto_level = 0
|
254
|
-
_resume = 10
|
255
|
-
_eof_trans = 15
|
256
|
-
_again = 20
|
257
|
-
_test_eof = 30
|
258
|
-
_out = 40
|
259
|
-
while true
|
260
|
-
_trigger_goto = false
|
261
|
-
if _goto_level <= 0
|
262
|
-
if p == pe
|
263
|
-
_goto_level = _test_eof
|
264
|
-
next
|
265
|
-
end
|
266
|
-
if cs == 0
|
267
|
-
_goto_level = _out
|
268
|
-
next
|
269
|
-
end
|
270
|
-
end
|
271
|
-
if _goto_level <= _resume
|
272
|
-
_keys = _simple_lexer_key_offsets[cs]
|
273
|
-
_trans = _simple_lexer_index_offsets[cs]
|
274
|
-
_klen = _simple_lexer_single_lengths[cs]
|
275
|
-
_break_match = false
|
276
|
-
|
277
|
-
begin
|
278
|
-
if _klen > 0
|
279
|
-
_lower = _keys
|
280
|
-
_upper = _keys + _klen - 1
|
281
|
-
|
282
|
-
loop do
|
283
|
-
break if _upper < _lower
|
284
|
-
_mid = _lower + ( (_upper - _lower) >> 1 )
|
285
|
-
|
286
|
-
if data[p].ord < _simple_lexer_trans_keys[_mid]
|
287
|
-
_upper = _mid - 1
|
288
|
-
elsif data[p].ord > _simple_lexer_trans_keys[_mid]
|
289
|
-
_lower = _mid + 1
|
290
|
-
else
|
291
|
-
_trans += (_mid - _keys)
|
292
|
-
_break_match = true
|
293
|
-
break
|
294
|
-
end
|
295
|
-
end # loop
|
296
|
-
break if _break_match
|
297
|
-
_keys += _klen
|
298
|
-
_trans += _klen
|
299
|
-
end
|
300
|
-
_klen = _simple_lexer_range_lengths[cs]
|
301
|
-
if _klen > 0
|
302
|
-
_lower = _keys
|
303
|
-
_upper = _keys + (_klen << 1) - 2
|
304
|
-
loop do
|
305
|
-
break if _upper < _lower
|
306
|
-
_mid = _lower + (((_upper-_lower) >> 1) & ~1)
|
307
|
-
if data[p].ord < _simple_lexer_trans_keys[_mid]
|
308
|
-
_upper = _mid - 2
|
309
|
-
elsif data[p].ord > _simple_lexer_trans_keys[_mid+1]
|
310
|
-
_lower = _mid + 2
|
311
|
-
else
|
312
|
-
_trans += ((_mid - _keys) >> 1)
|
313
|
-
_break_match = true
|
314
|
-
break
|
315
|
-
end
|
316
|
-
end # loop
|
317
|
-
break if _break_match
|
318
|
-
_trans += _klen
|
319
|
-
end
|
320
|
-
end while false
|
321
|
-
_trans = _simple_lexer_indicies[_trans]
|
322
|
-
cs = _simple_lexer_trans_targs[_trans]
|
323
|
-
if _simple_lexer_trans_actions[_trans] != 0
|
324
|
-
_acts = _simple_lexer_trans_actions[_trans]
|
325
|
-
_nacts = _simple_lexer_actions[_acts]
|
326
|
-
_acts += 1
|
327
|
-
while _nacts > 0
|
328
|
-
_nacts -= 1
|
329
|
-
_acts += 1
|
330
|
-
case _simple_lexer_actions[_acts - 1]
|
331
|
-
when 0 then
|
332
|
-
# line 23 "gen_vcfheaderline_parser.rl"
|
333
|
-
begin
|
334
|
-
ts=p end
|
335
|
-
when 1 then
|
336
|
-
# line 24 "gen_vcfheaderline_parser.rl"
|
337
|
-
begin
|
338
|
-
|
339
|
-
emit.call(:value,data,ts,p)
|
340
|
-
end
|
341
|
-
when 2 then
|
342
|
-
# line 28 "gen_vcfheaderline_parser.rl"
|
343
|
-
begin
|
344
|
-
|
345
|
-
emit.call(:kw,data,ts,p)
|
346
|
-
end
|
347
|
-
when 3 then
|
348
|
-
# line 46 "gen_vcfheaderline_parser.rl"
|
349
|
-
begin
|
350
|
-
emit.call(:key_word,data,ts,p) end
|
351
|
-
when 4 then
|
352
|
-
# line 47 "gen_vcfheaderline_parser.rl"
|
353
|
-
begin
|
354
|
-
emit.call(:value,data,ts,p) end
|
355
|
-
when 5 then
|
356
|
-
# line 48 "gen_vcfheaderline_parser.rl"
|
357
|
-
begin
|
358
|
-
emit.call(:value,data,ts,p) end
|
359
|
-
when 6 then
|
360
|
-
# line 50 "gen_vcfheaderline_parser.rl"
|
361
|
-
begin
|
362
|
-
emit.call(:value,data,ts,p) end
|
363
|
-
when 7 then
|
364
|
-
# line 52 "gen_vcfheaderline_parser.rl"
|
365
|
-
begin
|
366
|
-
error_code="ID" end
|
367
|
-
when 8 then
|
368
|
-
# line 53 "gen_vcfheaderline_parser.rl"
|
369
|
-
begin
|
370
|
-
error_code="Number" end
|
371
|
-
when 9 then
|
372
|
-
# line 54 "gen_vcfheaderline_parser.rl"
|
373
|
-
begin
|
374
|
-
error_code="key-value" end
|
375
|
-
# line 376 "gen_vcfheaderline_parser.rb"
|
376
|
-
end # action switch
|
377
|
-
end
|
378
|
-
end
|
379
|
-
if _trigger_goto
|
380
|
-
next
|
381
|
-
end
|
382
|
-
end
|
383
|
-
if _goto_level <= _again
|
384
|
-
if cs == 0
|
385
|
-
_goto_level = _out
|
386
|
-
next
|
387
|
-
end
|
388
|
-
p += 1
|
389
|
-
if p != pe
|
390
|
-
_goto_level = _resume
|
391
|
-
next
|
392
|
-
end
|
393
|
-
end
|
394
|
-
if _goto_level <= _test_eof
|
395
|
-
if p == eof
|
396
|
-
__acts = _simple_lexer_eof_actions[cs]
|
397
|
-
__nacts = _simple_lexer_actions[__acts]
|
398
|
-
__acts += 1
|
399
|
-
while __nacts > 0
|
400
|
-
__nacts -= 1
|
401
|
-
__acts += 1
|
402
|
-
case _simple_lexer_actions[__acts - 1]
|
403
|
-
when 4 then
|
404
|
-
# line 47 "gen_vcfheaderline_parser.rl"
|
405
|
-
begin
|
406
|
-
emit.call(:value,data,ts,p) end
|
407
|
-
when 5 then
|
408
|
-
# line 48 "gen_vcfheaderline_parser.rl"
|
409
|
-
begin
|
410
|
-
emit.call(:value,data,ts,p) end
|
411
|
-
when 6 then
|
412
|
-
# line 50 "gen_vcfheaderline_parser.rl"
|
413
|
-
begin
|
414
|
-
emit.call(:value,data,ts,p) end
|
415
|
-
when 7 then
|
416
|
-
# line 52 "gen_vcfheaderline_parser.rl"
|
417
|
-
begin
|
418
|
-
error_code="ID" end
|
419
|
-
when 8 then
|
420
|
-
# line 53 "gen_vcfheaderline_parser.rl"
|
421
|
-
begin
|
422
|
-
error_code="Number" end
|
423
|
-
when 9 then
|
424
|
-
# line 54 "gen_vcfheaderline_parser.rl"
|
425
|
-
begin
|
426
|
-
error_code="key-value" end
|
427
|
-
# line 428 "gen_vcfheaderline_parser.rb"
|
428
|
-
end # eof action switch
|
429
|
-
end
|
430
|
-
if _trigger_goto
|
431
|
-
next
|
432
|
-
end
|
433
|
-
end
|
434
|
-
end
|
435
|
-
if _goto_level <= _out
|
436
|
-
break
|
437
|
-
end
|
438
|
-
end
|
439
|
-
end
|
440
|
-
|
441
|
-
# line 81 "gen_vcfheaderline_parser.rl"
|
442
|
-
|
443
|
-
raise "ERROR: "+error_code+" in "+buf if error_code
|
444
|
-
|
445
|
-
begin
|
446
|
-
res = {}
|
447
|
-
# p values
|
448
|
-
values.each_slice(2) do | a,b |
|
449
|
-
# p '*',a,b
|
450
|
-
res[a[1]] = b[1]
|
451
|
-
# p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
|
452
|
-
end
|
453
|
-
rescue
|
454
|
-
print "ERROR: "
|
455
|
-
p values
|
456
|
-
raise
|
457
|
-
end
|
458
|
-
p res if do_debug
|
459
|
-
res
|
460
|
-
end
|
461
|
-
end
|
462
|
-
end
|
463
|
-
end
|
464
|
-
|
465
|
-
if __FILE__ == $0
|
466
|
-
|
467
|
-
lines = <<LINES
|
468
|
-
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
469
|
-
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
|
470
|
-
##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
|
471
|
-
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
|
472
|
-
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property. Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
|
473
|
-
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
|
474
|
-
##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical INFO tags.">
|
475
|
-
##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical INFO tags.">
|
476
|
-
LINES
|
477
|
-
|
478
|
-
lines.strip.split("\n").each { |s|
|
479
|
-
print s,"\n"
|
480
|
-
p BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: false)
|
481
|
-
}
|
482
|
-
|
483
|
-
end # test
|