bio-vcf 0.7.0 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/Gemfile +2 -5
- data/Gemfile.lock +3 -3
- data/README.md +101 -23
- data/Rakefile +4 -2
- data/VERSION +1 -1
- data/bin/bio-vcf +133 -73
- data/bio-vcf.gemspec +13 -10
- data/features/cli.feature +9 -1
- data/features/multisample.feature +4 -4
- data/features/sfilter.feature +1 -1
- data/features/step_definitions/cli-feature.rb +4 -0
- data/features/step_definitions/multisample.rb +24 -12
- data/features/step_definitions/sfilter.rb +80 -31
- data/lib/bio-vcf.rb +1 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +45 -9
- data/lib/bio-vcf/vcfheader.rb +1 -1
- data/lib/bio-vcf/vcfrecord.rb +14 -8
- data/lib/bio-vcf/vcfsample.rb +101 -152
- data/lib/bio-vcf/vcfstatistics.rb +28 -0
- data/test/data/regression/ifilter_s.dp.ref +31 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +1 -0
- metadata +16 -12
data/bio-vcf.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2014-
|
13
|
-
s.description = "Smart parser for VCF format"
|
12
|
+
s.date = "2014-09-01"
|
13
|
+
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["bio-vcf"]
|
16
16
|
s.extra_rdoc_files = [
|
@@ -49,10 +49,12 @@ Gem::Specification.new do |s|
|
|
49
49
|
"lib/bio-vcf/vcfrdf.rb",
|
50
50
|
"lib/bio-vcf/vcfrecord.rb",
|
51
51
|
"lib/bio-vcf/vcfsample.rb",
|
52
|
+
"lib/bio-vcf/vcfstatistics.rb",
|
52
53
|
"test/data/input/dbsnp.vcf",
|
53
54
|
"test/data/input/multisample.vcf",
|
54
55
|
"test/data/input/somaticsniper.vcf",
|
55
56
|
"test/data/regression/eval_r.info.dp.ref",
|
57
|
+
"test/data/regression/ifilter_s.dp.ref",
|
56
58
|
"test/data/regression/r.info.dp.ref",
|
57
59
|
"test/data/regression/rewrite.info.sample.ref",
|
58
60
|
"test/data/regression/s.dp.ref",
|
@@ -60,13 +62,14 @@ Gem::Specification.new do |s|
|
|
60
62
|
"test/data/regression/sfilter_seval_s.dp.ref",
|
61
63
|
"test/data/regression/thread4.ref",
|
62
64
|
"test/data/regression/thread4_4.ref",
|
65
|
+
"test/data/regression/thread4_4_failed_filter-stderr.ref",
|
63
66
|
"test/performance/metrics.md"
|
64
67
|
]
|
65
68
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
66
69
|
s.licenses = ["MIT"]
|
67
70
|
s.require_paths = ["lib"]
|
68
71
|
s.rubygems_version = "2.0.3"
|
69
|
-
s.summary = "VCF parser"
|
72
|
+
s.summary = "Fast multi-threaded VCF parser"
|
70
73
|
|
71
74
|
if s.respond_to? :specification_version then
|
72
75
|
s.specification_version = 4
|
@@ -74,19 +77,19 @@ Gem::Specification.new do |s|
|
|
74
77
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
75
78
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
76
79
|
s.add_development_dependency(%q<cucumber>, [">= 0"])
|
77
|
-
s.add_development_dependency(%q<jeweler>, ["
|
78
|
-
s.add_development_dependency(%q<regressiontest>, ["
|
80
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
81
|
+
s.add_development_dependency(%q<regressiontest>, ["~> 0.0.3"])
|
79
82
|
else
|
80
83
|
s.add_dependency(%q<rspec>, [">= 0"])
|
81
84
|
s.add_dependency(%q<cucumber>, [">= 0"])
|
82
|
-
s.add_dependency(%q<jeweler>, ["
|
83
|
-
s.add_dependency(%q<regressiontest>, ["
|
85
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
86
|
+
s.add_dependency(%q<regressiontest>, ["~> 0.0.3"])
|
84
87
|
end
|
85
88
|
else
|
86
89
|
s.add_dependency(%q<rspec>, [">= 0"])
|
87
90
|
s.add_dependency(%q<cucumber>, [">= 0"])
|
88
|
-
s.add_dependency(%q<jeweler>, ["
|
89
|
-
s.add_dependency(%q<regressiontest>, ["
|
91
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
92
|
+
s.add_dependency(%q<regressiontest>, ["~> 0.0.3"])
|
90
93
|
end
|
91
94
|
end
|
92
95
|
|
data/features/cli.feature
CHANGED
@@ -23,6 +23,11 @@ Feature: Command-line interface (CLI)
|
|
23
23
|
When I execute "./bin/bio-vcf -i --sfilter 's.dp>20'"
|
24
24
|
Then I expect the named output to match the named output "s.dp"
|
25
25
|
|
26
|
+
Scenario: Test the include sample filter using dp
|
27
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
|
+
When I execute "./bin/bio-vcf -i --ifilter 's.dp>100' --seval s.dp"
|
29
|
+
Then I expect the named output to match the named output "ifilter_s.dp"
|
30
|
+
|
26
31
|
Scenario: Test the info eval using dp
|
27
32
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
33
|
When I execute "./bin/bio-vcf -i --eval 'r.info.dp'"
|
@@ -44,5 +49,8 @@ Feature: Command-line interface (CLI)
|
|
44
49
|
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
45
50
|
Then I expect the named output to match the named output "rewrite.info.sample"
|
46
51
|
|
47
|
-
|
52
|
+
Scenario: Test deadlock on failed filter with threads
|
53
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
54
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
55
|
+
Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
|
48
56
|
|
@@ -27,17 +27,17 @@ Feature: Multi-sample VCF
|
|
27
27
|
And I expect rec.info.dp to be 1537
|
28
28
|
And I expect rec.info.readposranksum to be 0.815
|
29
29
|
And I expect rec.sample['Original'].ad to be [189,25]
|
30
|
-
And I expect rec.sample['Original'].gt to be
|
30
|
+
And I expect rec.sample['Original'].gt to be "0/1"
|
31
31
|
And I expect rec.sample['s3t2'].ad to be [167,26]
|
32
32
|
And I expect rec.sample['s3t2'].dp to be 196
|
33
33
|
And I expect rec.sample['s3t2'].gq to be 20
|
34
34
|
And I expect rec.sample['s3t2'].pl to be [20,0,522]
|
35
35
|
# And the nicer self resolving
|
36
|
-
And I expect rec.sample.original.gt to be
|
36
|
+
And I expect rec.sample.original.gt to be "0/1"
|
37
37
|
And I expect rec.sample.s3t2.pl to be [20,0,522]
|
38
38
|
# And the even better
|
39
|
-
And I expect
|
40
|
-
And I expect rec.original.gt to be
|
39
|
+
And I expect rec.original.gt? to be true
|
40
|
+
And I expect rec.original.gt to be "0/1"
|
41
41
|
And I expect rec.s3t2.pl to be [20,0,522]
|
42
42
|
# Check for missing data
|
43
43
|
And I expect test rec.missing_samples? to be false
|
data/features/sfilter.feature
CHANGED
@@ -35,7 +35,7 @@ Feature: Sample filters
|
|
35
35
|
When I evaluate empty './.'
|
36
36
|
Then I expect s.empty? to be true
|
37
37
|
Then I expect s.dp? to be false
|
38
|
-
Then I expect s.dp to
|
38
|
+
Then I expect s.dp to be nil
|
39
39
|
And sfilter 's.dp>4' to throw an error
|
40
40
|
|
41
41
|
# Scenario: Missing sample with ignore missing set
|
@@ -10,3 +10,7 @@ end
|
|
10
10
|
Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
11
11
|
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '##BioVcf=').should be_true
|
12
12
|
end
|
13
|
+
|
14
|
+
Then(/^I expect an error and the named output to match the named output "(.*?)" in under (\d+) seconds$/) do |arg1,arg2|
|
15
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_true
|
16
|
+
end
|
@@ -37,6 +37,14 @@ Then(/^I expect rec\.info\.readposranksum to be (\d+)\.(\d+)$/) do |arg1, arg2|
|
|
37
37
|
expect(@rec1.info.readposranksum).to eq 0.815
|
38
38
|
end
|
39
39
|
|
40
|
+
Then(/^I expect rec\.sample\.original\.gt to be "(.*?)"$/) do |arg1|
|
41
|
+
expect(@rec1.sample['Original'].gt).to eq "0/1"
|
42
|
+
end
|
43
|
+
|
44
|
+
Then(/^I expect rec\.original\.gt to be "(.*?)"$/) do |arg1|
|
45
|
+
expect(@rec1.original.gt).to eq "0/1"
|
46
|
+
end
|
47
|
+
|
40
48
|
Then(/^I expect rec\.sample\['Original'\]\.gt to be "(.*?)"$/) do |arg1|
|
41
49
|
expect(@rec1.sample['Original'].gt).to eq "0/1"
|
42
50
|
end
|
@@ -97,10 +105,6 @@ Then(/^I expect rec\.original\? to be true$/) do
|
|
97
105
|
expect(@rec1.original?).to be true
|
98
106
|
end
|
99
107
|
|
100
|
-
Given(/^multisample vcf line with missing data$/) do |string|
|
101
|
-
pending # express the regexp above with the code you wish you had
|
102
|
-
end
|
103
|
-
|
104
108
|
Then(/^I expect rec\.original\? to be false$/) do
|
105
109
|
expect(@rec1.original?).to eq false
|
106
110
|
end
|
@@ -118,34 +122,42 @@ Then(/^I expect rec\.valid\? to be true$/) do
|
|
118
122
|
end
|
119
123
|
|
120
124
|
Then(/^I expect r\.original\.gt\? to be true$/) do
|
121
|
-
|
125
|
+
expect(@rec1.original.gt?).to be true
|
122
126
|
end
|
123
127
|
|
124
128
|
Then(/^I expect r\.original\? to be true$/) do
|
125
|
-
|
129
|
+
expect(@rec1.original?).to be true
|
130
|
+
end
|
131
|
+
|
132
|
+
Then(/^I expect rec\.original\? to be true$/) do
|
133
|
+
expect(@rec1.original?).to be true
|
134
|
+
end
|
135
|
+
|
136
|
+
Then(/^I expect rec\.original\.gt\? to be true$/) do
|
137
|
+
expect(@rec1.original.gt?).to be true
|
126
138
|
end
|
127
139
|
|
128
140
|
Then(/^I expect r\.original\.gti\? to be true$/) do
|
129
|
-
|
141
|
+
expect(@rec1.original.gti?).to eq true
|
130
142
|
end
|
131
143
|
|
132
144
|
Then(/^I expect r\.original\.gti to be \[(\d+),(\d+)\]$/) do |arg1, arg2|
|
133
|
-
|
145
|
+
expect(@rec1.original.gti).to eq [arg1.to_i,arg2.to_i]
|
134
146
|
end
|
135
147
|
|
136
148
|
Then(/^I expect r\.original\.gti\[(\d+)\] to be (\d+)$/) do |arg1, arg2|
|
137
|
-
|
149
|
+
expect(@rec1.original.gti[arg1.to_i]).to eq arg2.to_i
|
138
150
|
end
|
139
151
|
|
140
152
|
Then(/^I expect r\.original\.gts\? to be true$/) do
|
141
|
-
|
153
|
+
expect(@rec1.original.gts?).to eq true
|
142
154
|
end
|
143
155
|
|
144
156
|
Then(/^I expect r\.original\.gts to be \["(.*?)","(.*?)"\]$/) do |arg1, arg2|
|
145
|
-
|
157
|
+
expect(@rec1.original.gts).to eq [arg1,arg2]
|
146
158
|
end
|
147
159
|
|
148
160
|
Then(/^I expect r\.original\.gts\[(\d+)\] to be "(.*?)"$/) do |arg1, arg2|
|
149
|
-
|
161
|
+
expect(@rec1.original.gts[arg1.to_i]).to eq arg2
|
150
162
|
end
|
151
163
|
|
@@ -1,90 +1,139 @@
|
|
1
1
|
Given(/^the VCF line$/) do |string|
|
2
|
-
@header =
|
2
|
+
@header = VcfHeader.new
|
3
|
+
@header.add("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSample")
|
3
4
|
@vcfline = string
|
4
5
|
end
|
5
6
|
|
6
7
|
When(/^I evaluate '([^']+)'$/) do |arg1|
|
8
|
+
# concat VCF line with sample (arg1)
|
7
9
|
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
8
10
|
@rec = VcfRecord.new(@fields,@header)
|
9
11
|
p @rec
|
12
|
+
@g = @rec.sample['Sample']
|
13
|
+
p @g
|
14
|
+
expect(@g).not_to be nil
|
15
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
16
|
+
@ignore_missing = false
|
10
17
|
end
|
11
18
|
|
12
19
|
Then(/^I expect s\.empty\? to be false$/) do
|
13
|
-
p @rec.sample[0]
|
14
20
|
expect(@s.empty?).to be false
|
21
|
+
expect(@s.sfilter("s.empty?",do_cache: false)).to be false
|
15
22
|
end
|
16
23
|
|
24
|
+
Then(/^I expect s\.dp\? to be true$/) do
|
25
|
+
p ['eval s.dp?',@s.eval("s.dp?",do_cache: false)]
|
26
|
+
p ['eval s.dp',@s.eval("s.dp",do_cache: false)]
|
27
|
+
p @g.dp
|
28
|
+
p @s.dp
|
29
|
+
p @s.sfilter("s.dp?",do_cache: false)
|
30
|
+
expect(@s.eval("s.dp?",do_cache: false)).to be true
|
31
|
+
end
|
17
32
|
|
18
33
|
Then(/^I expect s\.dp to be (\d+)$/) do |arg1|
|
19
|
-
|
34
|
+
# p @s.eval("s.dp")
|
35
|
+
p :now
|
36
|
+
p ['eval s.dp?',@s.eval("s.dp?",do_cache: false)]
|
37
|
+
p ['eval s.dp',@s.eval("s.dp",do_cache: false)]
|
38
|
+
expect(@s.eval("s.dp",do_cache: false)).to equal arg1.to_i
|
20
39
|
end
|
21
40
|
|
22
41
|
Then(/^sfilter 's\.dp>(\d+)' to be true$/) do |arg1|
|
23
|
-
|
42
|
+
expect(@s.sfilter("dp>#{arg1}",do_cache: false)).to be true
|
43
|
+
end
|
44
|
+
|
45
|
+
When(/^I evaluate missing '([^']+)'$/) do |arg1|
|
46
|
+
# concat VCF line with sample (arg1)
|
47
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
48
|
+
@rec = VcfRecord.new(@fields,@header)
|
49
|
+
p @rec
|
50
|
+
@g = @rec.sample['Sample']
|
51
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
52
|
+
p @s
|
53
|
+
expect(@s).not_to be nil
|
54
|
+
@ignore_missing = false
|
24
55
|
end
|
25
56
|
|
26
|
-
|
27
|
-
|
57
|
+
Then(/^I expect s\.dp\? to be false$/) do
|
58
|
+
expect(@s.eval("s.dp?",do_cache: false)).to be false
|
28
59
|
end
|
29
60
|
|
30
61
|
Then(/^I expect s\.dp to be nil$/) do
|
31
|
-
|
62
|
+
expect(@s.eval("s.dp",ignore_missing_data: @ignore_missing, do_cache: false)).to be nil
|
32
63
|
end
|
33
64
|
|
34
|
-
Then(/^sfilter 's\.dp>(\d+)' to
|
35
|
-
|
65
|
+
Then(/^sfilter 's\.dp>(\d+)' to throw an error$/) do |arg1|
|
66
|
+
expect { @s.eval("s.dp>#{arg1}",do_cache: false) }.to raise_error NoMethodError
|
36
67
|
end
|
37
68
|
|
38
|
-
|
39
|
-
|
69
|
+
Then(/^sfilter 's\.dp>(\d+)' to be false$/) do |arg1|
|
70
|
+
expect(@s.sfilter("s.dp>#{arg1}",ignore_missing_data: @ignore_missing, do_cache: false)).to be false
|
40
71
|
end
|
41
72
|
|
42
|
-
|
43
|
-
|
73
|
+
When(/^I evaluate empty '\.\/\.'$/) do
|
74
|
+
# concat VCF line with sample (arg1)
|
75
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+['./.']).join("\t"))
|
76
|
+
@rec = VcfRecord.new(@fields,@header)
|
77
|
+
p @rec
|
78
|
+
@g = @rec.sample['Sample']
|
79
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
80
|
+
p @s
|
81
|
+
expect(@s).not_to be nil
|
82
|
+
@ignore_missing = false
|
44
83
|
end
|
45
84
|
|
46
|
-
When(/^I evaluate missing '(
|
47
|
-
|
85
|
+
When(/^I evaluate missing '([^']+)' with ignore missing$/) do |arg1|
|
86
|
+
# concat VCF line with sample (arg1)
|
87
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
88
|
+
@rec = VcfRecord.new(@fields,@header)
|
89
|
+
p @rec
|
90
|
+
@g = @rec.sample['Sample']
|
91
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
92
|
+
p @s
|
93
|
+
expect(@s).not_to be nil
|
94
|
+
@ignore_missing = true
|
48
95
|
end
|
49
96
|
|
50
97
|
Then(/^I expect s\.empty\? to be true$/) do
|
51
|
-
|
98
|
+
expect(@s.sfilter("s.empty?",do_cache: false)).to be true
|
52
99
|
end
|
53
100
|
|
54
101
|
Then(/^I expect s\.dp to throw an error$/) do
|
55
|
-
|
102
|
+
# @s.instance_eval { undef :dp }
|
103
|
+
p @s.eval("s.dp",do_cache: false)
|
104
|
+
expect { @s.eval("s.dp",do_cache: false) }.to raise_error NoMethodError
|
56
105
|
end
|
57
106
|
|
58
107
|
When(/^I evaluate empty '\.\/\.' with ignore missing$/) do
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
108
|
+
# concat VCF line with sample (arg1)
|
109
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+['./.']).join("\t"))
|
110
|
+
@rec = VcfRecord.new(@fields,@header)
|
111
|
+
p @rec
|
112
|
+
@g = @rec.sample['Sample']
|
113
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
114
|
+
p @s
|
115
|
+
expect(@s).not_to be nil
|
116
|
+
@ignore_missing = true
|
68
117
|
end
|
69
118
|
|
70
119
|
Then(/^I expect s\.what\? to throw an error$/) do
|
71
|
-
|
120
|
+
expect { @s.eval("s.what?",do_cache: false) }.to raise_error RuntimeError
|
72
121
|
end
|
73
122
|
|
74
123
|
Then(/^I expect s\.what to throw an error$/) do
|
75
|
-
|
124
|
+
expect { @s.eval("s.what",do_cache: false) }.to raise_error NoMethodError
|
76
125
|
end
|
77
126
|
|
78
127
|
Then(/^I expect r\.chrom to be "(.*?)"$/) do |arg1|
|
79
|
-
|
128
|
+
expect(@s.eval("r.chrom",do_cache: false)).to eq "1"
|
80
129
|
end
|
81
130
|
|
82
131
|
Then(/^I expect r\.alt to be \["(.*?)"\]$/) do |arg1|
|
83
|
-
|
132
|
+
expect(@s.eval("r.alt",do_cache: false)).to eq ["G"]
|
84
133
|
end
|
85
134
|
|
86
135
|
Then(/^I expect r\.info\.af to be (\d+)\.(\d+)$/) do |arg1, arg2|
|
87
|
-
|
136
|
+
expect(@s.eval("r.info.af",do_cache: false)).to eq 0.667
|
88
137
|
end
|
89
138
|
|
90
139
|
|
data/lib/bio-vcf.rb
CHANGED
@@ -2,6 +2,15 @@ module BioVcf
|
|
2
2
|
|
3
3
|
MAXINT=100_000
|
4
4
|
|
5
|
+
class ValueError < Exception
|
6
|
+
end
|
7
|
+
|
8
|
+
module VcfValue
|
9
|
+
def VcfValue::empty? v
|
10
|
+
v == nil or v == '' or v == '.'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
5
14
|
# Helper class for a list of (variant) values, such as A,G.
|
6
15
|
# The [] function does the hard work. You can pass in an index (integer)
|
7
16
|
# or nucleotide which translates to an index.
|
@@ -95,11 +104,12 @@ module BioVcf
|
|
95
104
|
|
96
105
|
attr_reader :format, :values, :header
|
97
106
|
|
98
|
-
def initialize s, format, header, alt
|
99
|
-
@is_empty = (s
|
107
|
+
def initialize s, format, header, ref, alt
|
108
|
+
@is_empty = VcfSample::empty?(s)
|
100
109
|
@original_s = s
|
101
110
|
@format = format
|
102
111
|
@header = header
|
112
|
+
@ref = ref
|
103
113
|
@alt = alt
|
104
114
|
end
|
105
115
|
|
@@ -116,7 +126,7 @@ module BioVcf
|
|
116
126
|
end
|
117
127
|
|
118
128
|
def valid?
|
119
|
-
|
129
|
+
!empty?
|
120
130
|
end
|
121
131
|
|
122
132
|
def dp4
|
@@ -141,14 +151,33 @@ module BioVcf
|
|
141
151
|
VcfAltInfoList.new(@alt,values[fetch('AMQ')])
|
142
152
|
end
|
143
153
|
|
154
|
+
def gti?
|
155
|
+
not VcfValue::empty?(fetch_value("GT"))
|
156
|
+
end
|
157
|
+
|
158
|
+
def gti
|
159
|
+
gt.split('/').map { |g| g.to_i }
|
160
|
+
end
|
161
|
+
|
162
|
+
def gts?
|
163
|
+
not VcfValue::empty?(fetch_value("GT"))
|
164
|
+
end
|
165
|
+
|
166
|
+
def gts
|
167
|
+
genotypes = [@ref] + @alt
|
168
|
+
gti.map { |i| genotypes[i] }
|
169
|
+
end
|
170
|
+
|
171
|
+
# Returns the value of a field
|
144
172
|
def method_missing(m, *args, &block)
|
145
173
|
return nil if @is_empty
|
146
174
|
if m =~ /\?$/
|
147
|
-
# query if a value exists, e.g., r.info.dp?
|
175
|
+
# query if a value exists, e.g., r.info.dp? or s.dp?
|
148
176
|
v = values[fetch(m.to_s.upcase.chop)]
|
149
|
-
|
177
|
+
return (not VcfValue::empty?(v))
|
150
178
|
else
|
151
179
|
v = values[fetch(m.to_s.upcase)]
|
180
|
+
return nil if VcfValue::empty?(v)
|
152
181
|
v = v.to_i if v =~ /^\d+$/
|
153
182
|
v = v.to_f if v =~ /^\d+\.\d+$/
|
154
183
|
v
|
@@ -157,13 +186,19 @@ module BioVcf
|
|
157
186
|
|
158
187
|
private
|
159
188
|
|
189
|
+
# Fetch a value and throw an error if it does not exist
|
160
190
|
def fetch name
|
161
191
|
raise "ERROR: Field with name #{name} does not exist!" if !@format[name]
|
162
192
|
@format[name]
|
163
193
|
end
|
164
194
|
|
195
|
+
def fetch_value name
|
196
|
+
values[fetch(name)]
|
197
|
+
end
|
198
|
+
|
199
|
+
# Return an integer list
|
165
200
|
def ilist name
|
166
|
-
v =
|
201
|
+
v = fetch_value(name)
|
167
202
|
return nil if not v
|
168
203
|
v.split(',').map{|i| i.to_i}
|
169
204
|
end
|
@@ -172,17 +207,18 @@ module BioVcf
|
|
172
207
|
|
173
208
|
# Holds all samples
|
174
209
|
class VcfGenotypeFields
|
175
|
-
def initialize fields, format, header, alt
|
210
|
+
def initialize fields, format, header, ref, alt
|
176
211
|
@fields = fields
|
177
212
|
@format = format
|
178
213
|
@header = header
|
214
|
+
@ref = ref
|
179
215
|
@alt = alt
|
180
216
|
@samples = {} # lazy cache
|
181
217
|
@sample_index = @header.sample_index()
|
182
218
|
end
|
183
219
|
|
184
220
|
def [] name
|
185
|
-
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@alt)
|
221
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
186
222
|
end
|
187
223
|
|
188
224
|
def method_missing(m, *args, &block)
|
@@ -191,7 +227,7 @@ module BioVcf
|
|
191
227
|
# test for valid sample
|
192
228
|
return !VcfSample::empty?(@fields[@sample_index[name.chop]])
|
193
229
|
else
|
194
|
-
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@alt)
|
230
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
195
231
|
end
|
196
232
|
end
|
197
233
|
|