bio-vcf 0.7.0 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/Gemfile +2 -5
- data/Gemfile.lock +3 -3
- data/README.md +101 -23
- data/Rakefile +4 -2
- data/VERSION +1 -1
- data/bin/bio-vcf +133 -73
- data/bio-vcf.gemspec +13 -10
- data/features/cli.feature +9 -1
- data/features/multisample.feature +4 -4
- data/features/sfilter.feature +1 -1
- data/features/step_definitions/cli-feature.rb +4 -0
- data/features/step_definitions/multisample.rb +24 -12
- data/features/step_definitions/sfilter.rb +80 -31
- data/lib/bio-vcf.rb +1 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +45 -9
- data/lib/bio-vcf/vcfheader.rb +1 -1
- data/lib/bio-vcf/vcfrecord.rb +14 -8
- data/lib/bio-vcf/vcfsample.rb +101 -152
- data/lib/bio-vcf/vcfstatistics.rb +28 -0
- data/test/data/regression/ifilter_s.dp.ref +31 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +1 -0
- metadata +16 -12
data/bio-vcf.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2014-
|
13
|
-
s.description = "Smart parser for VCF format"
|
12
|
+
s.date = "2014-09-01"
|
13
|
+
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["bio-vcf"]
|
16
16
|
s.extra_rdoc_files = [
|
@@ -49,10 +49,12 @@ Gem::Specification.new do |s|
|
|
49
49
|
"lib/bio-vcf/vcfrdf.rb",
|
50
50
|
"lib/bio-vcf/vcfrecord.rb",
|
51
51
|
"lib/bio-vcf/vcfsample.rb",
|
52
|
+
"lib/bio-vcf/vcfstatistics.rb",
|
52
53
|
"test/data/input/dbsnp.vcf",
|
53
54
|
"test/data/input/multisample.vcf",
|
54
55
|
"test/data/input/somaticsniper.vcf",
|
55
56
|
"test/data/regression/eval_r.info.dp.ref",
|
57
|
+
"test/data/regression/ifilter_s.dp.ref",
|
56
58
|
"test/data/regression/r.info.dp.ref",
|
57
59
|
"test/data/regression/rewrite.info.sample.ref",
|
58
60
|
"test/data/regression/s.dp.ref",
|
@@ -60,13 +62,14 @@ Gem::Specification.new do |s|
|
|
60
62
|
"test/data/regression/sfilter_seval_s.dp.ref",
|
61
63
|
"test/data/regression/thread4.ref",
|
62
64
|
"test/data/regression/thread4_4.ref",
|
65
|
+
"test/data/regression/thread4_4_failed_filter-stderr.ref",
|
63
66
|
"test/performance/metrics.md"
|
64
67
|
]
|
65
68
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
66
69
|
s.licenses = ["MIT"]
|
67
70
|
s.require_paths = ["lib"]
|
68
71
|
s.rubygems_version = "2.0.3"
|
69
|
-
s.summary = "VCF parser"
|
72
|
+
s.summary = "Fast multi-threaded VCF parser"
|
70
73
|
|
71
74
|
if s.respond_to? :specification_version then
|
72
75
|
s.specification_version = 4
|
@@ -74,19 +77,19 @@ Gem::Specification.new do |s|
|
|
74
77
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
75
78
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
76
79
|
s.add_development_dependency(%q<cucumber>, [">= 0"])
|
77
|
-
s.add_development_dependency(%q<jeweler>, ["
|
78
|
-
s.add_development_dependency(%q<regressiontest>, ["
|
80
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
81
|
+
s.add_development_dependency(%q<regressiontest>, ["~> 0.0.3"])
|
79
82
|
else
|
80
83
|
s.add_dependency(%q<rspec>, [">= 0"])
|
81
84
|
s.add_dependency(%q<cucumber>, [">= 0"])
|
82
|
-
s.add_dependency(%q<jeweler>, ["
|
83
|
-
s.add_dependency(%q<regressiontest>, ["
|
85
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
86
|
+
s.add_dependency(%q<regressiontest>, ["~> 0.0.3"])
|
84
87
|
end
|
85
88
|
else
|
86
89
|
s.add_dependency(%q<rspec>, [">= 0"])
|
87
90
|
s.add_dependency(%q<cucumber>, [">= 0"])
|
88
|
-
s.add_dependency(%q<jeweler>, ["
|
89
|
-
s.add_dependency(%q<regressiontest>, ["
|
91
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
92
|
+
s.add_dependency(%q<regressiontest>, ["~> 0.0.3"])
|
90
93
|
end
|
91
94
|
end
|
92
95
|
|
data/features/cli.feature
CHANGED
@@ -23,6 +23,11 @@ Feature: Command-line interface (CLI)
|
|
23
23
|
When I execute "./bin/bio-vcf -i --sfilter 's.dp>20'"
|
24
24
|
Then I expect the named output to match the named output "s.dp"
|
25
25
|
|
26
|
+
Scenario: Test the include sample filter using dp
|
27
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
|
+
When I execute "./bin/bio-vcf -i --ifilter 's.dp>100' --seval s.dp"
|
29
|
+
Then I expect the named output to match the named output "ifilter_s.dp"
|
30
|
+
|
26
31
|
Scenario: Test the info eval using dp
|
27
32
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
33
|
When I execute "./bin/bio-vcf -i --eval 'r.info.dp'"
|
@@ -44,5 +49,8 @@ Feature: Command-line interface (CLI)
|
|
44
49
|
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
45
50
|
Then I expect the named output to match the named output "rewrite.info.sample"
|
46
51
|
|
47
|
-
|
52
|
+
Scenario: Test deadlock on failed filter with threads
|
53
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
54
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
55
|
+
Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
|
48
56
|
|
@@ -27,17 +27,17 @@ Feature: Multi-sample VCF
|
|
27
27
|
And I expect rec.info.dp to be 1537
|
28
28
|
And I expect rec.info.readposranksum to be 0.815
|
29
29
|
And I expect rec.sample['Original'].ad to be [189,25]
|
30
|
-
And I expect rec.sample['Original'].gt to be
|
30
|
+
And I expect rec.sample['Original'].gt to be "0/1"
|
31
31
|
And I expect rec.sample['s3t2'].ad to be [167,26]
|
32
32
|
And I expect rec.sample['s3t2'].dp to be 196
|
33
33
|
And I expect rec.sample['s3t2'].gq to be 20
|
34
34
|
And I expect rec.sample['s3t2'].pl to be [20,0,522]
|
35
35
|
# And the nicer self resolving
|
36
|
-
And I expect rec.sample.original.gt to be
|
36
|
+
And I expect rec.sample.original.gt to be "0/1"
|
37
37
|
And I expect rec.sample.s3t2.pl to be [20,0,522]
|
38
38
|
# And the even better
|
39
|
-
And I expect
|
40
|
-
And I expect rec.original.gt to be
|
39
|
+
And I expect rec.original.gt? to be true
|
40
|
+
And I expect rec.original.gt to be "0/1"
|
41
41
|
And I expect rec.s3t2.pl to be [20,0,522]
|
42
42
|
# Check for missing data
|
43
43
|
And I expect test rec.missing_samples? to be false
|
data/features/sfilter.feature
CHANGED
@@ -35,7 +35,7 @@ Feature: Sample filters
|
|
35
35
|
When I evaluate empty './.'
|
36
36
|
Then I expect s.empty? to be true
|
37
37
|
Then I expect s.dp? to be false
|
38
|
-
Then I expect s.dp to
|
38
|
+
Then I expect s.dp to be nil
|
39
39
|
And sfilter 's.dp>4' to throw an error
|
40
40
|
|
41
41
|
# Scenario: Missing sample with ignore missing set
|
@@ -10,3 +10,7 @@ end
|
|
10
10
|
Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
11
11
|
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '##BioVcf=').should be_true
|
12
12
|
end
|
13
|
+
|
14
|
+
Then(/^I expect an error and the named output to match the named output "(.*?)" in under (\d+) seconds$/) do |arg1,arg2|
|
15
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_true
|
16
|
+
end
|
@@ -37,6 +37,14 @@ Then(/^I expect rec\.info\.readposranksum to be (\d+)\.(\d+)$/) do |arg1, arg2|
|
|
37
37
|
expect(@rec1.info.readposranksum).to eq 0.815
|
38
38
|
end
|
39
39
|
|
40
|
+
Then(/^I expect rec\.sample\.original\.gt to be "(.*?)"$/) do |arg1|
|
41
|
+
expect(@rec1.sample['Original'].gt).to eq "0/1"
|
42
|
+
end
|
43
|
+
|
44
|
+
Then(/^I expect rec\.original\.gt to be "(.*?)"$/) do |arg1|
|
45
|
+
expect(@rec1.original.gt).to eq "0/1"
|
46
|
+
end
|
47
|
+
|
40
48
|
Then(/^I expect rec\.sample\['Original'\]\.gt to be "(.*?)"$/) do |arg1|
|
41
49
|
expect(@rec1.sample['Original'].gt).to eq "0/1"
|
42
50
|
end
|
@@ -97,10 +105,6 @@ Then(/^I expect rec\.original\? to be true$/) do
|
|
97
105
|
expect(@rec1.original?).to be true
|
98
106
|
end
|
99
107
|
|
100
|
-
Given(/^multisample vcf line with missing data$/) do |string|
|
101
|
-
pending # express the regexp above with the code you wish you had
|
102
|
-
end
|
103
|
-
|
104
108
|
Then(/^I expect rec\.original\? to be false$/) do
|
105
109
|
expect(@rec1.original?).to eq false
|
106
110
|
end
|
@@ -118,34 +122,42 @@ Then(/^I expect rec\.valid\? to be true$/) do
|
|
118
122
|
end
|
119
123
|
|
120
124
|
Then(/^I expect r\.original\.gt\? to be true$/) do
|
121
|
-
|
125
|
+
expect(@rec1.original.gt?).to be true
|
122
126
|
end
|
123
127
|
|
124
128
|
Then(/^I expect r\.original\? to be true$/) do
|
125
|
-
|
129
|
+
expect(@rec1.original?).to be true
|
130
|
+
end
|
131
|
+
|
132
|
+
Then(/^I expect rec\.original\? to be true$/) do
|
133
|
+
expect(@rec1.original?).to be true
|
134
|
+
end
|
135
|
+
|
136
|
+
Then(/^I expect rec\.original\.gt\? to be true$/) do
|
137
|
+
expect(@rec1.original.gt?).to be true
|
126
138
|
end
|
127
139
|
|
128
140
|
Then(/^I expect r\.original\.gti\? to be true$/) do
|
129
|
-
|
141
|
+
expect(@rec1.original.gti?).to eq true
|
130
142
|
end
|
131
143
|
|
132
144
|
Then(/^I expect r\.original\.gti to be \[(\d+),(\d+)\]$/) do |arg1, arg2|
|
133
|
-
|
145
|
+
expect(@rec1.original.gti).to eq [arg1.to_i,arg2.to_i]
|
134
146
|
end
|
135
147
|
|
136
148
|
Then(/^I expect r\.original\.gti\[(\d+)\] to be (\d+)$/) do |arg1, arg2|
|
137
|
-
|
149
|
+
expect(@rec1.original.gti[arg1.to_i]).to eq arg2.to_i
|
138
150
|
end
|
139
151
|
|
140
152
|
Then(/^I expect r\.original\.gts\? to be true$/) do
|
141
|
-
|
153
|
+
expect(@rec1.original.gts?).to eq true
|
142
154
|
end
|
143
155
|
|
144
156
|
Then(/^I expect r\.original\.gts to be \["(.*?)","(.*?)"\]$/) do |arg1, arg2|
|
145
|
-
|
157
|
+
expect(@rec1.original.gts).to eq [arg1,arg2]
|
146
158
|
end
|
147
159
|
|
148
160
|
Then(/^I expect r\.original\.gts\[(\d+)\] to be "(.*?)"$/) do |arg1, arg2|
|
149
|
-
|
161
|
+
expect(@rec1.original.gts[arg1.to_i]).to eq arg2
|
150
162
|
end
|
151
163
|
|
@@ -1,90 +1,139 @@
|
|
1
1
|
Given(/^the VCF line$/) do |string|
|
2
|
-
@header =
|
2
|
+
@header = VcfHeader.new
|
3
|
+
@header.add("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSample")
|
3
4
|
@vcfline = string
|
4
5
|
end
|
5
6
|
|
6
7
|
When(/^I evaluate '([^']+)'$/) do |arg1|
|
8
|
+
# concat VCF line with sample (arg1)
|
7
9
|
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
8
10
|
@rec = VcfRecord.new(@fields,@header)
|
9
11
|
p @rec
|
12
|
+
@g = @rec.sample['Sample']
|
13
|
+
p @g
|
14
|
+
expect(@g).not_to be nil
|
15
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
16
|
+
@ignore_missing = false
|
10
17
|
end
|
11
18
|
|
12
19
|
Then(/^I expect s\.empty\? to be false$/) do
|
13
|
-
p @rec.sample[0]
|
14
20
|
expect(@s.empty?).to be false
|
21
|
+
expect(@s.sfilter("s.empty?",do_cache: false)).to be false
|
15
22
|
end
|
16
23
|
|
24
|
+
Then(/^I expect s\.dp\? to be true$/) do
|
25
|
+
p ['eval s.dp?',@s.eval("s.dp?",do_cache: false)]
|
26
|
+
p ['eval s.dp',@s.eval("s.dp",do_cache: false)]
|
27
|
+
p @g.dp
|
28
|
+
p @s.dp
|
29
|
+
p @s.sfilter("s.dp?",do_cache: false)
|
30
|
+
expect(@s.eval("s.dp?",do_cache: false)).to be true
|
31
|
+
end
|
17
32
|
|
18
33
|
Then(/^I expect s\.dp to be (\d+)$/) do |arg1|
|
19
|
-
|
34
|
+
# p @s.eval("s.dp")
|
35
|
+
p :now
|
36
|
+
p ['eval s.dp?',@s.eval("s.dp?",do_cache: false)]
|
37
|
+
p ['eval s.dp',@s.eval("s.dp",do_cache: false)]
|
38
|
+
expect(@s.eval("s.dp",do_cache: false)).to equal arg1.to_i
|
20
39
|
end
|
21
40
|
|
22
41
|
Then(/^sfilter 's\.dp>(\d+)' to be true$/) do |arg1|
|
23
|
-
|
42
|
+
expect(@s.sfilter("dp>#{arg1}",do_cache: false)).to be true
|
43
|
+
end
|
44
|
+
|
45
|
+
When(/^I evaluate missing '([^']+)'$/) do |arg1|
|
46
|
+
# concat VCF line with sample (arg1)
|
47
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
48
|
+
@rec = VcfRecord.new(@fields,@header)
|
49
|
+
p @rec
|
50
|
+
@g = @rec.sample['Sample']
|
51
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
52
|
+
p @s
|
53
|
+
expect(@s).not_to be nil
|
54
|
+
@ignore_missing = false
|
24
55
|
end
|
25
56
|
|
26
|
-
|
27
|
-
|
57
|
+
Then(/^I expect s\.dp\? to be false$/) do
|
58
|
+
expect(@s.eval("s.dp?",do_cache: false)).to be false
|
28
59
|
end
|
29
60
|
|
30
61
|
Then(/^I expect s\.dp to be nil$/) do
|
31
|
-
|
62
|
+
expect(@s.eval("s.dp",ignore_missing_data: @ignore_missing, do_cache: false)).to be nil
|
32
63
|
end
|
33
64
|
|
34
|
-
Then(/^sfilter 's\.dp>(\d+)' to
|
35
|
-
|
65
|
+
Then(/^sfilter 's\.dp>(\d+)' to throw an error$/) do |arg1|
|
66
|
+
expect { @s.eval("s.dp>#{arg1}",do_cache: false) }.to raise_error NoMethodError
|
36
67
|
end
|
37
68
|
|
38
|
-
|
39
|
-
|
69
|
+
Then(/^sfilter 's\.dp>(\d+)' to be false$/) do |arg1|
|
70
|
+
expect(@s.sfilter("s.dp>#{arg1}",ignore_missing_data: @ignore_missing, do_cache: false)).to be false
|
40
71
|
end
|
41
72
|
|
42
|
-
|
43
|
-
|
73
|
+
When(/^I evaluate empty '\.\/\.'$/) do
|
74
|
+
# concat VCF line with sample (arg1)
|
75
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+['./.']).join("\t"))
|
76
|
+
@rec = VcfRecord.new(@fields,@header)
|
77
|
+
p @rec
|
78
|
+
@g = @rec.sample['Sample']
|
79
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
80
|
+
p @s
|
81
|
+
expect(@s).not_to be nil
|
82
|
+
@ignore_missing = false
|
44
83
|
end
|
45
84
|
|
46
|
-
When(/^I evaluate missing '(
|
47
|
-
|
85
|
+
When(/^I evaluate missing '([^']+)' with ignore missing$/) do |arg1|
|
86
|
+
# concat VCF line with sample (arg1)
|
87
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
88
|
+
@rec = VcfRecord.new(@fields,@header)
|
89
|
+
p @rec
|
90
|
+
@g = @rec.sample['Sample']
|
91
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
92
|
+
p @s
|
93
|
+
expect(@s).not_to be nil
|
94
|
+
@ignore_missing = true
|
48
95
|
end
|
49
96
|
|
50
97
|
Then(/^I expect s\.empty\? to be true$/) do
|
51
|
-
|
98
|
+
expect(@s.sfilter("s.empty?",do_cache: false)).to be true
|
52
99
|
end
|
53
100
|
|
54
101
|
Then(/^I expect s\.dp to throw an error$/) do
|
55
|
-
|
102
|
+
# @s.instance_eval { undef :dp }
|
103
|
+
p @s.eval("s.dp",do_cache: false)
|
104
|
+
expect { @s.eval("s.dp",do_cache: false) }.to raise_error NoMethodError
|
56
105
|
end
|
57
106
|
|
58
107
|
When(/^I evaluate empty '\.\/\.' with ignore missing$/) do
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
108
|
+
# concat VCF line with sample (arg1)
|
109
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+['./.']).join("\t"))
|
110
|
+
@rec = VcfRecord.new(@fields,@header)
|
111
|
+
p @rec
|
112
|
+
@g = @rec.sample['Sample']
|
113
|
+
@s = VcfSample::Sample.new(@rec,@g)
|
114
|
+
p @s
|
115
|
+
expect(@s).not_to be nil
|
116
|
+
@ignore_missing = true
|
68
117
|
end
|
69
118
|
|
70
119
|
Then(/^I expect s\.what\? to throw an error$/) do
|
71
|
-
|
120
|
+
expect { @s.eval("s.what?",do_cache: false) }.to raise_error RuntimeError
|
72
121
|
end
|
73
122
|
|
74
123
|
Then(/^I expect s\.what to throw an error$/) do
|
75
|
-
|
124
|
+
expect { @s.eval("s.what",do_cache: false) }.to raise_error NoMethodError
|
76
125
|
end
|
77
126
|
|
78
127
|
Then(/^I expect r\.chrom to be "(.*?)"$/) do |arg1|
|
79
|
-
|
128
|
+
expect(@s.eval("r.chrom",do_cache: false)).to eq "1"
|
80
129
|
end
|
81
130
|
|
82
131
|
Then(/^I expect r\.alt to be \["(.*?)"\]$/) do |arg1|
|
83
|
-
|
132
|
+
expect(@s.eval("r.alt",do_cache: false)).to eq ["G"]
|
84
133
|
end
|
85
134
|
|
86
135
|
Then(/^I expect r\.info\.af to be (\d+)\.(\d+)$/) do |arg1, arg2|
|
87
|
-
|
136
|
+
expect(@s.eval("r.info.af",do_cache: false)).to eq 0.667
|
88
137
|
end
|
89
138
|
|
90
139
|
|
data/lib/bio-vcf.rb
CHANGED
@@ -2,6 +2,15 @@ module BioVcf
|
|
2
2
|
|
3
3
|
MAXINT=100_000
|
4
4
|
|
5
|
+
class ValueError < Exception
|
6
|
+
end
|
7
|
+
|
8
|
+
module VcfValue
|
9
|
+
def VcfValue::empty? v
|
10
|
+
v == nil or v == '' or v == '.'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
5
14
|
# Helper class for a list of (variant) values, such as A,G.
|
6
15
|
# The [] function does the hard work. You can pass in an index (integer)
|
7
16
|
# or nucleotide which translates to an index.
|
@@ -95,11 +104,12 @@ module BioVcf
|
|
95
104
|
|
96
105
|
attr_reader :format, :values, :header
|
97
106
|
|
98
|
-
def initialize s, format, header, alt
|
99
|
-
@is_empty = (s
|
107
|
+
def initialize s, format, header, ref, alt
|
108
|
+
@is_empty = VcfSample::empty?(s)
|
100
109
|
@original_s = s
|
101
110
|
@format = format
|
102
111
|
@header = header
|
112
|
+
@ref = ref
|
103
113
|
@alt = alt
|
104
114
|
end
|
105
115
|
|
@@ -116,7 +126,7 @@ module BioVcf
|
|
116
126
|
end
|
117
127
|
|
118
128
|
def valid?
|
119
|
-
|
129
|
+
!empty?
|
120
130
|
end
|
121
131
|
|
122
132
|
def dp4
|
@@ -141,14 +151,33 @@ module BioVcf
|
|
141
151
|
VcfAltInfoList.new(@alt,values[fetch('AMQ')])
|
142
152
|
end
|
143
153
|
|
154
|
+
def gti?
|
155
|
+
not VcfValue::empty?(fetch_value("GT"))
|
156
|
+
end
|
157
|
+
|
158
|
+
def gti
|
159
|
+
gt.split('/').map { |g| g.to_i }
|
160
|
+
end
|
161
|
+
|
162
|
+
def gts?
|
163
|
+
not VcfValue::empty?(fetch_value("GT"))
|
164
|
+
end
|
165
|
+
|
166
|
+
def gts
|
167
|
+
genotypes = [@ref] + @alt
|
168
|
+
gti.map { |i| genotypes[i] }
|
169
|
+
end
|
170
|
+
|
171
|
+
# Returns the value of a field
|
144
172
|
def method_missing(m, *args, &block)
|
145
173
|
return nil if @is_empty
|
146
174
|
if m =~ /\?$/
|
147
|
-
# query if a value exists, e.g., r.info.dp?
|
175
|
+
# query if a value exists, e.g., r.info.dp? or s.dp?
|
148
176
|
v = values[fetch(m.to_s.upcase.chop)]
|
149
|
-
|
177
|
+
return (not VcfValue::empty?(v))
|
150
178
|
else
|
151
179
|
v = values[fetch(m.to_s.upcase)]
|
180
|
+
return nil if VcfValue::empty?(v)
|
152
181
|
v = v.to_i if v =~ /^\d+$/
|
153
182
|
v = v.to_f if v =~ /^\d+\.\d+$/
|
154
183
|
v
|
@@ -157,13 +186,19 @@ module BioVcf
|
|
157
186
|
|
158
187
|
private
|
159
188
|
|
189
|
+
# Fetch a value and throw an error if it does not exist
|
160
190
|
def fetch name
|
161
191
|
raise "ERROR: Field with name #{name} does not exist!" if !@format[name]
|
162
192
|
@format[name]
|
163
193
|
end
|
164
194
|
|
195
|
+
def fetch_value name
|
196
|
+
values[fetch(name)]
|
197
|
+
end
|
198
|
+
|
199
|
+
# Return an integer list
|
165
200
|
def ilist name
|
166
|
-
v =
|
201
|
+
v = fetch_value(name)
|
167
202
|
return nil if not v
|
168
203
|
v.split(',').map{|i| i.to_i}
|
169
204
|
end
|
@@ -172,17 +207,18 @@ module BioVcf
|
|
172
207
|
|
173
208
|
# Holds all samples
|
174
209
|
class VcfGenotypeFields
|
175
|
-
def initialize fields, format, header, alt
|
210
|
+
def initialize fields, format, header, ref, alt
|
176
211
|
@fields = fields
|
177
212
|
@format = format
|
178
213
|
@header = header
|
214
|
+
@ref = ref
|
179
215
|
@alt = alt
|
180
216
|
@samples = {} # lazy cache
|
181
217
|
@sample_index = @header.sample_index()
|
182
218
|
end
|
183
219
|
|
184
220
|
def [] name
|
185
|
-
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@alt)
|
221
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
186
222
|
end
|
187
223
|
|
188
224
|
def method_missing(m, *args, &block)
|
@@ -191,7 +227,7 @@ module BioVcf
|
|
191
227
|
# test for valid sample
|
192
228
|
return !VcfSample::empty?(@fields[@sample_index[name.chop]])
|
193
229
|
else
|
194
|
-
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@alt)
|
230
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
195
231
|
end
|
196
232
|
end
|
197
233
|
|