bio-vcf 0.0.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +145 -20
- data/VERSION +1 -1
- data/bin/bio-vcf +204 -62
- data/bio-vcf.gemspec +7 -3
- data/features/cli.feature +16 -0
- data/features/multisample.feature +10 -0
- data/features/sfilter.feature +60 -0
- data/features/step_definitions/cli-feature.rb +1 -1
- data/features/step_definitions/multisample.rb +32 -0
- data/features/step_definitions/sfilter.rb +90 -0
- data/lib/bio-vcf/utils.rb +12 -6
- data/lib/bio-vcf/vcfgenotypefield.rb +4 -1
- data/lib/bio-vcf/vcfheader.rb +24 -0
- data/lib/bio-vcf/vcfrdf.rb +15 -8
- data/lib/bio-vcf/vcfrecord.rb +45 -9
- data/lib/bio-vcf/vcfsample.rb +94 -5
- data/test/data/regression/sfilter_seval_s.dp.ref +31 -0
- data/test/data/regression/{sfilter001.ref → thread4.ref} +5 -0
- data/test/data/regression/thread4_4.ref +150 -0
- data/test/performance/metrics.md +53 -19
- metadata +7 -3
data/bio-vcf.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.7.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2014-
|
12
|
+
s.date = "2014-06-24"
|
13
13
|
s.description = "Smart parser for VCF format"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["bio-vcf"]
|
@@ -30,11 +30,13 @@ Gem::Specification.new do |s|
|
|
30
30
|
"features/cli.feature",
|
31
31
|
"features/diff_count.feature",
|
32
32
|
"features/multisample.feature",
|
33
|
+
"features/sfilter.feature",
|
33
34
|
"features/somaticsniper.feature",
|
34
35
|
"features/step_definitions/bio-vcf_steps.rb",
|
35
36
|
"features/step_definitions/cli-feature.rb",
|
36
37
|
"features/step_definitions/diff_count.rb",
|
37
38
|
"features/step_definitions/multisample.rb",
|
39
|
+
"features/step_definitions/sfilter.rb",
|
38
40
|
"features/step_definitions/somaticsniper.rb",
|
39
41
|
"features/support/env.rb",
|
40
42
|
"lib/bio-vcf.rb",
|
@@ -55,7 +57,9 @@ Gem::Specification.new do |s|
|
|
55
57
|
"test/data/regression/rewrite.info.sample.ref",
|
56
58
|
"test/data/regression/s.dp.ref",
|
57
59
|
"test/data/regression/seval_s.dp.ref",
|
58
|
-
"test/data/regression/
|
60
|
+
"test/data/regression/sfilter_seval_s.dp.ref",
|
61
|
+
"test/data/regression/thread4.ref",
|
62
|
+
"test/data/regression/thread4_4.ref",
|
59
63
|
"test/performance/metrics.md"
|
60
64
|
]
|
61
65
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
data/features/cli.feature
CHANGED
@@ -8,6 +8,16 @@ Feature: Command-line interface (CLI)
|
|
8
8
|
When I execute "./bin/bio-vcf -i --filter 'r.info.dp>100'"
|
9
9
|
Then I expect the named output to match the named output "r.info.dp"
|
10
10
|
|
11
|
+
Scenario: Test the info filter using dp and threads
|
12
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
13
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --filter 'r.info.dp>2'"
|
14
|
+
Then I expect the named output to match the named output "thread4"
|
15
|
+
|
16
|
+
Scenario: Test the info filter using dp and threads with lines
|
17
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
18
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --thread-lines 4 --filter 'r.info.dp>2'"
|
19
|
+
Then I expect the named output to match the named output "thread4_4"
|
20
|
+
|
11
21
|
Scenario: Test the sample filter using dp
|
12
22
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
13
23
|
When I execute "./bin/bio-vcf -i --sfilter 's.dp>20'"
|
@@ -23,6 +33,12 @@ Feature: Command-line interface (CLI)
|
|
23
33
|
When I execute "./bin/bio-vcf -i --seval 's.dp'"
|
24
34
|
Then I expect the named output to match the named output "seval_s.dp"
|
25
35
|
|
36
|
+
Scenario: Test the sample filter + eval using dp
|
37
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
38
|
+
When I execute "./bin/bio-vcf -i --sfilter 's.dp>10' --seval 's.dp'"
|
39
|
+
Then I expect the named output to match the named output "sfilter_seval_s.dp"
|
40
|
+
|
41
|
+
|
26
42
|
Scenario: Rewrite an info field
|
27
43
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
44
|
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
@@ -36,11 +36,21 @@ Feature: Multi-sample VCF
|
|
36
36
|
And I expect rec.sample.original.gt to be [0,1]
|
37
37
|
And I expect rec.sample.s3t2.pl to be [20,0,522]
|
38
38
|
# And the even better
|
39
|
+
And I expect r.original.gt? to be true
|
39
40
|
And I expect rec.original.gt to be [0,1]
|
40
41
|
And I expect rec.s3t2.pl to be [20,0,522]
|
41
42
|
# Check for missing data
|
42
43
|
And I expect test rec.missing_samples? to be false
|
43
44
|
And I expect test rec.original? to be true
|
45
|
+
# Special functions
|
46
|
+
And I expect r.original? to be true
|
47
|
+
And I expect r.original.gti? to be true
|
48
|
+
And I expect r.original.gti to be [0,1]
|
49
|
+
And I expect r.original.gti[1] to be 1
|
50
|
+
And I expect r.original.gts? to be true
|
51
|
+
And I expect r.original.gts to be ["C","T"]
|
52
|
+
And I expect r.original.gts[1] to be "T"
|
53
|
+
|
44
54
|
Given multisample vcf line
|
45
55
|
"""
|
46
56
|
1 10723 . C G 73.85 . AC=4;AF=0.667;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL ./. ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
|
@@ -0,0 +1,60 @@
|
|
1
|
+
@sfilter
|
2
|
+
Feature: Sample filters
|
3
|
+
|
4
|
+
Bio-vcf supports sample filters, where every sample is evaluated
|
5
|
+
independently, though they have the rec information (chrom, pos, info)
|
6
|
+
available.
|
7
|
+
|
8
|
+
Scenario: Example of a sample
|
9
|
+
|
10
|
+
Given the VCF line
|
11
|
+
"""
|
12
|
+
1 10723 . C G 73.85 . AC=4;AF=0.667;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL
|
13
|
+
"""
|
14
|
+
When I evaluate '0/0:6,0:6:3:0,3,33'
|
15
|
+
Then I expect s.empty? to be false
|
16
|
+
Then I expect s.dp? to be true
|
17
|
+
Then I expect s.dp to be 6
|
18
|
+
And sfilter 's.dp>4' to be true
|
19
|
+
|
20
|
+
# Scenario: Sample with missing data
|
21
|
+
When I evaluate missing '0/0:6,0:.:3:0,3,33'
|
22
|
+
Then I expect s.empty? to be false
|
23
|
+
Then I expect s.dp? to be false
|
24
|
+
Then I expect s.dp to be nil
|
25
|
+
And sfilter 's.dp>4' to throw an error
|
26
|
+
|
27
|
+
# Scenario: Sample with missing data with ignore missing set
|
28
|
+
When I evaluate missing '0/0:6,0:.:3:0,3,33' with ignore missing
|
29
|
+
Then I expect s.empty? to be false
|
30
|
+
Then I expect s.dp? to be false
|
31
|
+
Then I expect s.dp to be nil
|
32
|
+
And sfilter 's.dp>4' to be false
|
33
|
+
|
34
|
+
# Scenario: Missing sample
|
35
|
+
When I evaluate empty './.'
|
36
|
+
Then I expect s.empty? to be true
|
37
|
+
Then I expect s.dp? to be false
|
38
|
+
Then I expect s.dp to throw an error
|
39
|
+
And sfilter 's.dp>4' to throw an error
|
40
|
+
|
41
|
+
# Scenario: Missing sample with ignore missing set
|
42
|
+
When I evaluate empty './.' with ignore missing
|
43
|
+
Then I expect s.empty? to be true
|
44
|
+
Then I expect s.dp? to be false
|
45
|
+
Then I expect s.dp to be nil
|
46
|
+
And sfilter 's.dp>4' to be false
|
47
|
+
|
48
|
+
# Scenario: Wrong field name in sample
|
49
|
+
When I evaluate '0/0:6,0:6:3:0,3,33'
|
50
|
+
Then I expect s.empty? to be false
|
51
|
+
Then I expect s.dp? to be true
|
52
|
+
Then I expect s.what? to throw an error
|
53
|
+
And I expect s.what to throw an error
|
54
|
+
|
55
|
+
# Scenario: Get other information for a sample
|
56
|
+
When I evaluate '0/0:6,0:6:3:0,3,33'
|
57
|
+
Then I expect r.chrom to be "1"
|
58
|
+
And I expect r.alt to be ["G"]
|
59
|
+
And I expect r.info.af to be 0.667
|
60
|
+
|
@@ -8,5 +8,5 @@ When /^I execute "(.*?)"$/ do |arg1|
|
|
8
8
|
end
|
9
9
|
|
10
10
|
Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
11
|
-
RegressionTest::CliExec::exec(@cmd,arg1).should be_true
|
11
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '##BioVcf=').should be_true
|
12
12
|
end
|
@@ -117,3 +117,35 @@ Then(/^I expect rec\.valid\? to be true$/) do
|
|
117
117
|
expect(@rec1.valid?).to eq true
|
118
118
|
end
|
119
119
|
|
120
|
+
Then(/^I expect r\.original\.gt\? to be true$/) do
|
121
|
+
pending # express the regexp above with the code you wish you had
|
122
|
+
end
|
123
|
+
|
124
|
+
Then(/^I expect r\.original\? to be true$/) do
|
125
|
+
pending # express the regexp above with the code you wish you had
|
126
|
+
end
|
127
|
+
|
128
|
+
Then(/^I expect r\.original\.gti\? to be true$/) do
|
129
|
+
pending # express the regexp above with the code you wish you had
|
130
|
+
end
|
131
|
+
|
132
|
+
Then(/^I expect r\.original\.gti to be \[(\d+),(\d+)\]$/) do |arg1, arg2|
|
133
|
+
pending # express the regexp above with the code you wish you had
|
134
|
+
end
|
135
|
+
|
136
|
+
Then(/^I expect r\.original\.gti\[(\d+)\] to be (\d+)$/) do |arg1, arg2|
|
137
|
+
pending # express the regexp above with the code you wish you had
|
138
|
+
end
|
139
|
+
|
140
|
+
Then(/^I expect r\.original\.gts\? to be true$/) do
|
141
|
+
pending # express the regexp above with the code you wish you had
|
142
|
+
end
|
143
|
+
|
144
|
+
Then(/^I expect r\.original\.gts to be \["(.*?)","(.*?)"\]$/) do |arg1, arg2|
|
145
|
+
pending # express the regexp above with the code you wish you had
|
146
|
+
end
|
147
|
+
|
148
|
+
Then(/^I expect r\.original\.gts\[(\d+)\] to be "(.*?)"$/) do |arg1, arg2|
|
149
|
+
pending # express the regexp above with the code you wish you had
|
150
|
+
end
|
151
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
Given(/^the VCF line$/) do |string|
|
2
|
+
@header = nil
|
3
|
+
@vcfline = string
|
4
|
+
end
|
5
|
+
|
6
|
+
When(/^I evaluate '([^']+)'$/) do |arg1|
|
7
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
8
|
+
@rec = VcfRecord.new(@fields,@header)
|
9
|
+
p @rec
|
10
|
+
end
|
11
|
+
|
12
|
+
Then(/^I expect s\.empty\? to be false$/) do
|
13
|
+
p @rec.sample[0]
|
14
|
+
expect(@s.empty?).to be false
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
Then(/^I expect s\.dp to be (\d+)$/) do |arg1|
|
19
|
+
pending # express the regexp above with the code you wish you had
|
20
|
+
end
|
21
|
+
|
22
|
+
Then(/^sfilter 's\.dp>(\d+)' to be true$/) do |arg1|
|
23
|
+
pending # express the regexp above with the code you wish you had
|
24
|
+
end
|
25
|
+
|
26
|
+
When(/^I evaluate missing '(\d+)\/(\d+):(\d+),(\d+):\.:(\d+):(\d+),(\d+),(\d+)'$/) do |arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8|
|
27
|
+
pending # express the regexp above with the code you wish you had
|
28
|
+
end
|
29
|
+
|
30
|
+
Then(/^I expect s\.dp to be nil$/) do
|
31
|
+
pending # express the regexp above with the code you wish you had
|
32
|
+
end
|
33
|
+
|
34
|
+
Then(/^sfilter 's\.dp>(\d+)' to be false$/) do |arg1|
|
35
|
+
pending # express the regexp above with the code you wish you had
|
36
|
+
end
|
37
|
+
|
38
|
+
When(/^I evaluate empty '\.\/\.'$/) do
|
39
|
+
pending # express the regexp above with the code you wish you had
|
40
|
+
end
|
41
|
+
|
42
|
+
Then(/^sfilter 's\.dp>(\d+)' to throw an error$/) do |arg1|
|
43
|
+
pending # express the regexp above with the code you wish you had
|
44
|
+
end
|
45
|
+
|
46
|
+
When(/^I evaluate missing '(\d+)\/(\d+):(\d+),(\d+):\.:(\d+):(\d+),(\d+),(\d+)' with ignore missing$/) do |arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8|
|
47
|
+
pending # express the regexp above with the code you wish you had
|
48
|
+
end
|
49
|
+
|
50
|
+
Then(/^I expect s\.empty\? to be true$/) do
|
51
|
+
pending # express the regexp above with the code you wish you had
|
52
|
+
end
|
53
|
+
|
54
|
+
Then(/^I expect s\.dp to throw an error$/) do
|
55
|
+
pending # express the regexp above with the code you wish you had
|
56
|
+
end
|
57
|
+
|
58
|
+
When(/^I evaluate empty '\.\/\.' with ignore missing$/) do
|
59
|
+
pending # express the regexp above with the code you wish you had
|
60
|
+
end
|
61
|
+
|
62
|
+
Then(/^I expect s\.dp\? to be true$/) do
|
63
|
+
pending # express the regexp above with the code you wish you had
|
64
|
+
end
|
65
|
+
|
66
|
+
Then(/^I expect s\.dp\? to be false$/) do
|
67
|
+
pending # express the regexp above with the code you wish you had
|
68
|
+
end
|
69
|
+
|
70
|
+
Then(/^I expect s\.what\? to throw an error$/) do
|
71
|
+
pending # express the regexp above with the code you wish you had
|
72
|
+
end
|
73
|
+
|
74
|
+
Then(/^I expect s\.what to throw an error$/) do
|
75
|
+
pending # express the regexp above with the code you wish you had
|
76
|
+
end
|
77
|
+
|
78
|
+
Then(/^I expect r\.chrom to be "(.*?)"$/) do |arg1|
|
79
|
+
pending # express the regexp above with the code you wish you had
|
80
|
+
end
|
81
|
+
|
82
|
+
Then(/^I expect r\.alt to be \["(.*?)"\]$/) do |arg1|
|
83
|
+
pending # express the regexp above with the code you wish you had
|
84
|
+
end
|
85
|
+
|
86
|
+
Then(/^I expect r\.info\.af to be (\d+)\.(\d+)$/) do |arg1, arg2|
|
87
|
+
pending # express the regexp above with the code you wish you had
|
88
|
+
end
|
89
|
+
|
90
|
+
|
data/lib/bio-vcf/utils.rb
CHANGED
@@ -9,14 +9,20 @@ module BioVcf
|
|
9
9
|
!!Float(str) rescue false
|
10
10
|
end
|
11
11
|
|
12
|
-
def self::convert
|
13
|
-
if
|
14
|
-
|
12
|
+
def self::convert str
|
13
|
+
if str =~ /,/
|
14
|
+
str.split(/,/).map { |item| convert(item) }
|
15
15
|
else
|
16
|
-
|
17
|
-
|
16
|
+
if integer?(str)
|
17
|
+
str.to_i
|
18
|
+
else
|
19
|
+
if float?(str)
|
20
|
+
str.to_f
|
21
|
+
else
|
22
|
+
str
|
23
|
+
end
|
24
|
+
end
|
18
25
|
end
|
19
|
-
v
|
20
26
|
end
|
21
27
|
end
|
22
28
|
|
@@ -103,6 +103,10 @@ module BioVcf
|
|
103
103
|
@alt = alt
|
104
104
|
end
|
105
105
|
|
106
|
+
def to_s
|
107
|
+
@original_s
|
108
|
+
end
|
109
|
+
|
106
110
|
def values
|
107
111
|
@cache_values ||= @original_s.split(/:/)
|
108
112
|
end
|
@@ -164,7 +168,6 @@ module BioVcf
|
|
164
168
|
v.split(',').map{|i| i.to_i}
|
165
169
|
end
|
166
170
|
|
167
|
-
|
168
171
|
end
|
169
172
|
|
170
173
|
# Holds all samples
|
data/lib/bio-vcf/vcfheader.rb
CHANGED
@@ -27,6 +27,16 @@ module BioVcf
|
|
27
27
|
@lines << line.strip
|
28
28
|
end
|
29
29
|
|
30
|
+
# Add a key value list to the header
|
31
|
+
def tag h
|
32
|
+
h2 = h.dup
|
33
|
+
[:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
|
34
|
+
info = h2.map { |k,v| k.to_s.capitalize+'='+'"'+v.to_s+'"' }.join(',')
|
35
|
+
line = '##BioVcf=<'+info+'>'
|
36
|
+
@lines.insert(-2,line)
|
37
|
+
line
|
38
|
+
end
|
39
|
+
|
30
40
|
def version
|
31
41
|
@version ||= lines[0].scan(/##fileformat=VCFv(\d+\.\d+)/)[0][0]
|
32
42
|
end
|
@@ -39,10 +49,24 @@ module BioVcf
|
|
39
49
|
@column ||= column_names.size
|
40
50
|
end
|
41
51
|
|
52
|
+
def printable_header_line(fields)
|
53
|
+
fields.map { | field |
|
54
|
+
if field == '#samples'
|
55
|
+
samples
|
56
|
+
else
|
57
|
+
field
|
58
|
+
end
|
59
|
+
}.join("\t")
|
60
|
+
end
|
61
|
+
|
42
62
|
def samples
|
43
63
|
@samples ||= column_names[9..-1]
|
44
64
|
end
|
45
65
|
|
66
|
+
def samples_index_array
|
67
|
+
@all_samples_index ||= column_names[9..-1].fill{|i| i}
|
68
|
+
end
|
69
|
+
|
46
70
|
def sample_index
|
47
71
|
return @sample_index if @sample_index
|
48
72
|
index = {}
|
data/lib/bio-vcf/vcfrdf.rb
CHANGED
@@ -11,20 +11,27 @@ module BioVcf
|
|
11
11
|
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
12
12
|
@prefix hgnc: <http://identifiers.org/hgnc.symbol/> .
|
13
13
|
@prefix doi: <http://dx.doi.org/> .
|
14
|
-
@prefix : <http://biobeat.org/rdf/
|
14
|
+
@prefix db: <http://biobeat.org/rdf/db#> .
|
15
|
+
@prefix seq: <http://biobeat.org/rdf/seq#> .
|
16
|
+
@prefix : <http://biobeat.org/rdf/vcf#> .
|
15
17
|
EOB
|
16
18
|
end
|
17
19
|
|
18
|
-
def VcfRdf::record id,rec,
|
19
|
-
id2 = [id,'ch'+rec.chrom,rec.pos].join('_')
|
20
|
+
def VcfRdf::record id,rec,tags = "{}"
|
21
|
+
id2 = [id,'ch'+rec.chrom,rec.pos,rec.alt.join('')].join('_')
|
20
22
|
print <<OUT
|
21
|
-
:#{id2} :chr \"#{rec.chrom}\" .
|
22
|
-
:#{id2} :pos #{rec.pos} .
|
23
|
-
:#{id2} :
|
23
|
+
:#{id2} seq:chr \"#{rec.chrom}\" .
|
24
|
+
:#{id2} seq:pos #{rec.pos} .
|
25
|
+
:#{id2} seq:alt \"#{rec.alt[0]}\" .
|
26
|
+
:#{id2} db:vcf true .
|
24
27
|
OUT
|
25
|
-
hash
|
26
|
-
|
28
|
+
hash = eval(tags)
|
29
|
+
if hash
|
30
|
+
hash.each do |k,v|
|
31
|
+
print ":#{id2} #{k} #{v} .\n"
|
32
|
+
end
|
27
33
|
end
|
34
|
+
print "\n"
|
28
35
|
end
|
29
36
|
end
|
30
37
|
end
|
data/lib/bio-vcf/vcfrecord.rb
CHANGED
@@ -27,9 +27,10 @@ module BioVcf
|
|
27
27
|
v = if @h
|
28
28
|
@h[m.to_s.upcase]
|
29
29
|
else
|
30
|
-
@info =~ /#{m.to_s
|
30
|
+
@info =~ /#{m.to_s}=([^;]+)/i
|
31
31
|
value = $1
|
32
|
-
#
|
32
|
+
# p [m,value]
|
33
|
+
# m = @info.match(/#{m.to_s.upcase}=(?<value>[^;]+)/) slower!
|
33
34
|
# value = m[:value]
|
34
35
|
if value == nil
|
35
36
|
split_fields # no option but to split
|
@@ -117,6 +118,7 @@ module BioVcf
|
|
117
118
|
def initialize fields, header
|
118
119
|
@fields = fields
|
119
120
|
@header = header
|
121
|
+
@sample_by_index = []
|
120
122
|
end
|
121
123
|
|
122
124
|
def chrom
|
@@ -176,14 +178,15 @@ module BioVcf
|
|
176
178
|
sample[name]
|
177
179
|
end
|
178
180
|
|
181
|
+
def sample_by_index i
|
182
|
+
# p [i,@fields[i+9]]
|
183
|
+
@sample_by_index[i] ||= VcfGenotypeField.new(@fields[i+9],format,@header,alt)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Walk the samples. list contains an Array of int (the index)
|
179
187
|
def each_sample(list = nil)
|
180
|
-
|
181
|
-
|
182
|
-
samples.each_with_index { |name,i|
|
183
|
-
# p [i,list]
|
184
|
-
next if list and not list.index(i.to_s)
|
185
|
-
yield VcfSample::Sample.new(self,sample[name])
|
186
|
-
}
|
188
|
+
list = @header.samples_index_array() if not list
|
189
|
+
list.each { |i| yield VcfSample::Sample.new(self,sample_by_index(i)) }
|
187
190
|
end
|
188
191
|
|
189
192
|
def missing_samples?
|
@@ -230,6 +233,39 @@ module BioVcf
|
|
230
233
|
end
|
231
234
|
end
|
232
235
|
|
236
|
+
def filter expr, ignore_missing_data, quiet
|
237
|
+
begin
|
238
|
+
if not respond_to?(:call_cached_filter)
|
239
|
+
code =
|
240
|
+
"""
|
241
|
+
def call_cached_filter(rec,fields)
|
242
|
+
r = rec
|
243
|
+
#{expr}
|
244
|
+
end
|
245
|
+
"""
|
246
|
+
self.class.class_eval(code)
|
247
|
+
end
|
248
|
+
res = call_cached_filter(self,@fields)
|
249
|
+
if res.kind_of?(Array)
|
250
|
+
res.join("\t")
|
251
|
+
else
|
252
|
+
res
|
253
|
+
end
|
254
|
+
rescue NoMethodError => e
|
255
|
+
if not quiet
|
256
|
+
$stderr.print "RECORD ERROR!\n"
|
257
|
+
$stderr.print [@fields],"\n"
|
258
|
+
$stderr.print expr,"\n"
|
259
|
+
end
|
260
|
+
if ignore_missing_data
|
261
|
+
$stderr.print e.message if not quiet
|
262
|
+
return false
|
263
|
+
else
|
264
|
+
raise
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
233
269
|
# Return the sample
|
234
270
|
def method_missing(m, *args, &block)
|
235
271
|
name = m.to_s
|