bio-vcf 0.0.3 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +145 -20
- data/VERSION +1 -1
- data/bin/bio-vcf +204 -62
- data/bio-vcf.gemspec +7 -3
- data/features/cli.feature +16 -0
- data/features/multisample.feature +10 -0
- data/features/sfilter.feature +60 -0
- data/features/step_definitions/cli-feature.rb +1 -1
- data/features/step_definitions/multisample.rb +32 -0
- data/features/step_definitions/sfilter.rb +90 -0
- data/lib/bio-vcf/utils.rb +12 -6
- data/lib/bio-vcf/vcfgenotypefield.rb +4 -1
- data/lib/bio-vcf/vcfheader.rb +24 -0
- data/lib/bio-vcf/vcfrdf.rb +15 -8
- data/lib/bio-vcf/vcfrecord.rb +45 -9
- data/lib/bio-vcf/vcfsample.rb +94 -5
- data/test/data/regression/sfilter_seval_s.dp.ref +31 -0
- data/test/data/regression/{sfilter001.ref → thread4.ref} +5 -0
- data/test/data/regression/thread4_4.ref +150 -0
- data/test/performance/metrics.md +53 -19
- metadata +7 -3
data/bio-vcf.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.7.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2014-
|
12
|
+
s.date = "2014-06-24"
|
13
13
|
s.description = "Smart parser for VCF format"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["bio-vcf"]
|
@@ -30,11 +30,13 @@ Gem::Specification.new do |s|
|
|
30
30
|
"features/cli.feature",
|
31
31
|
"features/diff_count.feature",
|
32
32
|
"features/multisample.feature",
|
33
|
+
"features/sfilter.feature",
|
33
34
|
"features/somaticsniper.feature",
|
34
35
|
"features/step_definitions/bio-vcf_steps.rb",
|
35
36
|
"features/step_definitions/cli-feature.rb",
|
36
37
|
"features/step_definitions/diff_count.rb",
|
37
38
|
"features/step_definitions/multisample.rb",
|
39
|
+
"features/step_definitions/sfilter.rb",
|
38
40
|
"features/step_definitions/somaticsniper.rb",
|
39
41
|
"features/support/env.rb",
|
40
42
|
"lib/bio-vcf.rb",
|
@@ -55,7 +57,9 @@ Gem::Specification.new do |s|
|
|
55
57
|
"test/data/regression/rewrite.info.sample.ref",
|
56
58
|
"test/data/regression/s.dp.ref",
|
57
59
|
"test/data/regression/seval_s.dp.ref",
|
58
|
-
"test/data/regression/
|
60
|
+
"test/data/regression/sfilter_seval_s.dp.ref",
|
61
|
+
"test/data/regression/thread4.ref",
|
62
|
+
"test/data/regression/thread4_4.ref",
|
59
63
|
"test/performance/metrics.md"
|
60
64
|
]
|
61
65
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
data/features/cli.feature
CHANGED
@@ -8,6 +8,16 @@ Feature: Command-line interface (CLI)
|
|
8
8
|
When I execute "./bin/bio-vcf -i --filter 'r.info.dp>100'"
|
9
9
|
Then I expect the named output to match the named output "r.info.dp"
|
10
10
|
|
11
|
+
Scenario: Test the info filter using dp and threads
|
12
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
13
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --filter 'r.info.dp>2'"
|
14
|
+
Then I expect the named output to match the named output "thread4"
|
15
|
+
|
16
|
+
Scenario: Test the info filter using dp and threads with lines
|
17
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
18
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --thread-lines 4 --filter 'r.info.dp>2'"
|
19
|
+
Then I expect the named output to match the named output "thread4_4"
|
20
|
+
|
11
21
|
Scenario: Test the sample filter using dp
|
12
22
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
13
23
|
When I execute "./bin/bio-vcf -i --sfilter 's.dp>20'"
|
@@ -23,6 +33,12 @@ Feature: Command-line interface (CLI)
|
|
23
33
|
When I execute "./bin/bio-vcf -i --seval 's.dp'"
|
24
34
|
Then I expect the named output to match the named output "seval_s.dp"
|
25
35
|
|
36
|
+
Scenario: Test the sample filter + eval using dp
|
37
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
38
|
+
When I execute "./bin/bio-vcf -i --sfilter 's.dp>10' --seval 's.dp'"
|
39
|
+
Then I expect the named output to match the named output "sfilter_seval_s.dp"
|
40
|
+
|
41
|
+
|
26
42
|
Scenario: Rewrite an info field
|
27
43
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
44
|
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
@@ -36,11 +36,21 @@ Feature: Multi-sample VCF
|
|
36
36
|
And I expect rec.sample.original.gt to be [0,1]
|
37
37
|
And I expect rec.sample.s3t2.pl to be [20,0,522]
|
38
38
|
# And the even better
|
39
|
+
And I expect r.original.gt? to be true
|
39
40
|
And I expect rec.original.gt to be [0,1]
|
40
41
|
And I expect rec.s3t2.pl to be [20,0,522]
|
41
42
|
# Check for missing data
|
42
43
|
And I expect test rec.missing_samples? to be false
|
43
44
|
And I expect test rec.original? to be true
|
45
|
+
# Special functions
|
46
|
+
And I expect r.original? to be true
|
47
|
+
And I expect r.original.gti? to be true
|
48
|
+
And I expect r.original.gti to be [0,1]
|
49
|
+
And I expect r.original.gti[1] to be 1
|
50
|
+
And I expect r.original.gts? to be true
|
51
|
+
And I expect r.original.gts to be ["C","T"]
|
52
|
+
And I expect r.original.gts[1] to be "T"
|
53
|
+
|
44
54
|
Given multisample vcf line
|
45
55
|
"""
|
46
56
|
1 10723 . C G 73.85 . AC=4;AF=0.667;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL ./. ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
|
@@ -0,0 +1,60 @@
|
|
1
|
+
@sfilter
|
2
|
+
Feature: Sample filters
|
3
|
+
|
4
|
+
Bio-vcf supports sample filters, where every sample is evaluated
|
5
|
+
independently, though they have the rec information (chrom, pos, info)
|
6
|
+
available.
|
7
|
+
|
8
|
+
Scenario: Example of a sample
|
9
|
+
|
10
|
+
Given the VCF line
|
11
|
+
"""
|
12
|
+
1 10723 . C G 73.85 . AC=4;AF=0.667;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL
|
13
|
+
"""
|
14
|
+
When I evaluate '0/0:6,0:6:3:0,3,33'
|
15
|
+
Then I expect s.empty? to be false
|
16
|
+
Then I expect s.dp? to be true
|
17
|
+
Then I expect s.dp to be 6
|
18
|
+
And sfilter 's.dp>4' to be true
|
19
|
+
|
20
|
+
# Scenario: Sample with missing data
|
21
|
+
When I evaluate missing '0/0:6,0:.:3:0,3,33'
|
22
|
+
Then I expect s.empty? to be false
|
23
|
+
Then I expect s.dp? to be false
|
24
|
+
Then I expect s.dp to be nil
|
25
|
+
And sfilter 's.dp>4' to throw an error
|
26
|
+
|
27
|
+
# Scenario: Sample with missing data with ignore missing set
|
28
|
+
When I evaluate missing '0/0:6,0:.:3:0,3,33' with ignore missing
|
29
|
+
Then I expect s.empty? to be false
|
30
|
+
Then I expect s.dp? to be false
|
31
|
+
Then I expect s.dp to be nil
|
32
|
+
And sfilter 's.dp>4' to be false
|
33
|
+
|
34
|
+
# Scenario: Missing sample
|
35
|
+
When I evaluate empty './.'
|
36
|
+
Then I expect s.empty? to be true
|
37
|
+
Then I expect s.dp? to be false
|
38
|
+
Then I expect s.dp to throw an error
|
39
|
+
And sfilter 's.dp>4' to throw an error
|
40
|
+
|
41
|
+
# Scenario: Missing sample with ignore missing set
|
42
|
+
When I evaluate empty './.' with ignore missing
|
43
|
+
Then I expect s.empty? to be true
|
44
|
+
Then I expect s.dp? to be false
|
45
|
+
Then I expect s.dp to be nil
|
46
|
+
And sfilter 's.dp>4' to be false
|
47
|
+
|
48
|
+
# Scenario: Wrong field name in sample
|
49
|
+
When I evaluate '0/0:6,0:6:3:0,3,33'
|
50
|
+
Then I expect s.empty? to be false
|
51
|
+
Then I expect s.dp? to be true
|
52
|
+
Then I expect s.what? to throw an error
|
53
|
+
And I expect s.what to throw an error
|
54
|
+
|
55
|
+
# Scenario: Get other information for a sample
|
56
|
+
When I evaluate '0/0:6,0:6:3:0,3,33'
|
57
|
+
Then I expect r.chrom to be "1"
|
58
|
+
And I expect r.alt to be ["G"]
|
59
|
+
And I expect r.info.af to be 0.667
|
60
|
+
|
@@ -8,5 +8,5 @@ When /^I execute "(.*?)"$/ do |arg1|
|
|
8
8
|
end
|
9
9
|
|
10
10
|
Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
11
|
-
RegressionTest::CliExec::exec(@cmd,arg1).should be_true
|
11
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '##BioVcf=').should be_true
|
12
12
|
end
|
@@ -117,3 +117,35 @@ Then(/^I expect rec\.valid\? to be true$/) do
|
|
117
117
|
expect(@rec1.valid?).to eq true
|
118
118
|
end
|
119
119
|
|
120
|
+
Then(/^I expect r\.original\.gt\? to be true$/) do
|
121
|
+
pending # express the regexp above with the code you wish you had
|
122
|
+
end
|
123
|
+
|
124
|
+
Then(/^I expect r\.original\? to be true$/) do
|
125
|
+
pending # express the regexp above with the code you wish you had
|
126
|
+
end
|
127
|
+
|
128
|
+
Then(/^I expect r\.original\.gti\? to be true$/) do
|
129
|
+
pending # express the regexp above with the code you wish you had
|
130
|
+
end
|
131
|
+
|
132
|
+
Then(/^I expect r\.original\.gti to be \[(\d+),(\d+)\]$/) do |arg1, arg2|
|
133
|
+
pending # express the regexp above with the code you wish you had
|
134
|
+
end
|
135
|
+
|
136
|
+
Then(/^I expect r\.original\.gti\[(\d+)\] to be (\d+)$/) do |arg1, arg2|
|
137
|
+
pending # express the regexp above with the code you wish you had
|
138
|
+
end
|
139
|
+
|
140
|
+
Then(/^I expect r\.original\.gts\? to be true$/) do
|
141
|
+
pending # express the regexp above with the code you wish you had
|
142
|
+
end
|
143
|
+
|
144
|
+
Then(/^I expect r\.original\.gts to be \["(.*?)","(.*?)"\]$/) do |arg1, arg2|
|
145
|
+
pending # express the regexp above with the code you wish you had
|
146
|
+
end
|
147
|
+
|
148
|
+
Then(/^I expect r\.original\.gts\[(\d+)\] to be "(.*?)"$/) do |arg1, arg2|
|
149
|
+
pending # express the regexp above with the code you wish you had
|
150
|
+
end
|
151
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
Given(/^the VCF line$/) do |string|
|
2
|
+
@header = nil
|
3
|
+
@vcfline = string
|
4
|
+
end
|
5
|
+
|
6
|
+
When(/^I evaluate '([^']+)'$/) do |arg1|
|
7
|
+
@fields = VcfLine.parse((@vcfline.split(/\s+/)+[arg1]).join("\t"))
|
8
|
+
@rec = VcfRecord.new(@fields,@header)
|
9
|
+
p @rec
|
10
|
+
end
|
11
|
+
|
12
|
+
Then(/^I expect s\.empty\? to be false$/) do
|
13
|
+
p @rec.sample[0]
|
14
|
+
expect(@s.empty?).to be false
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
Then(/^I expect s\.dp to be (\d+)$/) do |arg1|
|
19
|
+
pending # express the regexp above with the code you wish you had
|
20
|
+
end
|
21
|
+
|
22
|
+
Then(/^sfilter 's\.dp>(\d+)' to be true$/) do |arg1|
|
23
|
+
pending # express the regexp above with the code you wish you had
|
24
|
+
end
|
25
|
+
|
26
|
+
When(/^I evaluate missing '(\d+)\/(\d+):(\d+),(\d+):\.:(\d+):(\d+),(\d+),(\d+)'$/) do |arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8|
|
27
|
+
pending # express the regexp above with the code you wish you had
|
28
|
+
end
|
29
|
+
|
30
|
+
Then(/^I expect s\.dp to be nil$/) do
|
31
|
+
pending # express the regexp above with the code you wish you had
|
32
|
+
end
|
33
|
+
|
34
|
+
Then(/^sfilter 's\.dp>(\d+)' to be false$/) do |arg1|
|
35
|
+
pending # express the regexp above with the code you wish you had
|
36
|
+
end
|
37
|
+
|
38
|
+
When(/^I evaluate empty '\.\/\.'$/) do
|
39
|
+
pending # express the regexp above with the code you wish you had
|
40
|
+
end
|
41
|
+
|
42
|
+
Then(/^sfilter 's\.dp>(\d+)' to throw an error$/) do |arg1|
|
43
|
+
pending # express the regexp above with the code you wish you had
|
44
|
+
end
|
45
|
+
|
46
|
+
When(/^I evaluate missing '(\d+)\/(\d+):(\d+),(\d+):\.:(\d+):(\d+),(\d+),(\d+)' with ignore missing$/) do |arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8|
|
47
|
+
pending # express the regexp above with the code you wish you had
|
48
|
+
end
|
49
|
+
|
50
|
+
Then(/^I expect s\.empty\? to be true$/) do
|
51
|
+
pending # express the regexp above with the code you wish you had
|
52
|
+
end
|
53
|
+
|
54
|
+
Then(/^I expect s\.dp to throw an error$/) do
|
55
|
+
pending # express the regexp above with the code you wish you had
|
56
|
+
end
|
57
|
+
|
58
|
+
When(/^I evaluate empty '\.\/\.' with ignore missing$/) do
|
59
|
+
pending # express the regexp above with the code you wish you had
|
60
|
+
end
|
61
|
+
|
62
|
+
Then(/^I expect s\.dp\? to be true$/) do
|
63
|
+
pending # express the regexp above with the code you wish you had
|
64
|
+
end
|
65
|
+
|
66
|
+
Then(/^I expect s\.dp\? to be false$/) do
|
67
|
+
pending # express the regexp above with the code you wish you had
|
68
|
+
end
|
69
|
+
|
70
|
+
Then(/^I expect s\.what\? to throw an error$/) do
|
71
|
+
pending # express the regexp above with the code you wish you had
|
72
|
+
end
|
73
|
+
|
74
|
+
Then(/^I expect s\.what to throw an error$/) do
|
75
|
+
pending # express the regexp above with the code you wish you had
|
76
|
+
end
|
77
|
+
|
78
|
+
Then(/^I expect r\.chrom to be "(.*?)"$/) do |arg1|
|
79
|
+
pending # express the regexp above with the code you wish you had
|
80
|
+
end
|
81
|
+
|
82
|
+
Then(/^I expect r\.alt to be \["(.*?)"\]$/) do |arg1|
|
83
|
+
pending # express the regexp above with the code you wish you had
|
84
|
+
end
|
85
|
+
|
86
|
+
Then(/^I expect r\.info\.af to be (\d+)\.(\d+)$/) do |arg1, arg2|
|
87
|
+
pending # express the regexp above with the code you wish you had
|
88
|
+
end
|
89
|
+
|
90
|
+
|
data/lib/bio-vcf/utils.rb
CHANGED
@@ -9,14 +9,20 @@ module BioVcf
|
|
9
9
|
!!Float(str) rescue false
|
10
10
|
end
|
11
11
|
|
12
|
-
def self::convert
|
13
|
-
if
|
14
|
-
|
12
|
+
def self::convert str
|
13
|
+
if str =~ /,/
|
14
|
+
str.split(/,/).map { |item| convert(item) }
|
15
15
|
else
|
16
|
-
|
17
|
-
|
16
|
+
if integer?(str)
|
17
|
+
str.to_i
|
18
|
+
else
|
19
|
+
if float?(str)
|
20
|
+
str.to_f
|
21
|
+
else
|
22
|
+
str
|
23
|
+
end
|
24
|
+
end
|
18
25
|
end
|
19
|
-
v
|
20
26
|
end
|
21
27
|
end
|
22
28
|
|
@@ -103,6 +103,10 @@ module BioVcf
|
|
103
103
|
@alt = alt
|
104
104
|
end
|
105
105
|
|
106
|
+
def to_s
|
107
|
+
@original_s
|
108
|
+
end
|
109
|
+
|
106
110
|
def values
|
107
111
|
@cache_values ||= @original_s.split(/:/)
|
108
112
|
end
|
@@ -164,7 +168,6 @@ module BioVcf
|
|
164
168
|
v.split(',').map{|i| i.to_i}
|
165
169
|
end
|
166
170
|
|
167
|
-
|
168
171
|
end
|
169
172
|
|
170
173
|
# Holds all samples
|
data/lib/bio-vcf/vcfheader.rb
CHANGED
@@ -27,6 +27,16 @@ module BioVcf
|
|
27
27
|
@lines << line.strip
|
28
28
|
end
|
29
29
|
|
30
|
+
# Add a key value list to the header
|
31
|
+
def tag h
|
32
|
+
h2 = h.dup
|
33
|
+
[:show_help,:skip_header,:verbose,:quiet,:debug].each { |key| h2.delete(key) }
|
34
|
+
info = h2.map { |k,v| k.to_s.capitalize+'='+'"'+v.to_s+'"' }.join(',')
|
35
|
+
line = '##BioVcf=<'+info+'>'
|
36
|
+
@lines.insert(-2,line)
|
37
|
+
line
|
38
|
+
end
|
39
|
+
|
30
40
|
def version
|
31
41
|
@version ||= lines[0].scan(/##fileformat=VCFv(\d+\.\d+)/)[0][0]
|
32
42
|
end
|
@@ -39,10 +49,24 @@ module BioVcf
|
|
39
49
|
@column ||= column_names.size
|
40
50
|
end
|
41
51
|
|
52
|
+
def printable_header_line(fields)
|
53
|
+
fields.map { | field |
|
54
|
+
if field == '#samples'
|
55
|
+
samples
|
56
|
+
else
|
57
|
+
field
|
58
|
+
end
|
59
|
+
}.join("\t")
|
60
|
+
end
|
61
|
+
|
42
62
|
def samples
|
43
63
|
@samples ||= column_names[9..-1]
|
44
64
|
end
|
45
65
|
|
66
|
+
def samples_index_array
|
67
|
+
@all_samples_index ||= column_names[9..-1].fill{|i| i}
|
68
|
+
end
|
69
|
+
|
46
70
|
def sample_index
|
47
71
|
return @sample_index if @sample_index
|
48
72
|
index = {}
|
data/lib/bio-vcf/vcfrdf.rb
CHANGED
@@ -11,20 +11,27 @@ module BioVcf
|
|
11
11
|
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
12
12
|
@prefix hgnc: <http://identifiers.org/hgnc.symbol/> .
|
13
13
|
@prefix doi: <http://dx.doi.org/> .
|
14
|
-
@prefix : <http://biobeat.org/rdf/
|
14
|
+
@prefix db: <http://biobeat.org/rdf/db#> .
|
15
|
+
@prefix seq: <http://biobeat.org/rdf/seq#> .
|
16
|
+
@prefix : <http://biobeat.org/rdf/vcf#> .
|
15
17
|
EOB
|
16
18
|
end
|
17
19
|
|
18
|
-
def VcfRdf::record id,rec,
|
19
|
-
id2 = [id,'ch'+rec.chrom,rec.pos].join('_')
|
20
|
+
def VcfRdf::record id,rec,tags = "{}"
|
21
|
+
id2 = [id,'ch'+rec.chrom,rec.pos,rec.alt.join('')].join('_')
|
20
22
|
print <<OUT
|
21
|
-
:#{id2} :chr \"#{rec.chrom}\" .
|
22
|
-
:#{id2} :pos #{rec.pos} .
|
23
|
-
:#{id2} :
|
23
|
+
:#{id2} seq:chr \"#{rec.chrom}\" .
|
24
|
+
:#{id2} seq:pos #{rec.pos} .
|
25
|
+
:#{id2} seq:alt \"#{rec.alt[0]}\" .
|
26
|
+
:#{id2} db:vcf true .
|
24
27
|
OUT
|
25
|
-
hash
|
26
|
-
|
28
|
+
hash = eval(tags)
|
29
|
+
if hash
|
30
|
+
hash.each do |k,v|
|
31
|
+
print ":#{id2} #{k} #{v} .\n"
|
32
|
+
end
|
27
33
|
end
|
34
|
+
print "\n"
|
28
35
|
end
|
29
36
|
end
|
30
37
|
end
|
data/lib/bio-vcf/vcfrecord.rb
CHANGED
@@ -27,9 +27,10 @@ module BioVcf
|
|
27
27
|
v = if @h
|
28
28
|
@h[m.to_s.upcase]
|
29
29
|
else
|
30
|
-
@info =~ /#{m.to_s
|
30
|
+
@info =~ /#{m.to_s}=([^;]+)/i
|
31
31
|
value = $1
|
32
|
-
#
|
32
|
+
# p [m,value]
|
33
|
+
# m = @info.match(/#{m.to_s.upcase}=(?<value>[^;]+)/) slower!
|
33
34
|
# value = m[:value]
|
34
35
|
if value == nil
|
35
36
|
split_fields # no option but to split
|
@@ -117,6 +118,7 @@ module BioVcf
|
|
117
118
|
def initialize fields, header
|
118
119
|
@fields = fields
|
119
120
|
@header = header
|
121
|
+
@sample_by_index = []
|
120
122
|
end
|
121
123
|
|
122
124
|
def chrom
|
@@ -176,14 +178,15 @@ module BioVcf
|
|
176
178
|
sample[name]
|
177
179
|
end
|
178
180
|
|
181
|
+
def sample_by_index i
|
182
|
+
# p [i,@fields[i+9]]
|
183
|
+
@sample_by_index[i] ||= VcfGenotypeField.new(@fields[i+9],format,@header,alt)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Walk the samples. list contains an Array of int (the index)
|
179
187
|
def each_sample(list = nil)
|
180
|
-
|
181
|
-
|
182
|
-
samples.each_with_index { |name,i|
|
183
|
-
# p [i,list]
|
184
|
-
next if list and not list.index(i.to_s)
|
185
|
-
yield VcfSample::Sample.new(self,sample[name])
|
186
|
-
}
|
188
|
+
list = @header.samples_index_array() if not list
|
189
|
+
list.each { |i| yield VcfSample::Sample.new(self,sample_by_index(i)) }
|
187
190
|
end
|
188
191
|
|
189
192
|
def missing_samples?
|
@@ -230,6 +233,39 @@ module BioVcf
|
|
230
233
|
end
|
231
234
|
end
|
232
235
|
|
236
|
+
def filter expr, ignore_missing_data, quiet
|
237
|
+
begin
|
238
|
+
if not respond_to?(:call_cached_filter)
|
239
|
+
code =
|
240
|
+
"""
|
241
|
+
def call_cached_filter(rec,fields)
|
242
|
+
r = rec
|
243
|
+
#{expr}
|
244
|
+
end
|
245
|
+
"""
|
246
|
+
self.class.class_eval(code)
|
247
|
+
end
|
248
|
+
res = call_cached_filter(self,@fields)
|
249
|
+
if res.kind_of?(Array)
|
250
|
+
res.join("\t")
|
251
|
+
else
|
252
|
+
res
|
253
|
+
end
|
254
|
+
rescue NoMethodError => e
|
255
|
+
if not quiet
|
256
|
+
$stderr.print "RECORD ERROR!\n"
|
257
|
+
$stderr.print [@fields],"\n"
|
258
|
+
$stderr.print expr,"\n"
|
259
|
+
end
|
260
|
+
if ignore_missing_data
|
261
|
+
$stderr.print e.message if not quiet
|
262
|
+
return false
|
263
|
+
else
|
264
|
+
raise
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
233
269
|
# Return the sample
|
234
270
|
def method_missing(m, *args, &block)
|
235
271
|
name = m.to_s
|