vcf 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/vcf.rb +24 -24
- metadata +1 -1
data/lib/vcf.rb
CHANGED
@@ -2,59 +2,59 @@ class Vcf
|
|
2
2
|
public
|
3
3
|
attr_accessor :chrom, :pos, :id, :ref, :alt, :qual, :filter, :info, :format, :samples
|
4
4
|
|
5
|
-
def initialize(line=nil)
|
6
|
-
|
5
|
+
def initialize(line=nil, sample_names=nil)
|
6
|
+
@info = {}
|
7
|
+
@samples = {}
|
8
|
+
parse_line(line, sample_names) if line != nil
|
7
9
|
end
|
8
10
|
|
9
11
|
def parse_line(line, sample_names=nil)
|
10
|
-
f = line.split("\t", -1)
|
12
|
+
f = line.chomp.split("\t", -1)
|
11
13
|
raise "VCF lines must have at least 8 fields" if f.size < 8
|
12
|
-
chrom = f[0]
|
13
|
-
pos = f[1].to_i
|
14
|
-
id = f[2]
|
15
|
-
ref = f[3]
|
16
|
-
alt = f[4]
|
17
|
-
qual = f[5].to_i
|
18
|
-
filter = f[6]
|
19
|
-
|
20
|
-
info = {}
|
14
|
+
@chrom = f[0]
|
15
|
+
@pos = f[1].to_i
|
16
|
+
@id = f[2]
|
17
|
+
@ref = f[3]
|
18
|
+
@alt = f[4]
|
19
|
+
@qual = f[5].to_i
|
20
|
+
@filter = f[6]
|
21
|
+
|
22
|
+
@info = {}
|
21
23
|
info_vec = f[7].split(";")
|
22
24
|
info_vec.each do |x|
|
23
25
|
keyval = x.split("=", -1)
|
24
26
|
if keyval.size == 2 # If it's key=value
|
25
|
-
info[keyval[0]] = keyval[1]
|
27
|
+
@info[keyval[0]] = keyval[1]
|
26
28
|
else # Otherwise, it's just a flag
|
27
|
-
info[x] = ""
|
29
|
+
@info[x] = ""
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
31
|
-
samples = {}
|
33
|
+
@samples = {}
|
32
34
|
return true if f.size == 8 # Has just upto info
|
33
35
|
raise "Can't have format with no samples" if f.size == 9
|
34
36
|
|
35
|
-
format = f[8]
|
37
|
+
@format = f[8]
|
36
38
|
|
37
|
-
sample_keys = format.split(":")
|
39
|
+
sample_keys = @format.split(":")
|
38
40
|
|
39
41
|
num_samples = f.size - 9 # How many fields are past the format
|
40
42
|
|
41
43
|
if sample_names == nil # Make the sample names just ["1", "2", ... , "num_samples}"
|
42
44
|
sample_names = (1..num_samples).to_a.map{|i| i.to_s}
|
43
|
-
elsif
|
44
|
-
raise "
|
45
|
+
elsif sample_names.size != num_samples
|
46
|
+
raise "Unexpected number of samples (#{num_samples}) based on the provided sample names (#{sample_names.inspect})"
|
45
47
|
end
|
46
48
|
|
47
49
|
sample_names.each_with_index do |sample_name, sample_index|
|
48
50
|
i = sample_index + 9 # index into columns (f)
|
49
51
|
sample_values = f[i].split(":")
|
50
|
-
raise "Expected number of sample values to
|
51
|
-
samples[sample_name] = {}
|
52
|
-
sample_keys.each_with_index {|key, value_index| samples[sample_name][
|
52
|
+
raise "Expected number of sample values to be <= number of sample keys in FORMAT column Format=#{@format} but sample=#{f[i]}" if sample_values.size > sample_keys.size
|
53
|
+
@samples[sample_name] = {}
|
54
|
+
sample_keys.each_with_index {|key, value_index| @samples[sample_name][key] = sample_values[value_index] || ""}
|
53
55
|
end
|
54
56
|
|
55
57
|
return true;
|
56
58
|
end
|
57
59
|
|
58
|
-
private
|
59
|
-
attr_accessor :
|
60
60
|
end
|