vcf 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/vcf.rb +24 -24
- metadata +1 -1
data/lib/vcf.rb
CHANGED
@@ -2,59 +2,59 @@ class Vcf
|
|
2
2
|
public
|
3
3
|
attr_accessor :chrom, :pos, :id, :ref, :alt, :qual, :filter, :info, :format, :samples
|
4
4
|
|
5
|
-
def initialize(line=nil)
|
6
|
-
|
5
|
+
def initialize(line=nil, sample_names=nil)
|
6
|
+
@info = {}
|
7
|
+
@samples = {}
|
8
|
+
parse_line(line, sample_names) if line != nil
|
7
9
|
end
|
8
10
|
|
9
11
|
def parse_line(line, sample_names=nil)
|
10
|
-
f = line.split("\t", -1)
|
12
|
+
f = line.chomp.split("\t", -1)
|
11
13
|
raise "VCF lines must have at least 8 fields" if f.size < 8
|
12
|
-
chrom = f[0]
|
13
|
-
pos = f[1].to_i
|
14
|
-
id = f[2]
|
15
|
-
ref = f[3]
|
16
|
-
alt = f[4]
|
17
|
-
qual = f[5].to_i
|
18
|
-
filter = f[6]
|
19
|
-
|
20
|
-
info = {}
|
14
|
+
@chrom = f[0]
|
15
|
+
@pos = f[1].to_i
|
16
|
+
@id = f[2]
|
17
|
+
@ref = f[3]
|
18
|
+
@alt = f[4]
|
19
|
+
@qual = f[5].to_i
|
20
|
+
@filter = f[6]
|
21
|
+
|
22
|
+
@info = {}
|
21
23
|
info_vec = f[7].split(";")
|
22
24
|
info_vec.each do |x|
|
23
25
|
keyval = x.split("=", -1)
|
24
26
|
if keyval.size == 2 # If it's key=value
|
25
|
-
info[keyval[0]] = keyval[1]
|
27
|
+
@info[keyval[0]] = keyval[1]
|
26
28
|
else # Otherwise, it's just a flag
|
27
|
-
info[x] = ""
|
29
|
+
@info[x] = ""
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
31
|
-
samples = {}
|
33
|
+
@samples = {}
|
32
34
|
return true if f.size == 8 # Has just upto info
|
33
35
|
raise "Can't have format with no samples" if f.size == 9
|
34
36
|
|
35
|
-
format = f[8]
|
37
|
+
@format = f[8]
|
36
38
|
|
37
|
-
sample_keys = format.split(":")
|
39
|
+
sample_keys = @format.split(":")
|
38
40
|
|
39
41
|
num_samples = f.size - 9 # How many fields are past the format
|
40
42
|
|
41
43
|
if sample_names == nil # Make the sample names just ["1", "2", ... , "num_samples}"
|
42
44
|
sample_names = (1..num_samples).to_a.map{|i| i.to_s}
|
43
|
-
elsif
|
44
|
-
raise "
|
45
|
+
elsif sample_names.size != num_samples
|
46
|
+
raise "Unexpected number of samples (#{num_samples}) based on the provided sample names (#{sample_names.inspect})"
|
45
47
|
end
|
46
48
|
|
47
49
|
sample_names.each_with_index do |sample_name, sample_index|
|
48
50
|
i = sample_index + 9 # index into columns (f)
|
49
51
|
sample_values = f[i].split(":")
|
50
|
-
raise "Expected number of sample values to
|
51
|
-
samples[sample_name] = {}
|
52
|
-
sample_keys.each_with_index {|key, value_index| samples[sample_name][
|
52
|
+
raise "Expected number of sample values to be <= number of sample keys in FORMAT column Format=#{@format} but sample=#{f[i]}" if sample_values.size > sample_keys.size
|
53
|
+
@samples[sample_name] = {}
|
54
|
+
sample_keys.each_with_index {|key, value_index| @samples[sample_name][key] = sample_values[value_index] || ""}
|
53
55
|
end
|
54
56
|
|
55
57
|
return true;
|
56
58
|
end
|
57
59
|
|
58
|
-
private
|
59
|
-
attr_accessor :
|
60
60
|
end
|