germ 0.1 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/hash_table_aux/HashTableAux.c +19 -6
- data/lib/fasta.rb +122 -24
- data/lib/fastq.rb +45 -0
- data/lib/genetic_code.rb +141 -0
- data/lib/genomic_locus.rb +50 -0
- data/lib/germ/config.rb +64 -4
- data/lib/germ/flagstat.rb +4 -0
- data/lib/germ.rb +3 -0
- data/lib/go.rb +164 -0
- data/lib/gtf/gene.rb +293 -0
- data/lib/gtf.rb +34 -202
- data/lib/hash_table.rb +190 -54
- data/lib/intervals.rb +225 -250
- data/lib/maf.rb +42 -58
- data/lib/mutation.rb +41 -0
- data/lib/mutation_set.rb +60 -239
- data/lib/mutect.rb +22 -17
- data/lib/oncotator.rb +43 -1
- data/lib/sdrf.rb +14 -0
- data/lib/tcga.rb +41 -0
- data/lib/vcf.rb +77 -73
- metadata +33 -33
data/lib/vcf.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
require 'mutation_set'
|
2
|
-
require 'oncotator'
|
3
2
|
require 'yaml'
|
4
3
|
|
5
|
-
class VCF <
|
6
|
-
|
4
|
+
class VCF < Mutation::Collection
|
5
|
+
header_on
|
6
|
+
requires :chrom => :str, :pos => :int, :id => :str, :ref => :str,
|
7
|
+
:alt => :str, :qual => :str, :filter => :str, :info => [ ";", "=" ]
|
8
|
+
might_have :format => :str
|
7
9
|
comments "##"
|
10
|
+
attr_reader :samples
|
8
11
|
|
9
12
|
class Preamble
|
10
13
|
def initialize lines
|
@@ -36,50 +39,49 @@ class VCF < MutationSet::Sample
|
|
36
39
|
end
|
37
40
|
end
|
38
41
|
|
39
|
-
def
|
42
|
+
def enforce_header
|
40
43
|
# kludge for empty vcf with no format line
|
41
|
-
|
42
|
-
raise "VCF lacks required headers" if !missing.empty? && !(missing.first == "format" && missing.size == 1)
|
44
|
+
super
|
43
45
|
|
44
|
-
if
|
45
|
-
@samples =
|
46
|
+
if header.size > required.size
|
47
|
+
@samples = @header - required - [ :format ]
|
48
|
+
# recover the original sample name from the sleeve
|
49
|
+
new_samples = @samples.map do |s|
|
50
|
+
@sleeve[s].to_sym
|
51
|
+
end
|
52
|
+
@header = @header - @samples + new_samples
|
53
|
+
@samples = new_samples
|
46
54
|
end
|
47
|
-
|
48
|
-
@headers = array.map &:to_sym
|
49
55
|
end
|
50
56
|
|
51
|
-
class Line <
|
52
|
-
|
57
|
+
class Line < Mutation::Record
|
58
|
+
alias_key :seqname, :chrom
|
53
59
|
alias_key :start, :pos
|
54
|
-
alias_key :
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
@
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
[ key.to_sym, value ]
|
70
|
-
end]
|
71
|
-
@format = @mutation[:format] = @mutation[:format].split(/:/).map(&:to_sym)
|
72
|
-
|
73
|
-
if @sample.samples
|
74
|
-
sample_fields = fields[required.size..-1]
|
75
|
-
@genotypes = {}
|
76
|
-
@sample.samples.each_with_index do |s,i|
|
77
|
-
next if !sample_fields[i]
|
78
|
-
@genotypes[s] = VCF::Genotype.new self, sample_fields[i].split(/:/)
|
60
|
+
alias_key :stop, :default_stop
|
61
|
+
def initialize(h, s)
|
62
|
+
super h, s
|
63
|
+
|
64
|
+
self.format = self.format.split(/:/).map{|f| f.to_sym} if self.format
|
65
|
+
|
66
|
+
build_genotypes
|
67
|
+
build_muts
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_genotypes
|
71
|
+
@genotypes = {}
|
72
|
+
if @table.samples
|
73
|
+
@table.samples.each do |s|
|
74
|
+
@genotypes[s] = VCF::Genotype.new self, self.send(s)
|
79
75
|
end
|
80
76
|
end
|
81
77
|
end
|
82
78
|
|
79
|
+
def build_muts
|
80
|
+
@table.samples.each do |s|
|
81
|
+
@muts.push Mutation.new(chrom, pos, ref, alt, genotype(s).ref_count, genotype(s).alt_count)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
83
85
|
def skip_genotype? g
|
84
86
|
name, geno = g.first
|
85
87
|
geno = genotype(geno)
|
@@ -91,72 +93,74 @@ class VCF < MutationSet::Sample
|
|
91
93
|
alt.split(/,/).first
|
92
94
|
end
|
93
95
|
|
94
|
-
def
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
mutation[h].map{|k,v| "#{k}=#{v}" }.join(";")
|
103
|
-
when :format
|
104
|
-
mutation[h].join(":")
|
105
|
-
else
|
106
|
-
mutation[h]
|
107
|
-
end
|
108
|
-
}.join("\t") + "\t" + sample.samples.map{|s| genotype(s).to_s }.join("\t")
|
96
|
+
def format_column column
|
97
|
+
if column == :format
|
98
|
+
self.format.join(":")
|
99
|
+
elsif genotype(column)
|
100
|
+
genotype(column).to_s
|
101
|
+
else
|
102
|
+
super(column)
|
103
|
+
end
|
109
104
|
end
|
110
105
|
|
111
106
|
def genotype(s)
|
112
|
-
@genotypes[s] if @genotypes
|
113
|
-
end
|
114
|
-
|
115
|
-
def clean_required
|
116
|
-
sample.clean_headers[0...required.size]
|
107
|
+
@genotypes[s.to_sym] if @genotypes
|
117
108
|
end
|
118
109
|
end
|
110
|
+
line_class VCF::Line
|
119
111
|
|
120
112
|
class Genotype
|
121
|
-
attr_reader :info
|
122
113
|
def initialize(line,field)
|
123
114
|
@line = line
|
124
|
-
@
|
115
|
+
@hash = Hash[line.format.map(&:downcase).zip(field.split /:/)]
|
116
|
+
end
|
117
|
+
|
118
|
+
def method_missing sym, *args, &block
|
119
|
+
if @hash[sym]
|
120
|
+
@hash[sym]
|
121
|
+
elsif sym.to_s =~ /(.*)=/
|
122
|
+
@hash[$1.to_sym] = args.first
|
123
|
+
else
|
124
|
+
super sym, *args, &block
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def respond_to_missing? sym, include_all = false
|
129
|
+
@hash[sym] || super
|
125
130
|
end
|
126
131
|
|
127
132
|
def homozygous?
|
128
|
-
|
133
|
+
gt =~ /0.0/ || gt =~ /1.1/
|
129
134
|
end
|
130
135
|
|
131
136
|
def heterozygous?
|
132
|
-
|
137
|
+
gt =~ /0.1/ || gt =~ /1.0/
|
133
138
|
end
|
134
139
|
|
135
140
|
def empty?
|
136
|
-
|
141
|
+
gt =~ /\..\./
|
137
142
|
end
|
138
143
|
|
139
144
|
def callable?
|
140
|
-
|
145
|
+
gt !~ /\..\./
|
141
146
|
end
|
142
147
|
|
143
|
-
def
|
144
|
-
def approx_depth; @info[:DP].to_i; end
|
148
|
+
def approx_depth; dp.to_i; end
|
145
149
|
def depth; alt_count + ref_count; end
|
146
|
-
def alt_count; @
|
147
|
-
def ref_count; @
|
150
|
+
def alt_count; @alt_count ||= respond_to?(:ad) ? ad.split(/,/)[1].to_i : nil; end
|
151
|
+
def ref_count; @ref_count ||= respond_to?(:ad) ? ad.split(/,/)[0].to_i : nil; end
|
148
152
|
def alt_freq; alt_count / depth.to_f; end
|
149
153
|
def ref_freq; ref_count / depth.to_f; end
|
150
154
|
def ref_length; @line.ref.length; end
|
151
155
|
def alt_length; @line.alt.length; end
|
152
|
-
def alt_base_quality;
|
153
|
-
def alt_map_quality;
|
154
|
-
def alt_mismatch_rate;
|
155
|
-
def alt_mismatch_count;
|
156
|
-
def quality;
|
156
|
+
def alt_base_quality; respond_to?(:nqsbq) ? nqsbq.split(/,/)[0].to_f : nil; end
|
157
|
+
def alt_map_quality; respond_to?(:mqs) ? mqs.split(/,/)[0].to_f : nil; end
|
158
|
+
def alt_mismatch_rate; respond_to?(:nqsmm) ? nqsmm.split(/,/)[0].to_f : nil; end
|
159
|
+
def alt_mismatch_count; respond_to?(:mm) ? mm.split(/,/)[0].to_f : nil; end
|
160
|
+
def quality; gq.to_i; end
|
157
161
|
|
158
162
|
def to_s
|
159
|
-
@line.format.map{|f| @
|
163
|
+
@line.format.map(&:downcase).map{|f| @hash[f]}.join(":")
|
160
164
|
end
|
161
165
|
end
|
162
166
|
end
|
metadata
CHANGED
@@ -1,62 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: germ
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
5
|
-
prerelease:
|
4
|
+
version: '0.3'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Saurabh Asthana
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-11-04 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: extlib
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: net-http-persistent
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: sequel
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - '>='
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - '>='
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
description: See summary
|
@@ -67,49 +60,56 @@ extensions:
|
|
67
60
|
- ext/hash_table_aux/extconf.rb
|
68
61
|
extra_rdoc_files: []
|
69
62
|
files:
|
70
|
-
- lib/gtf.rb
|
71
|
-
- lib/mutation_set.rb
|
72
|
-
- lib/indelocator.rb
|
73
|
-
- lib/maf.rb
|
74
|
-
- lib/hash_table.rb
|
75
|
-
- lib/sam.rb
|
76
|
-
- lib/germ/config.rb
|
77
|
-
- lib/germ/printer.rb
|
78
|
-
- lib/germ/data_types.rb
|
79
|
-
- lib/germ/flagstat.rb
|
80
63
|
- lib/oncotator.rb
|
64
|
+
- lib/gtf/gene.rb
|
65
|
+
- lib/sam.rb
|
66
|
+
- lib/mutation.rb
|
67
|
+
- lib/hash_table.rb
|
68
|
+
- lib/genomic_locus.rb
|
69
|
+
- lib/intervals.rb
|
70
|
+
- lib/genetic_code.rb
|
71
|
+
- lib/go.rb
|
72
|
+
- lib/germ.rb
|
73
|
+
- lib/mutation_set.rb
|
74
|
+
- lib/gtf.rb
|
81
75
|
- lib/mutect.rb
|
82
76
|
- lib/vcf.rb
|
83
|
-
- lib/germ.rb
|
84
|
-
- lib/intervals.rb
|
85
77
|
- lib/fasta.rb
|
78
|
+
- lib/indelocator.rb
|
79
|
+
- lib/sdrf.rb
|
80
|
+
- lib/tcga.rb
|
81
|
+
- lib/fastq.rb
|
82
|
+
- lib/maf.rb
|
83
|
+
- lib/germ/flagstat.rb
|
84
|
+
- lib/germ/data_types.rb
|
85
|
+
- lib/germ/printer.rb
|
86
|
+
- lib/germ/config.rb
|
86
87
|
- ext/fasta_aux/FastaAux.c
|
87
88
|
- ext/hash_table_aux/HashTableAux.c
|
88
89
|
- ext/fasta_aux/extconf.rb
|
89
90
|
- ext/hash_table_aux/extconf.rb
|
90
91
|
homepage: http://github.com/mountetna/germ
|
91
92
|
licenses: []
|
93
|
+
metadata: {}
|
92
94
|
post_install_message:
|
93
95
|
rdoc_options: []
|
94
96
|
require_paths:
|
95
97
|
- lib
|
96
98
|
- ext
|
97
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
-
none: false
|
99
100
|
requirements:
|
100
|
-
- -
|
101
|
+
- - '>='
|
101
102
|
- !ruby/object:Gem::Version
|
102
103
|
version: '0'
|
103
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
-
none: false
|
105
105
|
requirements:
|
106
|
-
- -
|
106
|
+
- - '>='
|
107
107
|
- !ruby/object:Gem::Version
|
108
108
|
version: '0'
|
109
109
|
requirements: []
|
110
110
|
rubyforge_project:
|
111
|
-
rubygems_version:
|
111
|
+
rubygems_version: 2.0.6
|
112
112
|
signing_key:
|
113
|
-
specification_version:
|
113
|
+
specification_version: 4
|
114
114
|
summary: Collection of utilities for use in computational genomics
|
115
115
|
test_files: []
|