germ 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,196 @@
1
+ require 'germ/data_types'
2
+ require 'germ/printer'
3
+ class Sam
4
+ include Enumerable
5
+ include Printer
6
+
7
+ class Header
8
+ class Record
9
+ attr_reader :type
10
+ attr_reader :tags
11
+ attr_reader :comment
12
+ attr_reader :line
13
+
14
+ def initialize line
15
+ @line = line
16
+ if line =~ /^@CO\t(.*)/
17
+ @type = :CO
18
+ @comment = $1
19
+ end
20
+ line.match /^@([A-Za-z][A-Za-z])\t((?:[A-Za-z][A-Za-z0-9]:[ -~]+(?:\t|$))+)/ do |m|
21
+ @type = m[1].to_sym
22
+ @tags = Hash[m[2].split(/\t/).map{|s| s.split(/:/)}]
23
+ end
24
+ end
25
+
26
+ def to_s
27
+ if @type == :CO
28
+ "@#{@type}\t#{@comment}"
29
+ else
30
+ "@#{@type}\t#{tags.map{ |t,v| "#{t}:#{v}" }.join("\t")}"
31
+ end
32
+ end
33
+ end
34
+
35
+ attr_reader :records
36
+ def initialize sam, lines
37
+ @sam = sam
38
+ @records = []
39
+ lines.each do |l|
40
+ @records.push Sam::Header::Record.new(l)
41
+ end
42
+ end
43
+
44
+ def output f
45
+ @records.each do |r|
46
+ f.puts r
47
+ end
48
+ end
49
+ end
50
+
51
+ class Read
52
+ extend DataTypes
53
+ attr_sym :qname
54
+ attr_integer :flag, :pos, :mapq, :pnext, :tlen
55
+ attr_string :rname, :rnext, :other, :sam, :seq, :qual, :cigar_string
56
+ alias :chrom :rname
57
+ alias :start_pos :pos
58
+ alias :mate_pos :pnext
59
+
60
+ def cigar
61
+ @cigar ||= @cigar_string.scan(/(\d+)(\w)/)
62
+ end
63
+
64
+ def paired?; flag & 1; end
65
+ def mapped?; flag & 2; end
66
+ def unmapped?; flag & 4; end
67
+ def mate_unmapped?; flag & 8; end
68
+ def reversed?; flag & 16; end
69
+ def mate_reversed?; flag & 32; end
70
+ def first?; flag & 64; end
71
+ def second?; flag & 128; end
72
+ def secondary?; flag & 258; end
73
+ def unchaste?; flag & 512; end
74
+ def supplementary?; flag & 1024; end
75
+
76
+ def end_pos
77
+ epos = pos
78
+ cigar.each do |w,c|
79
+ case c
80
+ when 'M', 'D', 'N', '=', 'X'
81
+ epos += w.to_i
82
+ end
83
+ end
84
+ epos
85
+ end
86
+
87
+ def seq_at p
88
+ cpos = pos
89
+ cind = 0
90
+ cigar.each do |w,c|
91
+ case c
92
+ when 'M', '=', 'X'
93
+ epos = cpos + w.to_i
94
+ if (cpos...epos).include? p
95
+ return seq[cind + p - cpos]
96
+ end
97
+ cind = cind + w.to_i
98
+ cpos = epos
99
+ when 'D', 'N'
100
+ epos = cpos + w.to_i
101
+ if (cpos...epos).include? p
102
+ return nil
103
+ end
104
+ cind = cind + w.to_i
105
+ cpos = epos
106
+ when 'I'
107
+ cind = cind + w.to_i
108
+ when 'S', 'H', 'P'
109
+ return nil
110
+ end
111
+ end
112
+ nil
113
+ end
114
+
115
+ def junctions
116
+ epos = pos
117
+ cigar.map do |w,c|
118
+ case c
119
+ when 'M', 'D', '=', 'X'
120
+ epos += w.to_i
121
+ nil
122
+ when 'N'
123
+ [ epos-1, epos += w.to_i ]
124
+ end
125
+ end.compact
126
+ end
127
+
128
+ def mate_chrom
129
+ if rnext == "="
130
+ chrom
131
+ else
132
+ rnext
133
+ end
134
+ end
135
+
136
+ def mate
137
+ sam.mates[qname].find{|l| l.pos == mate_pos}
138
+ end
139
+
140
+ def initialize s, fields
141
+ @sam = s
142
+ @header = [:qname, :flag, :rname, :pos, :mapq, :cigar_string, :rnext, :pnext, :tlen, :seq, :qual, :other]
143
+ @header.each_with_index do |s,i|
144
+ send "#{s}=".to_sym, fields[i]
145
+ end
146
+ sam.add_mate qname, self
147
+ end
148
+
149
+ def output f
150
+ f.puts [ @qname, @flag, @rname, @pos, @mapq, @cigar_string, @rnext, @pnext, @tlen, @seq, @qual, @other].join("\t")
151
+ end
152
+ end
153
+
154
+ attr_reader :reads, :mates
155
+ attr_accessor :header
156
+ def initialize
157
+ @reads = []
158
+ @mates = {}
159
+ end
160
+
161
+ def add_mate mate, record
162
+ @mates[mate] ||= []
163
+ @mates[mate].push record
164
+ end
165
+
166
+ def each
167
+ @reads.each do |r|
168
+ yield r
169
+ end
170
+ end
171
+
172
+ def self.read file
173
+ sam = self.new
174
+ header = []
175
+ File.foreach(file) do |l|
176
+ if l =~ /^@/
177
+ header.push l
178
+ next
179
+ end
180
+ sam.reads.push Sam::Read.new(sam, l.chomp.split(/\t/,12))
181
+ end
182
+ sam.header = Sam::Header.new(sam, header)
183
+ sam
184
+ end
185
+
186
+ def output f
187
+ @header.output f
188
+ @reads.each do |r|
189
+ r.output f
190
+ end
191
+ end
192
+
193
+ def inspect
194
+ "#<#{self.class.name}:#{object_id} @reads=#{reads.size}>"
195
+ end
196
+ end
@@ -0,0 +1,162 @@
1
+ require 'mutation_set'
2
+ require 'oncotator'
3
+ require 'yaml'
4
+
5
+ class VCF < MutationSet::Sample
6
+ requires "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"
7
+ comments "##"
8
+
9
+ class Preamble
10
+ def initialize lines
11
+ @items = {}
12
+ lines.each do |line|
13
+ add_key line.chomp
14
+ end
15
+ end
16
+
17
+ protected
18
+ def add_key line
19
+ line.match(/^##(?<key>\w+)=(?<value>.*)$/) do |m|
20
+ add_key_item m[:key].to_sym, m[:value]
21
+ end
22
+ end
23
+
24
+ def add_key_item key, value
25
+ @items[key] ||= []
26
+ @items[key].push new_item(value)
27
+ end
28
+
29
+ def new_item value
30
+ case value
31
+ when /^<(.*)>$/
32
+ Hash[$1.split(/,/).map{|s| s.split(/=/)}]
33
+ else
34
+ value
35
+ end
36
+ end
37
+ end
38
+
39
+ def enforce_headers(array)
40
+ # kludge for empty vcf with no format line
41
+ missing = required.map(&:downcase) - array.map(&:downcase)
42
+ raise "VCF lacks required headers" if !missing.empty? && !(missing.first == "format" && missing.size == 1)
43
+
44
+ if array.size > required.size
45
+ @samples = array - required
46
+ end
47
+
48
+ @headers = array.map &:to_sym
49
+ end
50
+
51
+ class Line < MutationSet::Line
52
+ attr_reader :format, :mutation
53
+ alias_key :start, :pos
54
+ alias_key :ref_allele, :ref
55
+ def alt_allele; pick_alt; end
56
+ def stop; @stop || end_pos; end
57
+ def stop= nc; @stop = nc; end
58
+
59
+ def required
60
+ sample.required
61
+ end
62
+
63
+ def initialize(fields, s)
64
+ @sample = s
65
+ @mutation = Hash[clean_required.zip(fields[0...required.size])]
66
+ @mutation[:info] = Hash[@mutation[:info].split(/;/).map do |s|
67
+ key, value = s.split(/=/)
68
+ value ||= true
69
+ [ key.to_sym, value ]
70
+ end]
71
+ @format = @mutation[:format] = @mutation[:format].split(/:/).map(&:to_sym)
72
+
73
+ if @sample.samples
74
+ sample_fields = fields[required.size..-1]
75
+ @genotypes = {}
76
+ @sample.samples.each_with_index do |s,i|
77
+ next if !sample_fields[i]
78
+ @genotypes[s] = VCF::Genotype.new self, sample_fields[i].split(/:/)
79
+ end
80
+ end
81
+ end
82
+
83
+ def skip_genotype? g
84
+ name, geno = g.first
85
+ geno = genotype(geno)
86
+
87
+ !geno || geno.empty? || criteria_failed?(geno, name)
88
+ end
89
+
90
+ def pick_alt
91
+ alt.split(/,/).first
92
+ end
93
+
94
+ def end_pos
95
+ pos.to_i + ref.length-1
96
+ end
97
+
98
+ def to_s
99
+ clean_required.map{ |h|
100
+ case h
101
+ when :info
102
+ mutation[h].map{|k,v| "#{k}=#{v}" }.join(";")
103
+ when :format
104
+ mutation[h].join(":")
105
+ else
106
+ mutation[h]
107
+ end
108
+ }.join("\t") + "\t" + sample.samples.map{|s| genotype(s).to_s }.join("\t")
109
+ end
110
+
111
+ def genotype(s)
112
+ @genotypes[s] if @genotypes
113
+ end
114
+
115
+ def clean_required
116
+ sample.clean_headers[0...required.size]
117
+ end
118
+ end
119
+
120
+ class Genotype
121
+ attr_reader :info
122
+ def initialize(line,field)
123
+ @line = line
124
+ @info = Hash[line.format.zip(field)]
125
+ end
126
+
127
+ def homozygous?
128
+ @info[:GT] =~ /0.0/ || @info[:GT] =~ /1.1/
129
+ end
130
+
131
+ def heterozygous?
132
+ @info[:GT] =~ /0.1/ || @info[:GT] =~ /1.0/
133
+ end
134
+
135
+ def empty?
136
+ @info[:GT] =~ /\..\./
137
+ end
138
+
139
+ def callable?
140
+ @info[:GT] !~ /\..\./
141
+ end
142
+
143
+ def gt; @info[:GT]; end
144
+ def approx_depth; @info[:DP].to_i; end
145
+ def depth; alt_count + ref_count; end
146
+ def alt_count; @info[:AD] ? @info[:AD].split(/,/)[1].to_i : nil; end
147
+ def ref_count; @info[:AD] ? @info[:AD].split(/,/)[0].to_i : nil; end
148
+ def alt_freq; alt_count / depth.to_f; end
149
+ def ref_freq; ref_count / depth.to_f; end
150
+ def ref_length; @line.ref.length; end
151
+ def alt_length; @line.alt.length; end
152
+ def alt_base_quality; @info[:NQSBQ] ? @info[:NQSBQ].split(/,/)[0].to_f : nil; end
153
+ def alt_map_quality; @info[:MQS] ? @info[:MQS].split(/,/)[0].to_f : nil; end
154
+ def alt_mismatch_rate; @info[:NQSMM] ? @info[:NQSMM].split(/,/)[0].to_f : nil; end
155
+ def alt_mismatch_count; @info[:MM] ? @info[:MM].split(/,/)[0].to_f : nil; end
156
+ def quality; @info[:GQ].to_i; end
157
+
158
+ def to_s
159
+ @line.format.map{|f| @info[f]}.join(":")
160
+ end
161
+ end
162
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: germ
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Saurabh Asthana
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: extlib
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: net-http-persistent
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: sequel
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: See summary
63
+ email: Saurabh.Asthana@ucsf.edu
64
+ executables: []
65
+ extensions:
66
+ - ext/fasta_aux/extconf.rb
67
+ - ext/hash_table_aux/extconf.rb
68
+ extra_rdoc_files: []
69
+ files:
70
+ - lib/gtf.rb
71
+ - lib/mutation_set.rb
72
+ - lib/indelocator.rb
73
+ - lib/maf.rb
74
+ - lib/hash_table.rb
75
+ - lib/sam.rb
76
+ - lib/germ/config.rb
77
+ - lib/germ/printer.rb
78
+ - lib/germ/data_types.rb
79
+ - lib/germ/flagstat.rb
80
+ - lib/oncotator.rb
81
+ - lib/mutect.rb
82
+ - lib/vcf.rb
83
+ - lib/germ.rb
84
+ - lib/intervals.rb
85
+ - lib/fasta.rb
86
+ - ext/fasta_aux/FastaAux.c
87
+ - ext/hash_table_aux/HashTableAux.c
88
+ - ext/fasta_aux/extconf.rb
89
+ - ext/hash_table_aux/extconf.rb
90
+ homepage: http://github.com/mountetna/germ
91
+ licenses: []
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ - ext
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ required_rubygems_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 1.8.23
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Collection of utilities for use in computational genomics
115
+ test_files: []