germ 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,351 @@
1
+ require 'oncotator'
2
+ require 'yaml'
3
+ require 'intervals'
4
+
5
+ module MutationSet
6
+ class Line
7
+ include IntervalList::Interval
8
+ attr_reader :sample
9
+ attr_accessor :invalid
10
+
11
+ def self.alias_key sym1, sym2
12
+ define_method sym1 do
13
+ send sym2
14
+ end
15
+ define_method "#{sym1}=" do |v|
16
+ send "#{sym2}=", v
17
+ end
18
+ end
19
+
20
+ def copy
21
+ self.class.new @mutation.clone, sample
22
+ end
23
+
24
+ def invalid?
25
+ invalid
26
+ end
27
+
28
+ def invalidate!
29
+ @invalid = true
30
+ end
31
+
32
+
33
+ def initialize(fields, sample)
34
+ if fields.is_a? Hash
35
+ @mutation = fields
36
+ else
37
+ @mutation = Hash[sample.clean_headers.zip(fields)]
38
+ end
39
+ @sample = sample
40
+ end
41
+
42
+ def key
43
+ "#{chrom}:#{start}:#{stop}"
44
+ end
45
+
46
+ def long_chrom
47
+ @long_chrom ||= "chr#{short_chrom}"
48
+ end
49
+
50
+ def short_chrom
51
+ @short_chrom ||= chrom.sub(/^chr/,'')
52
+ end
53
+
54
+ def to_s
55
+ sample.clean_headers.map{ |h| @mutation[h] }.join("\t")
56
+ end
57
+
58
+ def to_hash
59
+ @mutation
60
+ #Hash[@mutation.map do |k,v| [ k, v ? v.clone : v ]; end]
61
+ end
62
+
63
+ def criteria_failed? obj, name
64
+ return nil if !sample.mutation_config
65
+ name = [ name ] if !name.is_a? Array
66
+ crit = name.reduce(sample.mutation_config) do |h,n|
67
+ h.is_a?(Hash) ? h[n] : nil
68
+ end
69
+ return nil if !crit
70
+ crit.each do |attrib,value|
71
+ return true if !criterion_ok? obj, attrib, value
72
+ end
73
+ nil
74
+ end
75
+
76
+ def criterion_ok? obj, attrib, value
77
+ case attrib
78
+ when /^min_(.*)/
79
+ v = obj.send($1.to_sym).to_f
80
+ return v >= value.to_f
81
+ when /^max_(.*)/
82
+ return obj.send($1.to_sym).to_f <= value.to_f
83
+ when /^exclude_(.*)/
84
+ v = obj.send($1.to_sym)
85
+ if value.is_a? Array
86
+ return value.none? { |r| v.match(/#{r}/) }
87
+ else
88
+ return v !~ /#{value}/
89
+ end
90
+ when /^has_(.*)/
91
+ v = obj.send($1.to_sym)
92
+ if value.is_a? Array
93
+ return value.include? v
94
+ elsif value == true
95
+ return v && (v.is_a?(String) ? v.size > 0 : v)
96
+ elsif value == false || value == "nil"
97
+ return !v
98
+ else
99
+ return value == v
100
+ end
101
+ when /^include_(.*)/
102
+ v = obj.send($1.to_sym)
103
+ if value.is_a? Array
104
+ return value.any? { |r| v.match(/#{r}/) }
105
+ else
106
+ return v =~ /#{value}/
107
+ end
108
+ when /^either.*/
109
+ v = nil
110
+ value.each do |attrib,val|
111
+ v = true if criterion_ok? obj, attrib, val
112
+ end
113
+ return v
114
+ when /^whitelisted/
115
+ whitelist = sample.whitelist value
116
+ return whitelist.intersect(self)
117
+ when /^blacklisted/
118
+ blacklist = sample.blacklist value
119
+ return !blacklist.intersect(self)
120
+ else
121
+ # send it
122
+ case value
123
+ when "nil", false, nil
124
+ return !obj.send(attrib.to_sym)
125
+ when true
126
+ return obj.send(attrib.to_sym)
127
+ end
128
+ end
129
+ true
130
+ end
131
+
132
+ def onco
133
+ raise ArgumentError, @onco_error unless valid_onco_input?
134
+ @onco ||= Oncotator.new :key => self.to_ot
135
+ end
136
+
137
+ def discard_onco
138
+ @onco = nil
139
+ end
140
+
141
+ def skip_oncotator? criteria=nil
142
+ return true if !onco || onco.empty? || criteria_failed?(onco, criteria || :oncotator)
143
+ end
144
+
145
+ def inspect
146
+ "#<#{self.class.name}:#{object_id} @mutation=#{@mutation}>"
147
+ end
148
+
149
+ def in_cosmic
150
+ onco.Cosmic_overlapping_mutations ? "YES" : "NO"
151
+ end
152
+
153
+ def to_ot
154
+ [ short_chrom, start, stop, ref_allele, alt_allele ].join("_")
155
+ end
156
+
157
+ def method_missing(meth,*args,&block)
158
+ if meth.to_s =~ /(.*)=/
159
+ @mutation[$1.to_sym] = args.first
160
+ else
161
+ @mutation.has_key?(meth.to_sym) ? @mutation[meth.to_sym] : super
162
+ end
163
+ end
164
+
165
+ def respond_to? method
166
+ !@mutation[method.to_sym].nil? || super
167
+ end
168
+
169
+ private
170
+ CHROM_POS=/^[0-9]+$/
171
+ ALLELE_SEQ=/^([A-Z]+|-)$/
172
+ def valid_onco_input?
173
+ @onco_error = []
174
+ @onco_error.push 'Malformed start position' unless start.to_s =~ MutationSet::Line::CHROM_POS
175
+ @onco_error.push 'Malformed stop position' unless stop.to_s =~ MutationSet::Line::CHROM_POS
176
+ @onco_error.push 'Malformed reference allele' unless ref_allele =~ MutationSet::Line::ALLELE_SEQ
177
+ @onco_error.push 'Malformed alt allele' unless alt_allele =~ MutationSet::Line::ALLELE_SEQ
178
+ @onco_error.empty?
179
+ end
180
+ end
181
+
182
+ class Sample
183
+ include Enumerable
184
+ attr_reader :samples, :mutation_config, :lines, :preamble_lines
185
+ attr_accessor :headers
186
+ class << self
187
+ attr_reader :required, :comment
188
+ def requires *terms
189
+ @required = terms
190
+ end
191
+
192
+ def comments c
193
+ @comment = c
194
+ end
195
+
196
+ def read(filename,mutation_config=nil)
197
+ set = new mutation_config, true
198
+
199
+ set.load_file filename
200
+
201
+ return set
202
+ end
203
+ end
204
+
205
+ def load_file filename
206
+ File.foreach(filename) do |l|
207
+ fields = l.chomp.split(/\t/,-1)
208
+ if !headers
209
+ if fields.first.downcase == required.first.downcase
210
+ enforce_headers fields
211
+ else
212
+ preamble_lines.push l
213
+ end
214
+ next
215
+ end
216
+ add_line fields
217
+ end
218
+
219
+ post_read_hook
220
+ end
221
+
222
+ def preamble
223
+ preamble_lines.join("")
224
+ end
225
+
226
+ def write file
227
+ File.open(file,"w") do |f|
228
+ output f
229
+ end
230
+ end
231
+
232
+ def print f=nil
233
+ if f
234
+ write f
235
+ else
236
+ output STDOUT
237
+ end
238
+ end
239
+
240
+ def output f
241
+ f.puts preamble
242
+ f.puts headers.join("\t")
243
+ @lines.each do |l|
244
+ l = yield l if block_given?
245
+ next if !l || l.invalid?
246
+ f.puts format_line(l)
247
+ end
248
+ end
249
+
250
+ def format_line l
251
+ l.to_s
252
+ end
253
+
254
+ def clean_header s
255
+ s.to_s.gsub(/\s+/,"_").gsub(/[^\w]+/,"").downcase.to_sym
256
+ end
257
+
258
+ def clean_headers
259
+ @headers.map {|h| clean_header h}
260
+ end
261
+
262
+ def add_line fields
263
+ @lines.push self.class.const_get(:Line).new(clean_fields(fields), self)
264
+
265
+ index_line @lines.last
266
+ end
267
+
268
+ def clean_fields fields
269
+ fields.is_a?(Array) ? fields.map{|f| f == "NA" ? "" : f } : fields
270
+ end
271
+
272
+ def index_line line
273
+ @index[ line.key ] = line
274
+ end
275
+
276
+ def find_mutation line
277
+ @index[ line.key ]
278
+ end
279
+
280
+ def required
281
+ self.class.required
282
+ end
283
+
284
+ def enforce_headers array
285
+ raise "File lacks required headers: #{(required.map(&:downcase)-array.map(&:downcase)).join(", ")}" if !(required.map(&:downcase) - array.map(&:downcase)).empty?
286
+ @headers = array
287
+ end
288
+
289
+ def initialize(mutation_config=nil,suppress_headers=nil)
290
+ @lines = []
291
+
292
+ @mutation_config = YAML.load_file(mutation_config) if mutation_config
293
+
294
+ @headers = required.map(&:to_sym) unless suppress_headers
295
+
296
+ @preamble_lines = []
297
+
298
+ @index = {}
299
+ end
300
+
301
+ def whitelist file
302
+ case file
303
+ when /.gtf$/
304
+ require 'gtf'
305
+ @whitelist ||= GTF.new(file).to_interval_list
306
+ when /.vcf$/
307
+ require 'vcf'
308
+ @whitelist ||= VCF.read(file).to_interval_list
309
+ end
310
+ @whitelist
311
+ end
312
+
313
+ def blacklist file
314
+ case file
315
+ when /.gtf$/
316
+ require 'gtf'
317
+ @blacklist ||= GTF.new(file).to_interval_list
318
+ when /.vcf$/
319
+ require 'vcf'
320
+ @blacklist ||= VCF.read(file).to_interval_list
321
+ end
322
+ @blacklist
323
+ end
324
+
325
+ def to_interval_list
326
+ IntervalList.new self.map{|g| [ g.chrom, g.start, g.stop, g ] }
327
+ end
328
+
329
+ def inspect
330
+ to_s
331
+ end
332
+
333
+ def [](key)
334
+ @lines[key]
335
+ end
336
+
337
+ def sort_by! &block
338
+ @lines.sort_by! &block
339
+ end
340
+
341
+ def each
342
+ @lines.each do |l|
343
+ yield l
344
+ end
345
+ end
346
+
347
+ protected
348
+ def post_read_hook
349
+ end
350
+ end
351
+ end
@@ -0,0 +1,43 @@
1
+ require 'oncotator'
2
+ require 'yaml'
3
+ require 'mutation_set'
4
+
5
+ class MuTect < MutationSet::Sample
6
+ requires "contig", "position", "context", "ref_allele", "alt_allele",
7
+ "tumor_name", "normal_name", "score", "dbsnp_site", "covered", "power",
8
+ "tumor_power", "normal_power", "total_pairs", "improper_pairs",
9
+ "map_q0_reads", "t_lod_fstar", "tumor_f", "contaminant_fraction",
10
+ "contaminant_lod", "t_ref_count", "t_alt_count", "t_ref_sum", "t_alt_sum",
11
+ "t_ref_max_mapq", "t_alt_max_mapq", "t_ins_count", "t_del_count",
12
+ "normal_best_gt", "init_n_lod", "n_ref_count", "n_alt_count", "n_ref_sum",
13
+ "n_alt_sum", "judgement"
14
+ comments "##"
15
+
16
+ class Line < MutationSet::Line
17
+ alias_key :chrom, :contig
18
+ alias_key :start, :position
19
+ def stop; @stop || end_position; end
20
+ def stop= nc; @stop = nc; end
21
+ def keep_somatic?
22
+ !criteria_failed?(self, [ :mutect, :somatic ])
23
+ end
24
+ def keep_germline?
25
+ !criteria_failed?(self, [ :mutect, :germline ])
26
+ end
27
+
28
+ def end_position
29
+ position.to_i + ref_allele.length-1
30
+ end
31
+
32
+ def q0_ratio
33
+ map_q0_reads.to_f / (t_alt_count.to_i + n_alt_count.to_i)
34
+ end
35
+ def vf_ratio
36
+ t_var_freq > 0 ? n_var_freq / t_var_freq : 0
37
+ end
38
+ def t_var_freq; t_alt_count.to_f / t_depth end
39
+ def n_var_freq; n_alt_count.to_f / n_depth end
40
+ def t_depth; t_alt_count.to_i + t_ref_count.to_i end
41
+ def n_depth; n_alt_count.to_i + n_ref_count.to_i end
42
+ end
43
+ end
@@ -0,0 +1,144 @@
1
+ require 'net/http/persistent'
2
+ require 'resolv-replace'
3
+ require 'json'
4
+ require 'sequel'
5
+ require 'yaml'
6
+ require 'germ/config'
7
+
8
+ class Oncotator
9
+ attr_accessor :mutation
10
+ def self.persistent_connection
11
+ @http ||= Net::HTTP::Persistent.new
12
+ end
13
+
14
+ def self.db_connect opts
15
+ @db_opts = opts
16
+ end
17
+
18
+ def self.db_opts
19
+ @db_opts ||= TaylorlibConfig.get_conf :oncotator
20
+ end
21
+
22
+ def self.db_cache
23
+ @db ||= Sequel.connect(db_opts)
24
+ @db[:onco_json_cache]
25
+ end
26
+
27
+ def self.insert_onco onco
28
+ if defined? Rails
29
+ OncoJsonCache.create onco
30
+ else
31
+ db_cache.insert_ignore.insert onco
32
+ end
33
+ end
34
+
35
+ def self.db_obj
36
+ if defined? Rails
37
+ OncoJsonCache
38
+ else
39
+ Oncotator.db_cache
40
+ end
41
+ end
42
+
43
+ def self.find_key cache_key
44
+ # use the Rails environment if it is available
45
+ db_obj.where(:CACHE_KEY => cache_key).first
46
+ end
47
+
48
+ def self.delete_key cache_key
49
+ db_obj.where(:CACHE_KEY => cache_key).delete
50
+ end
51
+
52
+ def onco_uri
53
+ URI "http://69.173.64.101/oncotator/mutation/#{@mutation}/"
54
+ end
55
+
56
+ def get_json_object text=nil
57
+ # first look it up in the sequel database.
58
+ json = case text
59
+ when nil
60
+ result = Oncotator.find_key @mutation
61
+ result ? result[:RAW_JSON] : nil
62
+ else
63
+ text
64
+ end
65
+
66
+ begin
67
+ return JSON.parse(json) if json
68
+ rescue JSON::ParserError => e
69
+ # you have a bad data blob
70
+ Oncotator.delete_key @mutation
71
+ end
72
+
73
+ # if that doesn't work, query Oncotator.
74
+ response = Oncotator.persistent_connection.request(onco_uri)
75
+
76
+ return {} if response.code != "200"
77
+
78
+ json = response.body
79
+
80
+ # save it
81
+ Oncotator.insert_onco(:CACHE_KEY => @mutation, :RAW_JSON => json)
82
+
83
+ return JSON.parse(json)
84
+ end
85
+
86
+ def initialize(opts)
87
+ if opts[:key]
88
+ @mutation = opts[:key]
89
+ @onco = get_json_object
90
+ elsif opts[:text]
91
+ @onco = get_json_object opts[:text]
92
+ end
93
+ end
94
+
95
+ def empty?
96
+ !@onco || @onco.size == 0
97
+ end
98
+
99
+ class Transcript
100
+ def initialize(txp)
101
+ @txp = txp || {}
102
+ end
103
+
104
+ def method_missing(meth,*args,&block)
105
+ @txp[meth.to_s] || nil
106
+ end
107
+ end
108
+
109
+ def best_effect_txp
110
+ @best_effect_txp ||= Transcript.new(transcripts[best_effect_transcript]) if best_effect_transcript
111
+ end
112
+
113
+ def best_canonical_txp
114
+ @best_canonical_txp ||= Transcript.new(transcripts[best_canonical_transcript]) if best_canonical_transcript
115
+ end
116
+
117
+ def is_snp
118
+ dbSNP_RS && dbSNP_Val_Status =~ /(byFrequency|by1000genomes)/
119
+ end
120
+
121
+ def txp
122
+ best_effect_txp
123
+ end
124
+
125
+ def pph2_class
126
+ pph2 ? pph2["pph2_class"] : nil
127
+ end
128
+
129
+ def is_cancerous
130
+ self.Cosmic_overlapping_mutations || self.CGC_Tumor_Types_Somatic || self.CCLE_ONCOMAP_total_mutations_in_gene
131
+ end
132
+
133
+ def method_missing(meth,*args,&block)
134
+ meth = meth.to_s
135
+ case
136
+ when @onco[meth]
137
+ @onco[meth]
138
+ when meth =~ /^txp_(.*)/
139
+ txp ? txp.send($1) : nil
140
+ else
141
+ nil
142
+ end
143
+ end
144
+ end