germ 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/fasta_aux/FastaAux.c +137 -0
- data/ext/fasta_aux/extconf.rb +7 -0
- data/ext/hash_table_aux/HashTableAux.c +246 -0
- data/ext/hash_table_aux/extconf.rb +7 -0
- data/lib/fasta.rb +79 -0
- data/lib/germ.rb +11 -0
- data/lib/germ/config.rb +34 -0
- data/lib/germ/data_types.rb +47 -0
- data/lib/germ/flagstat.rb +23 -0
- data/lib/germ/printer.rb +15 -0
- data/lib/gtf.rb +248 -0
- data/lib/hash_table.rb +195 -0
- data/lib/indelocator.rb +46 -0
- data/lib/intervals.rb +337 -0
- data/lib/maf.rb +92 -0
- data/lib/mutation_set.rb +351 -0
- data/lib/mutect.rb +43 -0
- data/lib/oncotator.rb +144 -0
- data/lib/sam.rb +196 -0
- data/lib/vcf.rb +162 -0
- metadata +115 -0
data/lib/mutation_set.rb
ADDED
@@ -0,0 +1,351 @@
|
|
1
|
+
require 'oncotator'
|
2
|
+
require 'yaml'
|
3
|
+
require 'intervals'
|
4
|
+
|
5
|
+
module MutationSet
|
6
|
+
class Line
|
7
|
+
include IntervalList::Interval
|
8
|
+
attr_reader :sample
|
9
|
+
attr_accessor :invalid
|
10
|
+
|
11
|
+
def self.alias_key sym1, sym2
|
12
|
+
define_method sym1 do
|
13
|
+
send sym2
|
14
|
+
end
|
15
|
+
define_method "#{sym1}=" do |v|
|
16
|
+
send "#{sym2}=", v
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def copy
|
21
|
+
self.class.new @mutation.clone, sample
|
22
|
+
end
|
23
|
+
|
24
|
+
def invalid?
|
25
|
+
invalid
|
26
|
+
end
|
27
|
+
|
28
|
+
def invalidate!
|
29
|
+
@invalid = true
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def initialize(fields, sample)
|
34
|
+
if fields.is_a? Hash
|
35
|
+
@mutation = fields
|
36
|
+
else
|
37
|
+
@mutation = Hash[sample.clean_headers.zip(fields)]
|
38
|
+
end
|
39
|
+
@sample = sample
|
40
|
+
end
|
41
|
+
|
42
|
+
def key
|
43
|
+
"#{chrom}:#{start}:#{stop}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def long_chrom
|
47
|
+
@long_chrom ||= "chr#{short_chrom}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def short_chrom
|
51
|
+
@short_chrom ||= chrom.sub(/^chr/,'')
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
sample.clean_headers.map{ |h| @mutation[h] }.join("\t")
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_hash
|
59
|
+
@mutation
|
60
|
+
#Hash[@mutation.map do |k,v| [ k, v ? v.clone : v ]; end]
|
61
|
+
end
|
62
|
+
|
63
|
+
def criteria_failed? obj, name
|
64
|
+
return nil if !sample.mutation_config
|
65
|
+
name = [ name ] if !name.is_a? Array
|
66
|
+
crit = name.reduce(sample.mutation_config) do |h,n|
|
67
|
+
h.is_a?(Hash) ? h[n] : nil
|
68
|
+
end
|
69
|
+
return nil if !crit
|
70
|
+
crit.each do |attrib,value|
|
71
|
+
return true if !criterion_ok? obj, attrib, value
|
72
|
+
end
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
|
76
|
+
def criterion_ok? obj, attrib, value
|
77
|
+
case attrib
|
78
|
+
when /^min_(.*)/
|
79
|
+
v = obj.send($1.to_sym).to_f
|
80
|
+
return v >= value.to_f
|
81
|
+
when /^max_(.*)/
|
82
|
+
return obj.send($1.to_sym).to_f <= value.to_f
|
83
|
+
when /^exclude_(.*)/
|
84
|
+
v = obj.send($1.to_sym)
|
85
|
+
if value.is_a? Array
|
86
|
+
return value.none? { |r| v.match(/#{r}/) }
|
87
|
+
else
|
88
|
+
return v !~ /#{value}/
|
89
|
+
end
|
90
|
+
when /^has_(.*)/
|
91
|
+
v = obj.send($1.to_sym)
|
92
|
+
if value.is_a? Array
|
93
|
+
return value.include? v
|
94
|
+
elsif value == true
|
95
|
+
return v && (v.is_a?(String) ? v.size > 0 : v)
|
96
|
+
elsif value == false || value == "nil"
|
97
|
+
return !v
|
98
|
+
else
|
99
|
+
return value == v
|
100
|
+
end
|
101
|
+
when /^include_(.*)/
|
102
|
+
v = obj.send($1.to_sym)
|
103
|
+
if value.is_a? Array
|
104
|
+
return value.any? { |r| v.match(/#{r}/) }
|
105
|
+
else
|
106
|
+
return v =~ /#{value}/
|
107
|
+
end
|
108
|
+
when /^either.*/
|
109
|
+
v = nil
|
110
|
+
value.each do |attrib,val|
|
111
|
+
v = true if criterion_ok? obj, attrib, val
|
112
|
+
end
|
113
|
+
return v
|
114
|
+
when /^whitelisted/
|
115
|
+
whitelist = sample.whitelist value
|
116
|
+
return whitelist.intersect(self)
|
117
|
+
when /^blacklisted/
|
118
|
+
blacklist = sample.blacklist value
|
119
|
+
return !blacklist.intersect(self)
|
120
|
+
else
|
121
|
+
# send it
|
122
|
+
case value
|
123
|
+
when "nil", false, nil
|
124
|
+
return !obj.send(attrib.to_sym)
|
125
|
+
when true
|
126
|
+
return obj.send(attrib.to_sym)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
true
|
130
|
+
end
|
131
|
+
|
132
|
+
def onco
|
133
|
+
raise ArgumentError, @onco_error unless valid_onco_input?
|
134
|
+
@onco ||= Oncotator.new :key => self.to_ot
|
135
|
+
end
|
136
|
+
|
137
|
+
def discard_onco
|
138
|
+
@onco = nil
|
139
|
+
end
|
140
|
+
|
141
|
+
def skip_oncotator? criteria=nil
|
142
|
+
return true if !onco || onco.empty? || criteria_failed?(onco, criteria || :oncotator)
|
143
|
+
end
|
144
|
+
|
145
|
+
def inspect
|
146
|
+
"#<#{self.class.name}:#{object_id} @mutation=#{@mutation}>"
|
147
|
+
end
|
148
|
+
|
149
|
+
def in_cosmic
|
150
|
+
onco.Cosmic_overlapping_mutations ? "YES" : "NO"
|
151
|
+
end
|
152
|
+
|
153
|
+
def to_ot
|
154
|
+
[ short_chrom, start, stop, ref_allele, alt_allele ].join("_")
|
155
|
+
end
|
156
|
+
|
157
|
+
def method_missing(meth,*args,&block)
|
158
|
+
if meth.to_s =~ /(.*)=/
|
159
|
+
@mutation[$1.to_sym] = args.first
|
160
|
+
else
|
161
|
+
@mutation.has_key?(meth.to_sym) ? @mutation[meth.to_sym] : super
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def respond_to? method
|
166
|
+
!@mutation[method.to_sym].nil? || super
|
167
|
+
end
|
168
|
+
|
169
|
+
private
|
170
|
+
CHROM_POS=/^[0-9]+$/
|
171
|
+
ALLELE_SEQ=/^([A-Z]+|-)$/
|
172
|
+
def valid_onco_input?
|
173
|
+
@onco_error = []
|
174
|
+
@onco_error.push 'Malformed start position' unless start.to_s =~ MutationSet::Line::CHROM_POS
|
175
|
+
@onco_error.push 'Malformed stop position' unless stop.to_s =~ MutationSet::Line::CHROM_POS
|
176
|
+
@onco_error.push 'Malformed reference allele' unless ref_allele =~ MutationSet::Line::ALLELE_SEQ
|
177
|
+
@onco_error.push 'Malformed alt allele' unless alt_allele =~ MutationSet::Line::ALLELE_SEQ
|
178
|
+
@onco_error.empty?
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class Sample
|
183
|
+
include Enumerable
|
184
|
+
attr_reader :samples, :mutation_config, :lines, :preamble_lines
|
185
|
+
attr_accessor :headers
|
186
|
+
class << self
|
187
|
+
attr_reader :required, :comment
|
188
|
+
def requires *terms
|
189
|
+
@required = terms
|
190
|
+
end
|
191
|
+
|
192
|
+
def comments c
|
193
|
+
@comment = c
|
194
|
+
end
|
195
|
+
|
196
|
+
def read(filename,mutation_config=nil)
|
197
|
+
set = new mutation_config, true
|
198
|
+
|
199
|
+
set.load_file filename
|
200
|
+
|
201
|
+
return set
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def load_file filename
|
206
|
+
File.foreach(filename) do |l|
|
207
|
+
fields = l.chomp.split(/\t/,-1)
|
208
|
+
if !headers
|
209
|
+
if fields.first.downcase == required.first.downcase
|
210
|
+
enforce_headers fields
|
211
|
+
else
|
212
|
+
preamble_lines.push l
|
213
|
+
end
|
214
|
+
next
|
215
|
+
end
|
216
|
+
add_line fields
|
217
|
+
end
|
218
|
+
|
219
|
+
post_read_hook
|
220
|
+
end
|
221
|
+
|
222
|
+
def preamble
|
223
|
+
preamble_lines.join("")
|
224
|
+
end
|
225
|
+
|
226
|
+
def write file
|
227
|
+
File.open(file,"w") do |f|
|
228
|
+
output f
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def print f=nil
|
233
|
+
if f
|
234
|
+
write f
|
235
|
+
else
|
236
|
+
output STDOUT
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def output f
|
241
|
+
f.puts preamble
|
242
|
+
f.puts headers.join("\t")
|
243
|
+
@lines.each do |l|
|
244
|
+
l = yield l if block_given?
|
245
|
+
next if !l || l.invalid?
|
246
|
+
f.puts format_line(l)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def format_line l
|
251
|
+
l.to_s
|
252
|
+
end
|
253
|
+
|
254
|
+
def clean_header s
|
255
|
+
s.to_s.gsub(/\s+/,"_").gsub(/[^\w]+/,"").downcase.to_sym
|
256
|
+
end
|
257
|
+
|
258
|
+
def clean_headers
|
259
|
+
@headers.map {|h| clean_header h}
|
260
|
+
end
|
261
|
+
|
262
|
+
def add_line fields
|
263
|
+
@lines.push self.class.const_get(:Line).new(clean_fields(fields), self)
|
264
|
+
|
265
|
+
index_line @lines.last
|
266
|
+
end
|
267
|
+
|
268
|
+
def clean_fields fields
|
269
|
+
fields.is_a?(Array) ? fields.map{|f| f == "NA" ? "" : f } : fields
|
270
|
+
end
|
271
|
+
|
272
|
+
def index_line line
|
273
|
+
@index[ line.key ] = line
|
274
|
+
end
|
275
|
+
|
276
|
+
def find_mutation line
|
277
|
+
@index[ line.key ]
|
278
|
+
end
|
279
|
+
|
280
|
+
def required
|
281
|
+
self.class.required
|
282
|
+
end
|
283
|
+
|
284
|
+
def enforce_headers array
|
285
|
+
raise "File lacks required headers: #{(required.map(&:downcase)-array.map(&:downcase)).join(", ")}" if !(required.map(&:downcase) - array.map(&:downcase)).empty?
|
286
|
+
@headers = array
|
287
|
+
end
|
288
|
+
|
289
|
+
def initialize(mutation_config=nil,suppress_headers=nil)
|
290
|
+
@lines = []
|
291
|
+
|
292
|
+
@mutation_config = YAML.load_file(mutation_config) if mutation_config
|
293
|
+
|
294
|
+
@headers = required.map(&:to_sym) unless suppress_headers
|
295
|
+
|
296
|
+
@preamble_lines = []
|
297
|
+
|
298
|
+
@index = {}
|
299
|
+
end
|
300
|
+
|
301
|
+
def whitelist file
|
302
|
+
case file
|
303
|
+
when /.gtf$/
|
304
|
+
require 'gtf'
|
305
|
+
@whitelist ||= GTF.new(file).to_interval_list
|
306
|
+
when /.vcf$/
|
307
|
+
require 'vcf'
|
308
|
+
@whitelist ||= VCF.read(file).to_interval_list
|
309
|
+
end
|
310
|
+
@whitelist
|
311
|
+
end
|
312
|
+
|
313
|
+
def blacklist file
|
314
|
+
case file
|
315
|
+
when /.gtf$/
|
316
|
+
require 'gtf'
|
317
|
+
@blacklist ||= GTF.new(file).to_interval_list
|
318
|
+
when /.vcf$/
|
319
|
+
require 'vcf'
|
320
|
+
@blacklist ||= VCF.read(file).to_interval_list
|
321
|
+
end
|
322
|
+
@blacklist
|
323
|
+
end
|
324
|
+
|
325
|
+
def to_interval_list
|
326
|
+
IntervalList.new self.map{|g| [ g.chrom, g.start, g.stop, g ] }
|
327
|
+
end
|
328
|
+
|
329
|
+
def inspect
|
330
|
+
to_s
|
331
|
+
end
|
332
|
+
|
333
|
+
def [](key)
|
334
|
+
@lines[key]
|
335
|
+
end
|
336
|
+
|
337
|
+
def sort_by! &block
|
338
|
+
@lines.sort_by! &block
|
339
|
+
end
|
340
|
+
|
341
|
+
def each
|
342
|
+
@lines.each do |l|
|
343
|
+
yield l
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
protected
|
348
|
+
def post_read_hook
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
data/lib/mutect.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'oncotator'
|
2
|
+
require 'yaml'
|
3
|
+
require 'mutation_set'
|
4
|
+
|
5
|
+
class MuTect < MutationSet::Sample
|
6
|
+
requires "contig", "position", "context", "ref_allele", "alt_allele",
|
7
|
+
"tumor_name", "normal_name", "score", "dbsnp_site", "covered", "power",
|
8
|
+
"tumor_power", "normal_power", "total_pairs", "improper_pairs",
|
9
|
+
"map_q0_reads", "t_lod_fstar", "tumor_f", "contaminant_fraction",
|
10
|
+
"contaminant_lod", "t_ref_count", "t_alt_count", "t_ref_sum", "t_alt_sum",
|
11
|
+
"t_ref_max_mapq", "t_alt_max_mapq", "t_ins_count", "t_del_count",
|
12
|
+
"normal_best_gt", "init_n_lod", "n_ref_count", "n_alt_count", "n_ref_sum",
|
13
|
+
"n_alt_sum", "judgement"
|
14
|
+
comments "##"
|
15
|
+
|
16
|
+
class Line < MutationSet::Line
|
17
|
+
alias_key :chrom, :contig
|
18
|
+
alias_key :start, :position
|
19
|
+
def stop; @stop || end_position; end
|
20
|
+
def stop= nc; @stop = nc; end
|
21
|
+
def keep_somatic?
|
22
|
+
!criteria_failed?(self, [ :mutect, :somatic ])
|
23
|
+
end
|
24
|
+
def keep_germline?
|
25
|
+
!criteria_failed?(self, [ :mutect, :germline ])
|
26
|
+
end
|
27
|
+
|
28
|
+
def end_position
|
29
|
+
position.to_i + ref_allele.length-1
|
30
|
+
end
|
31
|
+
|
32
|
+
def q0_ratio
|
33
|
+
map_q0_reads.to_f / (t_alt_count.to_i + n_alt_count.to_i)
|
34
|
+
end
|
35
|
+
def vf_ratio
|
36
|
+
t_var_freq > 0 ? n_var_freq / t_var_freq : 0
|
37
|
+
end
|
38
|
+
def t_var_freq; t_alt_count.to_f / t_depth end
|
39
|
+
def n_var_freq; n_alt_count.to_f / n_depth end
|
40
|
+
def t_depth; t_alt_count.to_i + t_ref_count.to_i end
|
41
|
+
def n_depth; n_alt_count.to_i + n_ref_count.to_i end
|
42
|
+
end
|
43
|
+
end
|
data/lib/oncotator.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'net/http/persistent'
|
2
|
+
require 'resolv-replace'
|
3
|
+
require 'json'
|
4
|
+
require 'sequel'
|
5
|
+
require 'yaml'
|
6
|
+
require 'germ/config'
|
7
|
+
|
8
|
+
class Oncotator
|
9
|
+
attr_accessor :mutation
|
10
|
+
def self.persistent_connection
|
11
|
+
@http ||= Net::HTTP::Persistent.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.db_connect opts
|
15
|
+
@db_opts = opts
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.db_opts
|
19
|
+
@db_opts ||= TaylorlibConfig.get_conf :oncotator
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.db_cache
|
23
|
+
@db ||= Sequel.connect(db_opts)
|
24
|
+
@db[:onco_json_cache]
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.insert_onco onco
|
28
|
+
if defined? Rails
|
29
|
+
OncoJsonCache.create onco
|
30
|
+
else
|
31
|
+
db_cache.insert_ignore.insert onco
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.db_obj
|
36
|
+
if defined? Rails
|
37
|
+
OncoJsonCache
|
38
|
+
else
|
39
|
+
Oncotator.db_cache
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.find_key cache_key
|
44
|
+
# use the Rails environment if it is available
|
45
|
+
db_obj.where(:CACHE_KEY => cache_key).first
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.delete_key cache_key
|
49
|
+
db_obj.where(:CACHE_KEY => cache_key).delete
|
50
|
+
end
|
51
|
+
|
52
|
+
def onco_uri
|
53
|
+
URI "http://69.173.64.101/oncotator/mutation/#{@mutation}/"
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_json_object text=nil
|
57
|
+
# first look it up in the sequel database.
|
58
|
+
json = case text
|
59
|
+
when nil
|
60
|
+
result = Oncotator.find_key @mutation
|
61
|
+
result ? result[:RAW_JSON] : nil
|
62
|
+
else
|
63
|
+
text
|
64
|
+
end
|
65
|
+
|
66
|
+
begin
|
67
|
+
return JSON.parse(json) if json
|
68
|
+
rescue JSON::ParserError => e
|
69
|
+
# you have a bad data blob
|
70
|
+
Oncotator.delete_key @mutation
|
71
|
+
end
|
72
|
+
|
73
|
+
# if that doesn't work, query Oncotator.
|
74
|
+
response = Oncotator.persistent_connection.request(onco_uri)
|
75
|
+
|
76
|
+
return {} if response.code != "200"
|
77
|
+
|
78
|
+
json = response.body
|
79
|
+
|
80
|
+
# save it
|
81
|
+
Oncotator.insert_onco(:CACHE_KEY => @mutation, :RAW_JSON => json)
|
82
|
+
|
83
|
+
return JSON.parse(json)
|
84
|
+
end
|
85
|
+
|
86
|
+
def initialize(opts)
|
87
|
+
if opts[:key]
|
88
|
+
@mutation = opts[:key]
|
89
|
+
@onco = get_json_object
|
90
|
+
elsif opts[:text]
|
91
|
+
@onco = get_json_object opts[:text]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def empty?
|
96
|
+
!@onco || @onco.size == 0
|
97
|
+
end
|
98
|
+
|
99
|
+
class Transcript
|
100
|
+
def initialize(txp)
|
101
|
+
@txp = txp || {}
|
102
|
+
end
|
103
|
+
|
104
|
+
def method_missing(meth,*args,&block)
|
105
|
+
@txp[meth.to_s] || nil
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def best_effect_txp
|
110
|
+
@best_effect_txp ||= Transcript.new(transcripts[best_effect_transcript]) if best_effect_transcript
|
111
|
+
end
|
112
|
+
|
113
|
+
def best_canonical_txp
|
114
|
+
@best_canonical_txp ||= Transcript.new(transcripts[best_canonical_transcript]) if best_canonical_transcript
|
115
|
+
end
|
116
|
+
|
117
|
+
def is_snp
|
118
|
+
dbSNP_RS && dbSNP_Val_Status =~ /(byFrequency|by1000genomes)/
|
119
|
+
end
|
120
|
+
|
121
|
+
def txp
|
122
|
+
best_effect_txp
|
123
|
+
end
|
124
|
+
|
125
|
+
def pph2_class
|
126
|
+
pph2 ? pph2["pph2_class"] : nil
|
127
|
+
end
|
128
|
+
|
129
|
+
def is_cancerous
|
130
|
+
self.Cosmic_overlapping_mutations || self.CGC_Tumor_Types_Somatic || self.CCLE_ONCOMAP_total_mutations_in_gene
|
131
|
+
end
|
132
|
+
|
133
|
+
def method_missing(meth,*args,&block)
|
134
|
+
meth = meth.to_s
|
135
|
+
case
|
136
|
+
when @onco[meth]
|
137
|
+
@onco[meth]
|
138
|
+
when meth =~ /^txp_(.*)/
|
139
|
+
txp ? txp.send($1) : nil
|
140
|
+
else
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|