germ 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/fasta_aux/FastaAux.c +137 -0
- data/ext/fasta_aux/extconf.rb +7 -0
- data/ext/hash_table_aux/HashTableAux.c +246 -0
- data/ext/hash_table_aux/extconf.rb +7 -0
- data/lib/fasta.rb +79 -0
- data/lib/germ.rb +11 -0
- data/lib/germ/config.rb +34 -0
- data/lib/germ/data_types.rb +47 -0
- data/lib/germ/flagstat.rb +23 -0
- data/lib/germ/printer.rb +15 -0
- data/lib/gtf.rb +248 -0
- data/lib/hash_table.rb +195 -0
- data/lib/indelocator.rb +46 -0
- data/lib/intervals.rb +337 -0
- data/lib/maf.rb +92 -0
- data/lib/mutation_set.rb +351 -0
- data/lib/mutect.rb +43 -0
- data/lib/oncotator.rb +144 -0
- data/lib/sam.rb +196 -0
- data/lib/vcf.rb +162 -0
- metadata +115 -0
data/lib/indelocator.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'oncotator'
|
2
|
+
require 'yaml'
|
3
|
+
require 'mutation_set'
|
4
|
+
|
5
|
+
class Indelocator < MutationSet::Sample
|
6
|
+
comments "##"
|
7
|
+
|
8
|
+
requires "chrom", "start", "stop", "change",
|
9
|
+
"n_obs_counts", "n_av_mm", "n_av_mapq", "n_nqs_mm_rate", "n_nqs_av_qual", "n_strand_counts",
|
10
|
+
"t_obs_counts", "t_av_mm", "t_av_mapq", "t_nqs_mm_rate", "t_nqs_av_qual", "t_strand_counts",
|
11
|
+
"status"
|
12
|
+
|
13
|
+
class Line < MutationSet::Line
|
14
|
+
def keep_somatic?
|
15
|
+
!criteria_failed?(self, [ :mutect, :somatic ])
|
16
|
+
end
|
17
|
+
def keep_germline?
|
18
|
+
!criteria_failed?(self, [ :mutect, :germline ])
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_ot
|
22
|
+
"#{contig.sub(/chr/,"")}_#{position}_#{position.to_i + ref_allele.length-1}_#{ref_allele}_#{alt_allele}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def t_var_freq; t_alt_count.to_f / t_depth end
|
26
|
+
def n_var_freq; n_alt_count.to_f / n_depth end
|
27
|
+
def t_depth; t_alt_count.to_i + t_ref_count.to_i end
|
28
|
+
def n_depth; n_alt_count.to_i + n_ref_count.to_i end
|
29
|
+
|
30
|
+
def initialize fields, sample
|
31
|
+
@sample = sample
|
32
|
+
|
33
|
+
@mutation = Hash[sample.clean_headers.zip(fields)]
|
34
|
+
|
35
|
+
@mutation.each do |key,value|
|
36
|
+
next if key.to_s !~ /^[nt]_/
|
37
|
+
@mutation[key] = value.scan(/:(.*)/).flatten.first.split %r!/!
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize mutation_config=nil, suppress_headers=nil
|
43
|
+
super mutation_config, suppress_headers
|
44
|
+
@headers = required.map(&:to_sym)
|
45
|
+
end
|
46
|
+
end
|
data/lib/intervals.rb
ADDED
@@ -0,0 +1,337 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
class IntervalList
|
4
|
+
include Enumerable
|
5
|
+
class OrderedList
|
6
|
+
include Enumerable
|
7
|
+
def initialize ints
|
8
|
+
@track = ints
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
@track.each do |t|
|
13
|
+
yield t
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def intersect interval
|
18
|
+
ovs = overlap interval
|
19
|
+
return nil if !ovs
|
20
|
+
ovs.map{|s| s.strict_overlap interval }
|
21
|
+
end
|
22
|
+
|
23
|
+
def overlap interval
|
24
|
+
# first, find the lowest interval that is not below the given interval
|
25
|
+
low = (0...@track.size).bsearch do |i|
|
26
|
+
!@track[i].below? interval
|
27
|
+
end
|
28
|
+
# if low is nil, all of the intervals are below the search
|
29
|
+
# otherwise, low might be the first interval
|
30
|
+
return nil if !low || (low == 0 && @track[low].above?(interval))
|
31
|
+
|
32
|
+
# now you have a real value on the low end!
|
33
|
+
# get the first guy who is above the interval
|
34
|
+
high = (0...@track.size).bsearch do |i|
|
35
|
+
@track[i].above? interval
|
36
|
+
end
|
37
|
+
# if nil, all of these guys are not above the interval
|
38
|
+
high = high ? high - 1 : @track.size-1
|
39
|
+
o = @track[ low..high ]
|
40
|
+
o.empty? ? nil : o
|
41
|
+
end
|
42
|
+
|
43
|
+
def nearest interval
|
44
|
+
# find the first guy who is above the interval
|
45
|
+
low = (0...@track.size).bsearch do |i|
|
46
|
+
!@track[i].below? interval
|
47
|
+
end
|
48
|
+
|
49
|
+
return @track.last if !low
|
50
|
+
return @track[low] if low == 0
|
51
|
+
prev = @track[ low - 1]
|
52
|
+
@track[low].dist(interval) > prev.dist(interval) ? prev : @track[low]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
class BinaryTree
|
56
|
+
attr_reader :max
|
57
|
+
def self.create intervals
|
58
|
+
new intervals.sort_by(&:start)
|
59
|
+
end
|
60
|
+
def initialize intervals
|
61
|
+
# assume they are sorted by start
|
62
|
+
low, high = intervals.each_slice((intervals.size/2.0).round).to_a
|
63
|
+
@node = low.pop
|
64
|
+
@left = BinaryTree.new low unless low.empty?
|
65
|
+
@right = BinaryTree.new high unless high.nil?
|
66
|
+
update_max
|
67
|
+
end
|
68
|
+
|
69
|
+
def update_max
|
70
|
+
# set your max to the max of your children
|
71
|
+
@max = @node.stop
|
72
|
+
@max = @left.max if @left && @left.max > @max
|
73
|
+
@max = @right.max if @right && @right.max > @max
|
74
|
+
end
|
75
|
+
|
76
|
+
def nearest interval
|
77
|
+
#
|
78
|
+
end
|
79
|
+
|
80
|
+
def overlap interval
|
81
|
+
ols = []
|
82
|
+
return ols if interval.start > @max
|
83
|
+
ols.concat @left.overlap(interval) if @left
|
84
|
+
ols.push @node if @node.overlaps? interval
|
85
|
+
ols.concat @right.overlap(interval) if @right && !@node.above?(interval)
|
86
|
+
ols
|
87
|
+
end
|
88
|
+
end
|
89
|
+
class Tree
|
90
|
+
def self.create intervals
|
91
|
+
new intervals.sort_by(&:start), intervals.sort_by(&:stop)
|
92
|
+
end
|
93
|
+
def initialize starts, stops
|
94
|
+
# find the midpoint
|
95
|
+
midp = (starts.first.start + stops.last.stop) / 2
|
96
|
+
@mid = starts.clone :pos => midp
|
97
|
+
|
98
|
+
l = left_tree starts, stops
|
99
|
+
r = right_tree starts, stops
|
100
|
+
@left = IntervalList::Tree.new *l unless l.first.empty?
|
101
|
+
@right = IntervalList::Tree.new *r unless r.first.empty?
|
102
|
+
@center_start = starts - l.first - r.first
|
103
|
+
@center_stop = stops - l.last - r.last
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def left_tree starts, stops
|
108
|
+
low = (0...stops.size).bsearch do |i|
|
109
|
+
!stops[i].below? @mid
|
110
|
+
end
|
111
|
+
left_stops = (low == 0 ? [] : stops[0..low-1])
|
112
|
+
return [ [], [] ] if left_stops.empty?
|
113
|
+
left_starts = starts & left_stops
|
114
|
+
[ left_stops, left_starts ]
|
115
|
+
end
|
116
|
+
|
117
|
+
def right_tree starts, stops
|
118
|
+
low = (0...starts.size).bsearch do |i|
|
119
|
+
starts[i].above? @mid
|
120
|
+
end
|
121
|
+
right_starts = (!low ? [] : starts[low..-1])
|
122
|
+
return [ [], [] ] if right_starts.empty?
|
123
|
+
right_stops = stops & right_starts
|
124
|
+
[ right_starts, right_stops ]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
module Interval
|
128
|
+
# this interface needs to implement :chrom, :start, :stop, and :clone
|
129
|
+
def clone opts={}
|
130
|
+
c = copy
|
131
|
+
c.chrom = opts[:chrom] if opts[:chrom]
|
132
|
+
c.start = opts[:start] if opts[:start]
|
133
|
+
c.stop = opts[:stop] if opts[:stop]
|
134
|
+
c.start = opts[:pos] if opts[:pos]
|
135
|
+
c.stop = opts[:pos] if opts[:pos]
|
136
|
+
return c
|
137
|
+
end
|
138
|
+
#def start= ns; @start = ns; end
|
139
|
+
#def stop= ns; @stop = ns; end
|
140
|
+
|
141
|
+
def below? interval
|
142
|
+
stop < interval.start
|
143
|
+
end
|
144
|
+
|
145
|
+
def above? interval
|
146
|
+
start > interval.stop
|
147
|
+
end
|
148
|
+
|
149
|
+
def overlaps? interval
|
150
|
+
chrom == interval.chrom && !below?(interval) && !above?(interval)
|
151
|
+
end
|
152
|
+
|
153
|
+
def contains? interval
|
154
|
+
if interval.is_a? Numeric
|
155
|
+
start <= interval && stop >= interval
|
156
|
+
else
|
157
|
+
chrom == interval.chrom && start <= interval.start && stop >= interval.stop
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def strict_overlap interval
|
162
|
+
return nil if !overlaps? interval
|
163
|
+
|
164
|
+
clone chrom, [ interval.start, start ].max, [ interval.stop, stop ].min
|
165
|
+
end
|
166
|
+
|
167
|
+
def strict_diff interval
|
168
|
+
ol = strict_overlap interval
|
169
|
+
return IntervalList.new [ self ] if !ol
|
170
|
+
ints = []
|
171
|
+
if ol.start > start
|
172
|
+
ints.push clone( :start => start, :stop => ol.start-1 )
|
173
|
+
end
|
174
|
+
if ol.stop < stop
|
175
|
+
ints.push clone(:start => ol.stop+1, :stop => stop)
|
176
|
+
end
|
177
|
+
if !ints.empty?
|
178
|
+
return IntervalList.new ints
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def strict_union interval
|
183
|
+
return nil unless interval && overlaps?(interval)
|
184
|
+
clone :start => [ interval.start, start ].min, :stop => [ interval.stop, stop ].max
|
185
|
+
end
|
186
|
+
|
187
|
+
def overlap interval_list
|
188
|
+
interval_list.overlap self
|
189
|
+
end
|
190
|
+
|
191
|
+
def nearest interval_list
|
192
|
+
interval_list.nearest self
|
193
|
+
end
|
194
|
+
|
195
|
+
def intersect interval_list
|
196
|
+
interval_list.intersect self
|
197
|
+
end
|
198
|
+
|
199
|
+
def size
|
200
|
+
stop - start + 1
|
201
|
+
end
|
202
|
+
|
203
|
+
def center
|
204
|
+
(stop + start)/2.0
|
205
|
+
end
|
206
|
+
|
207
|
+
def dist interval
|
208
|
+
(center-interval.center).abs
|
209
|
+
end
|
210
|
+
|
211
|
+
def intersection_size interval_list
|
212
|
+
return 0 if !inters = intersect(interval_list)
|
213
|
+
inters.inject(0) {|sum,int| sum += int.size}
|
214
|
+
end
|
215
|
+
end
|
216
|
+
class BasicInterval
|
217
|
+
include Interval
|
218
|
+
|
219
|
+
attr_accessor :chrom, :start, :stop, :data
|
220
|
+
|
221
|
+
def initialize opts
|
222
|
+
@chrom = opts[:chrom]
|
223
|
+
@start = opts[:start]
|
224
|
+
@stop = opts[:stop]
|
225
|
+
@stop = @start = opts[:pos] if opts[:pos]
|
226
|
+
@data = opts[:data]
|
227
|
+
end
|
228
|
+
def copy
|
229
|
+
self.class.new :chrom => @chrom, :start => @start, :stop => @stop, :data => @data
|
230
|
+
end
|
231
|
+
def inspect
|
232
|
+
"#<#{self.class}:0x#{'%x' % (object_id << 1)} @chrom=#{@chrom} @start=#{@start} @stop=#{@stop}>"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def each
|
237
|
+
@intervals.each do |int|
|
238
|
+
yield int
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def overlap interval
|
243
|
+
track = @ints_chrom[interval.chrom]
|
244
|
+
return nil if !track
|
245
|
+
track.overlap interval
|
246
|
+
end
|
247
|
+
|
248
|
+
def nearest interval
|
249
|
+
track = @ints_chrom[interval.chrom]
|
250
|
+
return nil if !track
|
251
|
+
track.nearest interval
|
252
|
+
end
|
253
|
+
|
254
|
+
def intersect interval
|
255
|
+
track = @ints_chrom[interval.chrom]
|
256
|
+
return nil if !track
|
257
|
+
track.intersect interval
|
258
|
+
end
|
259
|
+
|
260
|
+
# subtract this set of intervals from the given interval_list
|
261
|
+
def diff interval_list
|
262
|
+
interval_list.map do |int|
|
263
|
+
ols = overlap(int)
|
264
|
+
# if there are no overlaps, return int
|
265
|
+
unless ols
|
266
|
+
int
|
267
|
+
else
|
268
|
+
int = ols.each do |ol|
|
269
|
+
int.strict_diff(ol).to_a
|
270
|
+
end.flatten
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
def initialize array, opts = {}
|
276
|
+
@intervals = []
|
277
|
+
@ints_chrom = {}
|
278
|
+
array.each do |item|
|
279
|
+
if item.is_a? IntervalList::Interval
|
280
|
+
int = item
|
281
|
+
end
|
282
|
+
@intervals.push int
|
283
|
+
@ints_chrom[int.chrom] ||= []
|
284
|
+
@ints_chrom[int.chrom].push int
|
285
|
+
end
|
286
|
+
|
287
|
+
sort_ints_chrom opts[:type]
|
288
|
+
end
|
289
|
+
|
290
|
+
def inspect
|
291
|
+
"#<#{self.class}:0x#{'%x' % (object_id << 1)} @intervals=#{@intervals.size}>"
|
292
|
+
end
|
293
|
+
|
294
|
+
attr_reader :ints_chrom
|
295
|
+
|
296
|
+
def collapse!
|
297
|
+
# collapse this set of intervals down to a shorter one
|
298
|
+
@ints_chrom.each do |chrom,list|
|
299
|
+
@ints_chrom[chrom] = collapsed_list list
|
300
|
+
end
|
301
|
+
|
302
|
+
@intervals = @ints_chrom.map(&:last).flatten
|
303
|
+
self
|
304
|
+
end
|
305
|
+
|
306
|
+
private
|
307
|
+
def collapsed_list intervals
|
308
|
+
new_list = []
|
309
|
+
cache_interval = nil
|
310
|
+
intervals.each do |interval|
|
311
|
+
# it should be sorted already
|
312
|
+
if cache_interval
|
313
|
+
if !un = cache_interval.strict_union(interval)
|
314
|
+
new_list.push cache_interval
|
315
|
+
cache_interval = interval
|
316
|
+
else
|
317
|
+
cache_interval = un
|
318
|
+
end
|
319
|
+
else
|
320
|
+
cache_interval = interval
|
321
|
+
end
|
322
|
+
end
|
323
|
+
new_list.push cache_interval if cache_interval
|
324
|
+
new_list
|
325
|
+
end
|
326
|
+
|
327
|
+
def sort_ints_chrom type
|
328
|
+
@ints_chrom.each do |chrom,list|
|
329
|
+
case type
|
330
|
+
when nil, :btree
|
331
|
+
@ints_chrom[chrom] = IntervalList::BinaryTree.new list.sort_by{ |int| int.start }
|
332
|
+
when :flat
|
333
|
+
@ints_chrom[chrom] = IntervalList::OrderedList.new list.sort_by{ |int| int.start }
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
end
|
data/lib/maf.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'oncotator'
|
4
|
+
require 'yaml'
|
5
|
+
require 'mutation_set'
|
6
|
+
|
7
|
+
class Maf < MutationSet::Sample
|
8
|
+
requires "Hugo_Symbol", "Entrez_Gene_Id", "Center",
|
9
|
+
"NCBI_Build", "Chromosome",
|
10
|
+
"Start_Position", "End_Position", "Strand",
|
11
|
+
"Variant_Classification", "Variant_Type",
|
12
|
+
"Reference_Allele", "Tumor_Seq_Allele1", "Tumor_Seq_Allele2",
|
13
|
+
"dbSNP_RS", "dbSNP_Val_Status",
|
14
|
+
"Tumor_Sample_Barcode", "Matched_Norm_Sample_Barcode",
|
15
|
+
"Match_Norm_Seq_Allele1", "Match_Norm_Seq_Allele2",
|
16
|
+
"Tumor_Validation_Allele1", "Tumor_Validation_Allele2",
|
17
|
+
"Match_Norm_Validation_Allele1", "Match_Norm_Validation_Allele2",
|
18
|
+
"verification_Status", "Validation_Status",
|
19
|
+
"Mutation_Status", "Sequencing_Phase", "Sequence_Source",
|
20
|
+
"Validation_Method", "Score" #, "BAM_File", "Sequencer"
|
21
|
+
comments "#"
|
22
|
+
|
23
|
+
def preamble
|
24
|
+
"#version 2.2"
|
25
|
+
end
|
26
|
+
|
27
|
+
class Line < MutationSet::Line
|
28
|
+
alias_key :chrom, :chromosome
|
29
|
+
alias_key :start, :start_position
|
30
|
+
alias_key :stop, :end_position
|
31
|
+
alias_key :ref_allele, :reference_allele
|
32
|
+
|
33
|
+
def skip_maf?
|
34
|
+
criteria_failed?(self, :maf)
|
35
|
+
end
|
36
|
+
|
37
|
+
def key
|
38
|
+
[ tumor_sample_barcode, chrom, start, stop ].join(":")
|
39
|
+
end
|
40
|
+
|
41
|
+
def alt_allele
|
42
|
+
tumor_seq_allele1 == reference_allele ? tumor_seq_allele2 : tumor_seq_allele1
|
43
|
+
end
|
44
|
+
|
45
|
+
def _ref_count
|
46
|
+
[ :t_ref_count, :tumor_ref_count, :ref_count ].each do |s|
|
47
|
+
if respond_to? s
|
48
|
+
return send(s)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def _alt_count
|
55
|
+
[ :t_alt_count, :tumor_alt_count, :alt_count ].each do |s|
|
56
|
+
if respond_to? s
|
57
|
+
return send(s)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def chrom_name
|
64
|
+
# properly format the name
|
65
|
+
if chromosome =~ /chr/
|
66
|
+
chromosome
|
67
|
+
else
|
68
|
+
"chr#{chromosome}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def is_coding?
|
73
|
+
variant_classification =~ /(Frame_Shift_Del|Frame_Shift_Ins|In_Frame_Del|In_Frame_Ins|Missense_Mutation|Nonsense_Mutation|Splice_Site|Translation_Start_Site)/
|
74
|
+
end
|
75
|
+
|
76
|
+
def gene_name
|
77
|
+
if !hugo_symbol || hugo_symbol.size == 0
|
78
|
+
onco.txp_gene
|
79
|
+
else
|
80
|
+
hugo_symbol
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def var_freq
|
85
|
+
if !_ref_count.empty? && !_alt_count.empty?
|
86
|
+
_ref_count.to_f / (_ref_count.to_i + _alt_count.to_i)
|
87
|
+
else
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|