germ 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ require 'oncotator'
2
+ require 'yaml'
3
+ require 'mutation_set'
4
+
5
+ class Indelocator < MutationSet::Sample
6
+ comments "##"
7
+
8
+ requires "chrom", "start", "stop", "change",
9
+ "n_obs_counts", "n_av_mm", "n_av_mapq", "n_nqs_mm_rate", "n_nqs_av_qual", "n_strand_counts",
10
+ "t_obs_counts", "t_av_mm", "t_av_mapq", "t_nqs_mm_rate", "t_nqs_av_qual", "t_strand_counts",
11
+ "status"
12
+
13
+ class Line < MutationSet::Line
14
+ def keep_somatic?
15
+ !criteria_failed?(self, [ :mutect, :somatic ])
16
+ end
17
+ def keep_germline?
18
+ !criteria_failed?(self, [ :mutect, :germline ])
19
+ end
20
+
21
+ def to_ot
22
+ "#{contig.sub(/chr/,"")}_#{position}_#{position.to_i + ref_allele.length-1}_#{ref_allele}_#{alt_allele}"
23
+ end
24
+
25
+ def t_var_freq; t_alt_count.to_f / t_depth end
26
+ def n_var_freq; n_alt_count.to_f / n_depth end
27
+ def t_depth; t_alt_count.to_i + t_ref_count.to_i end
28
+ def n_depth; n_alt_count.to_i + n_ref_count.to_i end
29
+
30
+ def initialize fields, sample
31
+ @sample = sample
32
+
33
+ @mutation = Hash[sample.clean_headers.zip(fields)]
34
+
35
+ @mutation.each do |key,value|
36
+ next if key.to_s !~ /^[nt]_/
37
+ @mutation[key] = value.scan(/:(.*)/).flatten.first.split %r!/!
38
+ end
39
+ end
40
+ end
41
+
42
+ def initialize mutation_config=nil, suppress_headers=nil
43
+ super mutation_config, suppress_headers
44
+ @headers = required.map(&:to_sym)
45
+ end
46
+ end
@@ -0,0 +1,337 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class IntervalList
4
+ include Enumerable
5
+ class OrderedList
6
+ include Enumerable
7
+ def initialize ints
8
+ @track = ints
9
+ end
10
+
11
+ def each
12
+ @track.each do |t|
13
+ yield t
14
+ end
15
+ end
16
+
17
+ def intersect interval
18
+ ovs = overlap interval
19
+ return nil if !ovs
20
+ ovs.map{|s| s.strict_overlap interval }
21
+ end
22
+
23
+ def overlap interval
24
+ # first, find the lowest interval that is not below the given interval
25
+ low = (0...@track.size).bsearch do |i|
26
+ !@track[i].below? interval
27
+ end
28
+ # if low is nil, all of the intervals are below the search
29
+ # otherwise, low might be the first interval
30
+ return nil if !low || (low == 0 && @track[low].above?(interval))
31
+
32
+ # now you have a real value on the low end!
33
+ # get the first guy who is above the interval
34
+ high = (0...@track.size).bsearch do |i|
35
+ @track[i].above? interval
36
+ end
37
+ # if nil, all of these guys are not above the interval
38
+ high = high ? high - 1 : @track.size-1
39
+ o = @track[ low..high ]
40
+ o.empty? ? nil : o
41
+ end
42
+
43
+ def nearest interval
44
+ # find the first guy who is above the interval
45
+ low = (0...@track.size).bsearch do |i|
46
+ !@track[i].below? interval
47
+ end
48
+
49
+ return @track.last if !low
50
+ return @track[low] if low == 0
51
+ prev = @track[ low - 1]
52
+ @track[low].dist(interval) > prev.dist(interval) ? prev : @track[low]
53
+ end
54
+ end
55
+ class BinaryTree
56
+ attr_reader :max
57
+ def self.create intervals
58
+ new intervals.sort_by(&:start)
59
+ end
60
+ def initialize intervals
61
+ # assume they are sorted by start
62
+ low, high = intervals.each_slice((intervals.size/2.0).round).to_a
63
+ @node = low.pop
64
+ @left = BinaryTree.new low unless low.empty?
65
+ @right = BinaryTree.new high unless high.nil?
66
+ update_max
67
+ end
68
+
69
+ def update_max
70
+ # set your max to the max of your children
71
+ @max = @node.stop
72
+ @max = @left.max if @left && @left.max > @max
73
+ @max = @right.max if @right && @right.max > @max
74
+ end
75
+
76
+ def nearest interval
77
+ #
78
+ end
79
+
80
+ def overlap interval
81
+ ols = []
82
+ return ols if interval.start > @max
83
+ ols.concat @left.overlap(interval) if @left
84
+ ols.push @node if @node.overlaps? interval
85
+ ols.concat @right.overlap(interval) if @right && !@node.above?(interval)
86
+ ols
87
+ end
88
+ end
89
+ class Tree
90
+ def self.create intervals
91
+ new intervals.sort_by(&:start), intervals.sort_by(&:stop)
92
+ end
93
+ def initialize starts, stops
94
+ # find the midpoint
95
+ midp = (starts.first.start + stops.last.stop) / 2
96
+ @mid = starts.clone :pos => midp
97
+
98
+ l = left_tree starts, stops
99
+ r = right_tree starts, stops
100
+ @left = IntervalList::Tree.new *l unless l.first.empty?
101
+ @right = IntervalList::Tree.new *r unless r.first.empty?
102
+ @center_start = starts - l.first - r.first
103
+ @center_stop = stops - l.last - r.last
104
+ end
105
+
106
+ private
107
+ def left_tree starts, stops
108
+ low = (0...stops.size).bsearch do |i|
109
+ !stops[i].below? @mid
110
+ end
111
+ left_stops = (low == 0 ? [] : stops[0..low-1])
112
+ return [ [], [] ] if left_stops.empty?
113
+ left_starts = starts & left_stops
114
+ [ left_stops, left_starts ]
115
+ end
116
+
117
+ def right_tree starts, stops
118
+ low = (0...starts.size).bsearch do |i|
119
+ starts[i].above? @mid
120
+ end
121
+ right_starts = (!low ? [] : starts[low..-1])
122
+ return [ [], [] ] if right_starts.empty?
123
+ right_stops = stops & right_starts
124
+ [ right_starts, right_stops ]
125
+ end
126
+ end
127
+ module Interval
128
+ # this interface needs to implement :chrom, :start, :stop, and :clone
129
+ def clone opts={}
130
+ c = copy
131
+ c.chrom = opts[:chrom] if opts[:chrom]
132
+ c.start = opts[:start] if opts[:start]
133
+ c.stop = opts[:stop] if opts[:stop]
134
+ c.start = opts[:pos] if opts[:pos]
135
+ c.stop = opts[:pos] if opts[:pos]
136
+ return c
137
+ end
138
+ #def start= ns; @start = ns; end
139
+ #def stop= ns; @stop = ns; end
140
+
141
+ def below? interval
142
+ stop < interval.start
143
+ end
144
+
145
+ def above? interval
146
+ start > interval.stop
147
+ end
148
+
149
+ def overlaps? interval
150
+ chrom == interval.chrom && !below?(interval) && !above?(interval)
151
+ end
152
+
153
+ def contains? interval
154
+ if interval.is_a? Numeric
155
+ start <= interval && stop >= interval
156
+ else
157
+ chrom == interval.chrom && start <= interval.start && stop >= interval.stop
158
+ end
159
+ end
160
+
161
+ def strict_overlap interval
162
+ return nil if !overlaps? interval
163
+
164
+ clone chrom, [ interval.start, start ].max, [ interval.stop, stop ].min
165
+ end
166
+
167
+ def strict_diff interval
168
+ ol = strict_overlap interval
169
+ return IntervalList.new [ self ] if !ol
170
+ ints = []
171
+ if ol.start > start
172
+ ints.push clone( :start => start, :stop => ol.start-1 )
173
+ end
174
+ if ol.stop < stop
175
+ ints.push clone(:start => ol.stop+1, :stop => stop)
176
+ end
177
+ if !ints.empty?
178
+ return IntervalList.new ints
179
+ end
180
+ end
181
+
182
+ def strict_union interval
183
+ return nil unless interval && overlaps?(interval)
184
+ clone :start => [ interval.start, start ].min, :stop => [ interval.stop, stop ].max
185
+ end
186
+
187
+ def overlap interval_list
188
+ interval_list.overlap self
189
+ end
190
+
191
+ def nearest interval_list
192
+ interval_list.nearest self
193
+ end
194
+
195
+ def intersect interval_list
196
+ interval_list.intersect self
197
+ end
198
+
199
+ def size
200
+ stop - start + 1
201
+ end
202
+
203
+ def center
204
+ (stop + start)/2.0
205
+ end
206
+
207
+ def dist interval
208
+ (center-interval.center).abs
209
+ end
210
+
211
+ def intersection_size interval_list
212
+ return 0 if !inters = intersect(interval_list)
213
+ inters.inject(0) {|sum,int| sum += int.size}
214
+ end
215
+ end
216
+ class BasicInterval
217
+ include Interval
218
+
219
+ attr_accessor :chrom, :start, :stop, :data
220
+
221
+ def initialize opts
222
+ @chrom = opts[:chrom]
223
+ @start = opts[:start]
224
+ @stop = opts[:stop]
225
+ @stop = @start = opts[:pos] if opts[:pos]
226
+ @data = opts[:data]
227
+ end
228
+ def copy
229
+ self.class.new :chrom => @chrom, :start => @start, :stop => @stop, :data => @data
230
+ end
231
+ def inspect
232
+ "#<#{self.class}:0x#{'%x' % (object_id << 1)} @chrom=#{@chrom} @start=#{@start} @stop=#{@stop}>"
233
+ end
234
+ end
235
+
236
+ def each
237
+ @intervals.each do |int|
238
+ yield int
239
+ end
240
+ end
241
+
242
+ def overlap interval
243
+ track = @ints_chrom[interval.chrom]
244
+ return nil if !track
245
+ track.overlap interval
246
+ end
247
+
248
+ def nearest interval
249
+ track = @ints_chrom[interval.chrom]
250
+ return nil if !track
251
+ track.nearest interval
252
+ end
253
+
254
+ def intersect interval
255
+ track = @ints_chrom[interval.chrom]
256
+ return nil if !track
257
+ track.intersect interval
258
+ end
259
+
260
+ # subtract this set of intervals from the given interval_list
261
+ def diff interval_list
262
+ interval_list.map do |int|
263
+ ols = overlap(int)
264
+ # if there are no overlaps, return int
265
+ unless ols
266
+ int
267
+ else
268
+ int = ols.each do |ol|
269
+ int.strict_diff(ol).to_a
270
+ end.flatten
271
+ end
272
+ end
273
+ end
274
+
275
+ def initialize array, opts = {}
276
+ @intervals = []
277
+ @ints_chrom = {}
278
+ array.each do |item|
279
+ if item.is_a? IntervalList::Interval
280
+ int = item
281
+ end
282
+ @intervals.push int
283
+ @ints_chrom[int.chrom] ||= []
284
+ @ints_chrom[int.chrom].push int
285
+ end
286
+
287
+ sort_ints_chrom opts[:type]
288
+ end
289
+
290
+ def inspect
291
+ "#<#{self.class}:0x#{'%x' % (object_id << 1)} @intervals=#{@intervals.size}>"
292
+ end
293
+
294
+ attr_reader :ints_chrom
295
+
296
+ def collapse!
297
+ # collapse this set of intervals down to a shorter one
298
+ @ints_chrom.each do |chrom,list|
299
+ @ints_chrom[chrom] = collapsed_list list
300
+ end
301
+
302
+ @intervals = @ints_chrom.map(&:last).flatten
303
+ self
304
+ end
305
+
306
+ private
307
+ def collapsed_list intervals
308
+ new_list = []
309
+ cache_interval = nil
310
+ intervals.each do |interval|
311
+ # it should be sorted already
312
+ if cache_interval
313
+ if !un = cache_interval.strict_union(interval)
314
+ new_list.push cache_interval
315
+ cache_interval = interval
316
+ else
317
+ cache_interval = un
318
+ end
319
+ else
320
+ cache_interval = interval
321
+ end
322
+ end
323
+ new_list.push cache_interval if cache_interval
324
+ new_list
325
+ end
326
+
327
+ def sort_ints_chrom type
328
+ @ints_chrom.each do |chrom,list|
329
+ case type
330
+ when nil, :btree
331
+ @ints_chrom[chrom] = IntervalList::BinaryTree.new list.sort_by{ |int| int.start }
332
+ when :flat
333
+ @ints_chrom[chrom] = IntervalList::OrderedList.new list.sort_by{ |int| int.start }
334
+ end
335
+ end
336
+ end
337
+ end
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'oncotator'
4
+ require 'yaml'
5
+ require 'mutation_set'
6
+
7
+ class Maf < MutationSet::Sample
8
+ requires "Hugo_Symbol", "Entrez_Gene_Id", "Center",
9
+ "NCBI_Build", "Chromosome",
10
+ "Start_Position", "End_Position", "Strand",
11
+ "Variant_Classification", "Variant_Type",
12
+ "Reference_Allele", "Tumor_Seq_Allele1", "Tumor_Seq_Allele2",
13
+ "dbSNP_RS", "dbSNP_Val_Status",
14
+ "Tumor_Sample_Barcode", "Matched_Norm_Sample_Barcode",
15
+ "Match_Norm_Seq_Allele1", "Match_Norm_Seq_Allele2",
16
+ "Tumor_Validation_Allele1", "Tumor_Validation_Allele2",
17
+ "Match_Norm_Validation_Allele1", "Match_Norm_Validation_Allele2",
18
+ "verification_Status", "Validation_Status",
19
+ "Mutation_Status", "Sequencing_Phase", "Sequence_Source",
20
+ "Validation_Method", "Score" #, "BAM_File", "Sequencer"
21
+ comments "#"
22
+
23
+ def preamble
24
+ "#version 2.2"
25
+ end
26
+
27
+ class Line < MutationSet::Line
28
+ alias_key :chrom, :chromosome
29
+ alias_key :start, :start_position
30
+ alias_key :stop, :end_position
31
+ alias_key :ref_allele, :reference_allele
32
+
33
+ def skip_maf?
34
+ criteria_failed?(self, :maf)
35
+ end
36
+
37
+ def key
38
+ [ tumor_sample_barcode, chrom, start, stop ].join(":")
39
+ end
40
+
41
+ def alt_allele
42
+ tumor_seq_allele1 == reference_allele ? tumor_seq_allele2 : tumor_seq_allele1
43
+ end
44
+
45
+ def _ref_count
46
+ [ :t_ref_count, :tumor_ref_count, :ref_count ].each do |s|
47
+ if respond_to? s
48
+ return send(s)
49
+ end
50
+ end
51
+ nil
52
+ end
53
+
54
+ def _alt_count
55
+ [ :t_alt_count, :tumor_alt_count, :alt_count ].each do |s|
56
+ if respond_to? s
57
+ return send(s)
58
+ end
59
+ end
60
+ nil
61
+ end
62
+
63
+ def chrom_name
64
+ # properly format the name
65
+ if chromosome =~ /chr/
66
+ chromosome
67
+ else
68
+ "chr#{chromosome}"
69
+ end
70
+ end
71
+
72
+ def is_coding?
73
+ variant_classification =~ /(Frame_Shift_Del|Frame_Shift_Ins|In_Frame_Del|In_Frame_Ins|Missense_Mutation|Nonsense_Mutation|Splice_Site|Translation_Start_Site)/
74
+ end
75
+
76
+ def gene_name
77
+ if !hugo_symbol || hugo_symbol.size == 0
78
+ onco.txp_gene
79
+ else
80
+ hugo_symbol
81
+ end
82
+ end
83
+
84
+ def var_freq
85
+ if !_ref_count.empty? && !_alt_count.empty?
86
+ _ref_count.to_f / (_ref_count.to_i + _alt_count.to_i)
87
+ else
88
+ nil
89
+ end
90
+ end
91
+ end
92
+ end