germ 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ require 'fasta'
2
+ require 'gtf'
3
+ require 'hash_table'
4
+ require 'indelocator'
5
+ require 'intervals'
6
+ require 'maf'
7
+ require 'mutation_set'
8
+ require 'mutect'
9
+ require 'oncotator'
10
+ require 'sam'
11
+ require 'vcf'
@@ -0,0 +1,34 @@
1
+ require 'yaml'
2
+ class TaylorlibConfig
3
+ def self.get_conf *keys
4
+ config = TaylorlibConfig.new
5
+ config.get_key *keys if config.loaded?
6
+ end
7
+
8
+ def initialize
9
+ load_file if file_exists?
10
+ end
11
+
12
+ def loaded?
13
+ @config != nil
14
+ end
15
+
16
+ def get_key *keys
17
+ keys.inject(@config) do |obj,key|
18
+ obj = obj[key]
19
+ end
20
+ end
21
+
22
+ private
23
+ def config_file
24
+ ENV["TAYLORLIB_CONF"]
25
+ end
26
+
27
+ def file_exists?
28
+ config_file && File.exists?(config_file)
29
+ end
30
+
31
+ def load_file
32
+ @config = YAML.load File.read(config_file)
33
+ end
34
+ end
@@ -0,0 +1,47 @@
1
+ module DataTypes
2
+ def attr_accessor_of_type name, type=nil
3
+ send :define_method, name do
4
+ instance_variable_get("@#{name}")
5
+ end
6
+ send :define_method, "#{name}=" do |v|
7
+ if block_given?
8
+ instance_variable_set "@#{name}", yield(v)
9
+ else
10
+ if v.respond_to? type
11
+ instance_variable_set "@#{name}", v.send(type)
12
+ else
13
+ raise ArgumentException
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ def attr_string *names
20
+ names.each do |name|
21
+ attr_accessor_of_type name, :to_s
22
+ end
23
+ end
24
+ def attr_integer *names
25
+ names.each do |name|
26
+ attr_accessor_of_type name, :to_i
27
+ end
28
+ end
29
+
30
+ def attr_sym *names
31
+ names.each do |name|
32
+ attr_accessor_of_type name, :to_sym
33
+ end
34
+ end
35
+
36
+ def attr_array *names
37
+ names.each do |name|
38
+ attr_accessor_of_type name do |v|
39
+ if block_given?
40
+ yield v
41
+ else
42
+ v.split(//)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,23 @@
1
+ class Flagstat
2
+ def initialize file
3
+ @headers = [ :total, :duplicates, :mapped, :paired_in_sequence, :read1, :read2,
4
+ :properly_paired, :both_mapped, :singletons, :mate_mapped_chr, :mate_mapped_chr_highq ]
5
+ @flags = Hash[@headers.zip(File.foreach(file).each_with_index.map do |l|
6
+ l.scan(/([0-9]+) \+ ([0-9]+)/).flatten
7
+ end)]
8
+ end
9
+
10
+ def each
11
+ @flags.each do |f,l|
12
+ yield f,l
13
+ end
14
+ end
15
+
16
+ def method_missing(method, *args, &block)
17
+ return @flags[method].first.to_i if @flags[method]
18
+ method.to_s.match(/^chastity_(.*)/) do |m|
19
+ return @flags[m[1].to_sym].last.to_i if @flags[m[1].to_sym]
20
+ end
21
+ super
22
+ end
23
+ end
@@ -0,0 +1,15 @@
1
+ module Printer
2
+ def write file, &block
3
+ File.open(file,"w") do |f|
4
+ output f, &block
5
+ end
6
+ end
7
+
8
+ def print f=nil, &block
9
+ if f
10
+ write f, &block
11
+ else
12
+ output STDOUT, &block
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,248 @@
1
+ require 'hash_table'
2
+ require 'intervals'
3
+
4
+ class GTF < HashTable
5
+ header_off
6
+
7
+ class GTFLine < HashTable::HashLine
8
+ include IntervalList::Interval
9
+ def chrom; seqname; end
10
+ def chrom= nc; seqname = nc; end
11
+ def copy
12
+ c = self.class.new @hash.clone
13
+ end
14
+ end
15
+ line_class GTFLine
16
+
17
+ class Gene
18
+ class Transcript
19
+ attr_reader :name, :intervals, :introns
20
+ def initialize array, name
21
+ @intervals = array
22
+ @name = name
23
+
24
+ @transcript = @intervals.find{|t| t.feature == "transcript"}
25
+
26
+ build_introns
27
+ end
28
+
29
+ def site pos
30
+ i = @transcript.clone :pos => pos
31
+ intron = nil
32
+ overlaps = @intervals.select{|f| f.contains? i }
33
+ return cds_pos i if overlaps.find{|f| f.feature == "cds" }
34
+ return intron_pos intron if intron = overlaps.find{|f| f.feature == "intron" }
35
+ return utr_pos if overlaps.find{|f| f.feature =~ /UTR/ }
36
+ { :type => :transcript }
37
+ end
38
+
39
+
40
+ def utr_pos
41
+ { :type => :utr }
42
+ end
43
+
44
+ def intron_frame intron
45
+ # find the terminal frame of the leading exon
46
+ if strand == "+"
47
+ (intron.prev_exon.frame + intron.prev_exon.size)%3
48
+ else
49
+ intron.post_exon.frame
50
+ end
51
+ end
52
+
53
+ def cds_pos pos
54
+ bases = 0
55
+ if @strand == "+"
56
+ cds.each do |c|
57
+ if c.contains? pos
58
+ bases += pos - c.start + 1
59
+ break
60
+ else
61
+ bases += c.size
62
+ end
63
+ end
64
+ else
65
+ cds.reverse.each do |c|
66
+ if c.contains? pos
67
+ bases += c.stop - pos + 1
68
+ break
69
+ else
70
+ bases += c.size
71
+ end
72
+ end
73
+ end
74
+ { :type => :cds, :pos => bases/3 }
75
+ end
76
+
77
+ def intron_pos intron
78
+ { :type => :intron, :pos => cds_pos(intron.start-1), :frame => intron_frame(intron) }
79
+ end
80
+
81
+ def utr3
82
+ return @utr3 if @utr3
83
+ cs = strand == "+" ? cds.first : cds.last
84
+ @utr3 = exons.select{ |e| strand == "+" ? !e.above?(cs) : !e.below?(cs) }
85
+ .map{|e| e.strict_diff(cs) }
86
+ .compact.map(&:to_a)
87
+ @utr3.each do |u|
88
+ u.feature = "3' UTR"
89
+ end
90
+ end
91
+
92
+ def utr5
93
+ return @utr5 if @utr5
94
+ cs = strand == "+" ? cds.last : cds.first
95
+ @utr5 = exons.select{|e| strand == "+" ? !e.below?(cs) : !e.above?(cs) }
96
+ .map{|e| e.strict_diff(cs)}
97
+ .compact.map(&:to_a)
98
+ @utr5.each do |u|
99
+ u.feature = "5' UTR"
100
+ end
101
+ end
102
+
103
+ def build_introns
104
+ return if !exons
105
+ @introns = exons.map.with_index do |e1,i|
106
+ e2 = @exons[i+1]
107
+ next if !e2
108
+ intron = e1.clone(:start => e1.stop+1, :stop => e2.start-1)
109
+ intron.feature = "intron"
110
+ intron.prev_exon = e1
111
+ intron.post_exon = e2
112
+ intron
113
+ end.compact
114
+ @intervals.concat @introns
115
+ end
116
+
117
+ def build_utrs
118
+ @intervals.concat @utr3 if @utr3
119
+ @intervals.concat @utr5 if @utr5
120
+ end
121
+
122
+ def start
123
+ @transcript.start
124
+ end
125
+ def stop
126
+ @transcript.stop
127
+ end
128
+ def strand
129
+ @transcript.strand
130
+ end
131
+ def contains? pos
132
+ start <= pos && stop >= pos
133
+ end
134
+ def exons
135
+ @exons ||= @intervals.select{|e| e.feature == "exon"}.sort_by &:start
136
+ end
137
+ def cds
138
+ @cds ||= @intervals.select{|e| e.feature == "CDS"}.sort_by &:start
139
+ end
140
+ end
141
+
142
+ attr_reader :name, :strand, :transcripts, :intervals
143
+ def initialize array
144
+ @intervals = array
145
+ @gene = @intervals.find{|l| l.feature == "gene"}
146
+ @name = @gene.attribute[:gene_name]
147
+ @strand = @gene.strand
148
+ @transcripts = build_transcripts
149
+ end
150
+
151
+ def start
152
+ @gene.start
153
+ end
154
+
155
+ def stop
156
+ @gene.stop
157
+ end
158
+
159
+ def site pos
160
+ score = { :cds => 1, :exon => 2, :utr => 3, :intron => 4, :transcript => 5, :igr => 6 }
161
+ sites = @transcripts.map do |t|
162
+ { :gene => name }.update(t.site pos) if t.contains? pos
163
+ end.compact
164
+ sites.push(:type => :igr)
165
+ sites.sort_by{|s| score[s[:type]] }.first
166
+ end
167
+
168
+ # compute unified intervals from the list of intervals
169
+ def unified
170
+ ints = @intervals
171
+ if block_given?
172
+ ints = ints.select do |i|
173
+ yield i
174
+ end
175
+ end
176
+ list = IntervalList.new ints, :type => :flat
177
+ list.collapse!
178
+ list.to_a
179
+ end
180
+
181
+ def canonical
182
+ # find out which transcript has the longest cds
183
+ @transcripts.max_by do |t|
184
+ t.cds.inject(0) do |sum,cds|
185
+ sum += cds.size
186
+ end
187
+ end
188
+ end
189
+
190
+ def inspect
191
+ "#<#{self.class.name}:#{object_id} @transcripts=#{@transcripts.count}>"
192
+ end
193
+
194
+ private
195
+ def build_transcripts
196
+ (@intervals.select{|l| l.feature == "transcript"} || []).map do |t|
197
+ name = t.attribute[:transcript_name]
198
+ Transcript.new @intervals.select{|l| l.attribute[:transcript_name] == name}, name
199
+ end
200
+ end
201
+ end
202
+
203
+ def gene name
204
+ intervals = gene_name[name]
205
+ @genes[name] ||= GTF::Gene.new intervals if intervals
206
+ end
207
+
208
+ def initialize file, opts=nil
209
+ opts = { :comment => "#", :sep => " "}.merge(opts || {})
210
+
211
+ @sep = opts[:sep]
212
+
213
+ @genes = {}
214
+
215
+ super file, :comment => opts[:comment], :idx => opts[:idx],
216
+ :header => [ :seqname, :source, :feature, :start, :stop, :score, :strand, :frame, :attribute ],
217
+ :types => [ :str, :str, :str, :int, :int, :int, :str, :int, [ ";", @sep ] ]
218
+ end
219
+
220
+ def inspect
221
+ "#<#{self.class}:0x#{'%x' % (object_id << 1)} @lines=#{@lines.count}>"
222
+ end
223
+
224
+ def to_interval_list
225
+ IntervalList.new self
226
+ end
227
+
228
+ def format_line g
229
+ [ :seqname, :source, :feature, :start, :stop, :score, :strand, :frame, :attribute ].map do |h|
230
+ if h == :attribute
231
+ g[:attribute].map do |k,v|
232
+ "#{k}#{@sep}#{v}"
233
+ end.join("; ")
234
+ else
235
+ g[h]
236
+ end
237
+ end.join("\t")
238
+ end
239
+
240
+ protected
241
+ def add_index line
242
+ @index.each do |key,ind|
243
+ ikey = line[key] || line[:attribute][key]
244
+ next if !ikey
245
+ (ind[ ikey ] ||= []) << line
246
+ end
247
+ end
248
+ end
@@ -0,0 +1,195 @@
1
+ require 'zlib'
2
+ require 'extlib'
3
+ require 'germ/printer'
4
+ require 'hash_table_aux/hash_table_aux'
5
+
6
+ class HashTable
7
+ include Enumerable
8
+ include Printer
9
+ include HashTableAux
10
+
11
+ class HashLine
12
+ def initialize h
13
+ if h.is_a? Array
14
+ @hash = Hash[h]
15
+ elsif h.is_a? Hash
16
+ @hash = h
17
+ end
18
+ end
19
+
20
+ def update hash
21
+ @hash.update hash
22
+ end
23
+
24
+ def [] ind
25
+ @hash[ind]
26
+ end
27
+
28
+ def []= ind,v
29
+ @hash[ind] = v
30
+ end
31
+
32
+ def invalidate!
33
+ @invalid = true
34
+ end
35
+
36
+ def approve!
37
+ @invalid = nil
38
+ end
39
+
40
+ def invalid?
41
+ @invalid
42
+ end
43
+
44
+ def method_missing sym, *args, &block
45
+ if @hash[sym]
46
+ @hash[sym]
47
+ elsif sym.to_s =~ /(.*)=/
48
+ @hash[$1.to_sym] = args.first
49
+ else
50
+ nil
51
+ end
52
+ end
53
+ end
54
+
55
+ class << self
56
+ def line_type
57
+ @line_type || HashLine
58
+ end
59
+
60
+ def line_class klass
61
+ @line_type = const_get klass.to_s.camel_case
62
+ end
63
+
64
+ def use_header?
65
+ @use_header
66
+ end
67
+ def header_on
68
+ @use_header = true
69
+ end
70
+ def header_off
71
+ @use_header = nil
72
+ end
73
+ end
74
+ header_on
75
+
76
+ attr_accessor :header
77
+ def [](ind)
78
+ @lines[ind]
79
+ end
80
+
81
+ def method_missing sym, *args, &block
82
+ if @index[sym]
83
+ @index[sym]
84
+ else
85
+ super sym, *args, &block
86
+ end
87
+ end
88
+
89
+ def sum(col)
90
+ inject(0) do |sum,line|
91
+ sum += line[col].to_f
92
+ end
93
+ end
94
+
95
+ def select! &block
96
+ @lines.select! &block
97
+ end
98
+
99
+ def sort_by! &block
100
+ @lines.sort_by! &block
101
+ end
102
+
103
+ def use_header?
104
+ self.class.use_header?
105
+ end
106
+
107
+ def output f
108
+ f.puts @header.join("\t") if use_header?
109
+ @lines.each do |l|
110
+ l = yield l if block_given?
111
+ next if !l || l.invalid?
112
+ f.puts format_line(l)
113
+ end
114
+ end
115
+
116
+ def inspect
117
+ "#<#{self.class.name}:#{object_id} @lines=#{@lines.count}>"
118
+ end
119
+
120
+ def each
121
+ @lines.each do |l|
122
+ yield l
123
+ end
124
+ end
125
+
126
+ def initialize(file,opts={})
127
+ @header = opts[:header]
128
+ @skip_header = opts[:skip_header] && opts[:header]
129
+ if @header.is_a? Hash
130
+ @types = @header.values
131
+ @header = @header.keys
132
+ end
133
+ create_index opts[:idx]
134
+ @lines = []
135
+ @comment = opts[:comment]
136
+ @types ||= opts[:types]
137
+
138
+ parse_file(file) if file && File.exists?(file)
139
+ end
140
+
141
+ def add_line hash
142
+ if hash.is_a? HashLine
143
+ @lines.push hash
144
+ else
145
+ @lines.push create_line(hash)
146
+ end
147
+ end
148
+
149
+ private
150
+ def parse_file file
151
+ load_file file
152
+ @lines.each_index do |i|
153
+ @lines[i] = create_line @lines[i]
154
+ add_index @lines[i] unless @index.empty?
155
+ end
156
+ end
157
+
158
+ def create_index idx
159
+ if !idx
160
+ @index = {}
161
+ return
162
+ end
163
+ idx = [ idx ] if !idx.is_a? Array
164
+ @index = Hash[idx.map{|i| [ i, {} ] }]
165
+ end
166
+
167
+ def set_header s, downcase=nil
168
+ return nil if @header
169
+ @header = s.chomp.split(/\t/).map{|s| downcase ? s.downcase.to_sym : s.to_sym }
170
+ end
171
+
172
+ def format_line l
173
+ @header.map{|h| l[h]}.join("\t")
174
+ end
175
+
176
+ def line_hash s
177
+ @header.zip(s.split(/\t/))
178
+ end
179
+
180
+ def is_comment? s
181
+ @comment && s =~ @comment
182
+ end
183
+
184
+ protected
185
+ def create_line s
186
+ self.class.line_type.new s
187
+ end
188
+
189
+ def add_index line
190
+ @index.each do |key,ind|
191
+ next if !line[key]
192
+ (ind[ line[key] ] ||= []) << line
193
+ end
194
+ end
195
+ end