bio-synreport 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bio-synreport.gemspec +1 -1
- data/lib/bio/utils/bio-synreport.rb +1 -124
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
data/bio-synreport.gemspec
CHANGED
@@ -5,7 +5,6 @@ require 'bio'
|
|
5
5
|
module Bio
|
6
6
|
class Util
|
7
7
|
|
8
|
-
<<<<<<< HEAD
|
9
8
|
class MrnaModel < Bio::GFF::GFF3::Record
|
10
9
|
attr_accessor :seq, :cds
|
11
10
|
def initialize(gff_line)
|
@@ -78,69 +77,15 @@ module Bio
|
|
78
77
|
return {#:id => self.gffid,
|
79
78
|
:chr => self.seqname,
|
80
79
|
:strand => self.strand,
|
81
|
-
=======
|
82
|
-
class MrnaModel
|
83
|
-
attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
|
84
|
-
|
85
|
-
def initialize(chr, id, strand, cds_arr, seq_arr)
|
86
|
-
@seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
|
87
|
-
end
|
88
|
-
|
89
|
-
def includes?(seq, point)
|
90
|
-
@cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop}
|
91
|
-
false
|
92
|
-
end
|
93
|
-
|
94
|
-
def seq
|
95
|
-
@sequences.join
|
96
|
-
end
|
97
|
-
|
98
|
-
def substitution_info(chr,point,alt)
|
99
|
-
cds_start = @cds.first.first
|
100
|
-
running_total = 0
|
101
|
-
@cds.each do |start,stop|
|
102
|
-
if point.to_i >= start and point.to_i <= stop
|
103
|
-
offset = case @strand
|
104
|
-
when "+"
|
105
|
-
#offset =
|
106
|
-
(point.to_i - start) + running_total
|
107
|
-
when "-"
|
108
|
-
(stop - point.to_i) + running_total
|
109
|
-
end #offset = how far into cds SNP is
|
110
|
-
codon_number = offset / 3
|
111
|
-
position_in_codon = offset % 3
|
112
|
-
#pp [offset, codon_number, position_in_codon]
|
113
|
-
codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
|
114
|
-
codon = codon_array[codon_number]
|
115
|
-
nt = codon[position_in_codon]
|
116
|
-
new_codon = codon.dup
|
117
|
-
new_codon[position_in_codon] = alt.downcase
|
118
|
-
#pp [codon, position_in_codon, nt, new_codon]
|
119
|
-
a = Bio::Sequence::NA.new(codon).translate.codes.first
|
120
|
-
b = Bio::Sequence::NA.new(new_codon).translate.codes.first
|
121
|
-
sub_type = a == b ? "SYN" : "NON_SYN"
|
122
|
-
return {:id => @gff_id,
|
123
|
-
:chr => @seqname,
|
124
|
-
:strand => @strand,
|
125
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
126
80
|
:position => point,
|
127
81
|
:original_codon => codon,
|
128
82
|
:original_residue => a || 'stop',
|
129
83
|
:mutant_codon => new_codon,
|
130
84
|
:mutant_residue =>b || 'stop',
|
131
|
-
<<<<<<< HEAD
|
132
85
|
:position_in_codon => position + 1,
|
133
86
|
:substitution_type => sub_type
|
134
87
|
}
|
135
|
-
|
136
|
-
:position_in_codon => position_in_codon + 1,
|
137
|
-
:substitution_type => sub_type
|
138
|
-
}
|
139
|
-
end
|
140
|
-
running_total += (stop - start)
|
141
|
-
running_total += 1 if @strand == '-' #how far we are into the cds
|
142
|
-
end
|
143
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
88
|
+
|
144
89
|
end
|
145
90
|
|
146
91
|
end#class end
|
@@ -150,7 +95,6 @@ module Bio
|
|
150
95
|
#attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
|
151
96
|
|
152
97
|
def initialize(opts)
|
153
|
-
<<<<<<< HEAD
|
154
98
|
cdses = []
|
155
99
|
mrna_list = []
|
156
100
|
seqs = Hash.new
|
@@ -190,68 +134,10 @@ module Bio
|
|
190
134
|
|
191
135
|
def is_in_cds?(chr,point)
|
192
136
|
self.mutation_info(chr,point,"a") ? true : false
|
193
|
-
=======
|
194
|
-
@gene_array = []
|
195
|
-
@cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
|
196
|
-
@mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
|
197
|
-
File.open(opts[:gff], "r").each do |gffline|
|
198
|
-
record=Bio::GFF::GFF3::Record.new(gffline)
|
199
|
-
if(record.feature_type == 'gene')
|
200
|
-
@gene_array << [record.seqname, record.id]
|
201
|
-
elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA')
|
202
|
-
parents = record.get_attributes('Parent')
|
203
|
-
parents.each do |parent|
|
204
|
-
if record.feature_type == 'CDS'
|
205
|
-
@cdshash[record.seqname][parent] << record
|
206
|
-
else
|
207
|
-
@mRNAhash[record.seqname][parent] << record
|
208
|
-
end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
$stderr.puts "Loaded GFF..." if opts[:verbose]
|
213
|
-
@seqhash = {}
|
214
|
-
Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq }
|
215
|
-
$stderr.puts "Loaded Seq..." if opts[:verbose]
|
216
|
-
|
217
|
-
@models = Hash.new {|h,k| h[k] = [] }
|
218
|
-
$stderr.puts "Building models..." if opts[:verbose]
|
219
|
-
@gene_array.each do |gene|
|
220
|
-
|
221
|
-
mRNAs=@mRNAhash[gene.first][gene.last]
|
222
|
-
mRNAs.each do |mRNA|
|
223
|
-
next if @seqhash[gene.first].nil?
|
224
|
-
cdsa = []
|
225
|
-
seqs = []
|
226
|
-
cdsary=@cdshash[gene.first][mRNA.id]
|
227
|
-
cdsary.each {|c| cdsa << [c.start, c.end]}
|
228
|
-
cdsa.sort!
|
229
|
-
cdsa.reverse! if mRNA.strand == '-'
|
230
|
-
|
231
|
-
cdsa.each do |cds|
|
232
|
-
|
233
|
-
#cdsa << [cds.start, cds.end]
|
234
|
-
if mRNA.strand == '+'
|
235
|
-
seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") )
|
236
|
-
elsif mRNA.strand == "-"
|
237
|
-
seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement
|
238
|
-
end
|
239
|
-
end
|
240
|
-
@models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs )
|
241
|
-
#pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1'
|
242
|
-
end
|
243
|
-
end
|
244
|
-
$stderr.puts "Models built..." if opts[:verbose]
|
245
|
-
end#init end
|
246
|
-
|
247
|
-
def is_in_cds?(chr,point)
|
248
|
-
@self.mutation_info(chr,point) ? true : false
|
249
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
250
137
|
end
|
251
138
|
|
252
139
|
#returns mutation info if point in CDS, if not in CDS returns false
|
253
140
|
def mutation_info(chr,pos,alt)
|
254
|
-
<<<<<<< HEAD
|
255
141
|
pos = pos.to_i
|
256
142
|
#cant do indels ...
|
257
143
|
return nil if alt.length > 1
|
@@ -266,15 +152,6 @@ module Bio
|
|
266
152
|
#somthing unpredicatable went wrong and we couldnt do the conversion ...
|
267
153
|
return nil
|
268
154
|
end
|
269
|
-
=======
|
270
|
-
|
271
|
-
@models[chr].each do |m|
|
272
|
-
if m.includes?(chr,pos)
|
273
|
-
return m.substitution_info(chr,pos,alt)
|
274
|
-
end
|
275
|
-
end
|
276
|
-
false
|
277
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
278
155
|
end
|
279
156
|
|
280
157
|
|