bio-synreport 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bio-synreport.gemspec +1 -1
- data/lib/bio/utils/bio-synreport.rb +1 -124
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
data/bio-synreport.gemspec
CHANGED
@@ -5,7 +5,6 @@ require 'bio'
|
|
5
5
|
module Bio
|
6
6
|
class Util
|
7
7
|
|
8
|
-
<<<<<<< HEAD
|
9
8
|
class MrnaModel < Bio::GFF::GFF3::Record
|
10
9
|
attr_accessor :seq, :cds
|
11
10
|
def initialize(gff_line)
|
@@ -78,69 +77,15 @@ module Bio
|
|
78
77
|
return {#:id => self.gffid,
|
79
78
|
:chr => self.seqname,
|
80
79
|
:strand => self.strand,
|
81
|
-
=======
|
82
|
-
class MrnaModel
|
83
|
-
attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
|
84
|
-
|
85
|
-
def initialize(chr, id, strand, cds_arr, seq_arr)
|
86
|
-
@seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
|
87
|
-
end
|
88
|
-
|
89
|
-
def includes?(seq, point)
|
90
|
-
@cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop}
|
91
|
-
false
|
92
|
-
end
|
93
|
-
|
94
|
-
def seq
|
95
|
-
@sequences.join
|
96
|
-
end
|
97
|
-
|
98
|
-
def substitution_info(chr,point,alt)
|
99
|
-
cds_start = @cds.first.first
|
100
|
-
running_total = 0
|
101
|
-
@cds.each do |start,stop|
|
102
|
-
if point.to_i >= start and point.to_i <= stop
|
103
|
-
offset = case @strand
|
104
|
-
when "+"
|
105
|
-
#offset =
|
106
|
-
(point.to_i - start) + running_total
|
107
|
-
when "-"
|
108
|
-
(stop - point.to_i) + running_total
|
109
|
-
end #offset = how far into cds SNP is
|
110
|
-
codon_number = offset / 3
|
111
|
-
position_in_codon = offset % 3
|
112
|
-
#pp [offset, codon_number, position_in_codon]
|
113
|
-
codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
|
114
|
-
codon = codon_array[codon_number]
|
115
|
-
nt = codon[position_in_codon]
|
116
|
-
new_codon = codon.dup
|
117
|
-
new_codon[position_in_codon] = alt.downcase
|
118
|
-
#pp [codon, position_in_codon, nt, new_codon]
|
119
|
-
a = Bio::Sequence::NA.new(codon).translate.codes.first
|
120
|
-
b = Bio::Sequence::NA.new(new_codon).translate.codes.first
|
121
|
-
sub_type = a == b ? "SYN" : "NON_SYN"
|
122
|
-
return {:id => @gff_id,
|
123
|
-
:chr => @seqname,
|
124
|
-
:strand => @strand,
|
125
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
126
80
|
:position => point,
|
127
81
|
:original_codon => codon,
|
128
82
|
:original_residue => a || 'stop',
|
129
83
|
:mutant_codon => new_codon,
|
130
84
|
:mutant_residue =>b || 'stop',
|
131
|
-
<<<<<<< HEAD
|
132
85
|
:position_in_codon => position + 1,
|
133
86
|
:substitution_type => sub_type
|
134
87
|
}
|
135
|
-
|
136
|
-
:position_in_codon => position_in_codon + 1,
|
137
|
-
:substitution_type => sub_type
|
138
|
-
}
|
139
|
-
end
|
140
|
-
running_total += (stop - start)
|
141
|
-
running_total += 1 if @strand == '-' #how far we are into the cds
|
142
|
-
end
|
143
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
88
|
+
|
144
89
|
end
|
145
90
|
|
146
91
|
end#class end
|
@@ -150,7 +95,6 @@ module Bio
|
|
150
95
|
#attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
|
151
96
|
|
152
97
|
def initialize(opts)
|
153
|
-
<<<<<<< HEAD
|
154
98
|
cdses = []
|
155
99
|
mrna_list = []
|
156
100
|
seqs = Hash.new
|
@@ -190,68 +134,10 @@ module Bio
|
|
190
134
|
|
191
135
|
def is_in_cds?(chr,point)
|
192
136
|
self.mutation_info(chr,point,"a") ? true : false
|
193
|
-
=======
|
194
|
-
@gene_array = []
|
195
|
-
@cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
|
196
|
-
@mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
|
197
|
-
File.open(opts[:gff], "r").each do |gffline|
|
198
|
-
record=Bio::GFF::GFF3::Record.new(gffline)
|
199
|
-
if(record.feature_type == 'gene')
|
200
|
-
@gene_array << [record.seqname, record.id]
|
201
|
-
elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA')
|
202
|
-
parents = record.get_attributes('Parent')
|
203
|
-
parents.each do |parent|
|
204
|
-
if record.feature_type == 'CDS'
|
205
|
-
@cdshash[record.seqname][parent] << record
|
206
|
-
else
|
207
|
-
@mRNAhash[record.seqname][parent] << record
|
208
|
-
end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
$stderr.puts "Loaded GFF..." if opts[:verbose]
|
213
|
-
@seqhash = {}
|
214
|
-
Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq }
|
215
|
-
$stderr.puts "Loaded Seq..." if opts[:verbose]
|
216
|
-
|
217
|
-
@models = Hash.new {|h,k| h[k] = [] }
|
218
|
-
$stderr.puts "Building models..." if opts[:verbose]
|
219
|
-
@gene_array.each do |gene|
|
220
|
-
|
221
|
-
mRNAs=@mRNAhash[gene.first][gene.last]
|
222
|
-
mRNAs.each do |mRNA|
|
223
|
-
next if @seqhash[gene.first].nil?
|
224
|
-
cdsa = []
|
225
|
-
seqs = []
|
226
|
-
cdsary=@cdshash[gene.first][mRNA.id]
|
227
|
-
cdsary.each {|c| cdsa << [c.start, c.end]}
|
228
|
-
cdsa.sort!
|
229
|
-
cdsa.reverse! if mRNA.strand == '-'
|
230
|
-
|
231
|
-
cdsa.each do |cds|
|
232
|
-
|
233
|
-
#cdsa << [cds.start, cds.end]
|
234
|
-
if mRNA.strand == '+'
|
235
|
-
seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") )
|
236
|
-
elsif mRNA.strand == "-"
|
237
|
-
seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement
|
238
|
-
end
|
239
|
-
end
|
240
|
-
@models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs )
|
241
|
-
#pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1'
|
242
|
-
end
|
243
|
-
end
|
244
|
-
$stderr.puts "Models built..." if opts[:verbose]
|
245
|
-
end#init end
|
246
|
-
|
247
|
-
def is_in_cds?(chr,point)
|
248
|
-
@self.mutation_info(chr,point) ? true : false
|
249
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
250
137
|
end
|
251
138
|
|
252
139
|
#returns mutation info if point in CDS, if not in CDS returns false
|
253
140
|
def mutation_info(chr,pos,alt)
|
254
|
-
<<<<<<< HEAD
|
255
141
|
pos = pos.to_i
|
256
142
|
#cant do indels ...
|
257
143
|
return nil if alt.length > 1
|
@@ -266,15 +152,6 @@ module Bio
|
|
266
152
|
#somthing unpredicatable went wrong and we couldnt do the conversion ...
|
267
153
|
return nil
|
268
154
|
end
|
269
|
-
=======
|
270
|
-
|
271
|
-
@models[chr].each do |m|
|
272
|
-
if m.includes?(chr,pos)
|
273
|
-
return m.substitution_info(chr,pos,alt)
|
274
|
-
end
|
275
|
-
end
|
276
|
-
false
|
277
|
-
>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
|
278
155
|
end
|
279
156
|
|
280
157
|
|