bio-synreport 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bio-synreport.gemspec +1 -1
- data/lib/bio/utils/bio-synreport.rb +1 -124
- metadata +2 -2
    
        data/VERSION
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            0.1. | 
| 1 | 
            +
            0.1.2
         | 
    
        data/bio-synreport.gemspec
    CHANGED
    
    
| @@ -5,7 +5,6 @@ require 'bio' | |
| 5 5 | 
             
            module Bio
         | 
| 6 6 | 
             
              class Util
         | 
| 7 7 |  | 
| 8 | 
            -
            <<<<<<< HEAD
         | 
| 9 8 | 
             
                class MrnaModel < Bio::GFF::GFF3::Record
         | 
| 10 9 | 
             
                  attr_accessor :seq, :cds
         | 
| 11 10 | 
             
                  def initialize(gff_line)
         | 
| @@ -78,69 +77,15 @@ module Bio | |
| 78 77 | 
             
                        return {#:id => self.gffid, 
         | 
| 79 78 | 
             
                                :chr => self.seqname, 
         | 
| 80 79 | 
             
                                :strand => self.strand, 
         | 
| 81 | 
            -
            =======
         | 
| 82 | 
            -
                class MrnaModel
         | 
| 83 | 
            -
                  attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
         | 
| 84 | 
            -
              
         | 
| 85 | 
            -
                  def initialize(chr, id, strand, cds_arr, seq_arr)
         | 
| 86 | 
            -
                    @seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
         | 
| 87 | 
            -
                  end
         | 
| 88 | 
            -
              
         | 
| 89 | 
            -
                  def includes?(seq, point)
         | 
| 90 | 
            -
                    @cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop}
         | 
| 91 | 
            -
                    false
         | 
| 92 | 
            -
                  end
         | 
| 93 | 
            -
              
         | 
| 94 | 
            -
                  def seq
         | 
| 95 | 
            -
                    @sequences.join
         | 
| 96 | 
            -
                  end
         | 
| 97 | 
            -
              
         | 
| 98 | 
            -
                  def substitution_info(chr,point,alt)
         | 
| 99 | 
            -
                    cds_start = @cds.first.first
         | 
| 100 | 
            -
                    running_total = 0
         | 
| 101 | 
            -
                    @cds.each do |start,stop|
         | 
| 102 | 
            -
                      if point.to_i >= start and point.to_i <= stop
         | 
| 103 | 
            -
                        offset = case @strand
         | 
| 104 | 
            -
                        when "+"
         | 
| 105 | 
            -
                          #offset = 
         | 
| 106 | 
            -
                          (point.to_i - start) + running_total
         | 
| 107 | 
            -
                        when "-"
         | 
| 108 | 
            -
                          (stop - point.to_i) + running_total 
         | 
| 109 | 
            -
                        end  #offset = how far into cds SNP is
         | 
| 110 | 
            -
                        codon_number = offset / 3
         | 
| 111 | 
            -
                        position_in_codon = offset % 3
         | 
| 112 | 
            -
                        #pp [offset, codon_number, position_in_codon] 
         | 
| 113 | 
            -
                        codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
         | 
| 114 | 
            -
                        codon = codon_array[codon_number]
         | 
| 115 | 
            -
                        nt = codon[position_in_codon]
         | 
| 116 | 
            -
                        new_codon = codon.dup
         | 
| 117 | 
            -
                        new_codon[position_in_codon] = alt.downcase
         | 
| 118 | 
            -
                        #pp [codon, position_in_codon, nt, new_codon]
         | 
| 119 | 
            -
                        a = Bio::Sequence::NA.new(codon).translate.codes.first
         | 
| 120 | 
            -
                        b =  Bio::Sequence::NA.new(new_codon).translate.codes.first
         | 
| 121 | 
            -
                        sub_type = a == b ? "SYN" : "NON_SYN"
         | 
| 122 | 
            -
                        return {:id => @gff_id, 
         | 
| 123 | 
            -
                                :chr => @seqname, 
         | 
| 124 | 
            -
                                :strand => @strand, 
         | 
| 125 | 
            -
            >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
         | 
| 126 80 | 
             
                                :position => point,
         | 
| 127 81 | 
             
                                :original_codon => codon, 
         | 
| 128 82 | 
             
                                :original_residue => a || 'stop', 
         | 
| 129 83 | 
             
                                :mutant_codon => new_codon, 
         | 
| 130 84 | 
             
                                :mutant_residue =>b || 'stop', 
         | 
| 131 | 
            -
            <<<<<<< HEAD
         | 
| 132 85 | 
             
                                :position_in_codon => position + 1, 
         | 
| 133 86 | 
             
                                :substitution_type => sub_type
         | 
| 134 87 | 
             
                                }
         | 
| 135 | 
            -
             | 
| 136 | 
            -
                                :position_in_codon => position_in_codon + 1, 
         | 
| 137 | 
            -
                                :substitution_type => sub_type
         | 
| 138 | 
            -
                                }
         | 
| 139 | 
            -
                      end
         | 
| 140 | 
            -
                      running_total += (stop - start)
         | 
| 141 | 
            -
                      running_total += 1 if @strand == '-' #how far we are into the cds
         | 
| 142 | 
            -
                    end
         | 
| 143 | 
            -
            >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
         | 
| 88 | 
            +
             | 
| 144 89 | 
             
                  end
         | 
| 145 90 |  | 
| 146 91 | 
             
                end#class end
         | 
| @@ -150,7 +95,6 @@ module Bio | |
| 150 95 | 
             
                  #attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
         | 
| 151 96 |  | 
| 152 97 | 
             
                  def initialize(opts)
         | 
| 153 | 
            -
            <<<<<<< HEAD
         | 
| 154 98 | 
             
                    cdses = []
         | 
| 155 99 | 
             
                    mrna_list = []
         | 
| 156 100 | 
             
                    seqs = Hash.new
         | 
| @@ -190,68 +134,10 @@ module Bio | |
| 190 134 |  | 
| 191 135 | 
             
                    def is_in_cds?(chr,point)
         | 
| 192 136 | 
             
                      self.mutation_info(chr,point,"a") ? true : false
         | 
| 193 | 
            -
            =======
         | 
| 194 | 
            -
                    @gene_array = []
         | 
| 195 | 
            -
                    @cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
         | 
| 196 | 
            -
                    @mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
         | 
| 197 | 
            -
                    File.open(opts[:gff], "r").each do |gffline|
         | 
| 198 | 
            -
                      record=Bio::GFF::GFF3::Record.new(gffline)
         | 
| 199 | 
            -
                      if(record.feature_type == 'gene')
         | 
| 200 | 
            -
                          @gene_array << [record.seqname, record.id]
         | 
| 201 | 
            -
                      elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA')
         | 
| 202 | 
            -
                        parents = record.get_attributes('Parent')
         | 
| 203 | 
            -
                        parents.each do |parent|  
         | 
| 204 | 
            -
                          if record.feature_type == 'CDS'
         | 
| 205 | 
            -
                            @cdshash[record.seqname][parent] << record
         | 
| 206 | 
            -
                          else
         | 
| 207 | 
            -
                            @mRNAhash[record.seqname][parent] << record
         | 
| 208 | 
            -
                          end
         | 
| 209 | 
            -
                        end
         | 
| 210 | 
            -
                      end
         | 
| 211 | 
            -
                    end
         | 
| 212 | 
            -
                    $stderr.puts "Loaded GFF..." if opts[:verbose]
         | 
| 213 | 
            -
                    @seqhash = {}
         | 
| 214 | 
            -
                    Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq }
         | 
| 215 | 
            -
                    $stderr.puts "Loaded Seq..." if opts[:verbose]
         | 
| 216 | 
            -
                    
         | 
| 217 | 
            -
                    @models = Hash.new {|h,k| h[k] =  []  }
         | 
| 218 | 
            -
                    $stderr.puts "Building models..." if opts[:verbose] 
         | 
| 219 | 
            -
                    @gene_array.each do |gene|
         | 
| 220 | 
            -
             | 
| 221 | 
            -
                      mRNAs=@mRNAhash[gene.first][gene.last]
         | 
| 222 | 
            -
                      mRNAs.each do |mRNA|
         | 
| 223 | 
            -
                        next if @seqhash[gene.first].nil?
         | 
| 224 | 
            -
                        cdsa = []
         | 
| 225 | 
            -
                        seqs = []
         | 
| 226 | 
            -
                        cdsary=@cdshash[gene.first][mRNA.id]
         | 
| 227 | 
            -
                        cdsary.each {|c| cdsa << [c.start, c.end]} 
         | 
| 228 | 
            -
                        cdsa.sort!
         | 
| 229 | 
            -
                        cdsa.reverse! if mRNA.strand == '-'
         | 
| 230 | 
            -
                        
         | 
| 231 | 
            -
                        cdsa.each do |cds|
         | 
| 232 | 
            -
             | 
| 233 | 
            -
                          #cdsa << [cds.start, cds.end]
         | 
| 234 | 
            -
                          if mRNA.strand == '+'
         | 
| 235 | 
            -
                            seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") )
         | 
| 236 | 
            -
                          elsif mRNA.strand == "-"
         | 
| 237 | 
            -
                            seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement
         | 
| 238 | 
            -
                          end
         | 
| 239 | 
            -
                        end
         | 
| 240 | 
            -
                        @models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs )
         | 
| 241 | 
            -
                        #pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1'
         | 
| 242 | 
            -
                      end
         | 
| 243 | 
            -
                    end
         | 
| 244 | 
            -
                    $stderr.puts "Models built..." if opts[:verbose]
         | 
| 245 | 
            -
                  end#init end
         | 
| 246 | 
            -
                    
         | 
| 247 | 
            -
                    def is_in_cds?(chr,point)
         | 
| 248 | 
            -
                      @self.mutation_info(chr,point) ? true : false
         | 
| 249 | 
            -
            >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
         | 
| 250 137 | 
             
                    end
         | 
| 251 138 |  | 
| 252 139 | 
             
                    #returns mutation info if point in CDS, if not in CDS returns false
         | 
| 253 140 | 
             
                    def mutation_info(chr,pos,alt)
         | 
| 254 | 
            -
            <<<<<<< HEAD
         | 
| 255 141 | 
             
                      pos = pos.to_i
         | 
| 256 142 | 
             
                      #cant do indels ...
         | 
| 257 143 | 
             
                      return nil if alt.length > 1
         | 
| @@ -266,15 +152,6 @@ module Bio | |
| 266 152 | 
             
                        #somthing unpredicatable went wrong and we couldnt do the conversion ...
         | 
| 267 153 | 
             
                        return nil
         | 
| 268 154 | 
             
                      end
         | 
| 269 | 
            -
            =======
         | 
| 270 | 
            -
             | 
| 271 | 
            -
                      @models[chr].each do |m|
         | 
| 272 | 
            -
                         if m.includes?(chr,pos)
         | 
| 273 | 
            -
                           return m.substitution_info(chr,pos,alt)   
         | 
| 274 | 
            -
                         end
         | 
| 275 | 
            -
                      end
         | 
| 276 | 
            -
                      false
         | 
| 277 | 
            -
            >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
         | 
| 278 155 | 
             
                    end
         | 
| 279 156 |  | 
| 280 157 |  |