bio-ensembl 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
 - data/Gemfile +20 -0
 - data/Gemfile.lock +40 -0
 - data/LICENSE.txt +20 -0
 - data/README.rdoc +19 -0
 - data/Rakefile +71 -0
 - data/VERSION +1 -0
 - data/bin/ensembl +40 -0
 - data/bin/variation_effect_predictor +106 -0
 - data/bio-ensembl.gemspec +190 -0
 - data/lib/bio-ensembl.rb +65 -0
 - data/lib/bio-ensembl/core/activerecord.rb +1812 -0
 - data/lib/bio-ensembl/core/collection.rb +64 -0
 - data/lib/bio-ensembl/core/project.rb +262 -0
 - data/lib/bio-ensembl/core/slice.rb +657 -0
 - data/lib/bio-ensembl/core/transcript.rb +409 -0
 - data/lib/bio-ensembl/core/transform.rb +95 -0
 - data/lib/bio-ensembl/db_connection.rb +205 -0
 - data/lib/bio-ensembl/variation/activerecord.rb +536 -0
 - data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
 - data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
 - data/samples/ensembl_genomes_example.rb +60 -0
 - data/samples/examples_perl_tutorial.rb +125 -0
 - data/samples/small_example_ruby_api.rb +34 -0
 - data/samples/variation_effect_predictor_data.txt +4 -0
 - data/samples/variation_example.rb +67 -0
 - data/test/data/seq_c6qbl.fa +10 -0
 - data/test/data/seq_cso19_coding.fa +16 -0
 - data/test/data/seq_cso19_transcript.fa +28 -0
 - data/test/data/seq_drd3_gene.fa +838 -0
 - data/test/data/seq_drd3_transcript.fa +22 -0
 - data/test/data/seq_drd4_transcript.fa +24 -0
 - data/test/data/seq_forward_composite.fa +1669 -0
 - data/test/data/seq_par_boundary.fa +169 -0
 - data/test/data/seq_rnd3_transcript.fa +47 -0
 - data/test/data/seq_ub2r1_coding.fa +13 -0
 - data/test/data/seq_ub2r1_gene.fa +174 -0
 - data/test/data/seq_ub2r1_transcript.fa +26 -0
 - data/test/data/seq_y.fa +2 -0
 - data/test/default/test_connection.rb +60 -0
 - data/test/default/test_releases.rb +130 -0
 - data/test/ensembl_genomes/test_collection.rb +122 -0
 - data/test/ensembl_genomes/test_gene.rb +46 -0
 - data/test/ensembl_genomes/test_slice.rb +65 -0
 - data/test/ensembl_genomes/test_variation.rb +38 -0
 - data/test/helper.rb +18 -0
 - data/test/release_50/core/test_project.rb +210 -0
 - data/test/release_50/core/test_project_human.rb +52 -0
 - data/test/release_50/core/test_relationships.rb +72 -0
 - data/test/release_50/core/test_sequence.rb +170 -0
 - data/test/release_50/core/test_slice.rb +116 -0
 - data/test/release_50/core/test_transcript.rb +125 -0
 - data/test/release_50/core/test_transform.rb +217 -0
 - data/test/release_50/variation/test_activerecord.rb +138 -0
 - data/test/release_50/variation/test_variation.rb +79 -0
 - data/test/release_53/core/test_gene.rb +61 -0
 - data/test/release_53/core/test_project.rb +91 -0
 - data/test/release_53/core/test_project_human.rb +61 -0
 - data/test/release_53/core/test_slice.rb +42 -0
 - data/test/release_53/core/test_transform.rb +57 -0
 - data/test/release_53/variation/test_activerecord.rb +137 -0
 - data/test/release_53/variation/test_variation.rb +66 -0
 - data/test/release_56/core/test_gene.rb +61 -0
 - data/test/release_56/core/test_project.rb +91 -0
 - data/test/release_56/core/test_slice.rb +49 -0
 - data/test/release_56/core/test_transform.rb +57 -0
 - data/test/release_56/variation/test_activerecord.rb +141 -0
 - data/test/release_56/variation/test_consequence.rb +131 -0
 - data/test/release_56/variation/test_variation.rb +63 -0
 - data/test/release_60/core/test_gene.rb +61 -0
 - data/test/release_60/core/test_project_human.rb +34 -0
 - data/test/release_60/core/test_slice.rb +42 -0
 - data/test/release_60/core/test_transcript.rb +120 -0
 - data/test/release_60/core/test_transform.rb +57 -0
 - data/test/release_60/variation/test_activerecord.rb +216 -0
 - data/test/release_60/variation/test_consequence.rb +153 -0
 - data/test/release_60/variation/test_variation.rb +64 -0
 - data/test/release_62/core/test_gene.rb +42 -0
 - data/test/release_62/variation/test_activerecord.rb +86 -0
 - data/test/release_62/variation/test_consequence.rb +191 -0
 - metadata +287 -0
 
| 
         @@ -0,0 +1,376 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # = ensembl/variation/variation.rb - Extension of ActiveRecord classes for Ensembl variation features
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Copyright::   Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
         
     | 
| 
      
 5 
     | 
    
         
            +
            # License::     The Ruby License
         
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # @author Francesco Strozzi
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            module Ensembl
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              module Variation
         
     | 
| 
      
 13 
     | 
    
         
            +
                
         
     | 
| 
      
 14 
     | 
    
         
            +
                
         
     | 
| 
      
 15 
     | 
    
         
            +
                # The VariationFeature class gives information about the genomic position of 
         
     | 
| 
      
 16 
     | 
    
         
            +
                # each Variation, including also validation status and consequence type. 
         
     | 
| 
      
 17 
     | 
    
         
            +
                #
         
     | 
| 
      
 18 
     | 
    
         
            +
                # This class uses ActiveRecord to access data in the Ensembl database.
         
     | 
| 
      
 19 
     | 
    
         
            +
                # See the general documentation of the Ensembl module for
         
     | 
| 
      
 20 
     | 
    
         
            +
                # more information on what this means and what methods are available.
         
     | 
| 
      
 21 
     | 
    
         
            +
                #
         
     | 
| 
      
 22 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 23 
     | 
    
         
            +
                #   # SLOWER QUERY
         
     | 
| 
      
 24 
     | 
    
         
            +
                #   vf = VariationFeature.find_by_variation_name('rs10111')
         
     | 
| 
      
 25 
     | 
    
         
            +
                #   # FASTER QUERY
         
     | 
| 
      
 26 
     | 
    
         
            +
                #   vf = Variation.find_by_name('rs10111').variation_feature
         
     | 
| 
      
 27 
     | 
    
         
            +
                #   
         
     | 
| 
      
 28 
     | 
    
         
            +
                #   puts vf.seq_region_start, vf.seq_region_end, vf.allele_string
         
     | 
| 
      
 29 
     | 
    
         
            +
                #   puts vf.variation.ancestral_allele
         
     | 
| 
      
 30 
     | 
    
         
            +
                #   genomic_region = vf.fetch_region (returns an Ensembl::Core::Slice)
         
     | 
| 
      
 31 
     | 
    
         
            +
                #   genomic_region.genes
         
     | 
| 
      
 32 
     | 
    
         
            +
                #   up_region,down_region = vf.flanking_seq (returns two Ensembl::Core::Slice)
         
     | 
| 
      
 33 
     | 
    
         
            +
                #
         
     | 
| 
      
 34 
     | 
    
         
            +
                class VariationFeature < DBConnection
         
     | 
| 
      
 35 
     | 
    
         
            +
                  set_primary_key "variation_feature_id"
         
     | 
| 
      
 36 
     | 
    
         
            +
                  belongs_to :variation
         
     | 
| 
      
 37 
     | 
    
         
            +
                  has_many :tagged_variation_features
         
     | 
| 
      
 38 
     | 
    
         
            +
                  has_many :samples, :through => :tagged_variation_features
         
     | 
| 
      
 39 
     | 
    
         
            +
                  belongs_to :seq_region
         
     | 
| 
      
 40 
     | 
    
         
            +
                  validates_inclusion_of :consequence_type, :in => ['ESSENTIAL_SPLICE_SITE',
         
     | 
| 
      
 41 
     | 
    
         
            +
                                                                    'STOP_GAINED',
         
     | 
| 
      
 42 
     | 
    
         
            +
                                                                    'STOP_LOST',
         
     | 
| 
      
 43 
     | 
    
         
            +
                                                                    'COMPLEX_INDEL',
         
     | 
| 
      
 44 
     | 
    
         
            +
                                                                    'FRAMESHIFT_CODING',
         
     | 
| 
      
 45 
     | 
    
         
            +
                                                                    'NON_SYNONYMOUS_CODING',
         
     | 
| 
      
 46 
     | 
    
         
            +
                                                                    'SPLICE_SITE',
         
     | 
| 
      
 47 
     | 
    
         
            +
                                                                    'PARTIAL_CODON',
         
     | 
| 
      
 48 
     | 
    
         
            +
                                                                    'SYNONYMOUS_CODING',
         
     | 
| 
      
 49 
     | 
    
         
            +
                                                                    'REGULATORY_REGION',
         
     | 
| 
      
 50 
     | 
    
         
            +
                                                                    'WITHIN_MATURE_miRNA',
         
     | 
| 
      
 51 
     | 
    
         
            +
                                                                    '5PRIME_UTR',
         
     | 
| 
      
 52 
     | 
    
         
            +
                                                                    '3PRIME_UTR',
         
     | 
| 
      
 53 
     | 
    
         
            +
                                                                    'INTRONIC',
         
     | 
| 
      
 54 
     | 
    
         
            +
                                                                    'NMD_TRANSCRIPT',
         
     | 
| 
      
 55 
     | 
    
         
            +
                                                                    'UPSTREAM',
         
     | 
| 
      
 56 
     | 
    
         
            +
                                                                    'DOWNSTREAM',
         
     | 
| 
      
 57 
     | 
    
         
            +
                                                                    'WITHIN_NON_CODING_GENE',
         
     | 
| 
      
 58 
     | 
    
         
            +
                                                                    'HGMD_MUTATION'
         
     | 
| 
      
 59 
     | 
    
         
            +
                                                                    ], :message => "Consequence type not allowed!"      
         
     | 
| 
      
 60 
     | 
    
         
            +
                  
         
     | 
| 
      
 61 
     | 
    
         
            +
                  def consequence_type # workaround as ActiveRecord do not parse SET field in MySQL
         
     | 
| 
      
 62 
     | 
    
         
            +
                    "#{attributes_before_type_cast['consequence_type']}" 
         
     | 
| 
      
 63 
     | 
    
         
            +
                  end 
         
     | 
| 
      
 64 
     | 
    
         
            +
                  
         
     | 
| 
      
 65 
     | 
    
         
            +
                  # Based on Perl API 'get_all_Genes' method for Variation class. Get a genomic region
         
     | 
| 
      
 66 
     | 
    
         
            +
                  # starting from the Variation coordinates, expanding the region upstream and
         
     | 
| 
      
 67 
     | 
    
         
            +
                  # downstream.
         
     | 
| 
      
 68 
     | 
    
         
            +
                  #
         
     | 
| 
      
 69 
     | 
    
         
            +
                  # @param [Integer] up Length of upstream flanking region
         
     | 
| 
      
 70 
     | 
    
         
            +
                  # @param [Integer] down Length of downstream flanking region
         
     | 
| 
      
 71 
     | 
    
         
            +
                  # @return [Slice] Slice object containing the variation
         
     | 
| 
      
 72 
     | 
    
         
            +
                  def fetch_region(up = 5000, down = 5000)
         
     | 
| 
      
 73 
     | 
    
         
            +
                    sr = core_connection(self.seq_region_id)
         
     | 
| 
      
 74 
     | 
    
         
            +
                    slice = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,self.seq_region_start-up,self.seq_region_end+down)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    return slice
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end
         
     | 
| 
      
 77 
     | 
    
         
            +
                  
         
     | 
| 
      
 78 
     | 
    
         
            +
                  def flanking_seq
         
     | 
| 
      
 79 
     | 
    
         
            +
                    sr = core_connection(self.seq_region_id)
         
     | 
| 
      
 80 
     | 
    
         
            +
                    f = Variation.find(self.variation_id).flanking_sequence
         
     | 
| 
      
 81 
     | 
    
         
            +
                    slice_up = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.up_seq_region_start,f.up_seq_region_end,self.seq_region_strand)
         
     | 
| 
      
 82 
     | 
    
         
            +
                    slice_down = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.down_seq_region_start,f.down_seq_region_end,self.seq_region_strand)
         
     | 
| 
      
 83 
     | 
    
         
            +
                    return slice_up,slice_down
         
     | 
| 
      
 84 
     | 
    
         
            +
                  end
         
     | 
| 
      
 85 
     | 
    
         
            +
                  
         
     | 
| 
      
 86 
     | 
    
         
            +
                  def transcript_variations
         
     | 
| 
      
 87 
     | 
    
         
            +
                    tvs = TranscriptVariation.find_all_by_variation_feature_id(self.variation_feature_id)
         
     | 
| 
      
 88 
     | 
    
         
            +
                    if tvs[0].nil? then # the variation is not stored in the database, so run the calculations
         
     | 
| 
      
 89 
     | 
    
         
            +
                      sr = core_connection(self.seq_region_id)
         
     | 
| 
      
 90 
     | 
    
         
            +
                      return custom_transcript_variation(self,sr)
         
     | 
| 
      
 91 
     | 
    
         
            +
                    else
         
     | 
| 
      
 92 
     | 
    
         
            +
                      return tvs # the variation is already present in the database
         
     | 
| 
      
 93 
     | 
    
         
            +
                    end  
         
     | 
| 
      
 94 
     | 
    
         
            +
                  end
         
     | 
| 
      
 95 
     | 
    
         
            +
                  
         
     | 
| 
      
 96 
     | 
    
         
            +
                  private 
         
     | 
| 
      
 97 
     | 
    
         
            +
                  
         
     | 
| 
      
 98 
     | 
    
         
            +
                  def core_connection(seq_region_id) 
         
     | 
| 
      
 99 
     | 
    
         
            +
                    if !Ensembl::Core::DBConnection.connected? then  
         
     | 
| 
      
 100 
     | 
    
         
            +
                      host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
         
     | 
| 
      
 101 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 102 
     | 
    
         
            +
                        Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
         
     | 
| 
      
 103 
     | 
    
         
            +
                      rescue
         
     | 
| 
      
 104 
     | 
    
         
            +
                        raise NameError, "Can't derive Core database name from #{db_name}. Are you using non conventional names?"
         
     | 
| 
      
 105 
     | 
    
         
            +
                      end
         
     | 
| 
      
 106 
     | 
    
         
            +
                    end
         
     | 
| 
      
 107 
     | 
    
         
            +
                    # Check if SeqRegion already exists in Ensembl::SESSION
         
     | 
| 
      
 108 
     | 
    
         
            +
                    seq_region = nil
         
     | 
| 
      
 109 
     | 
    
         
            +
                    if Ensembl::SESSION.seq_regions.has_key?(seq_region_id)
         
     | 
| 
      
 110 
     | 
    
         
            +
                      seq_region = Ensembl::SESSION.seq_regions[seq_region_id]
         
     | 
| 
      
 111 
     | 
    
         
            +
                    else
         
     | 
| 
      
 112 
     | 
    
         
            +
                      seq_region = Ensembl::Core::SeqRegion.find(seq_region_id)
         
     | 
| 
      
 113 
     | 
    
         
            +
                      Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
         
     | 
| 
      
 114 
     | 
    
         
            +
                    end
         
     | 
| 
      
 115 
     | 
    
         
            +
                    return seq_region
         
     | 
| 
      
 116 
     | 
    
         
            +
                  end
         
     | 
| 
      
 117 
     | 
    
         
            +
                  
         
     | 
| 
      
 118 
     | 
    
         
            +
                  # Calculate a consequence type for a user-defined variation
         
     | 
| 
      
 119 
     | 
    
         
            +
                  def custom_transcript_variation(vf,sr)
         
     | 
| 
      
 120 
     | 
    
         
            +
                            
         
     | 
| 
      
 121 
     | 
    
         
            +
                    @variation_name = vf.variation_name
         
     | 
| 
      
 122 
     | 
    
         
            +
                    @seq_region = sr
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                    downstream = 5000
         
     | 
| 
      
 125 
     | 
    
         
            +
                    upstream = 5000
         
     | 
| 
      
 126 
     | 
    
         
            +
                    tvs = [] # store all the calculated TranscriptVariations
         
     | 
| 
      
 127 
     | 
    
         
            +
                     # retrieve the slice of the genomic region where the variation is located
         
     | 
| 
      
 128 
     | 
    
         
            +
                     region = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,vf.seq_region_start-upstream,vf.seq_region_end+downstream-1)
         
     | 
| 
      
 129 
     | 
    
         
            +
                     # iterate through all the transcripts present in the region
         
     | 
| 
      
 130 
     | 
    
         
            +
                    genes = region.genes(inclusive = true)
         
     | 
| 
      
 131 
     | 
    
         
            +
                     if genes[0] != nil
         
     | 
| 
      
 132 
     | 
    
         
            +
                      genes.each do |g|
         
     | 
| 
      
 133 
     | 
    
         
            +
                        g.transcripts.each do |t|
         
     | 
| 
      
 134 
     | 
    
         
            +
                          @cache = {}
         
     | 
| 
      
 135 
     | 
    
         
            +
                          tv = TranscriptVariation.new() # create a new TranscriptVariation object for every transcript present
         
     | 
| 
      
 136 
     | 
    
         
            +
                          # do the calculations
         
     | 
| 
      
 137 
     | 
    
         
            +
                          
         
     | 
| 
      
 138 
     | 
    
         
            +
                          # check if the variation is intergenic for this transcript (no effects)
         
     | 
| 
      
 139 
     | 
    
         
            +
                          tv.consequence_type = check_intergenic(vf,t)
         
     | 
| 
      
 140 
     | 
    
         
            +
                          
         
     | 
| 
      
 141 
     | 
    
         
            +
                          # check if the variation is upstram or downstram the transcript
         
     | 
| 
      
 142 
     | 
    
         
            +
                          tv.consequence_type = check_upstream_downstream(vf,t) if tv.consequence_type == ""
         
     | 
| 
      
 143 
     | 
    
         
            +
                          
         
     | 
| 
      
 144 
     | 
    
         
            +
                          # if no consequence type is found, then the variation is inside the transcript         
         
     | 
| 
      
 145 
     | 
    
         
            +
                          # check for non coding gene
         
     | 
| 
      
 146 
     | 
    
         
            +
                          tv.consequence_type = check_non_coding(vf,t) if tv.consequence_type == "" and t.biotype != 'protein_coding'
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                          # if no consequence type is found, then check intron / exon boundaries
         
     | 
| 
      
 149 
     | 
    
         
            +
                          tv.consequence_type = check_splice_site(vf,t) if tv.consequence_type == ""
         
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
      
 151 
     | 
    
         
            +
                          # if no consequence type is found, check if the variation is inside UTRs
         
     | 
| 
      
 152 
     | 
    
         
            +
                          tv.consequence_type = check_utr(vf,t) if tv.consequence_type == ""    
         
     | 
| 
      
 153 
     | 
    
         
            +
                                    
         
     | 
| 
      
 154 
     | 
    
         
            +
                          # if no consequence type is found, then variation is inside an exon. 
         
     | 
| 
      
 155 
     | 
    
         
            +
                          # Check the codon change
         
     | 
| 
      
 156 
     | 
    
         
            +
                          (tv.consequence_type,tv.peptide_allele_string) = check_aa_change(vf,t) if tv.consequence_type == ""
         
     | 
| 
      
 157 
     | 
    
         
            +
                            
         
     | 
| 
      
 158 
     | 
    
         
            +
                          
         
     | 
| 
      
 159 
     | 
    
         
            +
                          begin # this changed from release 58
         
     | 
| 
      
 160 
     | 
    
         
            +
                             tv.transcript_stable_id = t.stable_id
         
     | 
| 
      
 161 
     | 
    
         
            +
                          rescue NoMethodError
         
     | 
| 
      
 162 
     | 
    
         
            +
                             tv.transcript_id = t.id
         
     | 
| 
      
 163 
     | 
    
         
            +
                          end
         
     | 
| 
      
 164 
     | 
    
         
            +
                          
         
     | 
| 
      
 165 
     | 
    
         
            +
                          tv.consequence_type = "INTERGENIC" if tv.consequence_type == ""
         
     | 
| 
      
 166 
     | 
    
         
            +
                          tvs << tv 
         
     | 
| 
      
 167 
     | 
    
         
            +
                        end   
         
     | 
| 
      
 168 
     | 
    
         
            +
                      end
         
     | 
| 
      
 169 
     | 
    
         
            +
                     end
         
     | 
| 
      
 170 
     | 
    
         
            +
                     # if there are no transcripts/genes within 5000 bases upstream and downstream set the variation as INTERGENIC (no effects)
         
     | 
| 
      
 171 
     | 
    
         
            +
                     if tvs.size == 0 then
         
     | 
| 
      
 172 
     | 
    
         
            +
                      tv = TranscriptVariation.new()
         
     | 
| 
      
 173 
     | 
    
         
            +
                      tv.consequence_type = "INTERGENIC"
         
     | 
| 
      
 174 
     | 
    
         
            +
                      tvs << tv
         
     | 
| 
      
 175 
     | 
    
         
            +
                     end
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                     return tvs
         
     | 
| 
      
 178 
     | 
    
         
            +
                   end
         
     | 
| 
      
 179 
     | 
    
         
            +
                  
         
     | 
| 
      
 180 
     | 
    
         
            +
                  ## CONSEQUENCE CALCULATION FUNCTIONS ##
         
     | 
| 
      
 181 
     | 
    
         
            +
                  
         
     | 
| 
      
 182 
     | 
    
         
            +
                  def check_intergenic(vf,t)
         
     | 
| 
      
 183 
     | 
    
         
            +
                    if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) > 5000 then
         
     | 
| 
      
 184 
     | 
    
         
            +
                       return "INTERGENIC"
         
     | 
| 
      
 185 
     | 
    
         
            +
                    elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) > 5000 then
         
     | 
| 
      
 186 
     | 
    
         
            +
                       return "INTERGENIC"      
         
     | 
| 
      
 187 
     | 
    
         
            +
                    end
         
     | 
| 
      
 188 
     | 
    
         
            +
                    return nil        
         
     | 
| 
      
 189 
     | 
    
         
            +
                  end
         
     | 
| 
      
 190 
     | 
    
         
            +
                  
         
     | 
| 
      
 191 
     | 
    
         
            +
                  def check_upstream_downstream(vf,t)
         
     | 
| 
      
 192 
     | 
    
         
            +
                    if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) <= 5000 then
         
     | 
| 
      
 193 
     | 
    
         
            +
                       return (t.strand == 1) ? "UPSTREAM" : "DOWNSTREAM"
         
     | 
| 
      
 194 
     | 
    
         
            +
                    elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) <= 5000 then
         
     | 
| 
      
 195 
     | 
    
         
            +
                       return (t.strand == 1) ? "DOWNSTREAM" : "UPSTREAM"
         
     | 
| 
      
 196 
     | 
    
         
            +
                    
         
     | 
| 
      
 197 
     | 
    
         
            +
                    # check if it's an InDel and if overlaps the transcript start / end   
         
     | 
| 
      
 198 
     | 
    
         
            +
                    elsif t.seq_region_start > vf.seq_region_start and t.seq_region_start < vf.seq_region_end then
         
     | 
| 
      
 199 
     | 
    
         
            +
                        return "COMPLEX_INDEL"
         
     | 
| 
      
 200 
     | 
    
         
            +
                    elsif t.seq_region_end > vf.seq_region_start and t.seq_region_end < vf.seq_region_end then
         
     | 
| 
      
 201 
     | 
    
         
            +
                        return "COMPLEX_INDEL"                
         
     | 
| 
      
 202 
     | 
    
         
            +
                    end
         
     | 
| 
      
 203 
     | 
    
         
            +
                    return nil
         
     | 
| 
      
 204 
     | 
    
         
            +
                  end
         
     | 
| 
      
 205 
     | 
    
         
            +
                  
         
     | 
| 
      
 206 
     | 
    
         
            +
                  def check_non_coding(vf,t)
         
     | 
| 
      
 207 
     | 
    
         
            +
                      if t.biotype == "miRNA" then 
         
     | 
| 
      
 208 
     | 
    
         
            +
                         return (vf.seq_region_start == vf.seq_region_end) ? "WITHIN_MATURE_miRNA" : "COMPLEX_INDEL"
         
     | 
| 
      
 209 
     | 
    
         
            +
                      elsif t.biotype == "nonsense_mediated_decay"
         
     | 
| 
      
 210 
     | 
    
         
            +
                         return (vf.seq_region_start == vf.seq_region_end) ? "NMD_TRANSCRIPT" : "COMPLEX_INDEL"
         
     | 
| 
      
 211 
     | 
    
         
            +
                      else
         
     | 
| 
      
 212 
     | 
    
         
            +
                         return (vf.seq_region_start == vf.seq_region_end) ? "WITHIN_NON_CODING_GENE" : "COMPLEX_INDEL"
         
     | 
| 
      
 213 
     | 
    
         
            +
                      end
         
     | 
| 
      
 214 
     | 
    
         
            +
                      return nil
         
     | 
| 
      
 215 
     | 
    
         
            +
                  end
         
     | 
| 
      
 216 
     | 
    
         
            +
                  
         
     | 
| 
      
 217 
     | 
    
         
            +
                  def check_utr(vf,t)
         
     | 
| 
      
 218 
     | 
    
         
            +
                      if vf.seq_region_start > t.seq_region_start and vf.seq_region_end < t.coding_region_genomic_start then
         
     | 
| 
      
 219 
     | 
    
         
            +
                         return (t.strand == 1) ? "5PRIME_UTR" : "3PRIME_UTR"
         
     | 
| 
      
 220 
     | 
    
         
            +
                      elsif vf.seq_region_start > t.coding_region_genomic_end and vf.seq_region_end < t.seq_region_end then
         
     | 
| 
      
 221 
     | 
    
         
            +
                         return (t.strand == 1) ? "3PRIME_UTR" : "5PRIME_UTR"   
         
     | 
| 
      
 222 
     | 
    
         
            +
                      end
         
     | 
| 
      
 223 
     | 
    
         
            +
                      return nil   
         
     | 
| 
      
 224 
     | 
    
         
            +
                  end
         
     | 
| 
      
 225 
     | 
    
         
            +
                  
         
     | 
| 
      
 226 
     | 
    
         
            +
                  def check_splice_site(vf,t)
         
     | 
| 
      
 227 
     | 
    
         
            +
                    @cache[:exons] = []
         
     | 
| 
      
 228 
     | 
    
         
            +
                    var_start,var_end = (vf.seq_region_strand == 1) ? [vf.seq_region_start,vf.seq_region_end] : [vf.seq_region_end,vf.seq_region_start]
         
     | 
| 
      
 229 
     | 
    
         
            +
                    t.exons.each {|ex| @cache[:exons] << Range.new(ex.seq_region_start,ex.seq_region_end)}
         
     | 
| 
      
 230 
     | 
    
         
            +
                    
         
     | 
| 
      
 231 
     | 
    
         
            +
                    exon_up = check_near_exons(var_start,@cache[:exons])
         
     | 
| 
      
 232 
     | 
    
         
            +
                    exon_down = check_near_exons(var_end,@cache[:exons])
         
     | 
| 
      
 233 
     | 
    
         
            +
                    if !exon_up and !exon_down # we are inside an intron
         
     | 
| 
      
 234 
     | 
    
         
            +
                       # checking boundaries
         
     | 
| 
      
 235 
     | 
    
         
            +
                       near_exon_up_2bp = check_near_exons(var_start-2..var_start,@cache[:exons])
         
     | 
| 
      
 236 
     | 
    
         
            +
                       near_exon_down_2bp = check_near_exons(var_end..var_end+2,@cache[:exons])
         
     | 
| 
      
 237 
     | 
    
         
            +
                       if near_exon_up_2bp or near_exon_down_2bp then
         
     | 
| 
      
 238 
     | 
    
         
            +
                          return "ESSENTIAL_SPLICE_SITE"
         
     | 
| 
      
 239 
     | 
    
         
            +
                       else
         
     | 
| 
      
 240 
     | 
    
         
            +
                          near_exon_up_8bp = check_near_exons(var_start+8..var_start,@cache[:exons])
         
     | 
| 
      
 241 
     | 
    
         
            +
                          near_exon_down_8bp = check_near_exons(var_end..var_end+8,@cache[:exons])    
         
     | 
| 
      
 242 
     | 
    
         
            +
                          if near_exon_up_8bp or near_exon_down_8bp then
         
     | 
| 
      
 243 
     | 
    
         
            +
                             return "SPLICE_SITE"
         
     | 
| 
      
 244 
     | 
    
         
            +
                          else
         
     | 
| 
      
 245 
     | 
    
         
            +
                             return "INTRONIC"   
         
     | 
| 
      
 246 
     | 
    
         
            +
                          end
         
     | 
| 
      
 247 
     | 
    
         
            +
                       end
         
     | 
| 
      
 248 
     | 
    
         
            +
                    elsif exon_up and exon_down # the variation is inside an exon
         
     | 
| 
      
 249 
     | 
    
         
            +
                         # check if it is a splice site
         
     | 
| 
      
 250 
     | 
    
         
            +
                         if (var_start-exon_up.first) <= 3 or (exon_down.last-var_end) <= 3 then
         
     | 
| 
      
 251 
     | 
    
         
            +
                            return "SPLICE_SITE"                   
         
     | 
| 
      
 252 
     | 
    
         
            +
                         end
         
     | 
| 
      
 253 
     | 
    
         
            +
                    else # a complex indel spanning intron/exon boundary
         
     | 
| 
      
 254 
     | 
    
         
            +
                         return "COMPLEX_INDEL"
         
     | 
| 
      
 255 
     | 
    
         
            +
                    end
         
     | 
| 
      
 256 
     | 
    
         
            +
                    return nil      
         
     | 
| 
      
 257 
     | 
    
         
            +
                  end
         
     | 
| 
      
 258 
     | 
    
         
            +
                  
         
     | 
| 
      
 259 
     | 
    
         
            +
                  def check_aa_change(vf,t)
         
     | 
| 
      
 260 
     | 
    
         
            +
                      alleles = vf.allele_string.split('/') # get the different alleles for this variation          
         
     | 
| 
      
 261 
     | 
    
         
            +
                      # if the variation is an InDel then it produces a frameshift
         
     | 
| 
      
 262 
     | 
    
         
            +
                      if vf.seq_region_start != vf.seq_region_end or alleles.include?("-") then
         
     | 
| 
      
 263 
     | 
    
         
            +
                        return "FRAMESHIFT_CODING",nil
         
     | 
| 
      
 264 
     | 
    
         
            +
                      end
         
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
                      # Find the position inside the CDS
         
     | 
| 
      
 267 
     | 
    
         
            +
                      
         
     | 
| 
      
 268 
     | 
    
         
            +
                      mutation_position = t.genomic2cds(vf.seq_region_start)
         
     | 
| 
      
 269 
     | 
    
         
            +
                      
         
     | 
| 
      
 270 
     | 
    
         
            +
                      mutation_base = Bio::Sequence::NA.new(alleles[1])
         
     | 
| 
      
 271 
     | 
    
         
            +
                      if t.seq_region_strand == -1
         
     | 
| 
      
 272 
     | 
    
         
            +
                         mutation_base.reverse_complement!
         
     | 
| 
      
 273 
     | 
    
         
            +
                      end
         
     | 
| 
      
 274 
     | 
    
         
            +
                      # The rank of the codon 
         
     | 
| 
      
 275 
     | 
    
         
            +
                      target_codon = (mutation_position)/3 + 1
         
     | 
| 
      
 276 
     | 
    
         
            +
                      cds_sequence = nil
         
     | 
| 
      
 277 
     | 
    
         
            +
                      cds_sequence = t.cds_seq
         
     | 
| 
      
 278 
     | 
    
         
            +
                      mut_sequence = cds_sequence.dup
         
     | 
| 
      
 279 
     | 
    
         
            +
                      # Replace base with the variant allele
         
     | 
| 
      
 280 
     | 
    
         
            +
                      mut_sequence[mutation_position] = mutation_base.seq
         
     | 
| 
      
 281 
     | 
    
         
            +
                      refcodon =  cds_sequence[(target_codon*3 -3)..(target_codon*3-1)]
         
     | 
| 
      
 282 
     | 
    
         
            +
                      mutcodon =  mut_sequence[(target_codon*3 -3)..(target_codon*3-1)]
         
     | 
| 
      
 283 
     | 
    
         
            +
                      codontable = Bio::CodonTable[1]
         
     | 
| 
      
 284 
     | 
    
         
            +
                      refaa = codontable[refcodon]
         
     | 
| 
      
 285 
     | 
    
         
            +
                      mutaa = codontable[mutcodon.downcase]
         
     | 
| 
      
 286 
     | 
    
         
            +
                      if mutaa == nil
         
     | 
| 
      
 287 
     | 
    
         
            +
                        raise RuntimeError "Codon #{mutcodon.downcase} wasn't recognized."
         
     | 
| 
      
 288 
     | 
    
         
            +
                      end
         
     | 
| 
      
 289 
     | 
    
         
            +
                      pep_string = refaa+"/"+mutaa
         
     | 
| 
      
 290 
     | 
    
         
            +
                      if mutaa == "*" and refaa != "*"
         
     | 
| 
      
 291 
     | 
    
         
            +
                        return "STOP_GAINED",pep_string
         
     | 
| 
      
 292 
     | 
    
         
            +
                      elsif mutaa != "*" and refaa == "*"
         
     | 
| 
      
 293 
     | 
    
         
            +
                        return "STOP_LOST",pep_string
         
     | 
| 
      
 294 
     | 
    
         
            +
                      elsif mutaa != refaa
         
     | 
| 
      
 295 
     | 
    
         
            +
                        return "NON_SYNONYMOUS_CODING",pep_string 
         
     | 
| 
      
 296 
     | 
    
         
            +
                      elsif mutaa == refaa
         
     | 
| 
      
 297 
     | 
    
         
            +
                        return "SYNONYMOUS_CODING",pep_string 
         
     | 
| 
      
 298 
     | 
    
         
            +
                      end
         
     | 
| 
      
 299 
     | 
    
         
            +
                       
         
     | 
| 
      
 300 
     | 
    
         
            +
                   end
         
     | 
| 
      
 301 
     | 
    
         
            +
                   
         
     | 
| 
      
 302 
     | 
    
         
            +
                   
         
     | 
| 
      
 303 
     | 
    
         
            +
                   def check_near_exons(feature,exons_ranges)
         
     | 
| 
      
 304 
     | 
    
         
            +
                    exons_ranges.each do |exon_range|
         
     | 
| 
      
 305 
     | 
    
         
            +
                      if feature.is_a? Range
         
     | 
| 
      
 306 
     | 
    
         
            +
                        return exon_range if (feature.first <= exon_range.last) && (exon_range.first <= feature.last)
         
     | 
| 
      
 307 
     | 
    
         
            +
                      else
         
     | 
| 
      
 308 
     | 
    
         
            +
                        return exon_range if exon_range.include? feature
         
     | 
| 
      
 309 
     | 
    
         
            +
                      end  
         
     | 
| 
      
 310 
     | 
    
         
            +
                    end
         
     | 
| 
      
 311 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 312 
     | 
    
         
            +
                   end
         
     | 
| 
      
 313 
     | 
    
         
            +
                  
         
     | 
| 
      
 314 
     | 
    
         
            +
                  
         
     | 
| 
      
 315 
     | 
    
         
            +
                end # VariationFeature
         
     | 
| 
      
 316 
     | 
    
         
            +
                
         
     | 
| 
      
 317 
     | 
    
         
            +
                # The TranscriptVariation class gives information about the position of 
         
     | 
| 
      
 318 
     | 
    
         
            +
                # a VariationFeature, mapped on an annotated transcript.
         
     | 
| 
      
 319 
     | 
    
         
            +
                #
         
     | 
| 
      
 320 
     | 
    
         
            +
                # This class uses ActiveRecord to access data in the Ensembl database.
         
     | 
| 
      
 321 
     | 
    
         
            +
                # See the general documentation of the Ensembl module for
         
     | 
| 
      
 322 
     | 
    
         
            +
                # more information on what this means and what methods are available.
         
     | 
| 
      
 323 
     | 
    
         
            +
                #
         
     | 
| 
      
 324 
     | 
    
         
            +
                # @example 
         
     | 
| 
      
 325 
     | 
    
         
            +
                #   vf = Variation.find_by_name('rs10111').variation_feature
         
     | 
| 
      
 326 
     | 
    
         
            +
                #   vf.transcript_variations.each do |tv|
         
     | 
| 
      
 327 
     | 
    
         
            +
                #     puts tv.peptide_allele_string, tv.transcript.stable_id    
         
     | 
| 
      
 328 
     | 
    
         
            +
                #   end
         
     | 
| 
      
 329 
     | 
    
         
            +
                #
         
     | 
| 
      
 330 
     | 
    
         
            +
                class TranscriptVariation < DBConnection
         
     | 
| 
      
 331 
     | 
    
         
            +
                  set_primary_key "transcript_variation_id"
         
     | 
| 
      
 332 
     | 
    
         
            +
                  belongs_to :variation_feature
         
     | 
| 
      
 333 
     | 
    
         
            +
                  validates_inclusion_of :consequence_type, :in => ['ESSENTIAL_SPLICE_SITE',
         
     | 
| 
      
 334 
     | 
    
         
            +
                                                                    'STOP_GAINED',
         
     | 
| 
      
 335 
     | 
    
         
            +
                                                                    'STOP_LOST',
         
     | 
| 
      
 336 
     | 
    
         
            +
                                                                    'COMPLEX_INDEL',
         
     | 
| 
      
 337 
     | 
    
         
            +
                                                                    'FRAMESHIFT_CODING',
         
     | 
| 
      
 338 
     | 
    
         
            +
                                                                    'NON_SYNONYMOUS_CODING',
         
     | 
| 
      
 339 
     | 
    
         
            +
                                                                    'SPLICE_SITE',
         
     | 
| 
      
 340 
     | 
    
         
            +
                                                                    'PARTIAL_CODON',
         
     | 
| 
      
 341 
     | 
    
         
            +
                                                                    'SYNONYMOUS_CODING',
         
     | 
| 
      
 342 
     | 
    
         
            +
                                                                    'REGULATORY_REGION',
         
     | 
| 
      
 343 
     | 
    
         
            +
                                                                    'WITHIN_MATURE_miRNA',
         
     | 
| 
      
 344 
     | 
    
         
            +
                                                                    '5PRIME_UTR',
         
     | 
| 
      
 345 
     | 
    
         
            +
                                                                    '3PRIME_UTR',
         
     | 
| 
      
 346 
     | 
    
         
            +
                                                                    'INTRONIC',
         
     | 
| 
      
 347 
     | 
    
         
            +
                                                                    'NMD_TRANSCRIPT',
         
     | 
| 
      
 348 
     | 
    
         
            +
                                                                    'UPSTREAM',
         
     | 
| 
      
 349 
     | 
    
         
            +
                                                                    'DOWNSTREAM',
         
     | 
| 
      
 350 
     | 
    
         
            +
                                                                    'WITHIN_NON_CODING_GENE',
         
     | 
| 
      
 351 
     | 
    
         
            +
                                                                    'HGMD_MUTATION'
         
     | 
| 
      
 352 
     | 
    
         
            +
                                                                    ], :message => "Consequence type not allowed!"
         
     | 
| 
      
 353 
     | 
    
         
            +
                                                                    
         
     | 
| 
      
 354 
     | 
    
         
            +
                  def consequence_type # workaround as ActiveRecord do not parse SET field in MySQL
         
     | 
| 
      
 355 
     | 
    
         
            +
                    "#{attributes_before_type_cast['consequence_type']}" 
         
     | 
| 
      
 356 
     | 
    
         
            +
                  end                                                  
         
     | 
| 
      
 357 
     | 
    
         
            +
                  
         
     | 
| 
      
 358 
     | 
    
         
            +
                  def transcript
         
     | 
| 
      
 359 
     | 
    
         
            +
                    host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
         
     | 
| 
      
 360 
     | 
    
         
            +
                    if !Ensembl::Core::DBConnection.connected? then     
         
     | 
| 
      
 361 
     | 
    
         
            +
                        Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)    
         
     | 
| 
      
 362 
     | 
    
         
            +
                    end
         
     | 
| 
      
 363 
     | 
    
         
            +
                    
         
     | 
| 
      
 364 
     | 
    
         
            +
                    begin # this changed from release 58
         
     | 
| 
      
 365 
     | 
    
         
            +
                      return Ensembl::Core::Transcript.find_by_stable_id(self.transcript_stable_id)
         
     | 
| 
      
 366 
     | 
    
         
            +
                    rescue NoMethodError  
         
     | 
| 
      
 367 
     | 
    
         
            +
                      return Ensembl::Core::Transcript.find(self.transcript_id)
         
     | 
| 
      
 368 
     | 
    
         
            +
                    end
         
     | 
| 
      
 369 
     | 
    
         
            +
                    
         
     | 
| 
      
 370 
     | 
    
         
            +
                  end
         
     | 
| 
      
 371 
     | 
    
         
            +
                  
         
     | 
| 
      
 372 
     | 
    
         
            +
                end
         
     | 
| 
      
 373 
     | 
    
         
            +
                
         
     | 
| 
      
 374 
     | 
    
         
            +
              end
         
     | 
| 
      
 375 
     | 
    
         
            +
              
         
     | 
| 
      
 376 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,444 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #
         
     | 
| 
      
 2 
     | 
    
         
            +
            # = ensembl/variation/variation.rb - Extension of ActiveRecord classes for Ensembl variation features
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Copyright::   Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
         
     | 
| 
      
 5 
     | 
    
         
            +
            # License::     The Ruby License
         
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # @author Francesco Strozzi
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            module Ensembl
         
     | 
| 
      
 11 
     | 
    
         
            +
              
         
     | 
| 
      
 12 
     | 
    
         
            +
              module Variation
         
     | 
| 
      
 13 
     | 
    
         
            +
                
         
     | 
| 
      
 14 
     | 
    
         
            +
                
         
     | 
| 
      
 15 
     | 
    
         
            +
                # The VariationFeature class gives information about the genomic position of 
         
     | 
| 
      
 16 
     | 
    
         
            +
                # each Variation, including also validation status and consequence type. 
         
     | 
| 
      
 17 
     | 
    
         
            +
                #
         
     | 
| 
      
 18 
     | 
    
         
            +
                # This class uses ActiveRecord to access data in the Ensembl database.
         
     | 
| 
      
 19 
     | 
    
         
            +
                # See the general documentation of the Ensembl module for
         
     | 
| 
      
 20 
     | 
    
         
            +
                # more information on what this means and what methods are available.
         
     | 
| 
      
 21 
     | 
    
         
            +
                #
         
     | 
| 
      
 22 
     | 
    
         
            +
                # @example
         
     | 
| 
      
 23 
     | 
    
         
            +
                #   # SLOWER QUERY
         
     | 
| 
      
 24 
     | 
    
         
            +
                #   vf = VariationFeature.find_by_variation_name('rs10111')
         
     | 
| 
      
 25 
     | 
    
         
            +
                #   # FASTER QUERY
         
     | 
| 
      
 26 
     | 
    
         
            +
                #   vf = Variation.find_by_name('rs10111').variation_feature
         
     | 
| 
      
 27 
     | 
    
         
            +
                #   
         
     | 
| 
      
 28 
     | 
    
         
            +
                #   puts vf.seq_region_start, vf.seq_region_end, vf.allele_string
         
     | 
| 
      
 29 
     | 
    
         
            +
                #   puts vf.variation.ancestral_allele
         
     | 
| 
      
 30 
     | 
    
         
            +
                #   genomic_region = vf.fetch_region (returns an Ensembl::Core::Slice)
         
     | 
| 
      
 31 
     | 
    
         
            +
                #   genomic_region.genes
         
     | 
| 
      
 32 
     | 
    
         
            +
                #   up_region,down_region = vf.flanking_seq (returns two Ensembl::Core::Slice)
         
     | 
| 
      
 33 
     | 
    
         
            +
                #
         
     | 
| 
      
 34 
     | 
    
         
            +
                class VariationFeature < DBConnection
         
     | 
| 
      
 35 
     | 
    
         
            +
                  set_primary_key "variation_feature_id"
         
     | 
| 
      
 36 
     | 
    
         
            +
                  belongs_to :variation
         
     | 
| 
      
 37 
     | 
    
         
            +
                  has_many :tagged_variation_features
         
     | 
| 
      
 38 
     | 
    
         
            +
                  has_many :samples, :through => :tagged_variation_features
         
     | 
| 
      
 39 
     | 
    
         
            +
                  belongs_to :seq_region
         
     | 
| 
      
 40 
     | 
    
         
            +
                  validates_inclusion_of :consequence_types, :in => ['intergenic_variant',
         
     | 
| 
      
 41 
     | 
    
         
            +
                                                                    'splice_acceptor_variant',
         
     | 
| 
      
 42 
     | 
    
         
            +
                                                                    'splice_donor_variant',
         
     | 
| 
      
 43 
     | 
    
         
            +
                                                                    'complex_change_in_transcript',
         
     | 
| 
      
 44 
     | 
    
         
            +
                                                                    'stop_lost',
         
     | 
| 
      
 45 
     | 
    
         
            +
                                                                    'coding_sequence_variant',
         
     | 
| 
      
 46 
     | 
    
         
            +
                                                                    'non_synonymous_codon',
         
     | 
| 
      
 47 
     | 
    
         
            +
                                                                    'stop_gained',
         
     | 
| 
      
 48 
     | 
    
         
            +
                                                                    'synonymous_codon',
         
     | 
| 
      
 49 
     | 
    
         
            +
                                                                    'frameshift_variant',
         
     | 
| 
      
 50 
     | 
    
         
            +
                                                                    'nc_transcript_variant',
         
     | 
| 
      
 51 
     | 
    
         
            +
                                                                    'mature_miRNA_variant',
         
     | 
| 
      
 52 
     | 
    
         
            +
                                                                    'NMD_transcript_variant',
         
     | 
| 
      
 53 
     | 
    
         
            +
                                                                    '5_prime_UTR_variant',
         
     | 
| 
      
 54 
     | 
    
         
            +
                                                                    '3_prime_UTR_variant',
         
     | 
| 
      
 55 
     | 
    
         
            +
                                                                    'incomplete_terminal_codon_variant',
         
     | 
| 
      
 56 
     | 
    
         
            +
                                                                    'intron_variant',
         
     | 
| 
      
 57 
     | 
    
         
            +
                                                                    'splice_region_variant',
         
     | 
| 
      
 58 
     | 
    
         
            +
                                                                    '5KB_downstream_variant',
         
     | 
| 
      
 59 
     | 
    
         
            +
                                                                    '500B_downstream_variant',
         
     | 
| 
      
 60 
     | 
    
         
            +
                                                                    '5KB_upstream_variant',
         
     | 
| 
      
 61 
     | 
    
         
            +
                                                                    '2KB_upstream_variant',
         
     | 
| 
      
 62 
     | 
    
         
            +
                                                                    'initiator_codon_change',
         
     | 
| 
      
 63 
     | 
    
         
            +
                                                                    'stop_retained_variant',
         
     | 
| 
      
 64 
     | 
    
         
            +
                                                                    'inframe_codon_gain',
         
     | 
| 
      
 65 
     | 
    
         
            +
                                                                    'inframe_codon_loss',
         
     | 
| 
      
 66 
     | 
    
         
            +
                                                                    'miRNA_target_site_variant',
         
     | 
| 
      
 67 
     | 
    
         
            +
                                                                    'pre_miRNA_variant',
         
     | 
| 
      
 68 
     | 
    
         
            +
                                                                    'regulatory_region_variant',
         
     | 
| 
      
 69 
     | 
    
         
            +
                                                                    'increased_binding_affinity',
         
     | 
| 
      
 70 
     | 
    
         
            +
                                                                    'decreased_binding_affinity',
         
     | 
| 
      
 71 
     | 
    
         
            +
                                                                    'binding_site_variant'
         
     | 
| 
      
 72 
     | 
    
         
            +
                                                                    ], :message => "Consequence type not allowed!"     
         
     | 
| 
      
 73 
     | 
    
         
            +
                  
         
     | 
| 
      
 74 
     | 
    
         
            +
                  def consequence_types # workaround as ActiveRecord do not parse SET field in MySQL
         
     | 
| 
      
 75 
     | 
    
         
            +
                    "#{attributes_before_type_cast['consequence_types']}" 
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end 
         
     | 
| 
      
 77 
     | 
    
         
            +
                  
         
     | 
| 
      
 78 
     | 
    
         
            +
                  # Based on Perl API 'get_all_Genes' method for Variation class. Get a genomic region
         
     | 
| 
      
 79 
     | 
    
         
            +
                  # starting from the Variation coordinates, expanding the region upstream and
         
     | 
| 
      
 80 
     | 
    
         
            +
                  # downstream.
         
     | 
| 
      
 81 
     | 
    
         
            +
                  #
         
     | 
| 
      
 82 
     | 
    
         
            +
                  # @param [Integer] up Length of upstream flanking region
         
     | 
| 
      
 83 
     | 
    
         
            +
                  # @param [Integer] down Length of downstream flanking region
         
     | 
| 
      
 84 
     | 
    
         
            +
                  # @return [Slice] Slice object containing the variation
         
     | 
| 
      
 85 
     | 
    
         
            +
                  def fetch_region(up = 5000, down = 5000)
         
     | 
| 
      
 86 
     | 
    
         
            +
                    sr = core_connection(self.seq_region_id)
         
     | 
| 
      
 87 
     | 
    
         
            +
                    slice = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,self.seq_region_start-up,self.seq_region_end+down)
         
     | 
| 
      
 88 
     | 
    
         
            +
                    return slice
         
     | 
| 
      
 89 
     | 
    
         
            +
                  end
         
     | 
| 
      
 90 
     | 
    
         
            +
                  
         
     | 
| 
      
 91 
     | 
    
         
            +
                  def flanking_seq
         
     | 
| 
      
 92 
     | 
    
         
            +
                    sr = core_connection(self.seq_region_id)
         
     | 
| 
      
 93 
     | 
    
         
            +
                    f = Variation.find(self.variation_id).flanking_sequence
         
     | 
| 
      
 94 
     | 
    
         
            +
                    slice_up = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.up_seq_region_start,f.up_seq_region_end,self.seq_region_strand)
         
     | 
| 
      
 95 
     | 
    
         
            +
                    slice_down = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.down_seq_region_start,f.down_seq_region_end,self.seq_region_strand)
         
     | 
| 
      
 96 
     | 
    
         
            +
                    return slice_up,slice_down
         
     | 
| 
      
 97 
     | 
    
         
            +
                  end
         
     | 
| 
      
 98 
     | 
    
         
            +
                  
         
     | 
| 
      
 99 
     | 
    
         
            +
                  def transcript_variations
         
     | 
| 
      
 100 
     | 
    
         
            +
                    tvs = TranscriptVariation.find_all_by_variation_feature_id(self.variation_feature_id)
         
     | 
| 
      
 101 
     | 
    
         
            +
                    if tvs[0].nil? then # the variation is not stored in the database, so run the calculations
         
     | 
| 
      
 102 
     | 
    
         
            +
                      sr = core_connection(self.seq_region_id)
         
     | 
| 
      
 103 
     | 
    
         
            +
                      return custom_transcript_variation(self,sr)
         
     | 
| 
      
 104 
     | 
    
         
            +
                    else
         
     | 
| 
      
 105 
     | 
    
         
            +
                      return tvs # the variation is already present in the database
         
     | 
| 
      
 106 
     | 
    
         
            +
                    end  
         
     | 
| 
      
 107 
     | 
    
         
            +
                  end
         
     | 
| 
      
 108 
     | 
    
         
            +
                  
         
     | 
| 
      
 109 
     | 
    
         
            +
                  private 
         
     | 
| 
      
 110 
     | 
    
         
            +
                  
         
     | 
| 
      
 111 
     | 
    
         
            +
                  def core_connection(seq_region_id) 
         
     | 
| 
      
 112 
     | 
    
         
            +
                    if !Ensembl::Core::DBConnection.connected? then  
         
     | 
| 
      
 113 
     | 
    
         
            +
                      host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
         
     | 
| 
      
 114 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 115 
     | 
    
         
            +
                        Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
         
     | 
| 
      
 116 
     | 
    
         
            +
                      rescue
         
     | 
| 
      
 117 
     | 
    
         
            +
                        raise NameError, "Can't derive Core database name from #{db_name}. Are you using non conventional names?"
         
     | 
| 
      
 118 
     | 
    
         
            +
                      end
         
     | 
| 
      
 119 
     | 
    
         
            +
                    end
         
     | 
| 
      
 120 
     | 
    
         
            +
                    # Check if SeqRegion already exists in Ensembl::SESSION
         
     | 
| 
      
 121 
     | 
    
         
            +
                    seq_region = nil
         
     | 
| 
      
 122 
     | 
    
         
            +
                    if Ensembl::SESSION.seq_regions.has_key?(seq_region_id)
         
     | 
| 
      
 123 
     | 
    
         
            +
                      seq_region = Ensembl::SESSION.seq_regions[seq_region_id]
         
     | 
| 
      
 124 
     | 
    
         
            +
                    else
         
     | 
| 
      
 125 
     | 
    
         
            +
                      seq_region = Ensembl::Core::SeqRegion.find(seq_region_id)
         
     | 
| 
      
 126 
     | 
    
         
            +
                      Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
         
     | 
| 
      
 127 
     | 
    
         
            +
                    end
         
     | 
| 
      
 128 
     | 
    
         
            +
                    return seq_region
         
     | 
| 
      
 129 
     | 
    
         
            +
                  end
         
     | 
| 
      
 130 
     | 
    
         
            +
                  
         
     | 
| 
      
 131 
     | 
    
         
            +
                  # Calculate a consequence type for a user-defined variation
         
     | 
| 
      
 132 
     | 
    
         
            +
                  def custom_transcript_variation(vf,sr)
         
     | 
| 
      
 133 
     | 
    
         
            +
                            
         
     | 
| 
      
 134 
     | 
    
         
            +
                    @variation_name = vf.variation_name
         
     | 
| 
      
 135 
     | 
    
         
            +
                    @seq_region = sr
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                    downstream = 5000
         
     | 
| 
      
 138 
     | 
    
         
            +
                    upstream = 5000
         
     | 
| 
      
 139 
     | 
    
         
            +
                    tvs = [] # store all the calculated TranscriptVariations
         
     | 
| 
      
 140 
     | 
    
         
            +
                    # retrieve the slice of the genomic region where the variation is located
         
     | 
| 
      
 141 
     | 
    
         
            +
                    var_start,var_end = 0,0
         
     | 
| 
      
 142 
     | 
    
         
            +
                    if vf.seq_region_start > vf.seq_region_end
         
     | 
| 
      
 143 
     | 
    
         
            +
                      var_start,var_end = vf.seq_region_end,vf.seq_region_start
         
     | 
| 
      
 144 
     | 
    
         
            +
                    else
         
     | 
| 
      
 145 
     | 
    
         
            +
                      var_start,var_end = vf.seq_region_start,vf.seq_region_end
         
     | 
| 
      
 146 
     | 
    
         
            +
                    end
         
     | 
| 
      
 147 
     | 
    
         
            +
                    region = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,var_start-upstream,var_end+downstream)
         
     | 
| 
      
 148 
     | 
    
         
            +
                    # iterate through all the transcripts present in the region
         
     | 
| 
      
 149 
     | 
    
         
            +
                    genes = region.genes(inclusive = true)
         
     | 
| 
      
 150 
     | 
    
         
            +
                     if genes[0] != nil
         
     | 
| 
      
 151 
     | 
    
         
            +
                      genes.each do |g|
         
     | 
| 
      
 152 
     | 
    
         
            +
                        g.transcripts.each do |t|
         
     | 
| 
      
 153 
     | 
    
         
            +
                          
         
     | 
| 
      
 154 
     | 
    
         
            +
                          @cache = {}
         
     | 
| 
      
 155 
     | 
    
         
            +
                          
         
     | 
| 
      
 156 
     | 
    
         
            +
                          tv = TranscriptVariation.new() # create a new TranscriptVariation object for every transcript present
         
     | 
| 
      
 157 
     | 
    
         
            +
                          # do the calculations
         
     | 
| 
      
 158 
     | 
    
         
            +
                          
         
     | 
| 
      
 159 
     | 
    
         
            +
                          # check if the variation is intergenic for this transcript (no effects)
         
     | 
| 
      
 160 
     | 
    
         
            +
                          tv.consequence_types = check_intergenic(vf,t)
         
     | 
| 
      
 161 
     | 
    
         
            +
                          
         
     | 
| 
      
 162 
     | 
    
         
            +
                          # check if the variation is upstram or downstram the transcript
         
     | 
| 
      
 163 
     | 
    
         
            +
                          tv.consequence_types = check_upstream_downstream(vf,t) if tv.consequence_types == ""
         
     | 
| 
      
 164 
     | 
    
         
            +
                          
         
     | 
| 
      
 165 
     | 
    
         
            +
                          # check partial codon
         
     | 
| 
      
 166 
     | 
    
         
            +
                          tv.consequence_types = check_partial_codon(vf,t) if tv.consequence_types == ""
         
     | 
| 
      
 167 
     | 
    
         
            +
                          
         
     | 
| 
      
 168 
     | 
    
         
            +
                          # if no consequence type is found, then the variation is inside the transcript         
         
     | 
| 
      
 169 
     | 
    
         
            +
                          # check for non coding gene
         
     | 
| 
      
 170 
     | 
    
         
            +
                          tv.consequence_types = check_non_coding(vf,t) if tv.consequence_types == "" && t.biotype != 'protein_coding'
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
                          # if no consequence type is found, then check intron / exon boundaries
         
     | 
| 
      
 173 
     | 
    
         
            +
                          tv.consequence_types = check_splice_site(vf,t) if tv.consequence_types == ""
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
                          # if no consequence type is found, check if the variation is inside UTRs
         
     | 
| 
      
 176 
     | 
    
         
            +
                          tv.consequence_types = check_utr(vf,t) if tv.consequence_types == ""    
         
     | 
| 
      
 177 
     | 
    
         
            +
                                    
         
     | 
| 
      
 178 
     | 
    
         
            +
                          # if no consequence type is found, then variation is inside an exon. 
         
     | 
| 
      
 179 
     | 
    
         
            +
                          # Check the codon change
         
     | 
| 
      
 180 
     | 
    
         
            +
                          (tv.consequence_types,tv.pep_allele_string) = check_aa_change(vf,t) if tv.consequence_types == ""
         
     | 
| 
      
 181 
     | 
    
         
            +
                            
         
     | 
| 
      
 182 
     | 
    
         
            +
                          tv.feature_stable_id = t.stable_id
         
     | 
| 
      
 183 
     | 
    
         
            +
                          
         
     | 
| 
      
 184 
     | 
    
         
            +
                          #tv.consequence_types = "intergenic_variant" if tv.consequence_types == ""
         
     | 
| 
      
 185 
     | 
    
         
            +
                          tvs << tv 
         
     | 
| 
      
 186 
     | 
    
         
            +
                        end   
         
     | 
| 
      
 187 
     | 
    
         
            +
                      end
         
     | 
| 
      
 188 
     | 
    
         
            +
                     end
         
     | 
| 
      
 189 
     | 
    
         
            +
                     # if there are no transcripts/genes within 5000 bases upstream and downstream set the variation as INTERGENIC (no effects)
         
     | 
| 
      
 190 
     | 
    
         
            +
                     if tvs.size == 0 then
         
     | 
| 
      
 191 
     | 
    
         
            +
                      tv = TranscriptVariation.new()
         
     | 
| 
      
 192 
     | 
    
         
            +
                      tv.consequence_types = "intergenic_variant"
         
     | 
| 
      
 193 
     | 
    
         
            +
                      tvs << tv
         
     | 
| 
      
 194 
     | 
    
         
            +
                     end
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
                     return tvs
         
     | 
| 
      
 197 
     | 
    
         
            +
                   end
         
     | 
| 
      
 198 
     | 
    
         
            +
                  
         
     | 
| 
      
 199 
     | 
    
         
            +
                  ## CONSEQUENCE CALCULATION METHODS ##
         
     | 
| 
      
 200 
     | 
    
         
            +
                  
         
     | 
| 
      
 201 
     | 
    
         
            +
                  def check_intergenic(vf,t)
         
     | 
| 
      
 202 
     | 
    
         
            +
                    if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) > 5000 then
         
     | 
| 
      
 203 
     | 
    
         
            +
                       return "intergenic_variant"
         
     | 
| 
      
 204 
     | 
    
         
            +
                    elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) > 5000 then
         
     | 
| 
      
 205 
     | 
    
         
            +
                       return "intergenic_variant"      
         
     | 
| 
      
 206 
     | 
    
         
            +
                    end
         
     | 
| 
      
 207 
     | 
    
         
            +
                    return nil        
         
     | 
| 
      
 208 
     | 
    
         
            +
                  end
         
     | 
| 
      
 209 
     | 
    
         
            +
                  
         
     | 
| 
      
 210 
     | 
    
         
            +
                  def check_upstream_downstream(vf,t)
         
     | 
| 
      
 211 
     | 
    
         
            +
                    if vf.seq_region_end < t.seq_region_start
         
     | 
| 
      
 212 
     | 
    
         
            +
                      distance = t.seq_region_start - vf.seq_region_end+1
         
     | 
| 
      
 213 
     | 
    
         
            +
                      if t.strand == 1 and distance <= 2000
         
     | 
| 
      
 214 
     | 
    
         
            +
                        return "2KB_upstream_variant"
         
     | 
| 
      
 215 
     | 
    
         
            +
                      elsif t.strand == -1 and distance <= 500
         
     | 
| 
      
 216 
     | 
    
         
            +
                        return "500B_downstream_variant"  
         
     | 
| 
      
 217 
     | 
    
         
            +
                      else
         
     | 
| 
      
 218 
     | 
    
         
            +
                       return (t.strand == 1) ? "5KB_upstream_variant" : "5KB_downstream_variant"
         
     | 
| 
      
 219 
     | 
    
         
            +
                      end
         
     | 
| 
      
 220 
     | 
    
         
            +
                    elsif vf.seq_region_start > t.seq_region_end
         
     | 
| 
      
 221 
     | 
    
         
            +
                       distance = vf.seq_region_start - t.seq_region_end+1
         
     | 
| 
      
 222 
     | 
    
         
            +
                       if t.strand == -1 and distance <= 2000
         
     | 
| 
      
 223 
     | 
    
         
            +
                         return "2KB_upstream_variant"
         
     | 
| 
      
 224 
     | 
    
         
            +
                       elsif t.strand == 1 and distance <= 500
         
     | 
| 
      
 225 
     | 
    
         
            +
                         return "500B_downstream_variant"
         
     | 
| 
      
 226 
     | 
    
         
            +
                       else 
         
     | 
| 
      
 227 
     | 
    
         
            +
                         return (t.strand == 1) ? "5KB_downstream_variant" : "5KB_upstream_variant"
         
     | 
| 
      
 228 
     | 
    
         
            +
                       end
         
     | 
| 
      
 229 
     | 
    
         
            +
                    # check if it's an InDel and if overlaps the transcript start / end   
         
     | 
| 
      
 230 
     | 
    
         
            +
                    elsif t.seq_region_start > vf.seq_region_start and t.seq_region_start < vf.seq_region_end then
         
     | 
| 
      
 231 
     | 
    
         
            +
                        return "complex_change_in_transcript"
         
     | 
| 
      
 232 
     | 
    
         
            +
                    elsif t.seq_region_end > vf.seq_region_start and t.seq_region_end < vf.seq_region_end then
         
     | 
| 
      
 233 
     | 
    
         
            +
                        return "complex_change_in_transcript"                
         
     | 
| 
      
 234 
     | 
    
         
            +
                    end
         
     | 
| 
      
 235 
     | 
    
         
            +
                    return nil
         
     | 
| 
      
 236 
     | 
    
         
            +
                  end
         
     | 
| 
      
 237 
     | 
    
         
            +
                  
         
     | 
| 
      
 238 
     | 
    
         
            +
                  def check_non_coding(vf,t)
         
     | 
| 
      
 239 
     | 
    
         
            +
                      if t.biotype == "miRNA" then 
         
     | 
| 
      
 240 
     | 
    
         
            +
                         return "mature_miRNA_variant"
         
     | 
| 
      
 241 
     | 
    
         
            +
                      elsif t.biotype == "nonsense_mediated_decay"
         
     | 
| 
      
 242 
     | 
    
         
            +
                         return "NMD_transcript_variant"
         
     | 
| 
      
 243 
     | 
    
         
            +
                      else
         
     | 
| 
      
 244 
     | 
    
         
            +
                         return "nc_transcript_variant"
         
     | 
| 
      
 245 
     | 
    
         
            +
                      end
         
     | 
| 
      
 246 
     | 
    
         
            +
                  end
         
     | 
| 
      
 247 
     | 
    
         
            +
                  
         
     | 
| 
      
 248 
     | 
    
         
            +
                  def check_utr(vf,t)
         
     | 
| 
      
 249 
     | 
    
         
            +
                      if vf.seq_region_start > t.seq_region_start and vf.seq_region_end < t.coding_region_genomic_start then
         
     | 
| 
      
 250 
     | 
    
         
            +
                         return (t.strand == 1) ? "5_prime_UTR_variant" : "3_prime_UTR_variant"
         
     | 
| 
      
 251 
     | 
    
         
            +
                      elsif vf.seq_region_start > t.coding_region_genomic_end and vf.seq_region_end < t.seq_region_end then
         
     | 
| 
      
 252 
     | 
    
         
            +
                         return (t.strand == 1) ? "3_prime_UTR_variant" : "5_prime_UTR_variant"   
         
     | 
| 
      
 253 
     | 
    
         
            +
                      end
         
     | 
| 
      
 254 
     | 
    
         
            +
                      return nil   
         
     | 
| 
      
 255 
     | 
    
         
            +
                  end
         
     | 
| 
      
 256 
     | 
    
         
            +
                  
         
     | 
| 
      
 257 
     | 
    
         
            +
                  def check_splice_site(vf,t)
         
     | 
| 
      
 258 
     | 
    
         
            +
                      @cache[:exons] = []
         
     | 
| 
      
 259 
     | 
    
         
            +
                      var_start,var_end = (vf.seq_region_strand == 1) ? [vf.seq_region_start,vf.seq_region_end] : [vf.seq_region_end,vf.seq_region_start]
         
     | 
| 
      
 260 
     | 
    
         
            +
                      t.exons.each {|ex| @cache[:exons] << Range.new(ex.seq_region_start,ex.seq_region_end)}
         
     | 
| 
      
 261 
     | 
    
         
            +
                      
         
     | 
| 
      
 262 
     | 
    
         
            +
                      exon_up = check_near_exons(var_start,@cache[:exons])
         
     | 
| 
      
 263 
     | 
    
         
            +
                      exon_down = check_near_exons(var_end,@cache[:exons])
         
     | 
| 
      
 264 
     | 
    
         
            +
                      if !exon_up and !exon_down # we are inside an intron
         
     | 
| 
      
 265 
     | 
    
         
            +
                         # checking boundaries
         
     | 
| 
      
 266 
     | 
    
         
            +
                         near_exon_up_2bp = check_near_exons(var_start-2..var_start,@cache[:exons])
         
     | 
| 
      
 267 
     | 
    
         
            +
                         near_exon_down_2bp = check_near_exons(var_end..var_end+2,@cache[:exons])
         
     | 
| 
      
 268 
     | 
    
         
            +
                         if near_exon_up_2bp 
         
     | 
| 
      
 269 
     | 
    
         
            +
                            return (t.strand == 1) ? "splice_donor_variant" : "splice_acceptor_variant"
         
     | 
| 
      
 270 
     | 
    
         
            +
                         elsif near_exon_down_2bp
         
     | 
| 
      
 271 
     | 
    
         
            +
                            return (t.strand == 1) ? "splice_acceptor_variant" : "splice_donor_variant"
         
     | 
| 
      
 272 
     | 
    
         
            +
                         else
         
     | 
| 
      
 273 
     | 
    
         
            +
                            near_exon_up_8bp = check_near_exons(var_start+8..var_start,@cache[:exons])
         
     | 
| 
      
 274 
     | 
    
         
            +
                            near_exon_down_8bp = check_near_exons(var_end..var_end+8,@cache[:exons])   
         
     | 
| 
      
 275 
     | 
    
         
            +
                            if near_exon_up_8bp or near_exon_down_8bp
         
     | 
| 
      
 276 
     | 
    
         
            +
                              return "splice_region_variant"
         
     | 
| 
      
 277 
     | 
    
         
            +
                            else
         
     | 
| 
      
 278 
     | 
    
         
            +
                              return "intron_variant"
         
     | 
| 
      
 279 
     | 
    
         
            +
                            end
         
     | 
| 
      
 280 
     | 
    
         
            +
                         end
         
     | 
| 
      
 281 
     | 
    
         
            +
                      elsif exon_up and exon_down # the variation is inside an exon
         
     | 
| 
      
 282 
     | 
    
         
            +
                            # check if it is a splice site
         
     | 
| 
      
 283 
     | 
    
         
            +
                            if (var_start-exon_up.first) <= 3 or (exon_down.last-var_end) <= 3 then
         
     | 
| 
      
 284 
     | 
    
         
            +
                                return "splice_region_variant"                   
         
     | 
| 
      
 285 
     | 
    
         
            +
                            end
         
     | 
| 
      
 286 
     | 
    
         
            +
                      else # a complex indel spanning intron/exon boundary
         
     | 
| 
      
 287 
     | 
    
         
            +
                           return "complex_change_in_transcript"
         
     | 
| 
      
 288 
     | 
    
         
            +
                      end
         
     | 
| 
      
 289 
     | 
    
         
            +
                      return nil      
         
     | 
| 
      
 290 
     | 
    
         
            +
                  end
         
     | 
| 
      
 291 
     | 
    
         
            +
                  
         
     | 
| 
      
 292 
     | 
    
         
            +
                  def check_aa_change(vf,t)
         
     | 
| 
      
 293 
     | 
    
         
            +
                      alleles = vf.allele_string.split('/') # get the different alleles for this variation          
         
     | 
| 
      
 294 
     | 
    
         
            +
             
     | 
| 
      
 295 
     | 
    
         
            +
                      # Find the position inside the CDS    
         
     | 
| 
      
 296 
     | 
    
         
            +
                      mutation_position = (@cache[:mutation_positon]) ? @cache[:mutation_positon] : t.genomic2cds(vf.seq_region_start)
         
     | 
| 
      
 297 
     | 
    
         
            +
                      cds_sequence = (@cache[:cds_sequence]) ? @cache[:cds_sequence] : t.cds_seq
         
     | 
| 
      
 298 
     | 
    
         
            +
                      
         
     | 
| 
      
 299 
     | 
    
         
            +
                      if vf.allele_string =~/INSERTION|DELETION|MUTATION/
         
     | 
| 
      
 300 
     | 
    
         
            +
                        return "coding_sequence_variant",nil
         
     | 
| 
      
 301 
     | 
    
         
            +
                      end  
         
     | 
| 
      
 302 
     | 
    
         
            +
                      
         
     | 
| 
      
 303 
     | 
    
         
            +
                      mutation_base = Bio::Sequence::NA.new(alleles[1])
         
     | 
| 
      
 304 
     | 
    
         
            +
                      if t.seq_region_strand == -1
         
     | 
| 
      
 305 
     | 
    
         
            +
                         mutation_base.reverse_complement!
         
     | 
| 
      
 306 
     | 
    
         
            +
                      end
         
     | 
| 
      
 307 
     | 
    
         
            +
                      # The rank of the codon 
         
     | 
| 
      
 308 
     | 
    
         
            +
                      target_codon = (mutation_position)/3 + 1
         
     | 
| 
      
 309 
     | 
    
         
            +
                      mut_sequence = cds_sequence.dup
         
     | 
| 
      
 310 
     | 
    
         
            +
                      
         
     | 
| 
      
 311 
     | 
    
         
            +
                      # Replace base with the variant allele
         
     | 
| 
      
 312 
     | 
    
         
            +
                      if alleles[1] == "-" # a deletion
         
     | 
| 
      
 313 
     | 
    
         
            +
                        mut_sequence.gsub!(/#{alleles[0]}/,'')
         
     | 
| 
      
 314 
     | 
    
         
            +
                      else # insertion or SNP  
         
     | 
| 
      
 315 
     | 
    
         
            +
                        mut_sequence[mutation_position] = mutation_base.seq
         
     | 
| 
      
 316 
     | 
    
         
            +
                      end
         
     | 
| 
      
 317 
     | 
    
         
            +
                      
         
     | 
| 
      
 318 
     | 
    
         
            +
                      mutcodon =  mut_sequence[(target_codon*3 -3)..(target_codon*3-1 + alleles[1].length-1)]
         
     | 
| 
      
 319 
     | 
    
         
            +
                      refcodon =  cds_sequence[(target_codon*3 -3)..(target_codon*3-1 + alleles[0].length-1)]
         
     | 
| 
      
 320 
     | 
    
         
            +
                      codontable = Bio::CodonTable[1]
         
     | 
| 
      
 321 
     | 
    
         
            +
                      refaa = codontable[refcodon]
         
     | 
| 
      
 322 
     | 
    
         
            +
                      mutaa = codontable[mutcodon.downcase]
         
     | 
| 
      
 323 
     | 
    
         
            +
             
     | 
| 
      
 324 
     | 
    
         
            +
                      pep_string = refaa.to_s+"/"+mutaa.to_s
         
     | 
| 
      
 325 
     | 
    
         
            +
                      transcript_start = (t.strand == 1) ? t.coding_region_genomic_start : t.coding_region_genomic_end
         
     | 
| 
      
 326 
     | 
    
         
            +
                      if (vf.seq_region_start - transcript_start).abs <= 3
         
     | 
| 
      
 327 
     | 
    
         
            +
                        return "initiator_codon_change",pep_string
         
     | 
| 
      
 328 
     | 
    
         
            +
                      elsif (mutcodon.length > refcodon.length) && (mutcodon =~/^#{refcodon}/ || mutcodon =~/#{refcodon}$/)
         
     | 
| 
      
 329 
     | 
    
         
            +
                        return "inframe_codon_gain",pep_string
         
     | 
| 
      
 330 
     | 
    
         
            +
                      elsif (mutcodon.length < refcodon.length) && (refcodon =~/^#{mutcodon}/ || refcodon =~/#{mutcodon}$/)
         
     | 
| 
      
 331 
     | 
    
         
            +
                        return "inframe_codon_loss",pep_string
         
     | 
| 
      
 332 
     | 
    
         
            +
                      elsif vf.seq_region_start != vf.seq_region_end
         
     | 
| 
      
 333 
     | 
    
         
            +
                        # if the variation is an InDel then it produces a frameshift
         
     | 
| 
      
 334 
     | 
    
         
            +
                        return "frameshift_variant",nil   
         
     | 
| 
      
 335 
     | 
    
         
            +
                      elsif (mutaa == "*" and refaa == "*") && (refcodon != mutcodon.downcase)
         
     | 
| 
      
 336 
     | 
    
         
            +
                        return "stop_retained_variant"    
         
     | 
| 
      
 337 
     | 
    
         
            +
                      elsif mutaa == "*" and refaa != "*"
         
     | 
| 
      
 338 
     | 
    
         
            +
                        return "stop_gained",pep_string
         
     | 
| 
      
 339 
     | 
    
         
            +
                      elsif mutaa != "*" and refaa == "*"
         
     | 
| 
      
 340 
     | 
    
         
            +
                        return "stop_lost",pep_string
         
     | 
| 
      
 341 
     | 
    
         
            +
                      elsif mutaa != refaa
         
     | 
| 
      
 342 
     | 
    
         
            +
                        return "non_synonymous_codon",pep_string
         
     | 
| 
      
 343 
     | 
    
         
            +
                      elsif mutaa == refaa
         
     | 
| 
      
 344 
     | 
    
         
            +
                        return "synonymous_codon",pep_string
         
     | 
| 
      
 345 
     | 
    
         
            +
                      end
         
     | 
| 
      
 346 
     | 
    
         
            +
                       
         
     | 
| 
      
 347 
     | 
    
         
            +
                   end
         
     | 
| 
      
 348 
     | 
    
         
            +
                   
         
     | 
| 
      
 349 
     | 
    
         
            +
                   def check_partial_codon(vf,t)
         
     | 
| 
      
 350 
     | 
    
         
            +
                     begin
         
     | 
| 
      
 351 
     | 
    
         
            +
                       mutation_position = t.genomic2cds(vf.seq_region_start)
         
     | 
| 
      
 352 
     | 
    
         
            +
                       cds_sequence = t.cds_seq
         
     | 
| 
      
 353 
     | 
    
         
            +
                       @cache[:mutation_position] = mutation_position
         
     | 
| 
      
 354 
     | 
    
         
            +
                       @cache[:cds_sequence] = cds_sequence
         
     | 
| 
      
 355 
     | 
    
         
            +
                       # check if the mutation is on the last codon and if it's a partial codon
         
     | 
| 
      
 356 
     | 
    
         
            +
                       if (cds_sequence.length - mutation_position) <= 3
         
     | 
| 
      
 357 
     | 
    
         
            +
                         return (cds_sequence.length % 3 == 0) ? nil : "incomplete_terminal_codon_variant"
         
     | 
| 
      
 358 
     | 
    
         
            +
                       end
         
     | 
| 
      
 359 
     | 
    
         
            +
                     rescue Exception => e
         
     | 
| 
      
 360 
     | 
    
         
            +
                       return nil
         
     | 
| 
      
 361 
     | 
    
         
            +
                     end 
         
     | 
| 
      
 362 
     | 
    
         
            +
                   end
         
     | 
| 
      
 363 
     | 
    
         
            +
                   
         
     | 
| 
      
 364 
     | 
    
         
            +
                   def check_near_exons(feature,exons_ranges)
         
     | 
| 
      
 365 
     | 
    
         
            +
                    exons_ranges.each do |exon_range|
         
     | 
| 
      
 366 
     | 
    
         
            +
                      if feature.is_a? Range
         
     | 
| 
      
 367 
     | 
    
         
            +
                        return exon_range if (feature.first <= exon_range.last) && (exon_range.first <= feature.last)
         
     | 
| 
      
 368 
     | 
    
         
            +
                      else
         
     | 
| 
      
 369 
     | 
    
         
            +
                        return exon_range if exon_range.include? feature
         
     | 
| 
      
 370 
     | 
    
         
            +
                      end  
         
     | 
| 
      
 371 
     | 
    
         
            +
                    end
         
     | 
| 
      
 372 
     | 
    
         
            +
                    return false
         
     | 
| 
      
 373 
     | 
    
         
            +
                   end
         
     | 
| 
      
 374 
     | 
    
         
            +
                  
         
     | 
| 
      
 375 
     | 
    
         
            +
                  
         
     | 
| 
      
 376 
     | 
    
         
            +
                end # VariationFeature
         
     | 
| 
      
 377 
     | 
    
         
            +
                
         
     | 
| 
      
 378 
     | 
    
         
            +
                # The TranscriptVariation class gives information about the position of 
         
     | 
| 
      
 379 
     | 
    
         
            +
                # a VariationFeature, mapped on an annotated transcript.
         
     | 
| 
      
 380 
     | 
    
         
            +
                #
         
     | 
| 
      
 381 
     | 
    
         
            +
                # This class uses ActiveRecord to access data in the Ensembl database.
         
     | 
| 
      
 382 
     | 
    
         
            +
                # See the general documentation of the Ensembl module for
         
     | 
| 
      
 383 
     | 
    
         
            +
                # more information on what this means and what methods are available.
         
     | 
| 
      
 384 
     | 
    
         
            +
                #
         
     | 
| 
      
 385 
     | 
    
         
            +
                # @example 
         
     | 
| 
      
 386 
     | 
    
         
            +
                #   vf = Variation.find_by_name('rs10111').variation_feature
         
     | 
| 
      
 387 
     | 
    
         
            +
                #   vf.transcript_variations.each do |tv|
         
     | 
| 
      
 388 
     | 
    
         
            +
                #     puts tv.peptide_allele_string, tv.transcript.stable_id    
         
     | 
| 
      
 389 
     | 
    
         
            +
                #   end
         
     | 
| 
      
 390 
     | 
    
         
            +
                #
         
     | 
| 
      
 391 
     | 
    
         
            +
                class TranscriptVariation < DBConnection
         
     | 
| 
      
 392 
     | 
    
         
            +
                  set_primary_key "transcript_variation_id"
         
     | 
| 
      
 393 
     | 
    
         
            +
                  belongs_to :variation_feature
         
     | 
| 
      
 394 
     | 
    
         
            +
                  validates_inclusion_of :consequence_types, :in => ['intergenic_variant',
         
     | 
| 
      
 395 
     | 
    
         
            +
                                                                    'splice_acceptor_variant',
         
     | 
| 
      
 396 
     | 
    
         
            +
                                                                    'splice_donor_variant',
         
     | 
| 
      
 397 
     | 
    
         
            +
                                                                    'complex_change_in_transcript',
         
     | 
| 
      
 398 
     | 
    
         
            +
                                                                    'stop_lost',
         
     | 
| 
      
 399 
     | 
    
         
            +
                                                                    'coding_sequence_variant',
         
     | 
| 
      
 400 
     | 
    
         
            +
                                                                    'non_synonymous_codon',
         
     | 
| 
      
 401 
     | 
    
         
            +
                                                                    'stop_gained',
         
     | 
| 
      
 402 
     | 
    
         
            +
                                                                    'synonymous_codon',
         
     | 
| 
      
 403 
     | 
    
         
            +
                                                                    'frameshift_variant',
         
     | 
| 
      
 404 
     | 
    
         
            +
                                                                    'nc_transcript_variant',
         
     | 
| 
      
 405 
     | 
    
         
            +
                                                                    'mature_miRNA_variant',
         
     | 
| 
      
 406 
     | 
    
         
            +
                                                                    'NMD_transcript_variant',
         
     | 
| 
      
 407 
     | 
    
         
            +
                                                                    '5_prime_UTR_variant',
         
     | 
| 
      
 408 
     | 
    
         
            +
                                                                    '3_prime_UTR_variant',
         
     | 
| 
      
 409 
     | 
    
         
            +
                                                                    'incomplete_terminal_codon_variant',
         
     | 
| 
      
 410 
     | 
    
         
            +
                                                                    'intron_variant',
         
     | 
| 
      
 411 
     | 
    
         
            +
                                                                    'splice_region_variant',
         
     | 
| 
      
 412 
     | 
    
         
            +
                                                                    '5KB_downstream_variant',
         
     | 
| 
      
 413 
     | 
    
         
            +
                                                                    '500B_downstream_variant',
         
     | 
| 
      
 414 
     | 
    
         
            +
                                                                    '5KB_upstream_variant',
         
     | 
| 
      
 415 
     | 
    
         
            +
                                                                    '2KB_upstream_variant',
         
     | 
| 
      
 416 
     | 
    
         
            +
                                                                    'initiator_codon_change',
         
     | 
| 
      
 417 
     | 
    
         
            +
                                                                    'stop_retained_variant',
         
     | 
| 
      
 418 
     | 
    
         
            +
                                                                    'inframe_codon_gain',
         
     | 
| 
      
 419 
     | 
    
         
            +
                                                                    'inframe_codon_loss',
         
     | 
| 
      
 420 
     | 
    
         
            +
                                                                    'miRNA_target_site_variant',
         
     | 
| 
      
 421 
     | 
    
         
            +
                                                                    'pre_miRNA_variant',
         
     | 
| 
      
 422 
     | 
    
         
            +
                                                                    'regulatory_region_variant',
         
     | 
| 
      
 423 
     | 
    
         
            +
                                                                    'increased_binding_affinity',
         
     | 
| 
      
 424 
     | 
    
         
            +
                                                                    'decreased_binding_affinity',
         
     | 
| 
      
 425 
     | 
    
         
            +
                                                                    'binding_site_variant'
         
     | 
| 
      
 426 
     | 
    
         
            +
                                                                    ], :message => "Consequence type not allowed!"
         
     | 
| 
      
 427 
     | 
    
         
            +
                                                                    
         
     | 
| 
      
 428 
     | 
    
         
            +
                  def consequence_types # workaround as ActiveRecord do not parse SET field in MySQL
         
     | 
| 
      
 429 
     | 
    
         
            +
                    "#{attributes_before_type_cast['consequence_types']}" 
         
     | 
| 
      
 430 
     | 
    
         
            +
                  end                                                  
         
     | 
| 
      
 431 
     | 
    
         
            +
                  
         
     | 
| 
      
 432 
     | 
    
         
            +
                  def transcript
         
     | 
| 
      
 433 
     | 
    
         
            +
                    host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
         
     | 
| 
      
 434 
     | 
    
         
            +
                    if !Ensembl::Core::DBConnection.connected? then     
         
     | 
| 
      
 435 
     | 
    
         
            +
                        Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)    
         
     | 
| 
      
 436 
     | 
    
         
            +
                    end
         
     | 
| 
      
 437 
     | 
    
         
            +
                    return Ensembl::Core::Transcript.find_by_stable_id(self.feature_stable_id)        
         
     | 
| 
      
 438 
     | 
    
         
            +
                  end
         
     | 
| 
      
 439 
     | 
    
         
            +
                  
         
     | 
| 
      
 440 
     | 
    
         
            +
                end
         
     | 
| 
      
 441 
     | 
    
         
            +
                
         
     | 
| 
      
 442 
     | 
    
         
            +
              end
         
     | 
| 
      
 443 
     | 
    
         
            +
              
         
     | 
| 
      
 444 
     | 
    
         
            +
            end
         
     |