bio-ensembl 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +71 -0
- data/VERSION +1 -0
- data/bin/ensembl +40 -0
- data/bin/variation_effect_predictor +106 -0
- data/bio-ensembl.gemspec +190 -0
- data/lib/bio-ensembl.rb +65 -0
- data/lib/bio-ensembl/core/activerecord.rb +1812 -0
- data/lib/bio-ensembl/core/collection.rb +64 -0
- data/lib/bio-ensembl/core/project.rb +262 -0
- data/lib/bio-ensembl/core/slice.rb +657 -0
- data/lib/bio-ensembl/core/transcript.rb +409 -0
- data/lib/bio-ensembl/core/transform.rb +95 -0
- data/lib/bio-ensembl/db_connection.rb +205 -0
- data/lib/bio-ensembl/variation/activerecord.rb +536 -0
- data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
- data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_effect_predictor_data.txt +4 -0
- data/samples/variation_example.rb +67 -0
- data/test/data/seq_c6qbl.fa +10 -0
- data/test/data/seq_cso19_coding.fa +16 -0
- data/test/data/seq_cso19_transcript.fa +28 -0
- data/test/data/seq_drd3_gene.fa +838 -0
- data/test/data/seq_drd3_transcript.fa +22 -0
- data/test/data/seq_drd4_transcript.fa +24 -0
- data/test/data/seq_forward_composite.fa +1669 -0
- data/test/data/seq_par_boundary.fa +169 -0
- data/test/data/seq_rnd3_transcript.fa +47 -0
- data/test/data/seq_ub2r1_coding.fa +13 -0
- data/test/data/seq_ub2r1_gene.fa +174 -0
- data/test/data/seq_ub2r1_transcript.fa +26 -0
- data/test/data/seq_y.fa +2 -0
- data/test/default/test_connection.rb +60 -0
- data/test/default/test_releases.rb +130 -0
- data/test/ensembl_genomes/test_collection.rb +122 -0
- data/test/ensembl_genomes/test_gene.rb +46 -0
- data/test/ensembl_genomes/test_slice.rb +65 -0
- data/test/ensembl_genomes/test_variation.rb +38 -0
- data/test/helper.rb +18 -0
- data/test/release_50/core/test_project.rb +210 -0
- data/test/release_50/core/test_project_human.rb +52 -0
- data/test/release_50/core/test_relationships.rb +72 -0
- data/test/release_50/core/test_sequence.rb +170 -0
- data/test/release_50/core/test_slice.rb +116 -0
- data/test/release_50/core/test_transcript.rb +125 -0
- data/test/release_50/core/test_transform.rb +217 -0
- data/test/release_50/variation/test_activerecord.rb +138 -0
- data/test/release_50/variation/test_variation.rb +79 -0
- data/test/release_53/core/test_gene.rb +61 -0
- data/test/release_53/core/test_project.rb +91 -0
- data/test/release_53/core/test_project_human.rb +61 -0
- data/test/release_53/core/test_slice.rb +42 -0
- data/test/release_53/core/test_transform.rb +57 -0
- data/test/release_53/variation/test_activerecord.rb +137 -0
- data/test/release_53/variation/test_variation.rb +66 -0
- data/test/release_56/core/test_gene.rb +61 -0
- data/test/release_56/core/test_project.rb +91 -0
- data/test/release_56/core/test_slice.rb +49 -0
- data/test/release_56/core/test_transform.rb +57 -0
- data/test/release_56/variation/test_activerecord.rb +141 -0
- data/test/release_56/variation/test_consequence.rb +131 -0
- data/test/release_56/variation/test_variation.rb +63 -0
- data/test/release_60/core/test_gene.rb +61 -0
- data/test/release_60/core/test_project_human.rb +34 -0
- data/test/release_60/core/test_slice.rb +42 -0
- data/test/release_60/core/test_transcript.rb +120 -0
- data/test/release_60/core/test_transform.rb +57 -0
- data/test/release_60/variation/test_activerecord.rb +216 -0
- data/test/release_60/variation/test_consequence.rb +153 -0
- data/test/release_60/variation/test_variation.rb +64 -0
- data/test/release_62/core/test_gene.rb +42 -0
- data/test/release_62/variation/test_activerecord.rb +86 -0
- data/test/release_62/variation/test_consequence.rb +191 -0
- metadata +287 -0
@@ -0,0 +1,376 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/variation.rb - Extension of ActiveRecord classes for Ensembl variation features
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
# @author Francesco Strozzi
|
8
|
+
|
9
|
+
|
10
|
+
module Ensembl
|
11
|
+
|
12
|
+
module Variation
|
13
|
+
|
14
|
+
|
15
|
+
# The VariationFeature class gives information about the genomic position of
|
16
|
+
# each Variation, including also validation status and consequence type.
|
17
|
+
#
|
18
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
19
|
+
# See the general documentation of the Ensembl module for
|
20
|
+
# more information on what this means and what methods are available.
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# # SLOWER QUERY
|
24
|
+
# vf = VariationFeature.find_by_variation_name('rs10111')
|
25
|
+
# # FASTER QUERY
|
26
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
27
|
+
#
|
28
|
+
# puts vf.seq_region_start, vf.seq_region_end, vf.allele_string
|
29
|
+
# puts vf.variation.ancestral_allele
|
30
|
+
# genomic_region = vf.fetch_region (returns an Ensembl::Core::Slice)
|
31
|
+
# genomic_region.genes
|
32
|
+
# up_region,down_region = vf.flanking_seq (returns two Ensembl::Core::Slice)
|
33
|
+
#
|
34
|
+
class VariationFeature < DBConnection
|
35
|
+
set_primary_key "variation_feature_id"
|
36
|
+
belongs_to :variation
|
37
|
+
has_many :tagged_variation_features
|
38
|
+
has_many :samples, :through => :tagged_variation_features
|
39
|
+
belongs_to :seq_region
|
40
|
+
validates_inclusion_of :consequence_type, :in => ['ESSENTIAL_SPLICE_SITE',
|
41
|
+
'STOP_GAINED',
|
42
|
+
'STOP_LOST',
|
43
|
+
'COMPLEX_INDEL',
|
44
|
+
'FRAMESHIFT_CODING',
|
45
|
+
'NON_SYNONYMOUS_CODING',
|
46
|
+
'SPLICE_SITE',
|
47
|
+
'PARTIAL_CODON',
|
48
|
+
'SYNONYMOUS_CODING',
|
49
|
+
'REGULATORY_REGION',
|
50
|
+
'WITHIN_MATURE_miRNA',
|
51
|
+
'5PRIME_UTR',
|
52
|
+
'3PRIME_UTR',
|
53
|
+
'INTRONIC',
|
54
|
+
'NMD_TRANSCRIPT',
|
55
|
+
'UPSTREAM',
|
56
|
+
'DOWNSTREAM',
|
57
|
+
'WITHIN_NON_CODING_GENE',
|
58
|
+
'HGMD_MUTATION'
|
59
|
+
], :message => "Consequence type not allowed!"
|
60
|
+
|
61
|
+
def consequence_type # workaround as ActiveRecord do not parse SET field in MySQL
|
62
|
+
"#{attributes_before_type_cast['consequence_type']}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Based on Perl API 'get_all_Genes' method for Variation class. Get a genomic region
|
66
|
+
# starting from the Variation coordinates, expanding the region upstream and
|
67
|
+
# downstream.
|
68
|
+
#
|
69
|
+
# @param [Integer] up Length of upstream flanking region
|
70
|
+
# @param [Integer] down Length of downstream flanking region
|
71
|
+
# @return [Slice] Slice object containing the variation
|
72
|
+
def fetch_region(up = 5000, down = 5000)
|
73
|
+
sr = core_connection(self.seq_region_id)
|
74
|
+
slice = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,self.seq_region_start-up,self.seq_region_end+down)
|
75
|
+
return slice
|
76
|
+
end
|
77
|
+
|
78
|
+
def flanking_seq
|
79
|
+
sr = core_connection(self.seq_region_id)
|
80
|
+
f = Variation.find(self.variation_id).flanking_sequence
|
81
|
+
slice_up = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.up_seq_region_start,f.up_seq_region_end,self.seq_region_strand)
|
82
|
+
slice_down = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.down_seq_region_start,f.down_seq_region_end,self.seq_region_strand)
|
83
|
+
return slice_up,slice_down
|
84
|
+
end
|
85
|
+
|
86
|
+
def transcript_variations
|
87
|
+
tvs = TranscriptVariation.find_all_by_variation_feature_id(self.variation_feature_id)
|
88
|
+
if tvs[0].nil? then # the variation is not stored in the database, so run the calculations
|
89
|
+
sr = core_connection(self.seq_region_id)
|
90
|
+
return custom_transcript_variation(self,sr)
|
91
|
+
else
|
92
|
+
return tvs # the variation is already present in the database
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def core_connection(seq_region_id)
|
99
|
+
if !Ensembl::Core::DBConnection.connected? then
|
100
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
101
|
+
begin
|
102
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
103
|
+
rescue
|
104
|
+
raise NameError, "Can't derive Core database name from #{db_name}. Are you using non conventional names?"
|
105
|
+
end
|
106
|
+
end
|
107
|
+
# Check if SeqRegion already exists in Ensembl::SESSION
|
108
|
+
seq_region = nil
|
109
|
+
if Ensembl::SESSION.seq_regions.has_key?(seq_region_id)
|
110
|
+
seq_region = Ensembl::SESSION.seq_regions[seq_region_id]
|
111
|
+
else
|
112
|
+
seq_region = Ensembl::Core::SeqRegion.find(seq_region_id)
|
113
|
+
Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
|
114
|
+
end
|
115
|
+
return seq_region
|
116
|
+
end
|
117
|
+
|
118
|
+
# Calculate a consequence type for a user-defined variation
|
119
|
+
def custom_transcript_variation(vf,sr)
|
120
|
+
|
121
|
+
@variation_name = vf.variation_name
|
122
|
+
@seq_region = sr
|
123
|
+
|
124
|
+
downstream = 5000
|
125
|
+
upstream = 5000
|
126
|
+
tvs = [] # store all the calculated TranscriptVariations
|
127
|
+
# retrieve the slice of the genomic region where the variation is located
|
128
|
+
region = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,vf.seq_region_start-upstream,vf.seq_region_end+downstream-1)
|
129
|
+
# iterate through all the transcripts present in the region
|
130
|
+
genes = region.genes(inclusive = true)
|
131
|
+
if genes[0] != nil
|
132
|
+
genes.each do |g|
|
133
|
+
g.transcripts.each do |t|
|
134
|
+
@cache = {}
|
135
|
+
tv = TranscriptVariation.new() # create a new TranscriptVariation object for every transcript present
|
136
|
+
# do the calculations
|
137
|
+
|
138
|
+
# check if the variation is intergenic for this transcript (no effects)
|
139
|
+
tv.consequence_type = check_intergenic(vf,t)
|
140
|
+
|
141
|
+
# check if the variation is upstram or downstram the transcript
|
142
|
+
tv.consequence_type = check_upstream_downstream(vf,t) if tv.consequence_type == ""
|
143
|
+
|
144
|
+
# if no consequence type is found, then the variation is inside the transcript
|
145
|
+
# check for non coding gene
|
146
|
+
tv.consequence_type = check_non_coding(vf,t) if tv.consequence_type == "" and t.biotype != 'protein_coding'
|
147
|
+
|
148
|
+
# if no consequence type is found, then check intron / exon boundaries
|
149
|
+
tv.consequence_type = check_splice_site(vf,t) if tv.consequence_type == ""
|
150
|
+
|
151
|
+
# if no consequence type is found, check if the variation is inside UTRs
|
152
|
+
tv.consequence_type = check_utr(vf,t) if tv.consequence_type == ""
|
153
|
+
|
154
|
+
# if no consequence type is found, then variation is inside an exon.
|
155
|
+
# Check the codon change
|
156
|
+
(tv.consequence_type,tv.peptide_allele_string) = check_aa_change(vf,t) if tv.consequence_type == ""
|
157
|
+
|
158
|
+
|
159
|
+
begin # this changed from release 58
|
160
|
+
tv.transcript_stable_id = t.stable_id
|
161
|
+
rescue NoMethodError
|
162
|
+
tv.transcript_id = t.id
|
163
|
+
end
|
164
|
+
|
165
|
+
tv.consequence_type = "INTERGENIC" if tv.consequence_type == ""
|
166
|
+
tvs << tv
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# if there are no transcripts/genes within 5000 bases upstream and downstream set the variation as INTERGENIC (no effects)
|
171
|
+
if tvs.size == 0 then
|
172
|
+
tv = TranscriptVariation.new()
|
173
|
+
tv.consequence_type = "INTERGENIC"
|
174
|
+
tvs << tv
|
175
|
+
end
|
176
|
+
|
177
|
+
return tvs
|
178
|
+
end
|
179
|
+
|
180
|
+
## CONSEQUENCE CALCULATION FUNCTIONS ##
|
181
|
+
|
182
|
+
def check_intergenic(vf,t)
|
183
|
+
if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) > 5000 then
|
184
|
+
return "INTERGENIC"
|
185
|
+
elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) > 5000 then
|
186
|
+
return "INTERGENIC"
|
187
|
+
end
|
188
|
+
return nil
|
189
|
+
end
|
190
|
+
|
191
|
+
def check_upstream_downstream(vf,t)
|
192
|
+
if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) <= 5000 then
|
193
|
+
return (t.strand == 1) ? "UPSTREAM" : "DOWNSTREAM"
|
194
|
+
elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) <= 5000 then
|
195
|
+
return (t.strand == 1) ? "DOWNSTREAM" : "UPSTREAM"
|
196
|
+
|
197
|
+
# check if it's an InDel and if overlaps the transcript start / end
|
198
|
+
elsif t.seq_region_start > vf.seq_region_start and t.seq_region_start < vf.seq_region_end then
|
199
|
+
return "COMPLEX_INDEL"
|
200
|
+
elsif t.seq_region_end > vf.seq_region_start and t.seq_region_end < vf.seq_region_end then
|
201
|
+
return "COMPLEX_INDEL"
|
202
|
+
end
|
203
|
+
return nil
|
204
|
+
end
|
205
|
+
|
206
|
+
def check_non_coding(vf,t)
|
207
|
+
if t.biotype == "miRNA" then
|
208
|
+
return (vf.seq_region_start == vf.seq_region_end) ? "WITHIN_MATURE_miRNA" : "COMPLEX_INDEL"
|
209
|
+
elsif t.biotype == "nonsense_mediated_decay"
|
210
|
+
return (vf.seq_region_start == vf.seq_region_end) ? "NMD_TRANSCRIPT" : "COMPLEX_INDEL"
|
211
|
+
else
|
212
|
+
return (vf.seq_region_start == vf.seq_region_end) ? "WITHIN_NON_CODING_GENE" : "COMPLEX_INDEL"
|
213
|
+
end
|
214
|
+
return nil
|
215
|
+
end
|
216
|
+
|
217
|
+
def check_utr(vf,t)
|
218
|
+
if vf.seq_region_start > t.seq_region_start and vf.seq_region_end < t.coding_region_genomic_start then
|
219
|
+
return (t.strand == 1) ? "5PRIME_UTR" : "3PRIME_UTR"
|
220
|
+
elsif vf.seq_region_start > t.coding_region_genomic_end and vf.seq_region_end < t.seq_region_end then
|
221
|
+
return (t.strand == 1) ? "3PRIME_UTR" : "5PRIME_UTR"
|
222
|
+
end
|
223
|
+
return nil
|
224
|
+
end
|
225
|
+
|
226
|
+
def check_splice_site(vf,t)
|
227
|
+
@cache[:exons] = []
|
228
|
+
var_start,var_end = (vf.seq_region_strand == 1) ? [vf.seq_region_start,vf.seq_region_end] : [vf.seq_region_end,vf.seq_region_start]
|
229
|
+
t.exons.each {|ex| @cache[:exons] << Range.new(ex.seq_region_start,ex.seq_region_end)}
|
230
|
+
|
231
|
+
exon_up = check_near_exons(var_start,@cache[:exons])
|
232
|
+
exon_down = check_near_exons(var_end,@cache[:exons])
|
233
|
+
if !exon_up and !exon_down # we are inside an intron
|
234
|
+
# checking boundaries
|
235
|
+
near_exon_up_2bp = check_near_exons(var_start-2..var_start,@cache[:exons])
|
236
|
+
near_exon_down_2bp = check_near_exons(var_end..var_end+2,@cache[:exons])
|
237
|
+
if near_exon_up_2bp or near_exon_down_2bp then
|
238
|
+
return "ESSENTIAL_SPLICE_SITE"
|
239
|
+
else
|
240
|
+
near_exon_up_8bp = check_near_exons(var_start+8..var_start,@cache[:exons])
|
241
|
+
near_exon_down_8bp = check_near_exons(var_end..var_end+8,@cache[:exons])
|
242
|
+
if near_exon_up_8bp or near_exon_down_8bp then
|
243
|
+
return "SPLICE_SITE"
|
244
|
+
else
|
245
|
+
return "INTRONIC"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
elsif exon_up and exon_down # the variation is inside an exon
|
249
|
+
# check if it is a splice site
|
250
|
+
if (var_start-exon_up.first) <= 3 or (exon_down.last-var_end) <= 3 then
|
251
|
+
return "SPLICE_SITE"
|
252
|
+
end
|
253
|
+
else # a complex indel spanning intron/exon boundary
|
254
|
+
return "COMPLEX_INDEL"
|
255
|
+
end
|
256
|
+
return nil
|
257
|
+
end
|
258
|
+
|
259
|
+
def check_aa_change(vf,t)
|
260
|
+
alleles = vf.allele_string.split('/') # get the different alleles for this variation
|
261
|
+
# if the variation is an InDel then it produces a frameshift
|
262
|
+
if vf.seq_region_start != vf.seq_region_end or alleles.include?("-") then
|
263
|
+
return "FRAMESHIFT_CODING",nil
|
264
|
+
end
|
265
|
+
|
266
|
+
# Find the position inside the CDS
|
267
|
+
|
268
|
+
mutation_position = t.genomic2cds(vf.seq_region_start)
|
269
|
+
|
270
|
+
mutation_base = Bio::Sequence::NA.new(alleles[1])
|
271
|
+
if t.seq_region_strand == -1
|
272
|
+
mutation_base.reverse_complement!
|
273
|
+
end
|
274
|
+
# The rank of the codon
|
275
|
+
target_codon = (mutation_position)/3 + 1
|
276
|
+
cds_sequence = nil
|
277
|
+
cds_sequence = t.cds_seq
|
278
|
+
mut_sequence = cds_sequence.dup
|
279
|
+
# Replace base with the variant allele
|
280
|
+
mut_sequence[mutation_position] = mutation_base.seq
|
281
|
+
refcodon = cds_sequence[(target_codon*3 -3)..(target_codon*3-1)]
|
282
|
+
mutcodon = mut_sequence[(target_codon*3 -3)..(target_codon*3-1)]
|
283
|
+
codontable = Bio::CodonTable[1]
|
284
|
+
refaa = codontable[refcodon]
|
285
|
+
mutaa = codontable[mutcodon.downcase]
|
286
|
+
if mutaa == nil
|
287
|
+
raise RuntimeError "Codon #{mutcodon.downcase} wasn't recognized."
|
288
|
+
end
|
289
|
+
pep_string = refaa+"/"+mutaa
|
290
|
+
if mutaa == "*" and refaa != "*"
|
291
|
+
return "STOP_GAINED",pep_string
|
292
|
+
elsif mutaa != "*" and refaa == "*"
|
293
|
+
return "STOP_LOST",pep_string
|
294
|
+
elsif mutaa != refaa
|
295
|
+
return "NON_SYNONYMOUS_CODING",pep_string
|
296
|
+
elsif mutaa == refaa
|
297
|
+
return "SYNONYMOUS_CODING",pep_string
|
298
|
+
end
|
299
|
+
|
300
|
+
end
|
301
|
+
|
302
|
+
|
303
|
+
def check_near_exons(feature,exons_ranges)
|
304
|
+
exons_ranges.each do |exon_range|
|
305
|
+
if feature.is_a? Range
|
306
|
+
return exon_range if (feature.first <= exon_range.last) && (exon_range.first <= feature.last)
|
307
|
+
else
|
308
|
+
return exon_range if exon_range.include? feature
|
309
|
+
end
|
310
|
+
end
|
311
|
+
return false
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
end # VariationFeature
|
316
|
+
|
317
|
+
# The TranscriptVariation class gives information about the position of
|
318
|
+
# a VariationFeature, mapped on an annotated transcript.
|
319
|
+
#
|
320
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
321
|
+
# See the general documentation of the Ensembl module for
|
322
|
+
# more information on what this means and what methods are available.
|
323
|
+
#
|
324
|
+
# @example
|
325
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
326
|
+
# vf.transcript_variations.each do |tv|
|
327
|
+
# puts tv.peptide_allele_string, tv.transcript.stable_id
|
328
|
+
# end
|
329
|
+
#
|
330
|
+
class TranscriptVariation < DBConnection
|
331
|
+
set_primary_key "transcript_variation_id"
|
332
|
+
belongs_to :variation_feature
|
333
|
+
validates_inclusion_of :consequence_type, :in => ['ESSENTIAL_SPLICE_SITE',
|
334
|
+
'STOP_GAINED',
|
335
|
+
'STOP_LOST',
|
336
|
+
'COMPLEX_INDEL',
|
337
|
+
'FRAMESHIFT_CODING',
|
338
|
+
'NON_SYNONYMOUS_CODING',
|
339
|
+
'SPLICE_SITE',
|
340
|
+
'PARTIAL_CODON',
|
341
|
+
'SYNONYMOUS_CODING',
|
342
|
+
'REGULATORY_REGION',
|
343
|
+
'WITHIN_MATURE_miRNA',
|
344
|
+
'5PRIME_UTR',
|
345
|
+
'3PRIME_UTR',
|
346
|
+
'INTRONIC',
|
347
|
+
'NMD_TRANSCRIPT',
|
348
|
+
'UPSTREAM',
|
349
|
+
'DOWNSTREAM',
|
350
|
+
'WITHIN_NON_CODING_GENE',
|
351
|
+
'HGMD_MUTATION'
|
352
|
+
], :message => "Consequence type not allowed!"
|
353
|
+
|
354
|
+
def consequence_type # workaround as ActiveRecord do not parse SET field in MySQL
|
355
|
+
"#{attributes_before_type_cast['consequence_type']}"
|
356
|
+
end
|
357
|
+
|
358
|
+
def transcript
|
359
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
360
|
+
if !Ensembl::Core::DBConnection.connected? then
|
361
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
362
|
+
end
|
363
|
+
|
364
|
+
begin # this changed from release 58
|
365
|
+
return Ensembl::Core::Transcript.find_by_stable_id(self.transcript_stable_id)
|
366
|
+
rescue NoMethodError
|
367
|
+
return Ensembl::Core::Transcript.find(self.transcript_id)
|
368
|
+
end
|
369
|
+
|
370
|
+
end
|
371
|
+
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|
375
|
+
|
376
|
+
end
|
@@ -0,0 +1,444 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/variation.rb - Extension of ActiveRecord classes for Ensembl variation features
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
# @author Francesco Strozzi
|
8
|
+
|
9
|
+
|
10
|
+
module Ensembl
|
11
|
+
|
12
|
+
module Variation
|
13
|
+
|
14
|
+
|
15
|
+
# The VariationFeature class gives information about the genomic position of
|
16
|
+
# each Variation, including also validation status and consequence type.
|
17
|
+
#
|
18
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
19
|
+
# See the general documentation of the Ensembl module for
|
20
|
+
# more information on what this means and what methods are available.
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# # SLOWER QUERY
|
24
|
+
# vf = VariationFeature.find_by_variation_name('rs10111')
|
25
|
+
# # FASTER QUERY
|
26
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
27
|
+
#
|
28
|
+
# puts vf.seq_region_start, vf.seq_region_end, vf.allele_string
|
29
|
+
# puts vf.variation.ancestral_allele
|
30
|
+
# genomic_region = vf.fetch_region (returns an Ensembl::Core::Slice)
|
31
|
+
# genomic_region.genes
|
32
|
+
# up_region,down_region = vf.flanking_seq (returns two Ensembl::Core::Slice)
|
33
|
+
#
|
34
|
+
class VariationFeature < DBConnection
|
35
|
+
set_primary_key "variation_feature_id"
|
36
|
+
belongs_to :variation
|
37
|
+
has_many :tagged_variation_features
|
38
|
+
has_many :samples, :through => :tagged_variation_features
|
39
|
+
belongs_to :seq_region
|
40
|
+
validates_inclusion_of :consequence_types, :in => ['intergenic_variant',
|
41
|
+
'splice_acceptor_variant',
|
42
|
+
'splice_donor_variant',
|
43
|
+
'complex_change_in_transcript',
|
44
|
+
'stop_lost',
|
45
|
+
'coding_sequence_variant',
|
46
|
+
'non_synonymous_codon',
|
47
|
+
'stop_gained',
|
48
|
+
'synonymous_codon',
|
49
|
+
'frameshift_variant',
|
50
|
+
'nc_transcript_variant',
|
51
|
+
'mature_miRNA_variant',
|
52
|
+
'NMD_transcript_variant',
|
53
|
+
'5_prime_UTR_variant',
|
54
|
+
'3_prime_UTR_variant',
|
55
|
+
'incomplete_terminal_codon_variant',
|
56
|
+
'intron_variant',
|
57
|
+
'splice_region_variant',
|
58
|
+
'5KB_downstream_variant',
|
59
|
+
'500B_downstream_variant',
|
60
|
+
'5KB_upstream_variant',
|
61
|
+
'2KB_upstream_variant',
|
62
|
+
'initiator_codon_change',
|
63
|
+
'stop_retained_variant',
|
64
|
+
'inframe_codon_gain',
|
65
|
+
'inframe_codon_loss',
|
66
|
+
'miRNA_target_site_variant',
|
67
|
+
'pre_miRNA_variant',
|
68
|
+
'regulatory_region_variant',
|
69
|
+
'increased_binding_affinity',
|
70
|
+
'decreased_binding_affinity',
|
71
|
+
'binding_site_variant'
|
72
|
+
], :message => "Consequence type not allowed!"
|
73
|
+
|
74
|
+
def consequence_types # workaround as ActiveRecord do not parse SET field in MySQL
|
75
|
+
"#{attributes_before_type_cast['consequence_types']}"
|
76
|
+
end
|
77
|
+
|
78
|
+
# Based on Perl API 'get_all_Genes' method for Variation class. Get a genomic region
|
79
|
+
# starting from the Variation coordinates, expanding the region upstream and
|
80
|
+
# downstream.
|
81
|
+
#
|
82
|
+
# @param [Integer] up Length of upstream flanking region
|
83
|
+
# @param [Integer] down Length of downstream flanking region
|
84
|
+
# @return [Slice] Slice object containing the variation
|
85
|
+
def fetch_region(up = 5000, down = 5000)
|
86
|
+
sr = core_connection(self.seq_region_id)
|
87
|
+
slice = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,self.seq_region_start-up,self.seq_region_end+down)
|
88
|
+
return slice
|
89
|
+
end
|
90
|
+
|
91
|
+
def flanking_seq
|
92
|
+
sr = core_connection(self.seq_region_id)
|
93
|
+
f = Variation.find(self.variation_id).flanking_sequence
|
94
|
+
slice_up = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.up_seq_region_start,f.up_seq_region_end,self.seq_region_strand)
|
95
|
+
slice_down = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.down_seq_region_start,f.down_seq_region_end,self.seq_region_strand)
|
96
|
+
return slice_up,slice_down
|
97
|
+
end
|
98
|
+
|
99
|
+
def transcript_variations
|
100
|
+
tvs = TranscriptVariation.find_all_by_variation_feature_id(self.variation_feature_id)
|
101
|
+
if tvs[0].nil? then # the variation is not stored in the database, so run the calculations
|
102
|
+
sr = core_connection(self.seq_region_id)
|
103
|
+
return custom_transcript_variation(self,sr)
|
104
|
+
else
|
105
|
+
return tvs # the variation is already present in the database
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def core_connection(seq_region_id)
|
112
|
+
if !Ensembl::Core::DBConnection.connected? then
|
113
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
114
|
+
begin
|
115
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
116
|
+
rescue
|
117
|
+
raise NameError, "Can't derive Core database name from #{db_name}. Are you using non conventional names?"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
# Check if SeqRegion already exists in Ensembl::SESSION
|
121
|
+
seq_region = nil
|
122
|
+
if Ensembl::SESSION.seq_regions.has_key?(seq_region_id)
|
123
|
+
seq_region = Ensembl::SESSION.seq_regions[seq_region_id]
|
124
|
+
else
|
125
|
+
seq_region = Ensembl::Core::SeqRegion.find(seq_region_id)
|
126
|
+
Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
|
127
|
+
end
|
128
|
+
return seq_region
|
129
|
+
end
|
130
|
+
|
131
|
+
# Calculate a consequence type for a user-defined variation
|
132
|
+
def custom_transcript_variation(vf,sr)
|
133
|
+
|
134
|
+
@variation_name = vf.variation_name
|
135
|
+
@seq_region = sr
|
136
|
+
|
137
|
+
downstream = 5000
|
138
|
+
upstream = 5000
|
139
|
+
tvs = [] # store all the calculated TranscriptVariations
|
140
|
+
# retrieve the slice of the genomic region where the variation is located
|
141
|
+
var_start,var_end = 0,0
|
142
|
+
if vf.seq_region_start > vf.seq_region_end
|
143
|
+
var_start,var_end = vf.seq_region_end,vf.seq_region_start
|
144
|
+
else
|
145
|
+
var_start,var_end = vf.seq_region_start,vf.seq_region_end
|
146
|
+
end
|
147
|
+
region = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,var_start-upstream,var_end+downstream)
|
148
|
+
# iterate through all the transcripts present in the region
|
149
|
+
genes = region.genes(inclusive = true)
|
150
|
+
if genes[0] != nil
|
151
|
+
genes.each do |g|
|
152
|
+
g.transcripts.each do |t|
|
153
|
+
|
154
|
+
@cache = {}
|
155
|
+
|
156
|
+
tv = TranscriptVariation.new() # create a new TranscriptVariation object for every transcript present
|
157
|
+
# do the calculations
|
158
|
+
|
159
|
+
# check if the variation is intergenic for this transcript (no effects)
|
160
|
+
tv.consequence_types = check_intergenic(vf,t)
|
161
|
+
|
162
|
+
# check if the variation is upstram or downstram the transcript
|
163
|
+
tv.consequence_types = check_upstream_downstream(vf,t) if tv.consequence_types == ""
|
164
|
+
|
165
|
+
# check partial codon
|
166
|
+
tv.consequence_types = check_partial_codon(vf,t) if tv.consequence_types == ""
|
167
|
+
|
168
|
+
# if no consequence type is found, then the variation is inside the transcript
|
169
|
+
# check for non coding gene
|
170
|
+
tv.consequence_types = check_non_coding(vf,t) if tv.consequence_types == "" && t.biotype != 'protein_coding'
|
171
|
+
|
172
|
+
# if no consequence type is found, then check intron / exon boundaries
|
173
|
+
tv.consequence_types = check_splice_site(vf,t) if tv.consequence_types == ""
|
174
|
+
|
175
|
+
# if no consequence type is found, check if the variation is inside UTRs
|
176
|
+
tv.consequence_types = check_utr(vf,t) if tv.consequence_types == ""
|
177
|
+
|
178
|
+
# if no consequence type is found, then variation is inside an exon.
|
179
|
+
# Check the codon change
|
180
|
+
(tv.consequence_types,tv.pep_allele_string) = check_aa_change(vf,t) if tv.consequence_types == ""
|
181
|
+
|
182
|
+
tv.feature_stable_id = t.stable_id
|
183
|
+
|
184
|
+
#tv.consequence_types = "intergenic_variant" if tv.consequence_types == ""
|
185
|
+
tvs << tv
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
# if there are no transcripts/genes within 5000 bases upstream and downstream set the variation as INTERGENIC (no effects)
|
190
|
+
if tvs.size == 0 then
|
191
|
+
tv = TranscriptVariation.new()
|
192
|
+
tv.consequence_types = "intergenic_variant"
|
193
|
+
tvs << tv
|
194
|
+
end
|
195
|
+
|
196
|
+
return tvs
|
197
|
+
end
|
198
|
+
|
199
|
+
## CONSEQUENCE CALCULATION METHODS ##
|
200
|
+
|
201
|
+
def check_intergenic(vf,t)
|
202
|
+
if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) > 5000 then
|
203
|
+
return "intergenic_variant"
|
204
|
+
elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) > 5000 then
|
205
|
+
return "intergenic_variant"
|
206
|
+
end
|
207
|
+
return nil
|
208
|
+
end
|
209
|
+
|
210
|
+
def check_upstream_downstream(vf,t)
|
211
|
+
if vf.seq_region_end < t.seq_region_start
|
212
|
+
distance = t.seq_region_start - vf.seq_region_end+1
|
213
|
+
if t.strand == 1 and distance <= 2000
|
214
|
+
return "2KB_upstream_variant"
|
215
|
+
elsif t.strand == -1 and distance <= 500
|
216
|
+
return "500B_downstream_variant"
|
217
|
+
else
|
218
|
+
return (t.strand == 1) ? "5KB_upstream_variant" : "5KB_downstream_variant"
|
219
|
+
end
|
220
|
+
elsif vf.seq_region_start > t.seq_region_end
|
221
|
+
distance = vf.seq_region_start - t.seq_region_end+1
|
222
|
+
if t.strand == -1 and distance <= 2000
|
223
|
+
return "2KB_upstream_variant"
|
224
|
+
elsif t.strand == 1 and distance <= 500
|
225
|
+
return "500B_downstream_variant"
|
226
|
+
else
|
227
|
+
return (t.strand == 1) ? "5KB_downstream_variant" : "5KB_upstream_variant"
|
228
|
+
end
|
229
|
+
# check if it's an InDel and if overlaps the transcript start / end
|
230
|
+
elsif t.seq_region_start > vf.seq_region_start and t.seq_region_start < vf.seq_region_end then
|
231
|
+
return "complex_change_in_transcript"
|
232
|
+
elsif t.seq_region_end > vf.seq_region_start and t.seq_region_end < vf.seq_region_end then
|
233
|
+
return "complex_change_in_transcript"
|
234
|
+
end
|
235
|
+
return nil
|
236
|
+
end
|
237
|
+
|
238
|
+
def check_non_coding(vf,t)
|
239
|
+
if t.biotype == "miRNA" then
|
240
|
+
return "mature_miRNA_variant"
|
241
|
+
elsif t.biotype == "nonsense_mediated_decay"
|
242
|
+
return "NMD_transcript_variant"
|
243
|
+
else
|
244
|
+
return "nc_transcript_variant"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def check_utr(vf,t)
|
249
|
+
if vf.seq_region_start > t.seq_region_start and vf.seq_region_end < t.coding_region_genomic_start then
|
250
|
+
return (t.strand == 1) ? "5_prime_UTR_variant" : "3_prime_UTR_variant"
|
251
|
+
elsif vf.seq_region_start > t.coding_region_genomic_end and vf.seq_region_end < t.seq_region_end then
|
252
|
+
return (t.strand == 1) ? "3_prime_UTR_variant" : "5_prime_UTR_variant"
|
253
|
+
end
|
254
|
+
return nil
|
255
|
+
end
|
256
|
+
|
257
|
+
def check_splice_site(vf,t)
|
258
|
+
@cache[:exons] = []
|
259
|
+
var_start,var_end = (vf.seq_region_strand == 1) ? [vf.seq_region_start,vf.seq_region_end] : [vf.seq_region_end,vf.seq_region_start]
|
260
|
+
t.exons.each {|ex| @cache[:exons] << Range.new(ex.seq_region_start,ex.seq_region_end)}
|
261
|
+
|
262
|
+
exon_up = check_near_exons(var_start,@cache[:exons])
|
263
|
+
exon_down = check_near_exons(var_end,@cache[:exons])
|
264
|
+
if !exon_up and !exon_down # we are inside an intron
|
265
|
+
# checking boundaries
|
266
|
+
near_exon_up_2bp = check_near_exons(var_start-2..var_start,@cache[:exons])
|
267
|
+
near_exon_down_2bp = check_near_exons(var_end..var_end+2,@cache[:exons])
|
268
|
+
if near_exon_up_2bp
|
269
|
+
return (t.strand == 1) ? "splice_donor_variant" : "splice_acceptor_variant"
|
270
|
+
elsif near_exon_down_2bp
|
271
|
+
return (t.strand == 1) ? "splice_acceptor_variant" : "splice_donor_variant"
|
272
|
+
else
|
273
|
+
near_exon_up_8bp = check_near_exons(var_start+8..var_start,@cache[:exons])
|
274
|
+
near_exon_down_8bp = check_near_exons(var_end..var_end+8,@cache[:exons])
|
275
|
+
if near_exon_up_8bp or near_exon_down_8bp
|
276
|
+
return "splice_region_variant"
|
277
|
+
else
|
278
|
+
return "intron_variant"
|
279
|
+
end
|
280
|
+
end
|
281
|
+
elsif exon_up and exon_down # the variation is inside an exon
|
282
|
+
# check if it is a splice site
|
283
|
+
if (var_start-exon_up.first) <= 3 or (exon_down.last-var_end) <= 3 then
|
284
|
+
return "splice_region_variant"
|
285
|
+
end
|
286
|
+
else # a complex indel spanning intron/exon boundary
|
287
|
+
return "complex_change_in_transcript"
|
288
|
+
end
|
289
|
+
return nil
|
290
|
+
end
|
291
|
+
|
292
|
+
def check_aa_change(vf,t)
|
293
|
+
alleles = vf.allele_string.split('/') # get the different alleles for this variation
|
294
|
+
|
295
|
+
# Find the position inside the CDS
|
296
|
+
mutation_position = (@cache[:mutation_positon]) ? @cache[:mutation_positon] : t.genomic2cds(vf.seq_region_start)
|
297
|
+
cds_sequence = (@cache[:cds_sequence]) ? @cache[:cds_sequence] : t.cds_seq
|
298
|
+
|
299
|
+
if vf.allele_string =~/INSERTION|DELETION|MUTATION/
|
300
|
+
return "coding_sequence_variant",nil
|
301
|
+
end
|
302
|
+
|
303
|
+
mutation_base = Bio::Sequence::NA.new(alleles[1])
|
304
|
+
if t.seq_region_strand == -1
|
305
|
+
mutation_base.reverse_complement!
|
306
|
+
end
|
307
|
+
# The rank of the codon
|
308
|
+
target_codon = (mutation_position)/3 + 1
|
309
|
+
mut_sequence = cds_sequence.dup
|
310
|
+
|
311
|
+
# Replace base with the variant allele
|
312
|
+
if alleles[1] == "-" # a deletion
|
313
|
+
mut_sequence.gsub!(/#{alleles[0]}/,'')
|
314
|
+
else # insertion or SNP
|
315
|
+
mut_sequence[mutation_position] = mutation_base.seq
|
316
|
+
end
|
317
|
+
|
318
|
+
mutcodon = mut_sequence[(target_codon*3 -3)..(target_codon*3-1 + alleles[1].length-1)]
|
319
|
+
refcodon = cds_sequence[(target_codon*3 -3)..(target_codon*3-1 + alleles[0].length-1)]
|
320
|
+
codontable = Bio::CodonTable[1]
|
321
|
+
refaa = codontable[refcodon]
|
322
|
+
mutaa = codontable[mutcodon.downcase]
|
323
|
+
|
324
|
+
pep_string = refaa.to_s+"/"+mutaa.to_s
|
325
|
+
transcript_start = (t.strand == 1) ? t.coding_region_genomic_start : t.coding_region_genomic_end
|
326
|
+
if (vf.seq_region_start - transcript_start).abs <= 3
|
327
|
+
return "initiator_codon_change",pep_string
|
328
|
+
elsif (mutcodon.length > refcodon.length) && (mutcodon =~/^#{refcodon}/ || mutcodon =~/#{refcodon}$/)
|
329
|
+
return "inframe_codon_gain",pep_string
|
330
|
+
elsif (mutcodon.length < refcodon.length) && (refcodon =~/^#{mutcodon}/ || refcodon =~/#{mutcodon}$/)
|
331
|
+
return "inframe_codon_loss",pep_string
|
332
|
+
elsif vf.seq_region_start != vf.seq_region_end
|
333
|
+
# if the variation is an InDel then it produces a frameshift
|
334
|
+
return "frameshift_variant",nil
|
335
|
+
elsif (mutaa == "*" and refaa == "*") && (refcodon != mutcodon.downcase)
|
336
|
+
return "stop_retained_variant"
|
337
|
+
elsif mutaa == "*" and refaa != "*"
|
338
|
+
return "stop_gained",pep_string
|
339
|
+
elsif mutaa != "*" and refaa == "*"
|
340
|
+
return "stop_lost",pep_string
|
341
|
+
elsif mutaa != refaa
|
342
|
+
return "non_synonymous_codon",pep_string
|
343
|
+
elsif mutaa == refaa
|
344
|
+
return "synonymous_codon",pep_string
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
def check_partial_codon(vf,t)
|
350
|
+
begin
|
351
|
+
mutation_position = t.genomic2cds(vf.seq_region_start)
|
352
|
+
cds_sequence = t.cds_seq
|
353
|
+
@cache[:mutation_position] = mutation_position
|
354
|
+
@cache[:cds_sequence] = cds_sequence
|
355
|
+
# check if the mutation is on the last codon and if it's a partial codon
|
356
|
+
if (cds_sequence.length - mutation_position) <= 3
|
357
|
+
return (cds_sequence.length % 3 == 0) ? nil : "incomplete_terminal_codon_variant"
|
358
|
+
end
|
359
|
+
rescue Exception => e
|
360
|
+
return nil
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def check_near_exons(feature,exons_ranges)
|
365
|
+
exons_ranges.each do |exon_range|
|
366
|
+
if feature.is_a? Range
|
367
|
+
return exon_range if (feature.first <= exon_range.last) && (exon_range.first <= feature.last)
|
368
|
+
else
|
369
|
+
return exon_range if exon_range.include? feature
|
370
|
+
end
|
371
|
+
end
|
372
|
+
return false
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
end # VariationFeature
|
377
|
+
|
378
|
+
# The TranscriptVariation class gives information about the position of
|
379
|
+
# a VariationFeature, mapped on an annotated transcript.
|
380
|
+
#
|
381
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
382
|
+
# See the general documentation of the Ensembl module for
|
383
|
+
# more information on what this means and what methods are available.
|
384
|
+
#
|
385
|
+
# @example
|
386
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
387
|
+
# vf.transcript_variations.each do |tv|
|
388
|
+
# puts tv.peptide_allele_string, tv.transcript.stable_id
|
389
|
+
# end
|
390
|
+
#
|
391
|
+
class TranscriptVariation < DBConnection
|
392
|
+
set_primary_key "transcript_variation_id"
|
393
|
+
belongs_to :variation_feature
|
394
|
+
validates_inclusion_of :consequence_types, :in => ['intergenic_variant',
|
395
|
+
'splice_acceptor_variant',
|
396
|
+
'splice_donor_variant',
|
397
|
+
'complex_change_in_transcript',
|
398
|
+
'stop_lost',
|
399
|
+
'coding_sequence_variant',
|
400
|
+
'non_synonymous_codon',
|
401
|
+
'stop_gained',
|
402
|
+
'synonymous_codon',
|
403
|
+
'frameshift_variant',
|
404
|
+
'nc_transcript_variant',
|
405
|
+
'mature_miRNA_variant',
|
406
|
+
'NMD_transcript_variant',
|
407
|
+
'5_prime_UTR_variant',
|
408
|
+
'3_prime_UTR_variant',
|
409
|
+
'incomplete_terminal_codon_variant',
|
410
|
+
'intron_variant',
|
411
|
+
'splice_region_variant',
|
412
|
+
'5KB_downstream_variant',
|
413
|
+
'500B_downstream_variant',
|
414
|
+
'5KB_upstream_variant',
|
415
|
+
'2KB_upstream_variant',
|
416
|
+
'initiator_codon_change',
|
417
|
+
'stop_retained_variant',
|
418
|
+
'inframe_codon_gain',
|
419
|
+
'inframe_codon_loss',
|
420
|
+
'miRNA_target_site_variant',
|
421
|
+
'pre_miRNA_variant',
|
422
|
+
'regulatory_region_variant',
|
423
|
+
'increased_binding_affinity',
|
424
|
+
'decreased_binding_affinity',
|
425
|
+
'binding_site_variant'
|
426
|
+
], :message => "Consequence type not allowed!"
|
427
|
+
|
428
|
+
def consequence_types # workaround as ActiveRecord do not parse SET field in MySQL
|
429
|
+
"#{attributes_before_type_cast['consequence_types']}"
|
430
|
+
end
|
431
|
+
|
432
|
+
def transcript
|
433
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
434
|
+
if !Ensembl::Core::DBConnection.connected? then
|
435
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
436
|
+
end
|
437
|
+
return Ensembl::Core::Transcript.find_by_stable_id(self.feature_stable_id)
|
438
|
+
end
|
439
|
+
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|
443
|
+
|
444
|
+
end
|