bio-ensembl 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +71 -0
- data/VERSION +1 -0
- data/bin/ensembl +40 -0
- data/bin/variation_effect_predictor +106 -0
- data/bio-ensembl.gemspec +190 -0
- data/lib/bio-ensembl.rb +65 -0
- data/lib/bio-ensembl/core/activerecord.rb +1812 -0
- data/lib/bio-ensembl/core/collection.rb +64 -0
- data/lib/bio-ensembl/core/project.rb +262 -0
- data/lib/bio-ensembl/core/slice.rb +657 -0
- data/lib/bio-ensembl/core/transcript.rb +409 -0
- data/lib/bio-ensembl/core/transform.rb +95 -0
- data/lib/bio-ensembl/db_connection.rb +205 -0
- data/lib/bio-ensembl/variation/activerecord.rb +536 -0
- data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
- data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_effect_predictor_data.txt +4 -0
- data/samples/variation_example.rb +67 -0
- data/test/data/seq_c6qbl.fa +10 -0
- data/test/data/seq_cso19_coding.fa +16 -0
- data/test/data/seq_cso19_transcript.fa +28 -0
- data/test/data/seq_drd3_gene.fa +838 -0
- data/test/data/seq_drd3_transcript.fa +22 -0
- data/test/data/seq_drd4_transcript.fa +24 -0
- data/test/data/seq_forward_composite.fa +1669 -0
- data/test/data/seq_par_boundary.fa +169 -0
- data/test/data/seq_rnd3_transcript.fa +47 -0
- data/test/data/seq_ub2r1_coding.fa +13 -0
- data/test/data/seq_ub2r1_gene.fa +174 -0
- data/test/data/seq_ub2r1_transcript.fa +26 -0
- data/test/data/seq_y.fa +2 -0
- data/test/default/test_connection.rb +60 -0
- data/test/default/test_releases.rb +130 -0
- data/test/ensembl_genomes/test_collection.rb +122 -0
- data/test/ensembl_genomes/test_gene.rb +46 -0
- data/test/ensembl_genomes/test_slice.rb +65 -0
- data/test/ensembl_genomes/test_variation.rb +38 -0
- data/test/helper.rb +18 -0
- data/test/release_50/core/test_project.rb +210 -0
- data/test/release_50/core/test_project_human.rb +52 -0
- data/test/release_50/core/test_relationships.rb +72 -0
- data/test/release_50/core/test_sequence.rb +170 -0
- data/test/release_50/core/test_slice.rb +116 -0
- data/test/release_50/core/test_transcript.rb +125 -0
- data/test/release_50/core/test_transform.rb +217 -0
- data/test/release_50/variation/test_activerecord.rb +138 -0
- data/test/release_50/variation/test_variation.rb +79 -0
- data/test/release_53/core/test_gene.rb +61 -0
- data/test/release_53/core/test_project.rb +91 -0
- data/test/release_53/core/test_project_human.rb +61 -0
- data/test/release_53/core/test_slice.rb +42 -0
- data/test/release_53/core/test_transform.rb +57 -0
- data/test/release_53/variation/test_activerecord.rb +137 -0
- data/test/release_53/variation/test_variation.rb +66 -0
- data/test/release_56/core/test_gene.rb +61 -0
- data/test/release_56/core/test_project.rb +91 -0
- data/test/release_56/core/test_slice.rb +49 -0
- data/test/release_56/core/test_transform.rb +57 -0
- data/test/release_56/variation/test_activerecord.rb +141 -0
- data/test/release_56/variation/test_consequence.rb +131 -0
- data/test/release_56/variation/test_variation.rb +63 -0
- data/test/release_60/core/test_gene.rb +61 -0
- data/test/release_60/core/test_project_human.rb +34 -0
- data/test/release_60/core/test_slice.rb +42 -0
- data/test/release_60/core/test_transcript.rb +120 -0
- data/test/release_60/core/test_transform.rb +57 -0
- data/test/release_60/variation/test_activerecord.rb +216 -0
- data/test/release_60/variation/test_consequence.rb +153 -0
- data/test/release_60/variation/test_variation.rb +64 -0
- data/test/release_62/core/test_gene.rb +42 -0
- data/test/release_62/variation/test_activerecord.rb +86 -0
- data/test/release_62/variation/test_consequence.rb +191 -0
- metadata +287 -0
@@ -0,0 +1,376 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/variation.rb - Extension of ActiveRecord classes for Ensembl variation features
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
# @author Francesco Strozzi
|
8
|
+
|
9
|
+
|
10
|
+
module Ensembl
|
11
|
+
|
12
|
+
module Variation
|
13
|
+
|
14
|
+
|
15
|
+
# The VariationFeature class gives information about the genomic position of
|
16
|
+
# each Variation, including also validation status and consequence type.
|
17
|
+
#
|
18
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
19
|
+
# See the general documentation of the Ensembl module for
|
20
|
+
# more information on what this means and what methods are available.
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# # SLOWER QUERY
|
24
|
+
# vf = VariationFeature.find_by_variation_name('rs10111')
|
25
|
+
# # FASTER QUERY
|
26
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
27
|
+
#
|
28
|
+
# puts vf.seq_region_start, vf.seq_region_end, vf.allele_string
|
29
|
+
# puts vf.variation.ancestral_allele
|
30
|
+
# genomic_region = vf.fetch_region (returns an Ensembl::Core::Slice)
|
31
|
+
# genomic_region.genes
|
32
|
+
# up_region,down_region = vf.flanking_seq (returns two Ensembl::Core::Slice)
|
33
|
+
#
|
34
|
+
class VariationFeature < DBConnection
|
35
|
+
set_primary_key "variation_feature_id"
|
36
|
+
belongs_to :variation
|
37
|
+
has_many :tagged_variation_features
|
38
|
+
has_many :samples, :through => :tagged_variation_features
|
39
|
+
belongs_to :seq_region
|
40
|
+
validates_inclusion_of :consequence_type, :in => ['ESSENTIAL_SPLICE_SITE',
|
41
|
+
'STOP_GAINED',
|
42
|
+
'STOP_LOST',
|
43
|
+
'COMPLEX_INDEL',
|
44
|
+
'FRAMESHIFT_CODING',
|
45
|
+
'NON_SYNONYMOUS_CODING',
|
46
|
+
'SPLICE_SITE',
|
47
|
+
'PARTIAL_CODON',
|
48
|
+
'SYNONYMOUS_CODING',
|
49
|
+
'REGULATORY_REGION',
|
50
|
+
'WITHIN_MATURE_miRNA',
|
51
|
+
'5PRIME_UTR',
|
52
|
+
'3PRIME_UTR',
|
53
|
+
'INTRONIC',
|
54
|
+
'NMD_TRANSCRIPT',
|
55
|
+
'UPSTREAM',
|
56
|
+
'DOWNSTREAM',
|
57
|
+
'WITHIN_NON_CODING_GENE',
|
58
|
+
'HGMD_MUTATION'
|
59
|
+
], :message => "Consequence type not allowed!"
|
60
|
+
|
61
|
+
def consequence_type # workaround as ActiveRecord do not parse SET field in MySQL
|
62
|
+
"#{attributes_before_type_cast['consequence_type']}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Based on Perl API 'get_all_Genes' method for Variation class. Get a genomic region
|
66
|
+
# starting from the Variation coordinates, expanding the region upstream and
|
67
|
+
# downstream.
|
68
|
+
#
|
69
|
+
# @param [Integer] up Length of upstream flanking region
|
70
|
+
# @param [Integer] down Length of downstream flanking region
|
71
|
+
# @return [Slice] Slice object containing the variation
|
72
|
+
def fetch_region(up = 5000, down = 5000)
|
73
|
+
sr = core_connection(self.seq_region_id)
|
74
|
+
slice = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,self.seq_region_start-up,self.seq_region_end+down)
|
75
|
+
return slice
|
76
|
+
end
|
77
|
+
|
78
|
+
def flanking_seq
|
79
|
+
sr = core_connection(self.seq_region_id)
|
80
|
+
f = Variation.find(self.variation_id).flanking_sequence
|
81
|
+
slice_up = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.up_seq_region_start,f.up_seq_region_end,self.seq_region_strand)
|
82
|
+
slice_down = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.down_seq_region_start,f.down_seq_region_end,self.seq_region_strand)
|
83
|
+
return slice_up,slice_down
|
84
|
+
end
|
85
|
+
|
86
|
+
def transcript_variations
|
87
|
+
tvs = TranscriptVariation.find_all_by_variation_feature_id(self.variation_feature_id)
|
88
|
+
if tvs[0].nil? then # the variation is not stored in the database, so run the calculations
|
89
|
+
sr = core_connection(self.seq_region_id)
|
90
|
+
return custom_transcript_variation(self,sr)
|
91
|
+
else
|
92
|
+
return tvs # the variation is already present in the database
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def core_connection(seq_region_id)
|
99
|
+
if !Ensembl::Core::DBConnection.connected? then
|
100
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
101
|
+
begin
|
102
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
103
|
+
rescue
|
104
|
+
raise NameError, "Can't derive Core database name from #{db_name}. Are you using non conventional names?"
|
105
|
+
end
|
106
|
+
end
|
107
|
+
# Check if SeqRegion already exists in Ensembl::SESSION
|
108
|
+
seq_region = nil
|
109
|
+
if Ensembl::SESSION.seq_regions.has_key?(seq_region_id)
|
110
|
+
seq_region = Ensembl::SESSION.seq_regions[seq_region_id]
|
111
|
+
else
|
112
|
+
seq_region = Ensembl::Core::SeqRegion.find(seq_region_id)
|
113
|
+
Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
|
114
|
+
end
|
115
|
+
return seq_region
|
116
|
+
end
|
117
|
+
|
118
|
+
# Calculate a consequence type for a user-defined variation
|
119
|
+
def custom_transcript_variation(vf,sr)
|
120
|
+
|
121
|
+
@variation_name = vf.variation_name
|
122
|
+
@seq_region = sr
|
123
|
+
|
124
|
+
downstream = 5000
|
125
|
+
upstream = 5000
|
126
|
+
tvs = [] # store all the calculated TranscriptVariations
|
127
|
+
# retrieve the slice of the genomic region where the variation is located
|
128
|
+
region = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,vf.seq_region_start-upstream,vf.seq_region_end+downstream-1)
|
129
|
+
# iterate through all the transcripts present in the region
|
130
|
+
genes = region.genes(inclusive = true)
|
131
|
+
if genes[0] != nil
|
132
|
+
genes.each do |g|
|
133
|
+
g.transcripts.each do |t|
|
134
|
+
@cache = {}
|
135
|
+
tv = TranscriptVariation.new() # create a new TranscriptVariation object for every transcript present
|
136
|
+
# do the calculations
|
137
|
+
|
138
|
+
# check if the variation is intergenic for this transcript (no effects)
|
139
|
+
tv.consequence_type = check_intergenic(vf,t)
|
140
|
+
|
141
|
+
# check if the variation is upstram or downstram the transcript
|
142
|
+
tv.consequence_type = check_upstream_downstream(vf,t) if tv.consequence_type == ""
|
143
|
+
|
144
|
+
# if no consequence type is found, then the variation is inside the transcript
|
145
|
+
# check for non coding gene
|
146
|
+
tv.consequence_type = check_non_coding(vf,t) if tv.consequence_type == "" and t.biotype != 'protein_coding'
|
147
|
+
|
148
|
+
# if no consequence type is found, then check intron / exon boundaries
|
149
|
+
tv.consequence_type = check_splice_site(vf,t) if tv.consequence_type == ""
|
150
|
+
|
151
|
+
# if no consequence type is found, check if the variation is inside UTRs
|
152
|
+
tv.consequence_type = check_utr(vf,t) if tv.consequence_type == ""
|
153
|
+
|
154
|
+
# if no consequence type is found, then variation is inside an exon.
|
155
|
+
# Check the codon change
|
156
|
+
(tv.consequence_type,tv.peptide_allele_string) = check_aa_change(vf,t) if tv.consequence_type == ""
|
157
|
+
|
158
|
+
|
159
|
+
begin # this changed from release 58
|
160
|
+
tv.transcript_stable_id = t.stable_id
|
161
|
+
rescue NoMethodError
|
162
|
+
tv.transcript_id = t.id
|
163
|
+
end
|
164
|
+
|
165
|
+
tv.consequence_type = "INTERGENIC" if tv.consequence_type == ""
|
166
|
+
tvs << tv
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# if there are no transcripts/genes within 5000 bases upstream and downstream set the variation as INTERGENIC (no effects)
|
171
|
+
if tvs.size == 0 then
|
172
|
+
tv = TranscriptVariation.new()
|
173
|
+
tv.consequence_type = "INTERGENIC"
|
174
|
+
tvs << tv
|
175
|
+
end
|
176
|
+
|
177
|
+
return tvs
|
178
|
+
end
|
179
|
+
|
180
|
+
## CONSEQUENCE CALCULATION FUNCTIONS ##
|
181
|
+
|
182
|
+
def check_intergenic(vf,t)
|
183
|
+
if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) > 5000 then
|
184
|
+
return "INTERGENIC"
|
185
|
+
elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) > 5000 then
|
186
|
+
return "INTERGENIC"
|
187
|
+
end
|
188
|
+
return nil
|
189
|
+
end
|
190
|
+
|
191
|
+
def check_upstream_downstream(vf,t)
|
192
|
+
if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) <= 5000 then
|
193
|
+
return (t.strand == 1) ? "UPSTREAM" : "DOWNSTREAM"
|
194
|
+
elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) <= 5000 then
|
195
|
+
return (t.strand == 1) ? "DOWNSTREAM" : "UPSTREAM"
|
196
|
+
|
197
|
+
# check if it's an InDel and if overlaps the transcript start / end
|
198
|
+
elsif t.seq_region_start > vf.seq_region_start and t.seq_region_start < vf.seq_region_end then
|
199
|
+
return "COMPLEX_INDEL"
|
200
|
+
elsif t.seq_region_end > vf.seq_region_start and t.seq_region_end < vf.seq_region_end then
|
201
|
+
return "COMPLEX_INDEL"
|
202
|
+
end
|
203
|
+
return nil
|
204
|
+
end
|
205
|
+
|
206
|
+
def check_non_coding(vf,t)
|
207
|
+
if t.biotype == "miRNA" then
|
208
|
+
return (vf.seq_region_start == vf.seq_region_end) ? "WITHIN_MATURE_miRNA" : "COMPLEX_INDEL"
|
209
|
+
elsif t.biotype == "nonsense_mediated_decay"
|
210
|
+
return (vf.seq_region_start == vf.seq_region_end) ? "NMD_TRANSCRIPT" : "COMPLEX_INDEL"
|
211
|
+
else
|
212
|
+
return (vf.seq_region_start == vf.seq_region_end) ? "WITHIN_NON_CODING_GENE" : "COMPLEX_INDEL"
|
213
|
+
end
|
214
|
+
return nil
|
215
|
+
end
|
216
|
+
|
217
|
+
def check_utr(vf,t)
|
218
|
+
if vf.seq_region_start > t.seq_region_start and vf.seq_region_end < t.coding_region_genomic_start then
|
219
|
+
return (t.strand == 1) ? "5PRIME_UTR" : "3PRIME_UTR"
|
220
|
+
elsif vf.seq_region_start > t.coding_region_genomic_end and vf.seq_region_end < t.seq_region_end then
|
221
|
+
return (t.strand == 1) ? "3PRIME_UTR" : "5PRIME_UTR"
|
222
|
+
end
|
223
|
+
return nil
|
224
|
+
end
|
225
|
+
|
226
|
+
def check_splice_site(vf,t)
|
227
|
+
@cache[:exons] = []
|
228
|
+
var_start,var_end = (vf.seq_region_strand == 1) ? [vf.seq_region_start,vf.seq_region_end] : [vf.seq_region_end,vf.seq_region_start]
|
229
|
+
t.exons.each {|ex| @cache[:exons] << Range.new(ex.seq_region_start,ex.seq_region_end)}
|
230
|
+
|
231
|
+
exon_up = check_near_exons(var_start,@cache[:exons])
|
232
|
+
exon_down = check_near_exons(var_end,@cache[:exons])
|
233
|
+
if !exon_up and !exon_down # we are inside an intron
|
234
|
+
# checking boundaries
|
235
|
+
near_exon_up_2bp = check_near_exons(var_start-2..var_start,@cache[:exons])
|
236
|
+
near_exon_down_2bp = check_near_exons(var_end..var_end+2,@cache[:exons])
|
237
|
+
if near_exon_up_2bp or near_exon_down_2bp then
|
238
|
+
return "ESSENTIAL_SPLICE_SITE"
|
239
|
+
else
|
240
|
+
near_exon_up_8bp = check_near_exons(var_start+8..var_start,@cache[:exons])
|
241
|
+
near_exon_down_8bp = check_near_exons(var_end..var_end+8,@cache[:exons])
|
242
|
+
if near_exon_up_8bp or near_exon_down_8bp then
|
243
|
+
return "SPLICE_SITE"
|
244
|
+
else
|
245
|
+
return "INTRONIC"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
elsif exon_up and exon_down # the variation is inside an exon
|
249
|
+
# check if it is a splice site
|
250
|
+
if (var_start-exon_up.first) <= 3 or (exon_down.last-var_end) <= 3 then
|
251
|
+
return "SPLICE_SITE"
|
252
|
+
end
|
253
|
+
else # a complex indel spanning intron/exon boundary
|
254
|
+
return "COMPLEX_INDEL"
|
255
|
+
end
|
256
|
+
return nil
|
257
|
+
end
|
258
|
+
|
259
|
+
def check_aa_change(vf,t)
|
260
|
+
alleles = vf.allele_string.split('/') # get the different alleles for this variation
|
261
|
+
# if the variation is an InDel then it produces a frameshift
|
262
|
+
if vf.seq_region_start != vf.seq_region_end or alleles.include?("-") then
|
263
|
+
return "FRAMESHIFT_CODING",nil
|
264
|
+
end
|
265
|
+
|
266
|
+
# Find the position inside the CDS
|
267
|
+
|
268
|
+
mutation_position = t.genomic2cds(vf.seq_region_start)
|
269
|
+
|
270
|
+
mutation_base = Bio::Sequence::NA.new(alleles[1])
|
271
|
+
if t.seq_region_strand == -1
|
272
|
+
mutation_base.reverse_complement!
|
273
|
+
end
|
274
|
+
# The rank of the codon
|
275
|
+
target_codon = (mutation_position)/3 + 1
|
276
|
+
cds_sequence = nil
|
277
|
+
cds_sequence = t.cds_seq
|
278
|
+
mut_sequence = cds_sequence.dup
|
279
|
+
# Replace base with the variant allele
|
280
|
+
mut_sequence[mutation_position] = mutation_base.seq
|
281
|
+
refcodon = cds_sequence[(target_codon*3 -3)..(target_codon*3-1)]
|
282
|
+
mutcodon = mut_sequence[(target_codon*3 -3)..(target_codon*3-1)]
|
283
|
+
codontable = Bio::CodonTable[1]
|
284
|
+
refaa = codontable[refcodon]
|
285
|
+
mutaa = codontable[mutcodon.downcase]
|
286
|
+
if mutaa == nil
|
287
|
+
raise RuntimeError "Codon #{mutcodon.downcase} wasn't recognized."
|
288
|
+
end
|
289
|
+
pep_string = refaa+"/"+mutaa
|
290
|
+
if mutaa == "*" and refaa != "*"
|
291
|
+
return "STOP_GAINED",pep_string
|
292
|
+
elsif mutaa != "*" and refaa == "*"
|
293
|
+
return "STOP_LOST",pep_string
|
294
|
+
elsif mutaa != refaa
|
295
|
+
return "NON_SYNONYMOUS_CODING",pep_string
|
296
|
+
elsif mutaa == refaa
|
297
|
+
return "SYNONYMOUS_CODING",pep_string
|
298
|
+
end
|
299
|
+
|
300
|
+
end
|
301
|
+
|
302
|
+
|
303
|
+
def check_near_exons(feature,exons_ranges)
|
304
|
+
exons_ranges.each do |exon_range|
|
305
|
+
if feature.is_a? Range
|
306
|
+
return exon_range if (feature.first <= exon_range.last) && (exon_range.first <= feature.last)
|
307
|
+
else
|
308
|
+
return exon_range if exon_range.include? feature
|
309
|
+
end
|
310
|
+
end
|
311
|
+
return false
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
end # VariationFeature
|
316
|
+
|
317
|
+
# The TranscriptVariation class gives information about the position of
|
318
|
+
# a VariationFeature, mapped on an annotated transcript.
|
319
|
+
#
|
320
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
321
|
+
# See the general documentation of the Ensembl module for
|
322
|
+
# more information on what this means and what methods are available.
|
323
|
+
#
|
324
|
+
# @example
|
325
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
326
|
+
# vf.transcript_variations.each do |tv|
|
327
|
+
# puts tv.peptide_allele_string, tv.transcript.stable_id
|
328
|
+
# end
|
329
|
+
#
|
330
|
+
class TranscriptVariation < DBConnection
|
331
|
+
set_primary_key "transcript_variation_id"
|
332
|
+
belongs_to :variation_feature
|
333
|
+
validates_inclusion_of :consequence_type, :in => ['ESSENTIAL_SPLICE_SITE',
|
334
|
+
'STOP_GAINED',
|
335
|
+
'STOP_LOST',
|
336
|
+
'COMPLEX_INDEL',
|
337
|
+
'FRAMESHIFT_CODING',
|
338
|
+
'NON_SYNONYMOUS_CODING',
|
339
|
+
'SPLICE_SITE',
|
340
|
+
'PARTIAL_CODON',
|
341
|
+
'SYNONYMOUS_CODING',
|
342
|
+
'REGULATORY_REGION',
|
343
|
+
'WITHIN_MATURE_miRNA',
|
344
|
+
'5PRIME_UTR',
|
345
|
+
'3PRIME_UTR',
|
346
|
+
'INTRONIC',
|
347
|
+
'NMD_TRANSCRIPT',
|
348
|
+
'UPSTREAM',
|
349
|
+
'DOWNSTREAM',
|
350
|
+
'WITHIN_NON_CODING_GENE',
|
351
|
+
'HGMD_MUTATION'
|
352
|
+
], :message => "Consequence type not allowed!"
|
353
|
+
|
354
|
+
def consequence_type # workaround as ActiveRecord do not parse SET field in MySQL
|
355
|
+
"#{attributes_before_type_cast['consequence_type']}"
|
356
|
+
end
|
357
|
+
|
358
|
+
def transcript
|
359
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
360
|
+
if !Ensembl::Core::DBConnection.connected? then
|
361
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
362
|
+
end
|
363
|
+
|
364
|
+
begin # this changed from release 58
|
365
|
+
return Ensembl::Core::Transcript.find_by_stable_id(self.transcript_stable_id)
|
366
|
+
rescue NoMethodError
|
367
|
+
return Ensembl::Core::Transcript.find(self.transcript_id)
|
368
|
+
end
|
369
|
+
|
370
|
+
end
|
371
|
+
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|
375
|
+
|
376
|
+
end
|
@@ -0,0 +1,444 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/variation.rb - Extension of ActiveRecord classes for Ensembl variation features
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
# @author Francesco Strozzi
|
8
|
+
|
9
|
+
|
10
|
+
module Ensembl
|
11
|
+
|
12
|
+
module Variation
|
13
|
+
|
14
|
+
|
15
|
+
# The VariationFeature class gives information about the genomic position of
|
16
|
+
# each Variation, including also validation status and consequence type.
|
17
|
+
#
|
18
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
19
|
+
# See the general documentation of the Ensembl module for
|
20
|
+
# more information on what this means and what methods are available.
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# # SLOWER QUERY
|
24
|
+
# vf = VariationFeature.find_by_variation_name('rs10111')
|
25
|
+
# # FASTER QUERY
|
26
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
27
|
+
#
|
28
|
+
# puts vf.seq_region_start, vf.seq_region_end, vf.allele_string
|
29
|
+
# puts vf.variation.ancestral_allele
|
30
|
+
# genomic_region = vf.fetch_region (returns an Ensembl::Core::Slice)
|
31
|
+
# genomic_region.genes
|
32
|
+
# up_region,down_region = vf.flanking_seq (returns two Ensembl::Core::Slice)
|
33
|
+
#
|
34
|
+
class VariationFeature < DBConnection
|
35
|
+
set_primary_key "variation_feature_id"
|
36
|
+
belongs_to :variation
|
37
|
+
has_many :tagged_variation_features
|
38
|
+
has_many :samples, :through => :tagged_variation_features
|
39
|
+
belongs_to :seq_region
|
40
|
+
validates_inclusion_of :consequence_types, :in => ['intergenic_variant',
|
41
|
+
'splice_acceptor_variant',
|
42
|
+
'splice_donor_variant',
|
43
|
+
'complex_change_in_transcript',
|
44
|
+
'stop_lost',
|
45
|
+
'coding_sequence_variant',
|
46
|
+
'non_synonymous_codon',
|
47
|
+
'stop_gained',
|
48
|
+
'synonymous_codon',
|
49
|
+
'frameshift_variant',
|
50
|
+
'nc_transcript_variant',
|
51
|
+
'mature_miRNA_variant',
|
52
|
+
'NMD_transcript_variant',
|
53
|
+
'5_prime_UTR_variant',
|
54
|
+
'3_prime_UTR_variant',
|
55
|
+
'incomplete_terminal_codon_variant',
|
56
|
+
'intron_variant',
|
57
|
+
'splice_region_variant',
|
58
|
+
'5KB_downstream_variant',
|
59
|
+
'500B_downstream_variant',
|
60
|
+
'5KB_upstream_variant',
|
61
|
+
'2KB_upstream_variant',
|
62
|
+
'initiator_codon_change',
|
63
|
+
'stop_retained_variant',
|
64
|
+
'inframe_codon_gain',
|
65
|
+
'inframe_codon_loss',
|
66
|
+
'miRNA_target_site_variant',
|
67
|
+
'pre_miRNA_variant',
|
68
|
+
'regulatory_region_variant',
|
69
|
+
'increased_binding_affinity',
|
70
|
+
'decreased_binding_affinity',
|
71
|
+
'binding_site_variant'
|
72
|
+
], :message => "Consequence type not allowed!"
|
73
|
+
|
74
|
+
def consequence_types # workaround as ActiveRecord do not parse SET field in MySQL
|
75
|
+
"#{attributes_before_type_cast['consequence_types']}"
|
76
|
+
end
|
77
|
+
|
78
|
+
# Based on Perl API 'get_all_Genes' method for Variation class. Get a genomic region
|
79
|
+
# starting from the Variation coordinates, expanding the region upstream and
|
80
|
+
# downstream.
|
81
|
+
#
|
82
|
+
# @param [Integer] up Length of upstream flanking region
|
83
|
+
# @param [Integer] down Length of downstream flanking region
|
84
|
+
# @return [Slice] Slice object containing the variation
|
85
|
+
def fetch_region(up = 5000, down = 5000)
|
86
|
+
sr = core_connection(self.seq_region_id)
|
87
|
+
slice = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,self.seq_region_start-up,self.seq_region_end+down)
|
88
|
+
return slice
|
89
|
+
end
|
90
|
+
|
91
|
+
def flanking_seq
|
92
|
+
sr = core_connection(self.seq_region_id)
|
93
|
+
f = Variation.find(self.variation_id).flanking_sequence
|
94
|
+
slice_up = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.up_seq_region_start,f.up_seq_region_end,self.seq_region_strand)
|
95
|
+
slice_down = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,f.down_seq_region_start,f.down_seq_region_end,self.seq_region_strand)
|
96
|
+
return slice_up,slice_down
|
97
|
+
end
|
98
|
+
|
99
|
+
def transcript_variations
|
100
|
+
tvs = TranscriptVariation.find_all_by_variation_feature_id(self.variation_feature_id)
|
101
|
+
if tvs[0].nil? then # the variation is not stored in the database, so run the calculations
|
102
|
+
sr = core_connection(self.seq_region_id)
|
103
|
+
return custom_transcript_variation(self,sr)
|
104
|
+
else
|
105
|
+
return tvs # the variation is already present in the database
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def core_connection(seq_region_id)
|
112
|
+
if !Ensembl::Core::DBConnection.connected? then
|
113
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
114
|
+
begin
|
115
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
116
|
+
rescue
|
117
|
+
raise NameError, "Can't derive Core database name from #{db_name}. Are you using non conventional names?"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
# Check if SeqRegion already exists in Ensembl::SESSION
|
121
|
+
seq_region = nil
|
122
|
+
if Ensembl::SESSION.seq_regions.has_key?(seq_region_id)
|
123
|
+
seq_region = Ensembl::SESSION.seq_regions[seq_region_id]
|
124
|
+
else
|
125
|
+
seq_region = Ensembl::Core::SeqRegion.find(seq_region_id)
|
126
|
+
Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
|
127
|
+
end
|
128
|
+
return seq_region
|
129
|
+
end
|
130
|
+
|
131
|
+
# Calculate a consequence type for a user-defined variation
|
132
|
+
def custom_transcript_variation(vf,sr)
|
133
|
+
|
134
|
+
@variation_name = vf.variation_name
|
135
|
+
@seq_region = sr
|
136
|
+
|
137
|
+
downstream = 5000
|
138
|
+
upstream = 5000
|
139
|
+
tvs = [] # store all the calculated TranscriptVariations
|
140
|
+
# retrieve the slice of the genomic region where the variation is located
|
141
|
+
var_start,var_end = 0,0
|
142
|
+
if vf.seq_region_start > vf.seq_region_end
|
143
|
+
var_start,var_end = vf.seq_region_end,vf.seq_region_start
|
144
|
+
else
|
145
|
+
var_start,var_end = vf.seq_region_start,vf.seq_region_end
|
146
|
+
end
|
147
|
+
region = Ensembl::Core::Slice.fetch_by_region(Ensembl::Core::CoordSystem.find(sr.coord_system_id).name,sr.name,var_start-upstream,var_end+downstream)
|
148
|
+
# iterate through all the transcripts present in the region
|
149
|
+
genes = region.genes(inclusive = true)
|
150
|
+
if genes[0] != nil
|
151
|
+
genes.each do |g|
|
152
|
+
g.transcripts.each do |t|
|
153
|
+
|
154
|
+
@cache = {}
|
155
|
+
|
156
|
+
tv = TranscriptVariation.new() # create a new TranscriptVariation object for every transcript present
|
157
|
+
# do the calculations
|
158
|
+
|
159
|
+
# check if the variation is intergenic for this transcript (no effects)
|
160
|
+
tv.consequence_types = check_intergenic(vf,t)
|
161
|
+
|
162
|
+
# check if the variation is upstram or downstram the transcript
|
163
|
+
tv.consequence_types = check_upstream_downstream(vf,t) if tv.consequence_types == ""
|
164
|
+
|
165
|
+
# check partial codon
|
166
|
+
tv.consequence_types = check_partial_codon(vf,t) if tv.consequence_types == ""
|
167
|
+
|
168
|
+
# if no consequence type is found, then the variation is inside the transcript
|
169
|
+
# check for non coding gene
|
170
|
+
tv.consequence_types = check_non_coding(vf,t) if tv.consequence_types == "" && t.biotype != 'protein_coding'
|
171
|
+
|
172
|
+
# if no consequence type is found, then check intron / exon boundaries
|
173
|
+
tv.consequence_types = check_splice_site(vf,t) if tv.consequence_types == ""
|
174
|
+
|
175
|
+
# if no consequence type is found, check if the variation is inside UTRs
|
176
|
+
tv.consequence_types = check_utr(vf,t) if tv.consequence_types == ""
|
177
|
+
|
178
|
+
# if no consequence type is found, then variation is inside an exon.
|
179
|
+
# Check the codon change
|
180
|
+
(tv.consequence_types,tv.pep_allele_string) = check_aa_change(vf,t) if tv.consequence_types == ""
|
181
|
+
|
182
|
+
tv.feature_stable_id = t.stable_id
|
183
|
+
|
184
|
+
#tv.consequence_types = "intergenic_variant" if tv.consequence_types == ""
|
185
|
+
tvs << tv
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
# if there are no transcripts/genes within 5000 bases upstream and downstream set the variation as INTERGENIC (no effects)
|
190
|
+
if tvs.size == 0 then
|
191
|
+
tv = TranscriptVariation.new()
|
192
|
+
tv.consequence_types = "intergenic_variant"
|
193
|
+
tvs << tv
|
194
|
+
end
|
195
|
+
|
196
|
+
return tvs
|
197
|
+
end
|
198
|
+
|
199
|
+
## CONSEQUENCE CALCULATION METHODS ##
|
200
|
+
|
201
|
+
def check_intergenic(vf,t)
|
202
|
+
if vf.seq_region_end < t.seq_region_start and (t.seq_region_start - vf.seq_region_end) > 5000 then
|
203
|
+
return "intergenic_variant"
|
204
|
+
elsif vf.seq_region_start > t.seq_region_end and (vf.seq_region_start - t.seq_region_end) > 5000 then
|
205
|
+
return "intergenic_variant"
|
206
|
+
end
|
207
|
+
return nil
|
208
|
+
end
|
209
|
+
|
210
|
+
def check_upstream_downstream(vf,t)
|
211
|
+
if vf.seq_region_end < t.seq_region_start
|
212
|
+
distance = t.seq_region_start - vf.seq_region_end+1
|
213
|
+
if t.strand == 1 and distance <= 2000
|
214
|
+
return "2KB_upstream_variant"
|
215
|
+
elsif t.strand == -1 and distance <= 500
|
216
|
+
return "500B_downstream_variant"
|
217
|
+
else
|
218
|
+
return (t.strand == 1) ? "5KB_upstream_variant" : "5KB_downstream_variant"
|
219
|
+
end
|
220
|
+
elsif vf.seq_region_start > t.seq_region_end
|
221
|
+
distance = vf.seq_region_start - t.seq_region_end+1
|
222
|
+
if t.strand == -1 and distance <= 2000
|
223
|
+
return "2KB_upstream_variant"
|
224
|
+
elsif t.strand == 1 and distance <= 500
|
225
|
+
return "500B_downstream_variant"
|
226
|
+
else
|
227
|
+
return (t.strand == 1) ? "5KB_downstream_variant" : "5KB_upstream_variant"
|
228
|
+
end
|
229
|
+
# check if it's an InDel and if overlaps the transcript start / end
|
230
|
+
elsif t.seq_region_start > vf.seq_region_start and t.seq_region_start < vf.seq_region_end then
|
231
|
+
return "complex_change_in_transcript"
|
232
|
+
elsif t.seq_region_end > vf.seq_region_start and t.seq_region_end < vf.seq_region_end then
|
233
|
+
return "complex_change_in_transcript"
|
234
|
+
end
|
235
|
+
return nil
|
236
|
+
end
|
237
|
+
|
238
|
+
def check_non_coding(vf,t)
|
239
|
+
if t.biotype == "miRNA" then
|
240
|
+
return "mature_miRNA_variant"
|
241
|
+
elsif t.biotype == "nonsense_mediated_decay"
|
242
|
+
return "NMD_transcript_variant"
|
243
|
+
else
|
244
|
+
return "nc_transcript_variant"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def check_utr(vf,t)
|
249
|
+
if vf.seq_region_start > t.seq_region_start and vf.seq_region_end < t.coding_region_genomic_start then
|
250
|
+
return (t.strand == 1) ? "5_prime_UTR_variant" : "3_prime_UTR_variant"
|
251
|
+
elsif vf.seq_region_start > t.coding_region_genomic_end and vf.seq_region_end < t.seq_region_end then
|
252
|
+
return (t.strand == 1) ? "3_prime_UTR_variant" : "5_prime_UTR_variant"
|
253
|
+
end
|
254
|
+
return nil
|
255
|
+
end
|
256
|
+
|
257
|
+
def check_splice_site(vf,t)
|
258
|
+
@cache[:exons] = []
|
259
|
+
var_start,var_end = (vf.seq_region_strand == 1) ? [vf.seq_region_start,vf.seq_region_end] : [vf.seq_region_end,vf.seq_region_start]
|
260
|
+
t.exons.each {|ex| @cache[:exons] << Range.new(ex.seq_region_start,ex.seq_region_end)}
|
261
|
+
|
262
|
+
exon_up = check_near_exons(var_start,@cache[:exons])
|
263
|
+
exon_down = check_near_exons(var_end,@cache[:exons])
|
264
|
+
if !exon_up and !exon_down # we are inside an intron
|
265
|
+
# checking boundaries
|
266
|
+
near_exon_up_2bp = check_near_exons(var_start-2..var_start,@cache[:exons])
|
267
|
+
near_exon_down_2bp = check_near_exons(var_end..var_end+2,@cache[:exons])
|
268
|
+
if near_exon_up_2bp
|
269
|
+
return (t.strand == 1) ? "splice_donor_variant" : "splice_acceptor_variant"
|
270
|
+
elsif near_exon_down_2bp
|
271
|
+
return (t.strand == 1) ? "splice_acceptor_variant" : "splice_donor_variant"
|
272
|
+
else
|
273
|
+
near_exon_up_8bp = check_near_exons(var_start+8..var_start,@cache[:exons])
|
274
|
+
near_exon_down_8bp = check_near_exons(var_end..var_end+8,@cache[:exons])
|
275
|
+
if near_exon_up_8bp or near_exon_down_8bp
|
276
|
+
return "splice_region_variant"
|
277
|
+
else
|
278
|
+
return "intron_variant"
|
279
|
+
end
|
280
|
+
end
|
281
|
+
elsif exon_up and exon_down # the variation is inside an exon
|
282
|
+
# check if it is a splice site
|
283
|
+
if (var_start-exon_up.first) <= 3 or (exon_down.last-var_end) <= 3 then
|
284
|
+
return "splice_region_variant"
|
285
|
+
end
|
286
|
+
else # a complex indel spanning intron/exon boundary
|
287
|
+
return "complex_change_in_transcript"
|
288
|
+
end
|
289
|
+
return nil
|
290
|
+
end
|
291
|
+
|
292
|
+
def check_aa_change(vf,t)
|
293
|
+
alleles = vf.allele_string.split('/') # get the different alleles for this variation
|
294
|
+
|
295
|
+
# Find the position inside the CDS
|
296
|
+
mutation_position = (@cache[:mutation_positon]) ? @cache[:mutation_positon] : t.genomic2cds(vf.seq_region_start)
|
297
|
+
cds_sequence = (@cache[:cds_sequence]) ? @cache[:cds_sequence] : t.cds_seq
|
298
|
+
|
299
|
+
if vf.allele_string =~/INSERTION|DELETION|MUTATION/
|
300
|
+
return "coding_sequence_variant",nil
|
301
|
+
end
|
302
|
+
|
303
|
+
mutation_base = Bio::Sequence::NA.new(alleles[1])
|
304
|
+
if t.seq_region_strand == -1
|
305
|
+
mutation_base.reverse_complement!
|
306
|
+
end
|
307
|
+
# The rank of the codon
|
308
|
+
target_codon = (mutation_position)/3 + 1
|
309
|
+
mut_sequence = cds_sequence.dup
|
310
|
+
|
311
|
+
# Replace base with the variant allele
|
312
|
+
if alleles[1] == "-" # a deletion
|
313
|
+
mut_sequence.gsub!(/#{alleles[0]}/,'')
|
314
|
+
else # insertion or SNP
|
315
|
+
mut_sequence[mutation_position] = mutation_base.seq
|
316
|
+
end
|
317
|
+
|
318
|
+
mutcodon = mut_sequence[(target_codon*3 -3)..(target_codon*3-1 + alleles[1].length-1)]
|
319
|
+
refcodon = cds_sequence[(target_codon*3 -3)..(target_codon*3-1 + alleles[0].length-1)]
|
320
|
+
codontable = Bio::CodonTable[1]
|
321
|
+
refaa = codontable[refcodon]
|
322
|
+
mutaa = codontable[mutcodon.downcase]
|
323
|
+
|
324
|
+
pep_string = refaa.to_s+"/"+mutaa.to_s
|
325
|
+
transcript_start = (t.strand == 1) ? t.coding_region_genomic_start : t.coding_region_genomic_end
|
326
|
+
if (vf.seq_region_start - transcript_start).abs <= 3
|
327
|
+
return "initiator_codon_change",pep_string
|
328
|
+
elsif (mutcodon.length > refcodon.length) && (mutcodon =~/^#{refcodon}/ || mutcodon =~/#{refcodon}$/)
|
329
|
+
return "inframe_codon_gain",pep_string
|
330
|
+
elsif (mutcodon.length < refcodon.length) && (refcodon =~/^#{mutcodon}/ || refcodon =~/#{mutcodon}$/)
|
331
|
+
return "inframe_codon_loss",pep_string
|
332
|
+
elsif vf.seq_region_start != vf.seq_region_end
|
333
|
+
# if the variation is an InDel then it produces a frameshift
|
334
|
+
return "frameshift_variant",nil
|
335
|
+
elsif (mutaa == "*" and refaa == "*") && (refcodon != mutcodon.downcase)
|
336
|
+
return "stop_retained_variant"
|
337
|
+
elsif mutaa == "*" and refaa != "*"
|
338
|
+
return "stop_gained",pep_string
|
339
|
+
elsif mutaa != "*" and refaa == "*"
|
340
|
+
return "stop_lost",pep_string
|
341
|
+
elsif mutaa != refaa
|
342
|
+
return "non_synonymous_codon",pep_string
|
343
|
+
elsif mutaa == refaa
|
344
|
+
return "synonymous_codon",pep_string
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
def check_partial_codon(vf,t)
|
350
|
+
begin
|
351
|
+
mutation_position = t.genomic2cds(vf.seq_region_start)
|
352
|
+
cds_sequence = t.cds_seq
|
353
|
+
@cache[:mutation_position] = mutation_position
|
354
|
+
@cache[:cds_sequence] = cds_sequence
|
355
|
+
# check if the mutation is on the last codon and if it's a partial codon
|
356
|
+
if (cds_sequence.length - mutation_position) <= 3
|
357
|
+
return (cds_sequence.length % 3 == 0) ? nil : "incomplete_terminal_codon_variant"
|
358
|
+
end
|
359
|
+
rescue Exception => e
|
360
|
+
return nil
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def check_near_exons(feature,exons_ranges)
|
365
|
+
exons_ranges.each do |exon_range|
|
366
|
+
if feature.is_a? Range
|
367
|
+
return exon_range if (feature.first <= exon_range.last) && (exon_range.first <= feature.last)
|
368
|
+
else
|
369
|
+
return exon_range if exon_range.include? feature
|
370
|
+
end
|
371
|
+
end
|
372
|
+
return false
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
end # VariationFeature
|
377
|
+
|
378
|
+
# The TranscriptVariation class gives information about the position of
|
379
|
+
# a VariationFeature, mapped on an annotated transcript.
|
380
|
+
#
|
381
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
382
|
+
# See the general documentation of the Ensembl module for
|
383
|
+
# more information on what this means and what methods are available.
|
384
|
+
#
|
385
|
+
# @example
|
386
|
+
# vf = Variation.find_by_name('rs10111').variation_feature
|
387
|
+
# vf.transcript_variations.each do |tv|
|
388
|
+
# puts tv.peptide_allele_string, tv.transcript.stable_id
|
389
|
+
# end
|
390
|
+
#
|
391
|
+
class TranscriptVariation < DBConnection
|
392
|
+
set_primary_key "transcript_variation_id"
|
393
|
+
belongs_to :variation_feature
|
394
|
+
validates_inclusion_of :consequence_types, :in => ['intergenic_variant',
|
395
|
+
'splice_acceptor_variant',
|
396
|
+
'splice_donor_variant',
|
397
|
+
'complex_change_in_transcript',
|
398
|
+
'stop_lost',
|
399
|
+
'coding_sequence_variant',
|
400
|
+
'non_synonymous_codon',
|
401
|
+
'stop_gained',
|
402
|
+
'synonymous_codon',
|
403
|
+
'frameshift_variant',
|
404
|
+
'nc_transcript_variant',
|
405
|
+
'mature_miRNA_variant',
|
406
|
+
'NMD_transcript_variant',
|
407
|
+
'5_prime_UTR_variant',
|
408
|
+
'3_prime_UTR_variant',
|
409
|
+
'incomplete_terminal_codon_variant',
|
410
|
+
'intron_variant',
|
411
|
+
'splice_region_variant',
|
412
|
+
'5KB_downstream_variant',
|
413
|
+
'500B_downstream_variant',
|
414
|
+
'5KB_upstream_variant',
|
415
|
+
'2KB_upstream_variant',
|
416
|
+
'initiator_codon_change',
|
417
|
+
'stop_retained_variant',
|
418
|
+
'inframe_codon_gain',
|
419
|
+
'inframe_codon_loss',
|
420
|
+
'miRNA_target_site_variant',
|
421
|
+
'pre_miRNA_variant',
|
422
|
+
'regulatory_region_variant',
|
423
|
+
'increased_binding_affinity',
|
424
|
+
'decreased_binding_affinity',
|
425
|
+
'binding_site_variant'
|
426
|
+
], :message => "Consequence type not allowed!"
|
427
|
+
|
428
|
+
def consequence_types # workaround as ActiveRecord do not parse SET field in MySQL
|
429
|
+
"#{attributes_before_type_cast['consequence_types']}"
|
430
|
+
end
|
431
|
+
|
432
|
+
def transcript
|
433
|
+
host,user,password,db_name,port,species,release = Ensembl::Variation::DBConnection.get_info
|
434
|
+
if !Ensembl::Core::DBConnection.connected? then
|
435
|
+
Ensembl::Core::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
436
|
+
end
|
437
|
+
return Ensembl::Core::Transcript.find_by_stable_id(self.feature_stable_id)
|
438
|
+
end
|
439
|
+
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|
443
|
+
|
444
|
+
end
|