bio-ensembl 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +71 -0
- data/VERSION +1 -0
- data/bin/ensembl +40 -0
- data/bin/variation_effect_predictor +106 -0
- data/bio-ensembl.gemspec +190 -0
- data/lib/bio-ensembl.rb +65 -0
- data/lib/bio-ensembl/core/activerecord.rb +1812 -0
- data/lib/bio-ensembl/core/collection.rb +64 -0
- data/lib/bio-ensembl/core/project.rb +262 -0
- data/lib/bio-ensembl/core/slice.rb +657 -0
- data/lib/bio-ensembl/core/transcript.rb +409 -0
- data/lib/bio-ensembl/core/transform.rb +95 -0
- data/lib/bio-ensembl/db_connection.rb +205 -0
- data/lib/bio-ensembl/variation/activerecord.rb +536 -0
- data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
- data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_effect_predictor_data.txt +4 -0
- data/samples/variation_example.rb +67 -0
- data/test/data/seq_c6qbl.fa +10 -0
- data/test/data/seq_cso19_coding.fa +16 -0
- data/test/data/seq_cso19_transcript.fa +28 -0
- data/test/data/seq_drd3_gene.fa +838 -0
- data/test/data/seq_drd3_transcript.fa +22 -0
- data/test/data/seq_drd4_transcript.fa +24 -0
- data/test/data/seq_forward_composite.fa +1669 -0
- data/test/data/seq_par_boundary.fa +169 -0
- data/test/data/seq_rnd3_transcript.fa +47 -0
- data/test/data/seq_ub2r1_coding.fa +13 -0
- data/test/data/seq_ub2r1_gene.fa +174 -0
- data/test/data/seq_ub2r1_transcript.fa +26 -0
- data/test/data/seq_y.fa +2 -0
- data/test/default/test_connection.rb +60 -0
- data/test/default/test_releases.rb +130 -0
- data/test/ensembl_genomes/test_collection.rb +122 -0
- data/test/ensembl_genomes/test_gene.rb +46 -0
- data/test/ensembl_genomes/test_slice.rb +65 -0
- data/test/ensembl_genomes/test_variation.rb +38 -0
- data/test/helper.rb +18 -0
- data/test/release_50/core/test_project.rb +210 -0
- data/test/release_50/core/test_project_human.rb +52 -0
- data/test/release_50/core/test_relationships.rb +72 -0
- data/test/release_50/core/test_sequence.rb +170 -0
- data/test/release_50/core/test_slice.rb +116 -0
- data/test/release_50/core/test_transcript.rb +125 -0
- data/test/release_50/core/test_transform.rb +217 -0
- data/test/release_50/variation/test_activerecord.rb +138 -0
- data/test/release_50/variation/test_variation.rb +79 -0
- data/test/release_53/core/test_gene.rb +61 -0
- data/test/release_53/core/test_project.rb +91 -0
- data/test/release_53/core/test_project_human.rb +61 -0
- data/test/release_53/core/test_slice.rb +42 -0
- data/test/release_53/core/test_transform.rb +57 -0
- data/test/release_53/variation/test_activerecord.rb +137 -0
- data/test/release_53/variation/test_variation.rb +66 -0
- data/test/release_56/core/test_gene.rb +61 -0
- data/test/release_56/core/test_project.rb +91 -0
- data/test/release_56/core/test_slice.rb +49 -0
- data/test/release_56/core/test_transform.rb +57 -0
- data/test/release_56/variation/test_activerecord.rb +141 -0
- data/test/release_56/variation/test_consequence.rb +131 -0
- data/test/release_56/variation/test_variation.rb +63 -0
- data/test/release_60/core/test_gene.rb +61 -0
- data/test/release_60/core/test_project_human.rb +34 -0
- data/test/release_60/core/test_slice.rb +42 -0
- data/test/release_60/core/test_transcript.rb +120 -0
- data/test/release_60/core/test_transform.rb +57 -0
- data/test/release_60/variation/test_activerecord.rb +216 -0
- data/test/release_60/variation/test_consequence.rb +153 -0
- data/test/release_60/variation/test_variation.rb +64 -0
- data/test/release_62/core/test_gene.rb +42 -0
- data/test/release_62/variation/test_activerecord.rb +86 -0
- data/test/release_62/variation/test_consequence.rb +191 -0
- metadata +287 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/db_connection.rb - Connection classes for Ensembl databases
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
|
5
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
6
|
+
#
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'active_record'
|
13
|
+
|
14
|
+
|
15
|
+
class ActiveRecord::Base
|
16
|
+
def self.belongs_to_what
|
17
|
+
return self.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.has_what
|
21
|
+
a = [self.reflect_on_all_associations(:has_one), self.reflect_on_all_associations(:has_many)]
|
22
|
+
return a.flatten.uniq.collect{|a| a.name.to_s}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
module Ensembl
|
28
|
+
DB_ADAPTER = 'mysql'
|
29
|
+
DB_HOST = 'ensembldb.ensembl.org'
|
30
|
+
DB_USERNAME = 'anonymous'
|
31
|
+
DB_PASSWORD = ''
|
32
|
+
EG_HOST = 'mysql.ebi.ac.uk'
|
33
|
+
EG_PORT = 4157
|
34
|
+
|
35
|
+
|
36
|
+
# Generic class to perform dynamic connections to the Ensembl database and retrieve database names
|
37
|
+
class DummyDBConnection < ActiveRecord::Base
|
38
|
+
self.abstract_class = true
|
39
|
+
def self.connect(args)
|
40
|
+
self.establish_connection(
|
41
|
+
:adapter => args[:adapter] ||= Ensembl::DB_ADAPTER,
|
42
|
+
:host => args[:host] ||= Ensembl::DB_HOST,
|
43
|
+
:username => args[:username] ||= Ensembl::DB_USERNAME,
|
44
|
+
:password => args[:password] ||= Ensembl::DB_PASSWORD,
|
45
|
+
:port => args[:port],
|
46
|
+
:database => args[:database] ||= ''
|
47
|
+
)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
module DBRegistry
|
52
|
+
# The Ensembl::Registry::Base is a super class providing general methods
|
53
|
+
# to get database and connection info.
|
54
|
+
class Base < ActiveRecord::Base
|
55
|
+
self.abstract_class = true
|
56
|
+
self.pluralize_table_names = false
|
57
|
+
|
58
|
+
def self.get_info
|
59
|
+
host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
|
60
|
+
db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
|
61
|
+
species,release = $1,$3 # just works for standard Ensembl database names
|
62
|
+
if species.nil? and release.nil? then
|
63
|
+
raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
|
64
|
+
else
|
65
|
+
return host,user,password,db_name,port,species,release.to_i
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Method to retrieve the name of a database, using species, release and connection parameters
|
70
|
+
# passed by the user.
|
71
|
+
def self.get_name_from_db(db_type,species,release,args)
|
72
|
+
species = species.underscore.tr(' ','_') # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
|
73
|
+
dummy_db = DummyDBConnection.connect(args)
|
74
|
+
dummy_connection = dummy_db.connection
|
75
|
+
# check if a database exists with exactly the species name passed (regular way)
|
76
|
+
db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
|
77
|
+
# if a database is not found and we are working on Ensembl Genomes database...
|
78
|
+
if db_name.nil? and args[:ensembl_genomes] then
|
79
|
+
words = species.split(/_/)
|
80
|
+
first = words.shift
|
81
|
+
# ...try to find a collection database using the first name of the species passed (convention used for collection databases)
|
82
|
+
db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
|
83
|
+
# if a collection database match is found, then look inside to find the species
|
84
|
+
if db_name != nil then
|
85
|
+
dummy_db.disconnect! # close the generic connection with the host
|
86
|
+
args[:database] = db_name
|
87
|
+
dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
|
88
|
+
species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
|
89
|
+
Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
|
90
|
+
|
91
|
+
# check that the species passed is present in the collection database, otherwise returns a warning
|
92
|
+
exists = dummy_db.connection.select_values("SELECT species_id FROM meta WHERE LOWER(meta_value) = '#{species_name}' AND meta_key = 'species.db_name'")[0]
|
93
|
+
warn "WARNING: No species '#{species}' found in the database. Please check that the name is correct." if !exists
|
94
|
+
end
|
95
|
+
end
|
96
|
+
warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})." if db_name.nil?
|
97
|
+
return db_name
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.generic_connect(db_type, species, release, args = {})
|
101
|
+
|
102
|
+
# check which release is used and load the correct VariationFeature version
|
103
|
+
require (release < 62) ? File.dirname(__FILE__) + '/variation/variation_feature.rb' : File.dirname(__FILE__) + '/variation/variation_feature62.rb'
|
104
|
+
Ensembl::SESSION.reset
|
105
|
+
Ensembl::SESSION.release = release
|
106
|
+
db_name = nil
|
107
|
+
# if the connection is established with Ensembl Genomes, set the default port and host
|
108
|
+
if args[:ensembl_genomes] then
|
109
|
+
args[:port] = EG_PORT
|
110
|
+
args[:host] = EG_HOST
|
111
|
+
end
|
112
|
+
if args[:port].nil? then
|
113
|
+
args[:port] = ( release > 47 ) ? 5306 : 3306
|
114
|
+
end
|
115
|
+
if args[:database]
|
116
|
+
db_name = args[:database]
|
117
|
+
else
|
118
|
+
db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database
|
119
|
+
end
|
120
|
+
establish_connection(
|
121
|
+
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
122
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
123
|
+
:database => db_name,
|
124
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
125
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
126
|
+
:port => args[:port]
|
127
|
+
)
|
128
|
+
|
129
|
+
self.retrieve_connection # Check if the connection is working
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
module Core
|
139
|
+
# The Ensembl::Core::DBConnection is the actual connection established
|
140
|
+
# with the Ensembl server.
|
141
|
+
class DBConnection < Ensembl::DBRegistry::Base
|
142
|
+
self.abstract_class = true
|
143
|
+
self.pluralize_table_names = false
|
144
|
+
# The Ensembl::Core::DBConnection#connect method makes the connection
|
145
|
+
# to the Ensembl core database for a given species. By default, it connects
|
146
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
147
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
148
|
+
#
|
149
|
+
# @example
|
150
|
+
# # Connect to release 50 of human
|
151
|
+
# Ensembl::Core::DBConnection.connect('homo_sapiens')
|
152
|
+
#
|
153
|
+
# # Connect to release 42 of chicken
|
154
|
+
# Ensembl::Core::DBConnection.connect('gallus_gallus')
|
155
|
+
#
|
156
|
+
# @param [String] species Species to connect to. Must be in snake_case
|
157
|
+
# @param [Integer] ensembl_release. Release to connect to (default = 60)
|
158
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
159
|
+
self.generic_connect('core',species, release,args)
|
160
|
+
end
|
161
|
+
|
162
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
163
|
+
args[:ensembl_genomes] = true
|
164
|
+
self.generic_connect('core',species,release,args)
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
end # Core::DBConnection
|
169
|
+
|
170
|
+
end # Core
|
171
|
+
|
172
|
+
module Variation
|
173
|
+
# The Ensembl::Variation::DBConnection is the actual connection established
|
174
|
+
# with the Ensembl server.
|
175
|
+
class DBConnection < Ensembl::DBRegistry::Base
|
176
|
+
self.abstract_class = true
|
177
|
+
self.pluralize_table_names = false
|
178
|
+
# The Ensembl::Variation::DBConnection#connect method makes the connection
|
179
|
+
# to the Ensembl variation database for a given species. By default, it connects
|
180
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
181
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
182
|
+
#
|
183
|
+
# @example
|
184
|
+
# # Connect to release 50 of human
|
185
|
+
# Ensembl::Variation::DBConnection.connect('homo_sapiens')
|
186
|
+
#
|
187
|
+
# # Connect to release 42 of chicken
|
188
|
+
# Ensembl::Variation::DBConnection.connect('gallus_gallus')
|
189
|
+
#
|
190
|
+
# @param [String] species Species to connect to. Must be in snake_case
|
191
|
+
# @param [Integer] ensembl_release. Release to connect to (default = 60)
|
192
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
193
|
+
self.generic_connect('variation',species, release, args)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
197
|
+
args[:ensembl_genomes] = true
|
198
|
+
self.generic_connect('variation',species,release,args)
|
199
|
+
end
|
200
|
+
|
201
|
+
end # Variation::DBConnection
|
202
|
+
|
203
|
+
end # Variation
|
204
|
+
|
205
|
+
end # Ensembl
|
@@ -0,0 +1,536 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl Variation
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
# @author Francesco Strozzi
|
8
|
+
|
9
|
+
nil
|
10
|
+
module Ensembl
|
11
|
+
# The Ensembl::Variation module covers the variation databases from
|
12
|
+
# ensembldb.ensembl.org.
|
13
|
+
module Variation
|
14
|
+
# The Allele class describes a single allele of a variation. In addition to
|
15
|
+
# the nucleotide(s) (or absence of) that representing the allele frequency
|
16
|
+
# and population information may be present.
|
17
|
+
#
|
18
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
19
|
+
# See the general documentation of the Ensembl module for
|
20
|
+
# more information on what this means and what methods are available.
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# allele = Allele.find(1)
|
24
|
+
# puts allele.to_yaml
|
25
|
+
class Allele < DBConnection
|
26
|
+
set_primary_key 'allele_id'
|
27
|
+
belongs_to :sample
|
28
|
+
belongs_to :variation
|
29
|
+
belongs_to :population
|
30
|
+
belongs_to :subsnp_handle
|
31
|
+
end
|
32
|
+
|
33
|
+
# The AlleleGroup class represents a grouping of alleles that have tight
|
34
|
+
# linkage and are usually present together. This is commonly known as a
|
35
|
+
# Haplotype or Haplotype Block.
|
36
|
+
#
|
37
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
38
|
+
# See the general documentation of the Ensembl module for
|
39
|
+
# more information on what this means and what methods are available.
|
40
|
+
#
|
41
|
+
# @example
|
42
|
+
# allele_group = AlleleGroup.find(1)
|
43
|
+
# puts allele_group.to_yaml
|
44
|
+
class AlleleGroup < DBConnection
|
45
|
+
set_primary_key 'allele_group_id'
|
46
|
+
belongs_to :variation_group
|
47
|
+
belongs_to :source
|
48
|
+
belongs_to :sample
|
49
|
+
belongs_to :allele_group_allele
|
50
|
+
end
|
51
|
+
|
52
|
+
# The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
|
53
|
+
# Should not be used directly.
|
54
|
+
#
|
55
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
56
|
+
# See the general documentation of the Ensembl module for
|
57
|
+
# more information on what this means and what methods are available.
|
58
|
+
class AlleleGroupAllele < DBConnection
|
59
|
+
belongs_to :variation
|
60
|
+
belongs_to :allele_group
|
61
|
+
end
|
62
|
+
|
63
|
+
# Store information on attributes types
|
64
|
+
#
|
65
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
66
|
+
# See the general documentation of the Ensembl module for
|
67
|
+
# more information on what this means and what methods are available.
|
68
|
+
class AttribType < DBConnection
|
69
|
+
set_primary_key "attrib_type_id"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Store information on associated studies
|
73
|
+
#
|
74
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
75
|
+
# See the general documentation of the Ensembl module for
|
76
|
+
# more information on what this means and what methods are available.
|
77
|
+
class AssociateStudy < DBConnection
|
78
|
+
set_primary_key "study1_id"
|
79
|
+
belongs_to :study
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
84
|
+
# See the general documentation of the Ensembl module for
|
85
|
+
# more information on what this means and what methods are available.
|
86
|
+
class ConsequenceMapping < DBConnection
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
91
|
+
# See the general documentation of the Ensembl module for
|
92
|
+
# more information on what this means and what methods are available.
|
93
|
+
class FailedDescription < DBConnection
|
94
|
+
set_primary_key "failed_description_id"
|
95
|
+
has_many :failed_variations
|
96
|
+
end
|
97
|
+
|
98
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
99
|
+
# See the general documentation of the Ensembl module for
|
100
|
+
# more information on what this means and what methods are available.
|
101
|
+
class FailedVariation < DBConnection
|
102
|
+
set_primary_key "failed_variation_id"
|
103
|
+
belongs_to :failed_description
|
104
|
+
belongs_to :variation
|
105
|
+
end
|
106
|
+
|
107
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
108
|
+
# See the general documentation of the Ensembl module for
|
109
|
+
# more information on what this means and what methods are available.
|
110
|
+
class FeatureType < DBConnection
|
111
|
+
set_primary_key "feature_type_id"
|
112
|
+
end
|
113
|
+
|
114
|
+
class Meta < DBConnection
|
115
|
+
set_primary_key "meta_id"
|
116
|
+
end
|
117
|
+
|
118
|
+
class MetaCoord < DBConnection
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
class Phenotype < DBConnection
|
123
|
+
set_primary_key "phenotype_id"
|
124
|
+
has_many :variation_annotations
|
125
|
+
end
|
126
|
+
|
127
|
+
# The Sample class gives information about the biological samples stored in the database.
|
128
|
+
#
|
129
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
130
|
+
# See the general documentation of the Ensembl module for
|
131
|
+
# more information on what this means and what methods are available.
|
132
|
+
class Sample < DBConnection
|
133
|
+
set_primary_key "sample_id"
|
134
|
+
has_one :individual
|
135
|
+
has_one :sample_synonym
|
136
|
+
has_many :individual_genotype_multiple_bp
|
137
|
+
has_many :compressed_genotype_single_bp
|
138
|
+
has_many :read_coverage
|
139
|
+
has_one :population
|
140
|
+
has_many :tagged_variation_features
|
141
|
+
end
|
142
|
+
|
143
|
+
# The IndividualPopulation class is used to connect Individual and Population classes.
|
144
|
+
# Should not be used directly.
|
145
|
+
#
|
146
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
147
|
+
# See the general documentation of the Ensembl module for
|
148
|
+
# more information on what this means and what methods are available.
|
149
|
+
class IndividualPopulation < DBConnection
|
150
|
+
belongs_to :individual, :foreign_key => "individual_sample_id"
|
151
|
+
belongs_to :population, :foreign_key => "population_sample_id"
|
152
|
+
end
|
153
|
+
|
154
|
+
# The Individual class gives information on the single individuals used
|
155
|
+
# to retrieve one or more biological samples.
|
156
|
+
#
|
157
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
158
|
+
# See the general documentation of the Ensembl module for
|
159
|
+
# more information on what this means and what methods are available.
|
160
|
+
class Individual < DBConnection
|
161
|
+
set_primary_key "sample_id"
|
162
|
+
belongs_to :sample
|
163
|
+
has_one :individual_type
|
164
|
+
has_many :individual_populations, :foreign_key => "individual_sample_id"
|
165
|
+
has_many :populations, :through => :individual_populations
|
166
|
+
end
|
167
|
+
|
168
|
+
class IndividualGenotypeMultipleBp < DBConnection
|
169
|
+
belongs_to :sample
|
170
|
+
belongs_to :variation
|
171
|
+
belongs_to :subsnp_handle
|
172
|
+
end
|
173
|
+
|
174
|
+
class IndividualType < DBConnection
|
175
|
+
set_primary_key "invidual_type_id"
|
176
|
+
belongs_to :individual
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
class CompressedGenotypeSingleBp < DBConnection
|
181
|
+
belongs_to :population_genotype, :foreign_key => "sample_id"
|
182
|
+
end
|
183
|
+
|
184
|
+
class ReadCoverage < DBConnection
|
185
|
+
belongs_to :sample
|
186
|
+
end
|
187
|
+
|
188
|
+
class Population < DBConnection
|
189
|
+
belongs_to :sample
|
190
|
+
set_primary_key "sample_id"
|
191
|
+
has_many :population_genotypes, :foreign_key => "sample_id"
|
192
|
+
has_many :individual_populations, :foreign_key => "population_sample_id"
|
193
|
+
has_many :individuals, :through => :individual_populations
|
194
|
+
has_many :sample_synonyms
|
195
|
+
has_one :population_structure
|
196
|
+
has_many :tagged_variation_features
|
197
|
+
has_many :alleles
|
198
|
+
has_many :allele_groups
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
# The PopulationStructure class gives information on super and sub populations
|
203
|
+
#
|
204
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
205
|
+
# See the general documentation of the Ensembl module for
|
206
|
+
# more information on what this means and what methods are available.
|
207
|
+
class PopulationStructure < DBConnection
|
208
|
+
|
209
|
+
end
|
210
|
+
|
211
|
+
# The PopulationGenotype class gives information about alleles and allele
|
212
|
+
# frequencies for a SNP observed within a population or a group of samples.
|
213
|
+
#
|
214
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
215
|
+
# See the general documentation of the Ensembl module for
|
216
|
+
# more information on what this means and what methods are available.
|
217
|
+
class PopulationGenotype < DBConnection
|
218
|
+
set_primary_key "population_genotype_id"
|
219
|
+
belongs_to :variation
|
220
|
+
belongs_to :population
|
221
|
+
belongs_to :subsnp_handle
|
222
|
+
has_many :compressed_genotype_single_bps, :foreign_key => "sample_id"
|
223
|
+
end
|
224
|
+
|
225
|
+
# The ProteinInfo class gives information about protein translated from a given transcript.
|
226
|
+
#
|
227
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
228
|
+
# See the general documentation of the Ensembl module for
|
229
|
+
# more information on what this means and what methods are available.
|
230
|
+
class ProteinInfo < DBConnection
|
231
|
+
set_primary_key "protein_info_id"
|
232
|
+
belongs_to :transcript_variation
|
233
|
+
has_many :protein_positions
|
234
|
+
end
|
235
|
+
|
236
|
+
# The PolyphenPrediction class gives information about variations effect predictions within an aminoacidic sequence
|
237
|
+
#
|
238
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
239
|
+
# See the general documentation of the Ensembl module for
|
240
|
+
# more information on what this means and what methods are available.
|
241
|
+
class PolyphenPrediction < DBConnection
|
242
|
+
set_primary_key "polyphen_prediction_id"
|
243
|
+
belongs_to :protein_position
|
244
|
+
end
|
245
|
+
|
246
|
+
# The ProteinPosition class gives information about variations within an aminoacidic sequence.
|
247
|
+
#
|
248
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
249
|
+
# See the general documentation of the Ensembl module for
|
250
|
+
# more information on what this means and what methods are available.
|
251
|
+
class ProteinPosition < DBConnection
|
252
|
+
set_primary_key "protein_position_id"
|
253
|
+
belongs_to :protein_info
|
254
|
+
has_many :polyphen_predictions
|
255
|
+
has_many :sift_predictions
|
256
|
+
end
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
# The SampleSynonym class represents information about alternative names
|
261
|
+
# for sample entries.
|
262
|
+
#
|
263
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
264
|
+
# See the general documentation of the Ensembl module for
|
265
|
+
# more information on what this means and what methods are available.
|
266
|
+
class SampleSynonym < DBConnection
|
267
|
+
set_primary_key "sample_synonym_id"
|
268
|
+
belongs_to :source
|
269
|
+
belongs_to :sample
|
270
|
+
belongs_to :population
|
271
|
+
end
|
272
|
+
|
273
|
+
# The Source class gives information on the different databases and SNP
|
274
|
+
# panels used to retrieve the data
|
275
|
+
#
|
276
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
277
|
+
# See the general documentation of the Ensembl module for
|
278
|
+
# more information on what this means and what methods are available.
|
279
|
+
class Source < DBConnection
|
280
|
+
set_primary_key "source_id"
|
281
|
+
has_many :sample_synonyms
|
282
|
+
has_many :allele_groups
|
283
|
+
has_many :variations
|
284
|
+
has_many :variation_groups
|
285
|
+
has_many :httags
|
286
|
+
has_many :variation_synonyms
|
287
|
+
has_many :variation_annotations
|
288
|
+
has_many :structural_variations
|
289
|
+
|
290
|
+
def somatic_status # workaround as ActiveRecord do not parse SET field in MySQL
|
291
|
+
"#{attributes_before_type_cast['somatic_status']}"
|
292
|
+
end
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
# The StructuralVariation class gives information on structural variations mapped on the genome
|
297
|
+
#
|
298
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
299
|
+
# See the general documentation of the Ensembl module for
|
300
|
+
# more information on what this means and what methods are available.
|
301
|
+
class StructuralVariation < DBConnection
|
302
|
+
set_primary_key "structural_variation_id"
|
303
|
+
belongs_to :source
|
304
|
+
belongs_to :seq_region
|
305
|
+
has_many :supporting_structural_variations
|
306
|
+
|
307
|
+
class << self # Workaround for 'class' field, otherwise it creates a mess for AR
|
308
|
+
def instance_method_already_implemented?(method_name)
|
309
|
+
return true if method_name == 'class'
|
310
|
+
super
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def sv_class
|
315
|
+
self.attributes["class"]
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
|
321
|
+
class SeqRegion < DBConnection
|
322
|
+
set_primary_key "seq_region_id"
|
323
|
+
has_many :variation_features
|
324
|
+
has_many :structural_variations
|
325
|
+
end
|
326
|
+
|
327
|
+
# The SubsnpHandle class gives information on SNP Submitters
|
328
|
+
#
|
329
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
330
|
+
# See the general documentation of the Ensembl module for
|
331
|
+
# more information on what this means and what methods are available.
|
332
|
+
class SupportingStructuralVariation < DBConnection
|
333
|
+
set_primary_key "supporting_structural_variation_id"
|
334
|
+
belongs_to :structural_variation
|
335
|
+
end
|
336
|
+
|
337
|
+
# The SubsnpHandle class gives information on SNP Submitters
|
338
|
+
#
|
339
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
340
|
+
# See the general documentation of the Ensembl module for
|
341
|
+
# more information on what this means and what methods are available.
|
342
|
+
class SubsnpHandle < DBConnection
|
343
|
+
set_primary_key "subsnp_id"
|
344
|
+
has_many :individual_genotype_multiple_bps, :foreign_key => "subsnp_id"
|
345
|
+
has_many :population_genotypes, :foreign_key => "subsnp_id"
|
346
|
+
has_many :alleles, :foreign_key => "subsnp_id"
|
347
|
+
has_many :variation_synonyms,:foreign_key => "subsnp_id"
|
348
|
+
end
|
349
|
+
|
350
|
+
# The SiftPrediction class gives information about variations effect predictions within an aminoacidic sequence
|
351
|
+
#
|
352
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
353
|
+
# See the general documentation of the Ensembl module for
|
354
|
+
# more information on what this means and what methods are available.
|
355
|
+
class SiftPrediction < DBConnection
|
356
|
+
set_primary_key "sift_prediction_id"
|
357
|
+
belongs_to :protein_position
|
358
|
+
end
|
359
|
+
|
360
|
+
# The Study class gives information about studies producing variations information
|
361
|
+
#
|
362
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
363
|
+
# See the general documentation of the Ensembl module for
|
364
|
+
# more information on what this means and what methods are available.
|
365
|
+
class Study < DBConnection
|
366
|
+
set_primary_key "study_id"
|
367
|
+
has_many :associate_studies, :foreign_key => "study1_id"
|
368
|
+
has_many :structural_variations
|
369
|
+
has_many :variation_annotations
|
370
|
+
|
371
|
+
def study_type
|
372
|
+
"#{attributes_before_type_cast['study_type']}"
|
373
|
+
end
|
374
|
+
|
375
|
+
end
|
376
|
+
|
377
|
+
|
378
|
+
# The Variation class represents single nucleotide polymorhisms (SNP) or variations
|
379
|
+
# and provides information like the names (IDs), the validation status and
|
380
|
+
# the allele information.
|
381
|
+
#
|
382
|
+
# *BUG*: fields like validation_status and consequence_type are created
|
383
|
+
# using SET option directly in MySQL. These fields are bad interpreted by
|
384
|
+
# ActiveRecord, returning always 0.
|
385
|
+
#
|
386
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
387
|
+
# See the general documentation of the Ensembl module for
|
388
|
+
# more information on what this means and what methods are available.
|
389
|
+
#
|
390
|
+
# @example
|
391
|
+
# v = Variation.find_by_name('rs10111')
|
392
|
+
# v.alleles.each do |a|
|
393
|
+
# puts a.allele, a.frequency
|
394
|
+
# end
|
395
|
+
#
|
396
|
+
# variations = Variation.fetch_all_by_source('dbSNP') # many records
|
397
|
+
# variations.each do |v|
|
398
|
+
# puts v.name
|
399
|
+
# end
|
400
|
+
#
|
401
|
+
class Variation < DBConnection
|
402
|
+
set_primary_key "variation_id"
|
403
|
+
belongs_to :source
|
404
|
+
has_many :variation_synonyms
|
405
|
+
has_one :flanking_sequence
|
406
|
+
has_many :allele_group_alleles
|
407
|
+
has_many :allele_groups, :through => :allele_group_alleles
|
408
|
+
has_many :population_genotypes
|
409
|
+
has_many :alleles
|
410
|
+
has_many :variation_features
|
411
|
+
has_many :variation_group_variations
|
412
|
+
has_many :variation_groups, :through => :variation_group_variations
|
413
|
+
has_many :individual_genotype_multiple_bps
|
414
|
+
has_many :failed_variations
|
415
|
+
has_many :failed_descriptions, :through => :failed_variations
|
416
|
+
has_many :variation_set_variations
|
417
|
+
has_many :variation_sets, :through => :variation_set_variations
|
418
|
+
|
419
|
+
def self.fetch_all_by_source(source)
|
420
|
+
variations = Source.find_by_name(source).variations
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
# The VariationSynonym class gives information on alterative names used
|
425
|
+
# for Variation entries.
|
426
|
+
#
|
427
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
428
|
+
# See the general documentation of the Ensembl module for
|
429
|
+
# more information on what this means and what methods are available.
|
430
|
+
class VariationSynonym < DBConnection
|
431
|
+
set_primary_key "variation_synonym_id"
|
432
|
+
belongs_to :variation
|
433
|
+
belongs_to :source
|
434
|
+
belongs_to :subsnp_handle
|
435
|
+
end
|
436
|
+
|
437
|
+
# The VariationGroup class represents a group of variations (SNPs) that are
|
438
|
+
# linked and present toghether.
|
439
|
+
#
|
440
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
441
|
+
# See the general documentation of the Ensembl module for
|
442
|
+
# more information on what this means and what methods are available.
|
443
|
+
class VariationGroup < DBConnection
|
444
|
+
set_primary_key "variation_group_id"
|
445
|
+
belongs_to :source
|
446
|
+
has_one :variation_group_variation
|
447
|
+
has_one :httag
|
448
|
+
has_one :variation_group_feature
|
449
|
+
has_one :allele_group
|
450
|
+
end
|
451
|
+
|
452
|
+
# The VariationGroupVariation class is a connection class.
|
453
|
+
# Should not be used directly.
|
454
|
+
#
|
455
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
456
|
+
# See the general documentation of the Ensembl module for
|
457
|
+
# more information on what this means and what methods are available.
|
458
|
+
class VariationGroupVariation < DBConnection
|
459
|
+
belongs_to :variation
|
460
|
+
belongs_to :variation_group
|
461
|
+
end
|
462
|
+
|
463
|
+
# The VariationGroupFeature class gives information on the genomic position
|
464
|
+
# of each VariationGroup.
|
465
|
+
#
|
466
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
467
|
+
# See the general documentation of the Ensembl module for
|
468
|
+
# more information on what this means and what methods are available.
|
469
|
+
class VariationGroupFeature < DBConnection
|
470
|
+
set_primary_key "variation_group_feature_id"
|
471
|
+
belongs_to :variation_group
|
472
|
+
end
|
473
|
+
|
474
|
+
class VariationAnnotation < DBConnection
|
475
|
+
set_primary_key "variation_annotation_id"
|
476
|
+
belongs_to :variation
|
477
|
+
belongs_to :phenotype
|
478
|
+
belongs_to :source
|
479
|
+
end
|
480
|
+
|
481
|
+
# The VariationSet class gives information on variations grouped by study, method, quality measure etc.
|
482
|
+
#
|
483
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
484
|
+
# See the general documentation of the Ensembl module for
|
485
|
+
# more information on what this means and what methods are available.
|
486
|
+
class VariationSet < DBConnection
|
487
|
+
set_primary_key "variation_set_id"
|
488
|
+
has_many :variation_set_variations
|
489
|
+
has_many :variations, :through => :variation_set_variations
|
490
|
+
end
|
491
|
+
|
492
|
+
class VariationSetVariation < DBConnection
|
493
|
+
belongs_to :variation
|
494
|
+
belongs_to :variation_set
|
495
|
+
end
|
496
|
+
|
497
|
+
# The VariationSet class gives information on super and sub VariationSets.
|
498
|
+
#
|
499
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
500
|
+
# See the general documentation of the Ensembl module for
|
501
|
+
# more information on what this means and what methods are available.
|
502
|
+
class VariationSetStructure < DBConnection
|
503
|
+
|
504
|
+
end
|
505
|
+
|
506
|
+
|
507
|
+
|
508
|
+
# The FlankingSequence class gives information about the genomic coordinates
|
509
|
+
# of the flanking sequences, for a single VariationFeature.
|
510
|
+
#
|
511
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
512
|
+
# See the general documentation of the Ensembl module for
|
513
|
+
# more information on what this means and what methods are available.
|
514
|
+
class FlankingSequence < DBConnection
|
515
|
+
belongs_to :variation
|
516
|
+
end
|
517
|
+
|
518
|
+
# The TaggedVariationFeature class is a connection class.
|
519
|
+
# Should not be used directly.
|
520
|
+
#
|
521
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
522
|
+
# See the general documentation of the Ensembl module for
|
523
|
+
# more information on what this means and what methods are available.
|
524
|
+
class TaggedVariationFeature < DBConnection
|
525
|
+
belongs_to :variation_feature
|
526
|
+
belongs_to :sample
|
527
|
+
end
|
528
|
+
|
529
|
+
class Httag < DBConnection
|
530
|
+
set_primary_key "httag_id"
|
531
|
+
belongs_to :variation_group
|
532
|
+
belongs_to :source
|
533
|
+
end
|
534
|
+
|
535
|
+
end
|
536
|
+
end
|