bio-ensembl 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +71 -0
- data/VERSION +1 -0
- data/bin/ensembl +40 -0
- data/bin/variation_effect_predictor +106 -0
- data/bio-ensembl.gemspec +190 -0
- data/lib/bio-ensembl.rb +65 -0
- data/lib/bio-ensembl/core/activerecord.rb +1812 -0
- data/lib/bio-ensembl/core/collection.rb +64 -0
- data/lib/bio-ensembl/core/project.rb +262 -0
- data/lib/bio-ensembl/core/slice.rb +657 -0
- data/lib/bio-ensembl/core/transcript.rb +409 -0
- data/lib/bio-ensembl/core/transform.rb +95 -0
- data/lib/bio-ensembl/db_connection.rb +205 -0
- data/lib/bio-ensembl/variation/activerecord.rb +536 -0
- data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
- data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_effect_predictor_data.txt +4 -0
- data/samples/variation_example.rb +67 -0
- data/test/data/seq_c6qbl.fa +10 -0
- data/test/data/seq_cso19_coding.fa +16 -0
- data/test/data/seq_cso19_transcript.fa +28 -0
- data/test/data/seq_drd3_gene.fa +838 -0
- data/test/data/seq_drd3_transcript.fa +22 -0
- data/test/data/seq_drd4_transcript.fa +24 -0
- data/test/data/seq_forward_composite.fa +1669 -0
- data/test/data/seq_par_boundary.fa +169 -0
- data/test/data/seq_rnd3_transcript.fa +47 -0
- data/test/data/seq_ub2r1_coding.fa +13 -0
- data/test/data/seq_ub2r1_gene.fa +174 -0
- data/test/data/seq_ub2r1_transcript.fa +26 -0
- data/test/data/seq_y.fa +2 -0
- data/test/default/test_connection.rb +60 -0
- data/test/default/test_releases.rb +130 -0
- data/test/ensembl_genomes/test_collection.rb +122 -0
- data/test/ensembl_genomes/test_gene.rb +46 -0
- data/test/ensembl_genomes/test_slice.rb +65 -0
- data/test/ensembl_genomes/test_variation.rb +38 -0
- data/test/helper.rb +18 -0
- data/test/release_50/core/test_project.rb +210 -0
- data/test/release_50/core/test_project_human.rb +52 -0
- data/test/release_50/core/test_relationships.rb +72 -0
- data/test/release_50/core/test_sequence.rb +170 -0
- data/test/release_50/core/test_slice.rb +116 -0
- data/test/release_50/core/test_transcript.rb +125 -0
- data/test/release_50/core/test_transform.rb +217 -0
- data/test/release_50/variation/test_activerecord.rb +138 -0
- data/test/release_50/variation/test_variation.rb +79 -0
- data/test/release_53/core/test_gene.rb +61 -0
- data/test/release_53/core/test_project.rb +91 -0
- data/test/release_53/core/test_project_human.rb +61 -0
- data/test/release_53/core/test_slice.rb +42 -0
- data/test/release_53/core/test_transform.rb +57 -0
- data/test/release_53/variation/test_activerecord.rb +137 -0
- data/test/release_53/variation/test_variation.rb +66 -0
- data/test/release_56/core/test_gene.rb +61 -0
- data/test/release_56/core/test_project.rb +91 -0
- data/test/release_56/core/test_slice.rb +49 -0
- data/test/release_56/core/test_transform.rb +57 -0
- data/test/release_56/variation/test_activerecord.rb +141 -0
- data/test/release_56/variation/test_consequence.rb +131 -0
- data/test/release_56/variation/test_variation.rb +63 -0
- data/test/release_60/core/test_gene.rb +61 -0
- data/test/release_60/core/test_project_human.rb +34 -0
- data/test/release_60/core/test_slice.rb +42 -0
- data/test/release_60/core/test_transcript.rb +120 -0
- data/test/release_60/core/test_transform.rb +57 -0
- data/test/release_60/variation/test_activerecord.rb +216 -0
- data/test/release_60/variation/test_consequence.rb +153 -0
- data/test/release_60/variation/test_variation.rb +64 -0
- data/test/release_62/core/test_gene.rb +42 -0
- data/test/release_62/variation/test_activerecord.rb +86 -0
- data/test/release_62/variation/test_consequence.rb +191 -0
- metadata +287 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/db_connection.rb - Connection classes for Ensembl databases
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
|
5
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
6
|
+
#
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'active_record'
|
13
|
+
|
14
|
+
|
15
|
+
class ActiveRecord::Base
|
16
|
+
def self.belongs_to_what
|
17
|
+
return self.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.has_what
|
21
|
+
a = [self.reflect_on_all_associations(:has_one), self.reflect_on_all_associations(:has_many)]
|
22
|
+
return a.flatten.uniq.collect{|a| a.name.to_s}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
module Ensembl
|
28
|
+
DB_ADAPTER = 'mysql'
|
29
|
+
DB_HOST = 'ensembldb.ensembl.org'
|
30
|
+
DB_USERNAME = 'anonymous'
|
31
|
+
DB_PASSWORD = ''
|
32
|
+
EG_HOST = 'mysql.ebi.ac.uk'
|
33
|
+
EG_PORT = 4157
|
34
|
+
|
35
|
+
|
36
|
+
# Generic class to perform dynamic connections to the Ensembl database and retrieve database names
|
37
|
+
class DummyDBConnection < ActiveRecord::Base
|
38
|
+
self.abstract_class = true
|
39
|
+
def self.connect(args)
|
40
|
+
self.establish_connection(
|
41
|
+
:adapter => args[:adapter] ||= Ensembl::DB_ADAPTER,
|
42
|
+
:host => args[:host] ||= Ensembl::DB_HOST,
|
43
|
+
:username => args[:username] ||= Ensembl::DB_USERNAME,
|
44
|
+
:password => args[:password] ||= Ensembl::DB_PASSWORD,
|
45
|
+
:port => args[:port],
|
46
|
+
:database => args[:database] ||= ''
|
47
|
+
)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
module DBRegistry
|
52
|
+
# The Ensembl::Registry::Base is a super class providing general methods
|
53
|
+
# to get database and connection info.
|
54
|
+
class Base < ActiveRecord::Base
|
55
|
+
self.abstract_class = true
|
56
|
+
self.pluralize_table_names = false
|
57
|
+
|
58
|
+
def self.get_info
|
59
|
+
host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
|
60
|
+
db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
|
61
|
+
species,release = $1,$3 # just works for standard Ensembl database names
|
62
|
+
if species.nil? and release.nil? then
|
63
|
+
raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
|
64
|
+
else
|
65
|
+
return host,user,password,db_name,port,species,release.to_i
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Method to retrieve the name of a database, using species, release and connection parameters
|
70
|
+
# passed by the user.
|
71
|
+
def self.get_name_from_db(db_type,species,release,args)
|
72
|
+
species = species.underscore.tr(' ','_') # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
|
73
|
+
dummy_db = DummyDBConnection.connect(args)
|
74
|
+
dummy_connection = dummy_db.connection
|
75
|
+
# check if a database exists with exactly the species name passed (regular way)
|
76
|
+
db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
|
77
|
+
# if a database is not found and we are working on Ensembl Genomes database...
|
78
|
+
if db_name.nil? and args[:ensembl_genomes] then
|
79
|
+
words = species.split(/_/)
|
80
|
+
first = words.shift
|
81
|
+
# ...try to find a collection database using the first name of the species passed (convention used for collection databases)
|
82
|
+
db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
|
83
|
+
# if a collection database match is found, then look inside to find the species
|
84
|
+
if db_name != nil then
|
85
|
+
dummy_db.disconnect! # close the generic connection with the host
|
86
|
+
args[:database] = db_name
|
87
|
+
dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
|
88
|
+
species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
|
89
|
+
Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
|
90
|
+
|
91
|
+
# check that the species passed is present in the collection database, otherwise returns a warning
|
92
|
+
exists = dummy_db.connection.select_values("SELECT species_id FROM meta WHERE LOWER(meta_value) = '#{species_name}' AND meta_key = 'species.db_name'")[0]
|
93
|
+
warn "WARNING: No species '#{species}' found in the database. Please check that the name is correct." if !exists
|
94
|
+
end
|
95
|
+
end
|
96
|
+
warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})." if db_name.nil?
|
97
|
+
return db_name
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.generic_connect(db_type, species, release, args = {})
|
101
|
+
|
102
|
+
# check which release is used and load the correct VariationFeature version
|
103
|
+
require (release < 62) ? File.dirname(__FILE__) + '/variation/variation_feature.rb' : File.dirname(__FILE__) + '/variation/variation_feature62.rb'
|
104
|
+
Ensembl::SESSION.reset
|
105
|
+
Ensembl::SESSION.release = release
|
106
|
+
db_name = nil
|
107
|
+
# if the connection is established with Ensembl Genomes, set the default port and host
|
108
|
+
if args[:ensembl_genomes] then
|
109
|
+
args[:port] = EG_PORT
|
110
|
+
args[:host] = EG_HOST
|
111
|
+
end
|
112
|
+
if args[:port].nil? then
|
113
|
+
args[:port] = ( release > 47 ) ? 5306 : 3306
|
114
|
+
end
|
115
|
+
if args[:database]
|
116
|
+
db_name = args[:database]
|
117
|
+
else
|
118
|
+
db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database
|
119
|
+
end
|
120
|
+
establish_connection(
|
121
|
+
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
122
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
123
|
+
:database => db_name,
|
124
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
125
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
126
|
+
:port => args[:port]
|
127
|
+
)
|
128
|
+
|
129
|
+
self.retrieve_connection # Check if the connection is working
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
module Core
|
139
|
+
# The Ensembl::Core::DBConnection is the actual connection established
|
140
|
+
# with the Ensembl server.
|
141
|
+
class DBConnection < Ensembl::DBRegistry::Base
|
142
|
+
self.abstract_class = true
|
143
|
+
self.pluralize_table_names = false
|
144
|
+
# The Ensembl::Core::DBConnection#connect method makes the connection
|
145
|
+
# to the Ensembl core database for a given species. By default, it connects
|
146
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
147
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
148
|
+
#
|
149
|
+
# @example
|
150
|
+
# # Connect to release 50 of human
|
151
|
+
# Ensembl::Core::DBConnection.connect('homo_sapiens')
|
152
|
+
#
|
153
|
+
# # Connect to release 42 of chicken
|
154
|
+
# Ensembl::Core::DBConnection.connect('gallus_gallus')
|
155
|
+
#
|
156
|
+
# @param [String] species Species to connect to. Must be in snake_case
|
157
|
+
# @param [Integer] ensembl_release. Release to connect to (default = 60)
|
158
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
159
|
+
self.generic_connect('core',species, release,args)
|
160
|
+
end
|
161
|
+
|
162
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
163
|
+
args[:ensembl_genomes] = true
|
164
|
+
self.generic_connect('core',species,release,args)
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
end # Core::DBConnection
|
169
|
+
|
170
|
+
end # Core
|
171
|
+
|
172
|
+
module Variation
|
173
|
+
# The Ensembl::Variation::DBConnection is the actual connection established
|
174
|
+
# with the Ensembl server.
|
175
|
+
class DBConnection < Ensembl::DBRegistry::Base
|
176
|
+
self.abstract_class = true
|
177
|
+
self.pluralize_table_names = false
|
178
|
+
# The Ensembl::Variation::DBConnection#connect method makes the connection
|
179
|
+
# to the Ensembl variation database for a given species. By default, it connects
|
180
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
181
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
182
|
+
#
|
183
|
+
# @example
|
184
|
+
# # Connect to release 50 of human
|
185
|
+
# Ensembl::Variation::DBConnection.connect('homo_sapiens')
|
186
|
+
#
|
187
|
+
# # Connect to release 42 of chicken
|
188
|
+
# Ensembl::Variation::DBConnection.connect('gallus_gallus')
|
189
|
+
#
|
190
|
+
# @param [String] species Species to connect to. Must be in snake_case
|
191
|
+
# @param [Integer] ensembl_release. Release to connect to (default = 60)
|
192
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
193
|
+
self.generic_connect('variation',species, release, args)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
197
|
+
args[:ensembl_genomes] = true
|
198
|
+
self.generic_connect('variation',species,release,args)
|
199
|
+
end
|
200
|
+
|
201
|
+
end # Variation::DBConnection
|
202
|
+
|
203
|
+
end # Variation
|
204
|
+
|
205
|
+
end # Ensembl
|
@@ -0,0 +1,536 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl Variation
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
# @author Francesco Strozzi
|
8
|
+
|
9
|
+
nil
|
10
|
+
module Ensembl
|
11
|
+
# The Ensembl::Variation module covers the variation databases from
|
12
|
+
# ensembldb.ensembl.org.
|
13
|
+
module Variation
|
14
|
+
# The Allele class describes a single allele of a variation. In addition to
|
15
|
+
# the nucleotide(s) (or absence of) that representing the allele frequency
|
16
|
+
# and population information may be present.
|
17
|
+
#
|
18
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
19
|
+
# See the general documentation of the Ensembl module for
|
20
|
+
# more information on what this means and what methods are available.
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# allele = Allele.find(1)
|
24
|
+
# puts allele.to_yaml
|
25
|
+
class Allele < DBConnection
|
26
|
+
set_primary_key 'allele_id'
|
27
|
+
belongs_to :sample
|
28
|
+
belongs_to :variation
|
29
|
+
belongs_to :population
|
30
|
+
belongs_to :subsnp_handle
|
31
|
+
end
|
32
|
+
|
33
|
+
# The AlleleGroup class represents a grouping of alleles that have tight
|
34
|
+
# linkage and are usually present together. This is commonly known as a
|
35
|
+
# Haplotype or Haplotype Block.
|
36
|
+
#
|
37
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
38
|
+
# See the general documentation of the Ensembl module for
|
39
|
+
# more information on what this means and what methods are available.
|
40
|
+
#
|
41
|
+
# @example
|
42
|
+
# allele_group = AlleleGroup.find(1)
|
43
|
+
# puts allele_group.to_yaml
|
44
|
+
class AlleleGroup < DBConnection
|
45
|
+
set_primary_key 'allele_group_id'
|
46
|
+
belongs_to :variation_group
|
47
|
+
belongs_to :source
|
48
|
+
belongs_to :sample
|
49
|
+
belongs_to :allele_group_allele
|
50
|
+
end
|
51
|
+
|
52
|
+
# The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
|
53
|
+
# Should not be used directly.
|
54
|
+
#
|
55
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
56
|
+
# See the general documentation of the Ensembl module for
|
57
|
+
# more information on what this means and what methods are available.
|
58
|
+
class AlleleGroupAllele < DBConnection
|
59
|
+
belongs_to :variation
|
60
|
+
belongs_to :allele_group
|
61
|
+
end
|
62
|
+
|
63
|
+
# Store information on attributes types
|
64
|
+
#
|
65
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
66
|
+
# See the general documentation of the Ensembl module for
|
67
|
+
# more information on what this means and what methods are available.
|
68
|
+
class AttribType < DBConnection
|
69
|
+
set_primary_key "attrib_type_id"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Store information on associated studies
|
73
|
+
#
|
74
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
75
|
+
# See the general documentation of the Ensembl module for
|
76
|
+
# more information on what this means and what methods are available.
|
77
|
+
class AssociateStudy < DBConnection
|
78
|
+
set_primary_key "study1_id"
|
79
|
+
belongs_to :study
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
84
|
+
# See the general documentation of the Ensembl module for
|
85
|
+
# more information on what this means and what methods are available.
|
86
|
+
class ConsequenceMapping < DBConnection
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
91
|
+
# See the general documentation of the Ensembl module for
|
92
|
+
# more information on what this means and what methods are available.
|
93
|
+
class FailedDescription < DBConnection
|
94
|
+
set_primary_key "failed_description_id"
|
95
|
+
has_many :failed_variations
|
96
|
+
end
|
97
|
+
|
98
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
99
|
+
# See the general documentation of the Ensembl module for
|
100
|
+
# more information on what this means and what methods are available.
|
101
|
+
class FailedVariation < DBConnection
|
102
|
+
set_primary_key "failed_variation_id"
|
103
|
+
belongs_to :failed_description
|
104
|
+
belongs_to :variation
|
105
|
+
end
|
106
|
+
|
107
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
108
|
+
# See the general documentation of the Ensembl module for
|
109
|
+
# more information on what this means and what methods are available.
|
110
|
+
class FeatureType < DBConnection
|
111
|
+
set_primary_key "feature_type_id"
|
112
|
+
end
|
113
|
+
|
114
|
+
class Meta < DBConnection
|
115
|
+
set_primary_key "meta_id"
|
116
|
+
end
|
117
|
+
|
118
|
+
class MetaCoord < DBConnection
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
class Phenotype < DBConnection
|
123
|
+
set_primary_key "phenotype_id"
|
124
|
+
has_many :variation_annotations
|
125
|
+
end
|
126
|
+
|
127
|
+
# The Sample class gives information about the biological samples stored in the database.
|
128
|
+
#
|
129
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
130
|
+
# See the general documentation of the Ensembl module for
|
131
|
+
# more information on what this means and what methods are available.
|
132
|
+
class Sample < DBConnection
|
133
|
+
set_primary_key "sample_id"
|
134
|
+
has_one :individual
|
135
|
+
has_one :sample_synonym
|
136
|
+
has_many :individual_genotype_multiple_bp
|
137
|
+
has_many :compressed_genotype_single_bp
|
138
|
+
has_many :read_coverage
|
139
|
+
has_one :population
|
140
|
+
has_many :tagged_variation_features
|
141
|
+
end
|
142
|
+
|
143
|
+
# The IndividualPopulation class is used to connect Individual and Population classes.
|
144
|
+
# Should not be used directly.
|
145
|
+
#
|
146
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
147
|
+
# See the general documentation of the Ensembl module for
|
148
|
+
# more information on what this means and what methods are available.
|
149
|
+
class IndividualPopulation < DBConnection
|
150
|
+
belongs_to :individual, :foreign_key => "individual_sample_id"
|
151
|
+
belongs_to :population, :foreign_key => "population_sample_id"
|
152
|
+
end
|
153
|
+
|
154
|
+
# The Individual class gives information on the single individuals used
|
155
|
+
# to retrieve one or more biological samples.
|
156
|
+
#
|
157
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
158
|
+
# See the general documentation of the Ensembl module for
|
159
|
+
# more information on what this means and what methods are available.
|
160
|
+
class Individual < DBConnection
|
161
|
+
set_primary_key "sample_id"
|
162
|
+
belongs_to :sample
|
163
|
+
has_one :individual_type
|
164
|
+
has_many :individual_populations, :foreign_key => "individual_sample_id"
|
165
|
+
has_many :populations, :through => :individual_populations
|
166
|
+
end
|
167
|
+
|
168
|
+
class IndividualGenotypeMultipleBp < DBConnection
|
169
|
+
belongs_to :sample
|
170
|
+
belongs_to :variation
|
171
|
+
belongs_to :subsnp_handle
|
172
|
+
end
|
173
|
+
|
174
|
+
class IndividualType < DBConnection
|
175
|
+
set_primary_key "invidual_type_id"
|
176
|
+
belongs_to :individual
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
class CompressedGenotypeSingleBp < DBConnection
|
181
|
+
belongs_to :population_genotype, :foreign_key => "sample_id"
|
182
|
+
end
|
183
|
+
|
184
|
+
class ReadCoverage < DBConnection
|
185
|
+
belongs_to :sample
|
186
|
+
end
|
187
|
+
|
188
|
+
class Population < DBConnection
|
189
|
+
belongs_to :sample
|
190
|
+
set_primary_key "sample_id"
|
191
|
+
has_many :population_genotypes, :foreign_key => "sample_id"
|
192
|
+
has_many :individual_populations, :foreign_key => "population_sample_id"
|
193
|
+
has_many :individuals, :through => :individual_populations
|
194
|
+
has_many :sample_synonyms
|
195
|
+
has_one :population_structure
|
196
|
+
has_many :tagged_variation_features
|
197
|
+
has_many :alleles
|
198
|
+
has_many :allele_groups
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
# The PopulationStructure class gives information on super and sub populations
|
203
|
+
#
|
204
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
205
|
+
# See the general documentation of the Ensembl module for
|
206
|
+
# more information on what this means and what methods are available.
|
207
|
+
class PopulationStructure < DBConnection
|
208
|
+
|
209
|
+
end
|
210
|
+
|
211
|
+
# The PopulationGenotype class gives information about alleles and allele
|
212
|
+
# frequencies for a SNP observed within a population or a group of samples.
|
213
|
+
#
|
214
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
215
|
+
# See the general documentation of the Ensembl module for
|
216
|
+
# more information on what this means and what methods are available.
|
217
|
+
class PopulationGenotype < DBConnection
|
218
|
+
set_primary_key "population_genotype_id"
|
219
|
+
belongs_to :variation
|
220
|
+
belongs_to :population
|
221
|
+
belongs_to :subsnp_handle
|
222
|
+
has_many :compressed_genotype_single_bps, :foreign_key => "sample_id"
|
223
|
+
end
|
224
|
+
|
225
|
+
# The ProteinInfo class gives information about protein translated from a given transcript.
|
226
|
+
#
|
227
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
228
|
+
# See the general documentation of the Ensembl module for
|
229
|
+
# more information on what this means and what methods are available.
|
230
|
+
class ProteinInfo < DBConnection
|
231
|
+
set_primary_key "protein_info_id"
|
232
|
+
belongs_to :transcript_variation
|
233
|
+
has_many :protein_positions
|
234
|
+
end
|
235
|
+
|
236
|
+
# The PolyphenPrediction class gives information about variations effect predictions within an aminoacidic sequence
|
237
|
+
#
|
238
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
239
|
+
# See the general documentation of the Ensembl module for
|
240
|
+
# more information on what this means and what methods are available.
|
241
|
+
class PolyphenPrediction < DBConnection
|
242
|
+
set_primary_key "polyphen_prediction_id"
|
243
|
+
belongs_to :protein_position
|
244
|
+
end
|
245
|
+
|
246
|
+
# The ProteinPosition class gives information about variations within an aminoacidic sequence.
|
247
|
+
#
|
248
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
249
|
+
# See the general documentation of the Ensembl module for
|
250
|
+
# more information on what this means and what methods are available.
|
251
|
+
class ProteinPosition < DBConnection
|
252
|
+
set_primary_key "protein_position_id"
|
253
|
+
belongs_to :protein_info
|
254
|
+
has_many :polyphen_predictions
|
255
|
+
has_many :sift_predictions
|
256
|
+
end
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
# The SampleSynonym class represents information about alternative names
|
261
|
+
# for sample entries.
|
262
|
+
#
|
263
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
264
|
+
# See the general documentation of the Ensembl module for
|
265
|
+
# more information on what this means and what methods are available.
|
266
|
+
class SampleSynonym < DBConnection
|
267
|
+
set_primary_key "sample_synonym_id"
|
268
|
+
belongs_to :source
|
269
|
+
belongs_to :sample
|
270
|
+
belongs_to :population
|
271
|
+
end
|
272
|
+
|
273
|
+
# The Source class gives information on the different databases and SNP
|
274
|
+
# panels used to retrieve the data
|
275
|
+
#
|
276
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
277
|
+
# See the general documentation of the Ensembl module for
|
278
|
+
# more information on what this means and what methods are available.
|
279
|
+
class Source < DBConnection
|
280
|
+
set_primary_key "source_id"
|
281
|
+
has_many :sample_synonyms
|
282
|
+
has_many :allele_groups
|
283
|
+
has_many :variations
|
284
|
+
has_many :variation_groups
|
285
|
+
has_many :httags
|
286
|
+
has_many :variation_synonyms
|
287
|
+
has_many :variation_annotations
|
288
|
+
has_many :structural_variations
|
289
|
+
|
290
|
+
def somatic_status # workaround as ActiveRecord do not parse SET field in MySQL
|
291
|
+
"#{attributes_before_type_cast['somatic_status']}"
|
292
|
+
end
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
# The StructuralVariation class gives information on structural variations mapped on the genome
|
297
|
+
#
|
298
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
299
|
+
# See the general documentation of the Ensembl module for
|
300
|
+
# more information on what this means and what methods are available.
|
301
|
+
class StructuralVariation < DBConnection
|
302
|
+
set_primary_key "structural_variation_id"
|
303
|
+
belongs_to :source
|
304
|
+
belongs_to :seq_region
|
305
|
+
has_many :supporting_structural_variations
|
306
|
+
|
307
|
+
class << self # Workaround for 'class' field, otherwise it creates a mess for AR
|
308
|
+
def instance_method_already_implemented?(method_name)
|
309
|
+
return true if method_name == 'class'
|
310
|
+
super
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def sv_class
|
315
|
+
self.attributes["class"]
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
|
321
|
+
class SeqRegion < DBConnection
|
322
|
+
set_primary_key "seq_region_id"
|
323
|
+
has_many :variation_features
|
324
|
+
has_many :structural_variations
|
325
|
+
end
|
326
|
+
|
327
|
+
# The SubsnpHandle class gives information on SNP Submitters
|
328
|
+
#
|
329
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
330
|
+
# See the general documentation of the Ensembl module for
|
331
|
+
# more information on what this means and what methods are available.
|
332
|
+
class SupportingStructuralVariation < DBConnection
|
333
|
+
set_primary_key "supporting_structural_variation_id"
|
334
|
+
belongs_to :structural_variation
|
335
|
+
end
|
336
|
+
|
337
|
+
# The SubsnpHandle class gives information on SNP Submitters
|
338
|
+
#
|
339
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
340
|
+
# See the general documentation of the Ensembl module for
|
341
|
+
# more information on what this means and what methods are available.
|
342
|
+
class SubsnpHandle < DBConnection
|
343
|
+
set_primary_key "subsnp_id"
|
344
|
+
has_many :individual_genotype_multiple_bps, :foreign_key => "subsnp_id"
|
345
|
+
has_many :population_genotypes, :foreign_key => "subsnp_id"
|
346
|
+
has_many :alleles, :foreign_key => "subsnp_id"
|
347
|
+
has_many :variation_synonyms,:foreign_key => "subsnp_id"
|
348
|
+
end
|
349
|
+
|
350
|
+
# The SiftPrediction class gives information about variations effect predictions within an aminoacidic sequence
|
351
|
+
#
|
352
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
353
|
+
# See the general documentation of the Ensembl module for
|
354
|
+
# more information on what this means and what methods are available.
|
355
|
+
class SiftPrediction < DBConnection
|
356
|
+
set_primary_key "sift_prediction_id"
|
357
|
+
belongs_to :protein_position
|
358
|
+
end
|
359
|
+
|
360
|
+
# The Study class gives information about studies producing variations information
|
361
|
+
#
|
362
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
363
|
+
# See the general documentation of the Ensembl module for
|
364
|
+
# more information on what this means and what methods are available.
|
365
|
+
class Study < DBConnection
|
366
|
+
set_primary_key "study_id"
|
367
|
+
has_many :associate_studies, :foreign_key => "study1_id"
|
368
|
+
has_many :structural_variations
|
369
|
+
has_many :variation_annotations
|
370
|
+
|
371
|
+
def study_type
|
372
|
+
"#{attributes_before_type_cast['study_type']}"
|
373
|
+
end
|
374
|
+
|
375
|
+
end
|
376
|
+
|
377
|
+
|
378
|
+
# The Variation class represents single nucleotide polymorhisms (SNP) or variations
|
379
|
+
# and provides information like the names (IDs), the validation status and
|
380
|
+
# the allele information.
|
381
|
+
#
|
382
|
+
# *BUG*: fields like validation_status and consequence_type are created
|
383
|
+
# using SET option directly in MySQL. These fields are bad interpreted by
|
384
|
+
# ActiveRecord, returning always 0.
|
385
|
+
#
|
386
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
387
|
+
# See the general documentation of the Ensembl module for
|
388
|
+
# more information on what this means and what methods are available.
|
389
|
+
#
|
390
|
+
# @example
|
391
|
+
# v = Variation.find_by_name('rs10111')
|
392
|
+
# v.alleles.each do |a|
|
393
|
+
# puts a.allele, a.frequency
|
394
|
+
# end
|
395
|
+
#
|
396
|
+
# variations = Variation.fetch_all_by_source('dbSNP') # many records
|
397
|
+
# variations.each do |v|
|
398
|
+
# puts v.name
|
399
|
+
# end
|
400
|
+
#
|
401
|
+
class Variation < DBConnection
|
402
|
+
set_primary_key "variation_id"
|
403
|
+
belongs_to :source
|
404
|
+
has_many :variation_synonyms
|
405
|
+
has_one :flanking_sequence
|
406
|
+
has_many :allele_group_alleles
|
407
|
+
has_many :allele_groups, :through => :allele_group_alleles
|
408
|
+
has_many :population_genotypes
|
409
|
+
has_many :alleles
|
410
|
+
has_many :variation_features
|
411
|
+
has_many :variation_group_variations
|
412
|
+
has_many :variation_groups, :through => :variation_group_variations
|
413
|
+
has_many :individual_genotype_multiple_bps
|
414
|
+
has_many :failed_variations
|
415
|
+
has_many :failed_descriptions, :through => :failed_variations
|
416
|
+
has_many :variation_set_variations
|
417
|
+
has_many :variation_sets, :through => :variation_set_variations
|
418
|
+
|
419
|
+
def self.fetch_all_by_source(source)
|
420
|
+
variations = Source.find_by_name(source).variations
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
# The VariationSynonym class gives information on alterative names used
|
425
|
+
# for Variation entries.
|
426
|
+
#
|
427
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
428
|
+
# See the general documentation of the Ensembl module for
|
429
|
+
# more information on what this means and what methods are available.
|
430
|
+
class VariationSynonym < DBConnection
|
431
|
+
set_primary_key "variation_synonym_id"
|
432
|
+
belongs_to :variation
|
433
|
+
belongs_to :source
|
434
|
+
belongs_to :subsnp_handle
|
435
|
+
end
|
436
|
+
|
437
|
+
# The VariationGroup class represents a group of variations (SNPs) that are
|
438
|
+
# linked and present toghether.
|
439
|
+
#
|
440
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
441
|
+
# See the general documentation of the Ensembl module for
|
442
|
+
# more information on what this means and what methods are available.
|
443
|
+
class VariationGroup < DBConnection
|
444
|
+
set_primary_key "variation_group_id"
|
445
|
+
belongs_to :source
|
446
|
+
has_one :variation_group_variation
|
447
|
+
has_one :httag
|
448
|
+
has_one :variation_group_feature
|
449
|
+
has_one :allele_group
|
450
|
+
end
|
451
|
+
|
452
|
+
# The VariationGroupVariation class is a connection class.
|
453
|
+
# Should not be used directly.
|
454
|
+
#
|
455
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
456
|
+
# See the general documentation of the Ensembl module for
|
457
|
+
# more information on what this means and what methods are available.
|
458
|
+
class VariationGroupVariation < DBConnection
|
459
|
+
belongs_to :variation
|
460
|
+
belongs_to :variation_group
|
461
|
+
end
|
462
|
+
|
463
|
+
# The VariationGroupFeature class gives information on the genomic position
|
464
|
+
# of each VariationGroup.
|
465
|
+
#
|
466
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
467
|
+
# See the general documentation of the Ensembl module for
|
468
|
+
# more information on what this means and what methods are available.
|
469
|
+
class VariationGroupFeature < DBConnection
|
470
|
+
set_primary_key "variation_group_feature_id"
|
471
|
+
belongs_to :variation_group
|
472
|
+
end
|
473
|
+
|
474
|
+
class VariationAnnotation < DBConnection
|
475
|
+
set_primary_key "variation_annotation_id"
|
476
|
+
belongs_to :variation
|
477
|
+
belongs_to :phenotype
|
478
|
+
belongs_to :source
|
479
|
+
end
|
480
|
+
|
481
|
+
# The VariationSet class gives information on variations grouped by study, method, quality measure etc.
|
482
|
+
#
|
483
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
484
|
+
# See the general documentation of the Ensembl module for
|
485
|
+
# more information on what this means and what methods are available.
|
486
|
+
class VariationSet < DBConnection
|
487
|
+
set_primary_key "variation_set_id"
|
488
|
+
has_many :variation_set_variations
|
489
|
+
has_many :variations, :through => :variation_set_variations
|
490
|
+
end
|
491
|
+
|
492
|
+
class VariationSetVariation < DBConnection
|
493
|
+
belongs_to :variation
|
494
|
+
belongs_to :variation_set
|
495
|
+
end
|
496
|
+
|
497
|
+
# The VariationSet class gives information on super and sub VariationSets.
|
498
|
+
#
|
499
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
500
|
+
# See the general documentation of the Ensembl module for
|
501
|
+
# more information on what this means and what methods are available.
|
502
|
+
class VariationSetStructure < DBConnection
|
503
|
+
|
504
|
+
end
|
505
|
+
|
506
|
+
|
507
|
+
|
508
|
+
# The FlankingSequence class gives information about the genomic coordinates
|
509
|
+
# of the flanking sequences, for a single VariationFeature.
|
510
|
+
#
|
511
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
512
|
+
# See the general documentation of the Ensembl module for
|
513
|
+
# more information on what this means and what methods are available.
|
514
|
+
class FlankingSequence < DBConnection
|
515
|
+
belongs_to :variation
|
516
|
+
end
|
517
|
+
|
518
|
+
# The TaggedVariationFeature class is a connection class.
|
519
|
+
# Should not be used directly.
|
520
|
+
#
|
521
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
522
|
+
# See the general documentation of the Ensembl module for
|
523
|
+
# more information on what this means and what methods are available.
|
524
|
+
class TaggedVariationFeature < DBConnection
|
525
|
+
belongs_to :variation_feature
|
526
|
+
belongs_to :sample
|
527
|
+
end
|
528
|
+
|
529
|
+
class Httag < DBConnection
|
530
|
+
set_primary_key "httag_id"
|
531
|
+
belongs_to :variation_group
|
532
|
+
belongs_to :source
|
533
|
+
end
|
534
|
+
|
535
|
+
end
|
536
|
+
end
|