bio-ensembl 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,205 @@
1
+ #
2
+ # = ensembl/db_connection.rb - Connection classes for Ensembl databases
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+
10
+
11
+ require 'rubygems'
12
+ require 'active_record'
13
+
14
+
15
+ class ActiveRecord::Base
16
+ def self.belongs_to_what
17
+ return self.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}
18
+ end
19
+
20
+ def self.has_what
21
+ a = [self.reflect_on_all_associations(:has_one), self.reflect_on_all_associations(:has_many)]
22
+ return a.flatten.uniq.collect{|a| a.name.to_s}
23
+ end
24
+ end
25
+
26
+
27
+ module Ensembl
28
+ DB_ADAPTER = 'mysql'
29
+ DB_HOST = 'ensembldb.ensembl.org'
30
+ DB_USERNAME = 'anonymous'
31
+ DB_PASSWORD = ''
32
+ EG_HOST = 'mysql.ebi.ac.uk'
33
+ EG_PORT = 4157
34
+
35
+
36
+ # Generic class to perform dynamic connections to the Ensembl database and retrieve database names
37
+ class DummyDBConnection < ActiveRecord::Base
38
+ self.abstract_class = true
39
+ def self.connect(args)
40
+ self.establish_connection(
41
+ :adapter => args[:adapter] ||= Ensembl::DB_ADAPTER,
42
+ :host => args[:host] ||= Ensembl::DB_HOST,
43
+ :username => args[:username] ||= Ensembl::DB_USERNAME,
44
+ :password => args[:password] ||= Ensembl::DB_PASSWORD,
45
+ :port => args[:port],
46
+ :database => args[:database] ||= ''
47
+ )
48
+ end
49
+ end
50
+
51
+ module DBRegistry
52
+ # The Ensembl::Registry::Base is a super class providing general methods
53
+ # to get database and connection info.
54
+ class Base < ActiveRecord::Base
55
+ self.abstract_class = true
56
+ self.pluralize_table_names = false
57
+
58
+ def self.get_info
59
+ host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
60
+ db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
61
+ species,release = $1,$3 # just works for standard Ensembl database names
62
+ if species.nil? and release.nil? then
63
+ raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
64
+ else
65
+ return host,user,password,db_name,port,species,release.to_i
66
+ end
67
+ end
68
+
69
+ # Method to retrieve the name of a database, using species, release and connection parameters
70
+ # passed by the user.
71
+ def self.get_name_from_db(db_type,species,release,args)
72
+ species = species.underscore.tr(' ','_') # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
73
+ dummy_db = DummyDBConnection.connect(args)
74
+ dummy_connection = dummy_db.connection
75
+ # check if a database exists with exactly the species name passed (regular way)
76
+ db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
77
+ # if a database is not found and we are working on Ensembl Genomes database...
78
+ if db_name.nil? and args[:ensembl_genomes] then
79
+ words = species.split(/_/)
80
+ first = words.shift
81
+ # ...try to find a collection database using the first name of the species passed (convention used for collection databases)
82
+ db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
83
+ # if a collection database match is found, then look inside to find the species
84
+ if db_name != nil then
85
+ dummy_db.disconnect! # close the generic connection with the host
86
+ args[:database] = db_name
87
+ dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
88
+ species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
89
+ Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
90
+
91
+ # check that the species passed is present in the collection database, otherwise returns a warning
92
+ exists = dummy_db.connection.select_values("SELECT species_id FROM meta WHERE LOWER(meta_value) = '#{species_name}' AND meta_key = 'species.db_name'")[0]
93
+ warn "WARNING: No species '#{species}' found in the database. Please check that the name is correct." if !exists
94
+ end
95
+ end
96
+ warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})." if db_name.nil?
97
+ return db_name
98
+ end
99
+
100
+ def self.generic_connect(db_type, species, release, args = {})
101
+
102
+ # check which release is used and load the correct VariationFeature version
103
+ require (release < 62) ? File.dirname(__FILE__) + '/variation/variation_feature.rb' : File.dirname(__FILE__) + '/variation/variation_feature62.rb'
104
+ Ensembl::SESSION.reset
105
+ Ensembl::SESSION.release = release
106
+ db_name = nil
107
+ # if the connection is established with Ensembl Genomes, set the default port and host
108
+ if args[:ensembl_genomes] then
109
+ args[:port] = EG_PORT
110
+ args[:host] = EG_HOST
111
+ end
112
+ if args[:port].nil? then
113
+ args[:port] = ( release > 47 ) ? 5306 : 3306
114
+ end
115
+ if args[:database]
116
+ db_name = args[:database]
117
+ else
118
+ db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database
119
+ end
120
+ establish_connection(
121
+ :adapter => args[:adapter] || Ensembl::DB_ADAPTER,
122
+ :host => args[:host] || Ensembl::DB_HOST,
123
+ :database => db_name,
124
+ :username => args[:username] || Ensembl::DB_USERNAME,
125
+ :password => args[:password] || Ensembl::DB_PASSWORD,
126
+ :port => args[:port]
127
+ )
128
+
129
+ self.retrieve_connection # Check if the connection is working
130
+
131
+ end
132
+
133
+ end
134
+
135
+ end
136
+
137
+
138
+ module Core
139
+ # The Ensembl::Core::DBConnection is the actual connection established
140
+ # with the Ensembl server.
141
+ class DBConnection < Ensembl::DBRegistry::Base
142
+ self.abstract_class = true
143
+ self.pluralize_table_names = false
144
+ # The Ensembl::Core::DBConnection#connect method makes the connection
145
+ # to the Ensembl core database for a given species. By default, it connects
146
+ # to release 50 for that species. You _could_ use a lower number, but
147
+ # some parts of the API might not work, or worse: give the wrong results.
148
+ #
149
+ # @example
150
+ # # Connect to release 50 of human
151
+ # Ensembl::Core::DBConnection.connect('homo_sapiens')
152
+ #
153
+ # # Connect to release 42 of chicken
154
+ # Ensembl::Core::DBConnection.connect('gallus_gallus')
155
+ #
156
+ # @param [String] species Species to connect to. Must be in snake_case
157
+ # @param [Integer] ensembl_release. Release to connect to (default = 60)
158
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
159
+ self.generic_connect('core',species, release,args)
160
+ end
161
+
162
+ def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
163
+ args[:ensembl_genomes] = true
164
+ self.generic_connect('core',species,release,args)
165
+ end
166
+
167
+
168
+ end # Core::DBConnection
169
+
170
+ end # Core
171
+
172
+ module Variation
173
+ # The Ensembl::Variation::DBConnection is the actual connection established
174
+ # with the Ensembl server.
175
+ class DBConnection < Ensembl::DBRegistry::Base
176
+ self.abstract_class = true
177
+ self.pluralize_table_names = false
178
+ # The Ensembl::Variation::DBConnection#connect method makes the connection
179
+ # to the Ensembl variation database for a given species. By default, it connects
180
+ # to release 50 for that species. You _could_ use a lower number, but
181
+ # some parts of the API might not work, or worse: give the wrong results.
182
+ #
183
+ # @example
184
+ # # Connect to release 50 of human
185
+ # Ensembl::Variation::DBConnection.connect('homo_sapiens')
186
+ #
187
+ # # Connect to release 42 of chicken
188
+ # Ensembl::Variation::DBConnection.connect('gallus_gallus')
189
+ #
190
+ # @param [String] species Species to connect to. Must be in snake_case
191
+ # @param [Integer] ensembl_release. Release to connect to (default = 60)
192
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
193
+ self.generic_connect('variation',species, release, args)
194
+ end
195
+
196
+ def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
197
+ args[:ensembl_genomes] = true
198
+ self.generic_connect('variation',species,release,args)
199
+ end
200
+
201
+ end # Variation::DBConnection
202
+
203
+ end # Variation
204
+
205
+ end # Ensembl
@@ -0,0 +1,536 @@
1
+ #
2
+ # = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+ # @author Francesco Strozzi
8
+
9
+ nil
10
+ module Ensembl
11
+ # The Ensembl::Variation module covers the variation databases from
12
+ # ensembldb.ensembl.org.
13
+ module Variation
14
+ # The Allele class describes a single allele of a variation. In addition to
15
+ # the nucleotide(s) (or absence of) that representing the allele frequency
16
+ # and population information may be present.
17
+ #
18
+ # This class uses ActiveRecord to access data in the Ensembl database.
19
+ # See the general documentation of the Ensembl module for
20
+ # more information on what this means and what methods are available.
21
+ #
22
+ # @example
23
+ # allele = Allele.find(1)
24
+ # puts allele.to_yaml
25
+ class Allele < DBConnection
26
+ set_primary_key 'allele_id'
27
+ belongs_to :sample
28
+ belongs_to :variation
29
+ belongs_to :population
30
+ belongs_to :subsnp_handle
31
+ end
32
+
33
+ # The AlleleGroup class represents a grouping of alleles that have tight
34
+ # linkage and are usually present together. This is commonly known as a
35
+ # Haplotype or Haplotype Block.
36
+ #
37
+ # This class uses ActiveRecord to access data in the Ensembl database.
38
+ # See the general documentation of the Ensembl module for
39
+ # more information on what this means and what methods are available.
40
+ #
41
+ # @example
42
+ # allele_group = AlleleGroup.find(1)
43
+ # puts allele_group.to_yaml
44
+ class AlleleGroup < DBConnection
45
+ set_primary_key 'allele_group_id'
46
+ belongs_to :variation_group
47
+ belongs_to :source
48
+ belongs_to :sample
49
+ belongs_to :allele_group_allele
50
+ end
51
+
52
+ # The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
53
+ # Should not be used directly.
54
+ #
55
+ # This class uses ActiveRecord to access data in the Ensembl database.
56
+ # See the general documentation of the Ensembl module for
57
+ # more information on what this means and what methods are available.
58
+ class AlleleGroupAllele < DBConnection
59
+ belongs_to :variation
60
+ belongs_to :allele_group
61
+ end
62
+
63
+ # Store information on attributes types
64
+ #
65
+ # This class uses ActiveRecord to access data in the Ensembl database.
66
+ # See the general documentation of the Ensembl module for
67
+ # more information on what this means and what methods are available.
68
+ class AttribType < DBConnection
69
+ set_primary_key "attrib_type_id"
70
+ end
71
+
72
+ # Store information on associated studies
73
+ #
74
+ # This class uses ActiveRecord to access data in the Ensembl database.
75
+ # See the general documentation of the Ensembl module for
76
+ # more information on what this means and what methods are available.
77
+ class AssociateStudy < DBConnection
78
+ set_primary_key "study1_id"
79
+ belongs_to :study
80
+ end
81
+
82
+
83
+ # This class uses ActiveRecord to access data in the Ensembl database.
84
+ # See the general documentation of the Ensembl module for
85
+ # more information on what this means and what methods are available.
86
+ class ConsequenceMapping < DBConnection
87
+
88
+ end
89
+
90
+ # This class uses ActiveRecord to access data in the Ensembl database.
91
+ # See the general documentation of the Ensembl module for
92
+ # more information on what this means and what methods are available.
93
+ class FailedDescription < DBConnection
94
+ set_primary_key "failed_description_id"
95
+ has_many :failed_variations
96
+ end
97
+
98
+ # This class uses ActiveRecord to access data in the Ensembl database.
99
+ # See the general documentation of the Ensembl module for
100
+ # more information on what this means and what methods are available.
101
+ class FailedVariation < DBConnection
102
+ set_primary_key "failed_variation_id"
103
+ belongs_to :failed_description
104
+ belongs_to :variation
105
+ end
106
+
107
+ # This class uses ActiveRecord to access data in the Ensembl database.
108
+ # See the general documentation of the Ensembl module for
109
+ # more information on what this means and what methods are available.
110
+ class FeatureType < DBConnection
111
+ set_primary_key "feature_type_id"
112
+ end
113
+
114
+ class Meta < DBConnection
115
+ set_primary_key "meta_id"
116
+ end
117
+
118
+ class MetaCoord < DBConnection
119
+
120
+ end
121
+
122
+ class Phenotype < DBConnection
123
+ set_primary_key "phenotype_id"
124
+ has_many :variation_annotations
125
+ end
126
+
127
+ # The Sample class gives information about the biological samples stored in the database.
128
+ #
129
+ # This class uses ActiveRecord to access data in the Ensembl database.
130
+ # See the general documentation of the Ensembl module for
131
+ # more information on what this means and what methods are available.
132
+ class Sample < DBConnection
133
+ set_primary_key "sample_id"
134
+ has_one :individual
135
+ has_one :sample_synonym
136
+ has_many :individual_genotype_multiple_bp
137
+ has_many :compressed_genotype_single_bp
138
+ has_many :read_coverage
139
+ has_one :population
140
+ has_many :tagged_variation_features
141
+ end
142
+
143
+ # The IndividualPopulation class is used to connect Individual and Population classes.
144
+ # Should not be used directly.
145
+ #
146
+ # This class uses ActiveRecord to access data in the Ensembl database.
147
+ # See the general documentation of the Ensembl module for
148
+ # more information on what this means and what methods are available.
149
+ class IndividualPopulation < DBConnection
150
+ belongs_to :individual, :foreign_key => "individual_sample_id"
151
+ belongs_to :population, :foreign_key => "population_sample_id"
152
+ end
153
+
154
+ # The Individual class gives information on the single individuals used
155
+ # to retrieve one or more biological samples.
156
+ #
157
+ # This class uses ActiveRecord to access data in the Ensembl database.
158
+ # See the general documentation of the Ensembl module for
159
+ # more information on what this means and what methods are available.
160
+ class Individual < DBConnection
161
+ set_primary_key "sample_id"
162
+ belongs_to :sample
163
+ has_one :individual_type
164
+ has_many :individual_populations, :foreign_key => "individual_sample_id"
165
+ has_many :populations, :through => :individual_populations
166
+ end
167
+
168
+ class IndividualGenotypeMultipleBp < DBConnection
169
+ belongs_to :sample
170
+ belongs_to :variation
171
+ belongs_to :subsnp_handle
172
+ end
173
+
174
+ class IndividualType < DBConnection
175
+ set_primary_key "invidual_type_id"
176
+ belongs_to :individual
177
+ end
178
+
179
+
180
+ class CompressedGenotypeSingleBp < DBConnection
181
+ belongs_to :population_genotype, :foreign_key => "sample_id"
182
+ end
183
+
184
+ class ReadCoverage < DBConnection
185
+ belongs_to :sample
186
+ end
187
+
188
+ class Population < DBConnection
189
+ belongs_to :sample
190
+ set_primary_key "sample_id"
191
+ has_many :population_genotypes, :foreign_key => "sample_id"
192
+ has_many :individual_populations, :foreign_key => "population_sample_id"
193
+ has_many :individuals, :through => :individual_populations
194
+ has_many :sample_synonyms
195
+ has_one :population_structure
196
+ has_many :tagged_variation_features
197
+ has_many :alleles
198
+ has_many :allele_groups
199
+ end
200
+
201
+
202
+ # The PopulationStructure class gives information on super and sub populations
203
+ #
204
+ # This class uses ActiveRecord to access data in the Ensembl database.
205
+ # See the general documentation of the Ensembl module for
206
+ # more information on what this means and what methods are available.
207
+ class PopulationStructure < DBConnection
208
+
209
+ end
210
+
211
+ # The PopulationGenotype class gives information about alleles and allele
212
+ # frequencies for a SNP observed within a population or a group of samples.
213
+ #
214
+ # This class uses ActiveRecord to access data in the Ensembl database.
215
+ # See the general documentation of the Ensembl module for
216
+ # more information on what this means and what methods are available.
217
+ class PopulationGenotype < DBConnection
218
+ set_primary_key "population_genotype_id"
219
+ belongs_to :variation
220
+ belongs_to :population
221
+ belongs_to :subsnp_handle
222
+ has_many :compressed_genotype_single_bps, :foreign_key => "sample_id"
223
+ end
224
+
225
+ # The ProteinInfo class gives information about protein translated from a given transcript.
226
+ #
227
+ # This class uses ActiveRecord to access data in the Ensembl database.
228
+ # See the general documentation of the Ensembl module for
229
+ # more information on what this means and what methods are available.
230
+ class ProteinInfo < DBConnection
231
+ set_primary_key "protein_info_id"
232
+ belongs_to :transcript_variation
233
+ has_many :protein_positions
234
+ end
235
+
236
+ # The PolyphenPrediction class gives information about variations effect predictions within an aminoacidic sequence
237
+ #
238
+ # This class uses ActiveRecord to access data in the Ensembl database.
239
+ # See the general documentation of the Ensembl module for
240
+ # more information on what this means and what methods are available.
241
+ class PolyphenPrediction < DBConnection
242
+ set_primary_key "polyphen_prediction_id"
243
+ belongs_to :protein_position
244
+ end
245
+
246
+ # The ProteinPosition class gives information about variations within an aminoacidic sequence.
247
+ #
248
+ # This class uses ActiveRecord to access data in the Ensembl database.
249
+ # See the general documentation of the Ensembl module for
250
+ # more information on what this means and what methods are available.
251
+ class ProteinPosition < DBConnection
252
+ set_primary_key "protein_position_id"
253
+ belongs_to :protein_info
254
+ has_many :polyphen_predictions
255
+ has_many :sift_predictions
256
+ end
257
+
258
+
259
+
260
+ # The SampleSynonym class represents information about alternative names
261
+ # for sample entries.
262
+ #
263
+ # This class uses ActiveRecord to access data in the Ensembl database.
264
+ # See the general documentation of the Ensembl module for
265
+ # more information on what this means and what methods are available.
266
+ class SampleSynonym < DBConnection
267
+ set_primary_key "sample_synonym_id"
268
+ belongs_to :source
269
+ belongs_to :sample
270
+ belongs_to :population
271
+ end
272
+
273
+ # The Source class gives information on the different databases and SNP
274
+ # panels used to retrieve the data
275
+ #
276
+ # This class uses ActiveRecord to access data in the Ensembl database.
277
+ # See the general documentation of the Ensembl module for
278
+ # more information on what this means and what methods are available.
279
+ class Source < DBConnection
280
+ set_primary_key "source_id"
281
+ has_many :sample_synonyms
282
+ has_many :allele_groups
283
+ has_many :variations
284
+ has_many :variation_groups
285
+ has_many :httags
286
+ has_many :variation_synonyms
287
+ has_many :variation_annotations
288
+ has_many :structural_variations
289
+
290
+ def somatic_status # workaround as ActiveRecord do not parse SET field in MySQL
291
+ "#{attributes_before_type_cast['somatic_status']}"
292
+ end
293
+
294
+ end
295
+
296
+ # The StructuralVariation class gives information on structural variations mapped on the genome
297
+ #
298
+ # This class uses ActiveRecord to access data in the Ensembl database.
299
+ # See the general documentation of the Ensembl module for
300
+ # more information on what this means and what methods are available.
301
+ class StructuralVariation < DBConnection
302
+ set_primary_key "structural_variation_id"
303
+ belongs_to :source
304
+ belongs_to :seq_region
305
+ has_many :supporting_structural_variations
306
+
307
+ class << self # Workaround for 'class' field, otherwise it creates a mess for AR
308
+ def instance_method_already_implemented?(method_name)
309
+ return true if method_name == 'class'
310
+ super
311
+ end
312
+ end
313
+
314
+ def sv_class
315
+ self.attributes["class"]
316
+ end
317
+
318
+ end
319
+
320
+
321
+ class SeqRegion < DBConnection
322
+ set_primary_key "seq_region_id"
323
+ has_many :variation_features
324
+ has_many :structural_variations
325
+ end
326
+
327
+ # The SubsnpHandle class gives information on SNP Submitters
328
+ #
329
+ # This class uses ActiveRecord to access data in the Ensembl database.
330
+ # See the general documentation of the Ensembl module for
331
+ # more information on what this means and what methods are available.
332
+ class SupportingStructuralVariation < DBConnection
333
+ set_primary_key "supporting_structural_variation_id"
334
+ belongs_to :structural_variation
335
+ end
336
+
337
+ # The SubsnpHandle class gives information on SNP Submitters
338
+ #
339
+ # This class uses ActiveRecord to access data in the Ensembl database.
340
+ # See the general documentation of the Ensembl module for
341
+ # more information on what this means and what methods are available.
342
+ class SubsnpHandle < DBConnection
343
+ set_primary_key "subsnp_id"
344
+ has_many :individual_genotype_multiple_bps, :foreign_key => "subsnp_id"
345
+ has_many :population_genotypes, :foreign_key => "subsnp_id"
346
+ has_many :alleles, :foreign_key => "subsnp_id"
347
+ has_many :variation_synonyms,:foreign_key => "subsnp_id"
348
+ end
349
+
350
+ # The SiftPrediction class gives information about variations effect predictions within an aminoacidic sequence
351
+ #
352
+ # This class uses ActiveRecord to access data in the Ensembl database.
353
+ # See the general documentation of the Ensembl module for
354
+ # more information on what this means and what methods are available.
355
+ class SiftPrediction < DBConnection
356
+ set_primary_key "sift_prediction_id"
357
+ belongs_to :protein_position
358
+ end
359
+
360
+ # The Study class gives information about studies producing variations information
361
+ #
362
+ # This class uses ActiveRecord to access data in the Ensembl database.
363
+ # See the general documentation of the Ensembl module for
364
+ # more information on what this means and what methods are available.
365
+ class Study < DBConnection
366
+ set_primary_key "study_id"
367
+ has_many :associate_studies, :foreign_key => "study1_id"
368
+ has_many :structural_variations
369
+ has_many :variation_annotations
370
+
371
+ def study_type
372
+ "#{attributes_before_type_cast['study_type']}"
373
+ end
374
+
375
+ end
376
+
377
+
378
+ # The Variation class represents single nucleotide polymorhisms (SNP) or variations
379
+ # and provides information like the names (IDs), the validation status and
380
+ # the allele information.
381
+ #
382
+ # *BUG*: fields like validation_status and consequence_type are created
383
+ # using SET option directly in MySQL. These fields are bad interpreted by
384
+ # ActiveRecord, returning always 0.
385
+ #
386
+ # This class uses ActiveRecord to access data in the Ensembl database.
387
+ # See the general documentation of the Ensembl module for
388
+ # more information on what this means and what methods are available.
389
+ #
390
+ # @example
391
+ # v = Variation.find_by_name('rs10111')
392
+ # v.alleles.each do |a|
393
+ # puts a.allele, a.frequency
394
+ # end
395
+ #
396
+ # variations = Variation.fetch_all_by_source('dbSNP') # many records
397
+ # variations.each do |v|
398
+ # puts v.name
399
+ # end
400
+ #
401
+ class Variation < DBConnection
402
+ set_primary_key "variation_id"
403
+ belongs_to :source
404
+ has_many :variation_synonyms
405
+ has_one :flanking_sequence
406
+ has_many :allele_group_alleles
407
+ has_many :allele_groups, :through => :allele_group_alleles
408
+ has_many :population_genotypes
409
+ has_many :alleles
410
+ has_many :variation_features
411
+ has_many :variation_group_variations
412
+ has_many :variation_groups, :through => :variation_group_variations
413
+ has_many :individual_genotype_multiple_bps
414
+ has_many :failed_variations
415
+ has_many :failed_descriptions, :through => :failed_variations
416
+ has_many :variation_set_variations
417
+ has_many :variation_sets, :through => :variation_set_variations
418
+
419
+ def self.fetch_all_by_source(source)
420
+ variations = Source.find_by_name(source).variations
421
+ end
422
+ end
423
+
424
+ # The VariationSynonym class gives information on alterative names used
425
+ # for Variation entries.
426
+ #
427
+ # This class uses ActiveRecord to access data in the Ensembl database.
428
+ # See the general documentation of the Ensembl module for
429
+ # more information on what this means and what methods are available.
430
+ class VariationSynonym < DBConnection
431
+ set_primary_key "variation_synonym_id"
432
+ belongs_to :variation
433
+ belongs_to :source
434
+ belongs_to :subsnp_handle
435
+ end
436
+
437
+ # The VariationGroup class represents a group of variations (SNPs) that are
438
+ # linked and present toghether.
439
+ #
440
+ # This class uses ActiveRecord to access data in the Ensembl database.
441
+ # See the general documentation of the Ensembl module for
442
+ # more information on what this means and what methods are available.
443
+ class VariationGroup < DBConnection
444
+ set_primary_key "variation_group_id"
445
+ belongs_to :source
446
+ has_one :variation_group_variation
447
+ has_one :httag
448
+ has_one :variation_group_feature
449
+ has_one :allele_group
450
+ end
451
+
452
+ # The VariationGroupVariation class is a connection class.
453
+ # Should not be used directly.
454
+ #
455
+ # This class uses ActiveRecord to access data in the Ensembl database.
456
+ # See the general documentation of the Ensembl module for
457
+ # more information on what this means and what methods are available.
458
+ class VariationGroupVariation < DBConnection
459
+ belongs_to :variation
460
+ belongs_to :variation_group
461
+ end
462
+
463
+ # The VariationGroupFeature class gives information on the genomic position
464
+ # of each VariationGroup.
465
+ #
466
+ # This class uses ActiveRecord to access data in the Ensembl database.
467
+ # See the general documentation of the Ensembl module for
468
+ # more information on what this means and what methods are available.
469
+ class VariationGroupFeature < DBConnection
470
+ set_primary_key "variation_group_feature_id"
471
+ belongs_to :variation_group
472
+ end
473
+
474
+ class VariationAnnotation < DBConnection
475
+ set_primary_key "variation_annotation_id"
476
+ belongs_to :variation
477
+ belongs_to :phenotype
478
+ belongs_to :source
479
+ end
480
+
481
+ # The VariationSet class gives information on variations grouped by study, method, quality measure etc.
482
+ #
483
+ # This class uses ActiveRecord to access data in the Ensembl database.
484
+ # See the general documentation of the Ensembl module for
485
+ # more information on what this means and what methods are available.
486
+ class VariationSet < DBConnection
487
+ set_primary_key "variation_set_id"
488
+ has_many :variation_set_variations
489
+ has_many :variations, :through => :variation_set_variations
490
+ end
491
+
492
+ class VariationSetVariation < DBConnection
493
+ belongs_to :variation
494
+ belongs_to :variation_set
495
+ end
496
+
497
+ # The VariationSet class gives information on super and sub VariationSets.
498
+ #
499
+ # This class uses ActiveRecord to access data in the Ensembl database.
500
+ # See the general documentation of the Ensembl module for
501
+ # more information on what this means and what methods are available.
502
+ class VariationSetStructure < DBConnection
503
+
504
+ end
505
+
506
+
507
+
508
+ # The FlankingSequence class gives information about the genomic coordinates
509
+ # of the flanking sequences, for a single VariationFeature.
510
+ #
511
+ # This class uses ActiveRecord to access data in the Ensembl database.
512
+ # See the general documentation of the Ensembl module for
513
+ # more information on what this means and what methods are available.
514
+ class FlankingSequence < DBConnection
515
+ belongs_to :variation
516
+ end
517
+
518
+ # The TaggedVariationFeature class is a connection class.
519
+ # Should not be used directly.
520
+ #
521
+ # This class uses ActiveRecord to access data in the Ensembl database.
522
+ # See the general documentation of the Ensembl module for
523
+ # more information on what this means and what methods are available.
524
+ class TaggedVariationFeature < DBConnection
525
+ belongs_to :variation_feature
526
+ belongs_to :sample
527
+ end
528
+
529
+ class Httag < DBConnection
530
+ set_primary_key "httag_id"
531
+ belongs_to :variation_group
532
+ belongs_to :source
533
+ end
534
+
535
+ end
536
+ end