bio-ensembl 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,205 @@
1
+ #
2
+ # = ensembl/db_connection.rb - Connection classes for Ensembl databases
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+
10
+
11
+ require 'rubygems'
12
+ require 'active_record'
13
+
14
+
15
+ class ActiveRecord::Base
16
+ def self.belongs_to_what
17
+ return self.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}
18
+ end
19
+
20
+ def self.has_what
21
+ a = [self.reflect_on_all_associations(:has_one), self.reflect_on_all_associations(:has_many)]
22
+ return a.flatten.uniq.collect{|a| a.name.to_s}
23
+ end
24
+ end
25
+
26
+
27
+ module Ensembl
28
+ DB_ADAPTER = 'mysql'
29
+ DB_HOST = 'ensembldb.ensembl.org'
30
+ DB_USERNAME = 'anonymous'
31
+ DB_PASSWORD = ''
32
+ EG_HOST = 'mysql.ebi.ac.uk'
33
+ EG_PORT = 4157
34
+
35
+
36
+ # Generic class to perform dynamic connections to the Ensembl database and retrieve database names
37
+ class DummyDBConnection < ActiveRecord::Base
38
+ self.abstract_class = true
39
+ def self.connect(args)
40
+ self.establish_connection(
41
+ :adapter => args[:adapter] ||= Ensembl::DB_ADAPTER,
42
+ :host => args[:host] ||= Ensembl::DB_HOST,
43
+ :username => args[:username] ||= Ensembl::DB_USERNAME,
44
+ :password => args[:password] ||= Ensembl::DB_PASSWORD,
45
+ :port => args[:port],
46
+ :database => args[:database] ||= ''
47
+ )
48
+ end
49
+ end
50
+
51
+ module DBRegistry
52
+ # The Ensembl::Registry::Base is a super class providing general methods
53
+ # to get database and connection info.
54
+ class Base < ActiveRecord::Base
55
+ self.abstract_class = true
56
+ self.pluralize_table_names = false
57
+
58
+ def self.get_info
59
+ host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
60
+ db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
61
+ species,release = $1,$3 # just works for standard Ensembl database names
62
+ if species.nil? and release.nil? then
63
+ raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
64
+ else
65
+ return host,user,password,db_name,port,species,release.to_i
66
+ end
67
+ end
68
+
69
+ # Method to retrieve the name of a database, using species, release and connection parameters
70
+ # passed by the user.
71
+ def self.get_name_from_db(db_type,species,release,args)
72
+ species = species.underscore.tr(' ','_') # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
73
+ dummy_db = DummyDBConnection.connect(args)
74
+ dummy_connection = dummy_db.connection
75
+ # check if a database exists with exactly the species name passed (regular way)
76
+ db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
77
+ # if a database is not found and we are working on Ensembl Genomes database...
78
+ if db_name.nil? and args[:ensembl_genomes] then
79
+ words = species.split(/_/)
80
+ first = words.shift
81
+ # ...try to find a collection database using the first name of the species passed (convention used for collection databases)
82
+ db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
83
+ # if a collection database match is found, then look inside to find the species
84
+ if db_name != nil then
85
+ dummy_db.disconnect! # close the generic connection with the host
86
+ args[:database] = db_name
87
+ dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
88
+ species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
89
+ Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
90
+
91
+ # check that the species passed is present in the collection database, otherwise returns a warning
92
+ exists = dummy_db.connection.select_values("SELECT species_id FROM meta WHERE LOWER(meta_value) = '#{species_name}' AND meta_key = 'species.db_name'")[0]
93
+ warn "WARNING: No species '#{species}' found in the database. Please check that the name is correct." if !exists
94
+ end
95
+ end
96
+ warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})." if db_name.nil?
97
+ return db_name
98
+ end
99
+
100
+ def self.generic_connect(db_type, species, release, args = {})
101
+
102
+ # check which release is used and load the correct VariationFeature version
103
+ require (release < 62) ? File.dirname(__FILE__) + '/variation/variation_feature.rb' : File.dirname(__FILE__) + '/variation/variation_feature62.rb'
104
+ Ensembl::SESSION.reset
105
+ Ensembl::SESSION.release = release
106
+ db_name = nil
107
+ # if the connection is established with Ensembl Genomes, set the default port and host
108
+ if args[:ensembl_genomes] then
109
+ args[:port] = EG_PORT
110
+ args[:host] = EG_HOST
111
+ end
112
+ if args[:port].nil? then
113
+ args[:port] = ( release > 47 ) ? 5306 : 3306
114
+ end
115
+ if args[:database]
116
+ db_name = args[:database]
117
+ else
118
+ db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database
119
+ end
120
+ establish_connection(
121
+ :adapter => args[:adapter] || Ensembl::DB_ADAPTER,
122
+ :host => args[:host] || Ensembl::DB_HOST,
123
+ :database => db_name,
124
+ :username => args[:username] || Ensembl::DB_USERNAME,
125
+ :password => args[:password] || Ensembl::DB_PASSWORD,
126
+ :port => args[:port]
127
+ )
128
+
129
+ self.retrieve_connection # Check if the connection is working
130
+
131
+ end
132
+
133
+ end
134
+
135
+ end
136
+
137
+
138
+ module Core
139
+ # The Ensembl::Core::DBConnection is the actual connection established
140
+ # with the Ensembl server.
141
+ class DBConnection < Ensembl::DBRegistry::Base
142
+ self.abstract_class = true
143
+ self.pluralize_table_names = false
144
+ # The Ensembl::Core::DBConnection#connect method makes the connection
145
+ # to the Ensembl core database for a given species. By default, it connects
146
+ # to release 50 for that species. You _could_ use a lower number, but
147
+ # some parts of the API might not work, or worse: give the wrong results.
148
+ #
149
+ # @example
150
+ # # Connect to release 50 of human
151
+ # Ensembl::Core::DBConnection.connect('homo_sapiens')
152
+ #
153
+ # # Connect to release 42 of chicken
154
+ # Ensembl::Core::DBConnection.connect('gallus_gallus')
155
+ #
156
+ # @param [String] species Species to connect to. Must be in snake_case
157
+ # @param [Integer] ensembl_release. Release to connect to (default = 60)
158
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
159
+ self.generic_connect('core',species, release,args)
160
+ end
161
+
162
+ def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
163
+ args[:ensembl_genomes] = true
164
+ self.generic_connect('core',species,release,args)
165
+ end
166
+
167
+
168
+ end # Core::DBConnection
169
+
170
+ end # Core
171
+
172
+ module Variation
173
+ # The Ensembl::Variation::DBConnection is the actual connection established
174
+ # with the Ensembl server.
175
+ class DBConnection < Ensembl::DBRegistry::Base
176
+ self.abstract_class = true
177
+ self.pluralize_table_names = false
178
+ # The Ensembl::Variation::DBConnection#connect method makes the connection
179
+ # to the Ensembl variation database for a given species. By default, it connects
180
+ # to release 50 for that species. You _could_ use a lower number, but
181
+ # some parts of the API might not work, or worse: give the wrong results.
182
+ #
183
+ # @example
184
+ # # Connect to release 50 of human
185
+ # Ensembl::Variation::DBConnection.connect('homo_sapiens')
186
+ #
187
+ # # Connect to release 42 of chicken
188
+ # Ensembl::Variation::DBConnection.connect('gallus_gallus')
189
+ #
190
+ # @param [String] species Species to connect to. Must be in snake_case
191
+ # @param [Integer] ensembl_release. Release to connect to (default = 60)
192
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
193
+ self.generic_connect('variation',species, release, args)
194
+ end
195
+
196
+ def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
197
+ args[:ensembl_genomes] = true
198
+ self.generic_connect('variation',species,release,args)
199
+ end
200
+
201
+ end # Variation::DBConnection
202
+
203
+ end # Variation
204
+
205
+ end # Ensembl
@@ -0,0 +1,536 @@
1
+ #
2
+ # = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+ # @author Francesco Strozzi
8
+
9
+ nil
10
+ module Ensembl
11
+ # The Ensembl::Variation module covers the variation databases from
12
+ # ensembldb.ensembl.org.
13
+ module Variation
14
+ # The Allele class describes a single allele of a variation. In addition to
15
+ # the nucleotide(s) (or absence of) that representing the allele frequency
16
+ # and population information may be present.
17
+ #
18
+ # This class uses ActiveRecord to access data in the Ensembl database.
19
+ # See the general documentation of the Ensembl module for
20
+ # more information on what this means and what methods are available.
21
+ #
22
+ # @example
23
+ # allele = Allele.find(1)
24
+ # puts allele.to_yaml
25
+ class Allele < DBConnection
26
+ set_primary_key 'allele_id'
27
+ belongs_to :sample
28
+ belongs_to :variation
29
+ belongs_to :population
30
+ belongs_to :subsnp_handle
31
+ end
32
+
33
+ # The AlleleGroup class represents a grouping of alleles that have tight
34
+ # linkage and are usually present together. This is commonly known as a
35
+ # Haplotype or Haplotype Block.
36
+ #
37
+ # This class uses ActiveRecord to access data in the Ensembl database.
38
+ # See the general documentation of the Ensembl module for
39
+ # more information on what this means and what methods are available.
40
+ #
41
+ # @example
42
+ # allele_group = AlleleGroup.find(1)
43
+ # puts allele_group.to_yaml
44
+ class AlleleGroup < DBConnection
45
+ set_primary_key 'allele_group_id'
46
+ belongs_to :variation_group
47
+ belongs_to :source
48
+ belongs_to :sample
49
+ belongs_to :allele_group_allele
50
+ end
51
+
52
+ # The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
53
+ # Should not be used directly.
54
+ #
55
+ # This class uses ActiveRecord to access data in the Ensembl database.
56
+ # See the general documentation of the Ensembl module for
57
+ # more information on what this means and what methods are available.
58
+ class AlleleGroupAllele < DBConnection
59
+ belongs_to :variation
60
+ belongs_to :allele_group
61
+ end
62
+
63
+ # Store information on attributes types
64
+ #
65
+ # This class uses ActiveRecord to access data in the Ensembl database.
66
+ # See the general documentation of the Ensembl module for
67
+ # more information on what this means and what methods are available.
68
+ class AttribType < DBConnection
69
+ set_primary_key "attrib_type_id"
70
+ end
71
+
72
+ # Store information on associated studies
73
+ #
74
+ # This class uses ActiveRecord to access data in the Ensembl database.
75
+ # See the general documentation of the Ensembl module for
76
+ # more information on what this means and what methods are available.
77
+ class AssociateStudy < DBConnection
78
+ set_primary_key "study1_id"
79
+ belongs_to :study
80
+ end
81
+
82
+
83
+ # This class uses ActiveRecord to access data in the Ensembl database.
84
+ # See the general documentation of the Ensembl module for
85
+ # more information on what this means and what methods are available.
86
+ class ConsequenceMapping < DBConnection
87
+
88
+ end
89
+
90
+ # This class uses ActiveRecord to access data in the Ensembl database.
91
+ # See the general documentation of the Ensembl module for
92
+ # more information on what this means and what methods are available.
93
+ class FailedDescription < DBConnection
94
+ set_primary_key "failed_description_id"
95
+ has_many :failed_variations
96
+ end
97
+
98
+ # This class uses ActiveRecord to access data in the Ensembl database.
99
+ # See the general documentation of the Ensembl module for
100
+ # more information on what this means and what methods are available.
101
+ class FailedVariation < DBConnection
102
+ set_primary_key "failed_variation_id"
103
+ belongs_to :failed_description
104
+ belongs_to :variation
105
+ end
106
+
107
+ # This class uses ActiveRecord to access data in the Ensembl database.
108
+ # See the general documentation of the Ensembl module for
109
+ # more information on what this means and what methods are available.
110
+ class FeatureType < DBConnection
111
+ set_primary_key "feature_type_id"
112
+ end
113
+
114
+ class Meta < DBConnection
115
+ set_primary_key "meta_id"
116
+ end
117
+
118
+ class MetaCoord < DBConnection
119
+
120
+ end
121
+
122
+ class Phenotype < DBConnection
123
+ set_primary_key "phenotype_id"
124
+ has_many :variation_annotations
125
+ end
126
+
127
+ # The Sample class gives information about the biological samples stored in the database.
128
+ #
129
+ # This class uses ActiveRecord to access data in the Ensembl database.
130
+ # See the general documentation of the Ensembl module for
131
+ # more information on what this means and what methods are available.
132
+ class Sample < DBConnection
133
+ set_primary_key "sample_id"
134
+ has_one :individual
135
+ has_one :sample_synonym
136
+ has_many :individual_genotype_multiple_bp
137
+ has_many :compressed_genotype_single_bp
138
+ has_many :read_coverage
139
+ has_one :population
140
+ has_many :tagged_variation_features
141
+ end
142
+
143
+ # The IndividualPopulation class is used to connect Individual and Population classes.
144
+ # Should not be used directly.
145
+ #
146
+ # This class uses ActiveRecord to access data in the Ensembl database.
147
+ # See the general documentation of the Ensembl module for
148
+ # more information on what this means and what methods are available.
149
+ class IndividualPopulation < DBConnection
150
+ belongs_to :individual, :foreign_key => "individual_sample_id"
151
+ belongs_to :population, :foreign_key => "population_sample_id"
152
+ end
153
+
154
+ # The Individual class gives information on the single individuals used
155
+ # to retrieve one or more biological samples.
156
+ #
157
+ # This class uses ActiveRecord to access data in the Ensembl database.
158
+ # See the general documentation of the Ensembl module for
159
+ # more information on what this means and what methods are available.
160
+ class Individual < DBConnection
161
+ set_primary_key "sample_id"
162
+ belongs_to :sample
163
+ has_one :individual_type
164
+ has_many :individual_populations, :foreign_key => "individual_sample_id"
165
+ has_many :populations, :through => :individual_populations
166
+ end
167
+
168
+ class IndividualGenotypeMultipleBp < DBConnection
169
+ belongs_to :sample
170
+ belongs_to :variation
171
+ belongs_to :subsnp_handle
172
+ end
173
+
174
+ class IndividualType < DBConnection
175
+ set_primary_key "invidual_type_id"
176
+ belongs_to :individual
177
+ end
178
+
179
+
180
+ class CompressedGenotypeSingleBp < DBConnection
181
+ belongs_to :population_genotype, :foreign_key => "sample_id"
182
+ end
183
+
184
+ class ReadCoverage < DBConnection
185
+ belongs_to :sample
186
+ end
187
+
188
+ class Population < DBConnection
189
+ belongs_to :sample
190
+ set_primary_key "sample_id"
191
+ has_many :population_genotypes, :foreign_key => "sample_id"
192
+ has_many :individual_populations, :foreign_key => "population_sample_id"
193
+ has_many :individuals, :through => :individual_populations
194
+ has_many :sample_synonyms
195
+ has_one :population_structure
196
+ has_many :tagged_variation_features
197
+ has_many :alleles
198
+ has_many :allele_groups
199
+ end
200
+
201
+
202
+ # The PopulationStructure class gives information on super and sub populations
203
+ #
204
+ # This class uses ActiveRecord to access data in the Ensembl database.
205
+ # See the general documentation of the Ensembl module for
206
+ # more information on what this means and what methods are available.
207
+ class PopulationStructure < DBConnection
208
+
209
+ end
210
+
211
+ # The PopulationGenotype class gives information about alleles and allele
212
+ # frequencies for a SNP observed within a population or a group of samples.
213
+ #
214
+ # This class uses ActiveRecord to access data in the Ensembl database.
215
+ # See the general documentation of the Ensembl module for
216
+ # more information on what this means and what methods are available.
217
+ class PopulationGenotype < DBConnection
218
+ set_primary_key "population_genotype_id"
219
+ belongs_to :variation
220
+ belongs_to :population
221
+ belongs_to :subsnp_handle
222
+ has_many :compressed_genotype_single_bps, :foreign_key => "sample_id"
223
+ end
224
+
225
+ # The ProteinInfo class gives information about protein translated from a given transcript.
226
+ #
227
+ # This class uses ActiveRecord to access data in the Ensembl database.
228
+ # See the general documentation of the Ensembl module for
229
+ # more information on what this means and what methods are available.
230
+ class ProteinInfo < DBConnection
231
+ set_primary_key "protein_info_id"
232
+ belongs_to :transcript_variation
233
+ has_many :protein_positions
234
+ end
235
+
236
+ # The PolyphenPrediction class gives information about variations effect predictions within an aminoacidic sequence
237
+ #
238
+ # This class uses ActiveRecord to access data in the Ensembl database.
239
+ # See the general documentation of the Ensembl module for
240
+ # more information on what this means and what methods are available.
241
+ class PolyphenPrediction < DBConnection
242
+ set_primary_key "polyphen_prediction_id"
243
+ belongs_to :protein_position
244
+ end
245
+
246
+ # The ProteinPosition class gives information about variations within an aminoacidic sequence.
247
+ #
248
+ # This class uses ActiveRecord to access data in the Ensembl database.
249
+ # See the general documentation of the Ensembl module for
250
+ # more information on what this means and what methods are available.
251
+ class ProteinPosition < DBConnection
252
+ set_primary_key "protein_position_id"
253
+ belongs_to :protein_info
254
+ has_many :polyphen_predictions
255
+ has_many :sift_predictions
256
+ end
257
+
258
+
259
+
260
+ # The SampleSynonym class represents information about alternative names
261
+ # for sample entries.
262
+ #
263
+ # This class uses ActiveRecord to access data in the Ensembl database.
264
+ # See the general documentation of the Ensembl module for
265
+ # more information on what this means and what methods are available.
266
+ class SampleSynonym < DBConnection
267
+ set_primary_key "sample_synonym_id"
268
+ belongs_to :source
269
+ belongs_to :sample
270
+ belongs_to :population
271
+ end
272
+
273
+ # The Source class gives information on the different databases and SNP
274
+ # panels used to retrieve the data
275
+ #
276
+ # This class uses ActiveRecord to access data in the Ensembl database.
277
+ # See the general documentation of the Ensembl module for
278
+ # more information on what this means and what methods are available.
279
+ class Source < DBConnection
280
+ set_primary_key "source_id"
281
+ has_many :sample_synonyms
282
+ has_many :allele_groups
283
+ has_many :variations
284
+ has_many :variation_groups
285
+ has_many :httags
286
+ has_many :variation_synonyms
287
+ has_many :variation_annotations
288
+ has_many :structural_variations
289
+
290
+ def somatic_status # workaround as ActiveRecord do not parse SET field in MySQL
291
+ "#{attributes_before_type_cast['somatic_status']}"
292
+ end
293
+
294
+ end
295
+
296
+ # The StructuralVariation class gives information on structural variations mapped on the genome
297
+ #
298
+ # This class uses ActiveRecord to access data in the Ensembl database.
299
+ # See the general documentation of the Ensembl module for
300
+ # more information on what this means and what methods are available.
301
+ class StructuralVariation < DBConnection
302
+ set_primary_key "structural_variation_id"
303
+ belongs_to :source
304
+ belongs_to :seq_region
305
+ has_many :supporting_structural_variations
306
+
307
+ class << self # Workaround for 'class' field, otherwise it creates a mess for AR
308
+ def instance_method_already_implemented?(method_name)
309
+ return true if method_name == 'class'
310
+ super
311
+ end
312
+ end
313
+
314
+ def sv_class
315
+ self.attributes["class"]
316
+ end
317
+
318
+ end
319
+
320
+
321
+ class SeqRegion < DBConnection
322
+ set_primary_key "seq_region_id"
323
+ has_many :variation_features
324
+ has_many :structural_variations
325
+ end
326
+
327
+ # The SubsnpHandle class gives information on SNP Submitters
328
+ #
329
+ # This class uses ActiveRecord to access data in the Ensembl database.
330
+ # See the general documentation of the Ensembl module for
331
+ # more information on what this means and what methods are available.
332
+ class SupportingStructuralVariation < DBConnection
333
+ set_primary_key "supporting_structural_variation_id"
334
+ belongs_to :structural_variation
335
+ end
336
+
337
+ # The SubsnpHandle class gives information on SNP Submitters
338
+ #
339
+ # This class uses ActiveRecord to access data in the Ensembl database.
340
+ # See the general documentation of the Ensembl module for
341
+ # more information on what this means and what methods are available.
342
+ class SubsnpHandle < DBConnection
343
+ set_primary_key "subsnp_id"
344
+ has_many :individual_genotype_multiple_bps, :foreign_key => "subsnp_id"
345
+ has_many :population_genotypes, :foreign_key => "subsnp_id"
346
+ has_many :alleles, :foreign_key => "subsnp_id"
347
+ has_many :variation_synonyms,:foreign_key => "subsnp_id"
348
+ end
349
+
350
+ # The SiftPrediction class gives information about variations effect predictions within an aminoacidic sequence
351
+ #
352
+ # This class uses ActiveRecord to access data in the Ensembl database.
353
+ # See the general documentation of the Ensembl module for
354
+ # more information on what this means and what methods are available.
355
+ class SiftPrediction < DBConnection
356
+ set_primary_key "sift_prediction_id"
357
+ belongs_to :protein_position
358
+ end
359
+
360
+ # The Study class gives information about studies producing variations information
361
+ #
362
+ # This class uses ActiveRecord to access data in the Ensembl database.
363
+ # See the general documentation of the Ensembl module for
364
+ # more information on what this means and what methods are available.
365
+ class Study < DBConnection
366
+ set_primary_key "study_id"
367
+ has_many :associate_studies, :foreign_key => "study1_id"
368
+ has_many :structural_variations
369
+ has_many :variation_annotations
370
+
371
+ def study_type
372
+ "#{attributes_before_type_cast['study_type']}"
373
+ end
374
+
375
+ end
376
+
377
+
378
+ # The Variation class represents single nucleotide polymorhisms (SNP) or variations
379
+ # and provides information like the names (IDs), the validation status and
380
+ # the allele information.
381
+ #
382
+ # *BUG*: fields like validation_status and consequence_type are created
383
+ # using SET option directly in MySQL. These fields are bad interpreted by
384
+ # ActiveRecord, returning always 0.
385
+ #
386
+ # This class uses ActiveRecord to access data in the Ensembl database.
387
+ # See the general documentation of the Ensembl module for
388
+ # more information on what this means and what methods are available.
389
+ #
390
+ # @example
391
+ # v = Variation.find_by_name('rs10111')
392
+ # v.alleles.each do |a|
393
+ # puts a.allele, a.frequency
394
+ # end
395
+ #
396
+ # variations = Variation.fetch_all_by_source('dbSNP') # many records
397
+ # variations.each do |v|
398
+ # puts v.name
399
+ # end
400
+ #
401
+ class Variation < DBConnection
402
+ set_primary_key "variation_id"
403
+ belongs_to :source
404
+ has_many :variation_synonyms
405
+ has_one :flanking_sequence
406
+ has_many :allele_group_alleles
407
+ has_many :allele_groups, :through => :allele_group_alleles
408
+ has_many :population_genotypes
409
+ has_many :alleles
410
+ has_many :variation_features
411
+ has_many :variation_group_variations
412
+ has_many :variation_groups, :through => :variation_group_variations
413
+ has_many :individual_genotype_multiple_bps
414
+ has_many :failed_variations
415
+ has_many :failed_descriptions, :through => :failed_variations
416
+ has_many :variation_set_variations
417
+ has_many :variation_sets, :through => :variation_set_variations
418
+
419
+ def self.fetch_all_by_source(source)
420
+ variations = Source.find_by_name(source).variations
421
+ end
422
+ end
423
+
424
+ # The VariationSynonym class gives information on alterative names used
425
+ # for Variation entries.
426
+ #
427
+ # This class uses ActiveRecord to access data in the Ensembl database.
428
+ # See the general documentation of the Ensembl module for
429
+ # more information on what this means and what methods are available.
430
+ class VariationSynonym < DBConnection
431
+ set_primary_key "variation_synonym_id"
432
+ belongs_to :variation
433
+ belongs_to :source
434
+ belongs_to :subsnp_handle
435
+ end
436
+
437
+ # The VariationGroup class represents a group of variations (SNPs) that are
438
+ # linked and present toghether.
439
+ #
440
+ # This class uses ActiveRecord to access data in the Ensembl database.
441
+ # See the general documentation of the Ensembl module for
442
+ # more information on what this means and what methods are available.
443
+ class VariationGroup < DBConnection
444
+ set_primary_key "variation_group_id"
445
+ belongs_to :source
446
+ has_one :variation_group_variation
447
+ has_one :httag
448
+ has_one :variation_group_feature
449
+ has_one :allele_group
450
+ end
451
+
452
+ # The VariationGroupVariation class is a connection class.
453
+ # Should not be used directly.
454
+ #
455
+ # This class uses ActiveRecord to access data in the Ensembl database.
456
+ # See the general documentation of the Ensembl module for
457
+ # more information on what this means and what methods are available.
458
+ class VariationGroupVariation < DBConnection
459
+ belongs_to :variation
460
+ belongs_to :variation_group
461
+ end
462
+
463
+ # The VariationGroupFeature class gives information on the genomic position
464
+ # of each VariationGroup.
465
+ #
466
+ # This class uses ActiveRecord to access data in the Ensembl database.
467
+ # See the general documentation of the Ensembl module for
468
+ # more information on what this means and what methods are available.
469
+ class VariationGroupFeature < DBConnection
470
+ set_primary_key "variation_group_feature_id"
471
+ belongs_to :variation_group
472
+ end
473
+
474
+ class VariationAnnotation < DBConnection
475
+ set_primary_key "variation_annotation_id"
476
+ belongs_to :variation
477
+ belongs_to :phenotype
478
+ belongs_to :source
479
+ end
480
+
481
+ # The VariationSet class gives information on variations grouped by study, method, quality measure etc.
482
+ #
483
+ # This class uses ActiveRecord to access data in the Ensembl database.
484
+ # See the general documentation of the Ensembl module for
485
+ # more information on what this means and what methods are available.
486
+ class VariationSet < DBConnection
487
+ set_primary_key "variation_set_id"
488
+ has_many :variation_set_variations
489
+ has_many :variations, :through => :variation_set_variations
490
+ end
491
+
492
+ class VariationSetVariation < DBConnection
493
+ belongs_to :variation
494
+ belongs_to :variation_set
495
+ end
496
+
497
+ # The VariationSet class gives information on super and sub VariationSets.
498
+ #
499
+ # This class uses ActiveRecord to access data in the Ensembl database.
500
+ # See the general documentation of the Ensembl module for
501
+ # more information on what this means and what methods are available.
502
+ class VariationSetStructure < DBConnection
503
+
504
+ end
505
+
506
+
507
+
508
+ # The FlankingSequence class gives information about the genomic coordinates
509
+ # of the flanking sequences, for a single VariationFeature.
510
+ #
511
+ # This class uses ActiveRecord to access data in the Ensembl database.
512
+ # See the general documentation of the Ensembl module for
513
+ # more information on what this means and what methods are available.
514
+ class FlankingSequence < DBConnection
515
+ belongs_to :variation
516
+ end
517
+
518
+ # The TaggedVariationFeature class is a connection class.
519
+ # Should not be used directly.
520
+ #
521
+ # This class uses ActiveRecord to access data in the Ensembl database.
522
+ # See the general documentation of the Ensembl module for
523
+ # more information on what this means and what methods are available.
524
+ class TaggedVariationFeature < DBConnection
525
+ belongs_to :variation_feature
526
+ belongs_to :sample
527
+ end
528
+
529
+ class Httag < DBConnection
530
+ set_primary_key "httag_id"
531
+ belongs_to :variation_group
532
+ belongs_to :source
533
+ end
534
+
535
+ end
536
+ end