jandot-ruby-ensembl-api 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/TUTORIAL +623 -0
  2. data/bin/ensembl +39 -0
  3. data/lib/ensembl/core/activerecord.rb +1847 -0
  4. data/lib/ensembl/core/project.rb +248 -0
  5. data/lib/ensembl/core/slice.rb +627 -0
  6. data/lib/ensembl/core/transcript.rb +425 -0
  7. data/lib/ensembl/core/transform.rb +97 -0
  8. data/lib/ensembl/db_connection.rb +148 -0
  9. data/lib/ensembl/variation/activerecord.rb +308 -0
  10. data/lib/ensembl.rb +23 -0
  11. data/samples/examples_perl_tutorial.rb +120 -0
  12. data/samples/small_example_ruby_api.rb +34 -0
  13. data/test/unit/release_45/core/run_tests.rb +12 -0
  14. data/test/unit/release_45/core/test_project.rb +235 -0
  15. data/test/unit/release_45/core/test_project_human.rb +58 -0
  16. data/test/unit/release_45/core/test_relationships.rb +61 -0
  17. data/test/unit/release_45/core/test_sequence.rb +175 -0
  18. data/test/unit/release_45/core/test_slice.rb +56 -0
  19. data/test/unit/release_45/core/test_transcript.rb +94 -0
  20. data/test/unit/release_45/core/test_transform.rb +223 -0
  21. data/test/unit/release_45/variation/test_activerecord.rb +32 -0
  22. data/test/unit/release_50/core/run_tests.rb +12 -0
  23. data/test/unit/release_50/core/test_project.rb +215 -0
  24. data/test/unit/release_50/core/test_project_human.rb +58 -0
  25. data/test/unit/release_50/core/test_relationships.rb +66 -0
  26. data/test/unit/release_50/core/test_sequence.rb +175 -0
  27. data/test/unit/release_50/core/test_slice.rb +121 -0
  28. data/test/unit/release_50/core/test_transcript.rb +108 -0
  29. data/test/unit/release_50/core/test_transform.rb +223 -0
  30. data/test/unit/release_50/variation/test_activerecord.rb +136 -0
  31. data/test/unit/test_connection.rb +58 -0
  32. data/test/unit/test_releases.rb +40 -0
  33. metadata +243 -0
@@ -0,0 +1,148 @@
1
+ require 'rubygems'
2
+ require 'activerecord'
3
+
4
+ module Ensembl
5
+ DB_ADAPTER = 'mysql'
6
+ DB_HOST = 'ensembldb.ensembl.org'
7
+ DB_USERNAME = 'anonymous'
8
+ DB_PASSWORD = ''
9
+
10
+ class OldDummyDBConnection < ActiveRecord::Base
11
+ self.abstract_class = true
12
+
13
+ establish_connection(
14
+ :adapter => Ensembl::DB_ADAPTER,
15
+ :host => Ensembl::DB_HOST,
16
+ :database => '',
17
+ :username => Ensembl::DB_USERNAME,
18
+ :password => Ensembl::DB_PASSWORD
19
+ )
20
+ end
21
+
22
+ class NewDummyDBConnection < ActiveRecord::Base
23
+ self.abstract_class = true
24
+
25
+ establish_connection(
26
+ :adapter => Ensembl::DB_ADAPTER,
27
+ :host => Ensembl::DB_HOST,
28
+ :database => '',
29
+ :username => Ensembl::DB_USERNAME,
30
+ :password => Ensembl::DB_PASSWORD,
31
+ :port => 5306
32
+ )
33
+ end
34
+
35
+
36
+ module Core
37
+ # = DESCRIPTION
38
+ # The Ensembl::Core::DBConnection is the actual connection established
39
+ # with the Ensembl server.
40
+ class DBConnection < ActiveRecord::Base
41
+ self.abstract_class = true
42
+ self.pluralize_table_names = false
43
+
44
+ # = DESCRIPTION
45
+ # The Ensembl::Core::DBConnection#connect method makes the connection
46
+ # to the Ensembl core database for a given species. By default, it connects
47
+ # to release 50 for that species. You _could_ use a lower number, but
48
+ # some parts of the API might not work, or worse: give the wrong results.
49
+ #
50
+ # = USAGE
51
+ # # Connect to release 50 of human
52
+ # Ensembl::Core::DBConnection.connect('homo_sapiens')
53
+ #
54
+ # # Connect to release 42 of chicken
55
+ # Ensembl::Core::DBConnection.connect('gallus_gallus')
56
+ #
57
+ # ---
58
+ # *Arguments*:
59
+ # * species:: species to connect to. Arguments should be in snake_case
60
+ # * ensembl_release:: the release of the database to connect to
61
+ # (default = 50)
62
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
63
+ dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
64
+ db_name = nil
65
+
66
+ if args[:database]
67
+ db_name = args[:database]
68
+ else
69
+ db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_core_#{release.to_s}/}[0]
70
+ end
71
+
72
+ if db_name.nil?
73
+ warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
74
+ else
75
+ port = ( release > 47 ) ? 5306 : nil
76
+ establish_connection(
77
+ :adapter => args[:adapter] || Ensembl::DB_ADAPTER,
78
+ :host => args[:host] || Ensembl::DB_HOST,
79
+ :database => args[:database] || db_name,
80
+ :username => args[:username] || Ensembl::DB_USERNAME,
81
+ :password => args[:password] || Ensembl::DB_PASSWORD,
82
+ :port => args[:port] || port
83
+ )
84
+ self.retrieve_connection
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
92
+
93
+ module Variation
94
+ # = DESCRIPTION
95
+ # The Ensembl::Variation::DBConnection is the actual connection established
96
+ # with the Ensembl server.
97
+ class DBConnection < ActiveRecord::Base
98
+ self.abstract_class = true
99
+ self.pluralize_table_names = false
100
+
101
+ # = DESCRIPTION
102
+ # The Ensembl::Variation::DBConnection#connect method makes the connection
103
+ # to the Ensembl variation database for a given species. By default, it connects
104
+ # to release 50 for that species. You _could_ use a lower number, but
105
+ # some parts of the API might not work, or worse: give the wrong results.
106
+ #
107
+ # = USAGE
108
+ # # Connect to release 50 of human
109
+ # Ensembl::Variation::DBConnection.connect('homo_sapiens')
110
+ #
111
+ # # Connect to release 42 of chicken
112
+ # Ensembl::Variation::DBConnection.connect('gallus_gallus')
113
+ #
114
+ # ---
115
+ # *Arguments*:
116
+ # * species:: species to connect to. Arguments should be in snake_case
117
+ # * ensembl_release:: the release of the database to connect to
118
+ # (default = 50)
119
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
120
+ dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
121
+ db_name = nil
122
+ if args[:database]
123
+ db_name = args[:database]
124
+ else
125
+ db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_variation_#{release.to_s}/}[0]
126
+ end
127
+
128
+ if db_name.nil?
129
+ warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
130
+ else
131
+ port = ( release > 47 ) ? 5306 : nil
132
+ establish_connection(
133
+ :adapter => Ensembl::DB_ADAPTER,
134
+ :host => args[:host] || Ensembl::DB_HOST,
135
+ :database => db_name,
136
+ :username => args[:username] || Ensembl::DB_USERNAME,
137
+ :password => args[:password] || Ensembl::DB_PASSWORD,
138
+ :port => args[:port] || port
139
+ )
140
+ self.retrieve_connection
141
+ end
142
+
143
+ end
144
+
145
+ end
146
+
147
+ end
148
+ end
@@ -0,0 +1,308 @@
1
+ #
2
+ # = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl variation
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Jan Aerts <http://jandot.myopenid.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ nil
9
+ module Ensembl
10
+ # = DESCRIPTION
11
+ # The Ensembl::Variation module covers the variation databases from
12
+ # ensembldb.ensembl.org.
13
+ module Variation
14
+ # = DESCRIPTION
15
+ # The Allele class describes a single allele of a variation. In addition to
16
+ # the nucleotide(s) (or absence of) that representing the allele frequency
17
+ # and population information may be present.
18
+ #
19
+ # This class uses ActiveRecord to access data in the Ensembl database.
20
+ # See the general documentation of the Ensembl module for
21
+ # more information on what this means and what methods are available.
22
+ #
23
+ # = USAGE
24
+ # allele = Allele.find(1)
25
+ # puts allele.to_yaml
26
+ class Allele < DBConnection
27
+ set_primary_key 'allele_id'
28
+ belongs_to :sample
29
+ belongs_to :variation
30
+ belongs_to :population
31
+ end
32
+
33
+ # = DESCRIPTION
34
+ # The AlleleGroup class represents a grouping of alleles that have tight
35
+ # linkage and are usually present together. This is commonly known as a
36
+ # Haplotype or Haplotype Block.
37
+ #
38
+ # This class uses ActiveRecord to access data in the Ensembl database.
39
+ # See the general documentation of the Ensembl module for
40
+ # more information on what this means and what methods are available.
41
+ #
42
+ # = USAGE
43
+ # allele_group = AlleleGroup.find(1)
44
+ # puts allele_group.to_yaml
45
+ class AlleleGroup < DBConnection
46
+ set_primary_key 'allele_group_id'
47
+ belongs_to :variation_group
48
+ belongs_to :source
49
+ belongs_to :sample
50
+ belongs_to :allele_group_allele
51
+ end
52
+
53
+ # = DESCRIPTION
54
+ # The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
55
+ # Should not be used directly.
56
+ #
57
+ # This class uses ActiveRecord to access data in the Ensembl database.
58
+ # See the general documentation of the Ensembl module for
59
+ # more information on what this means and what methods are available.
60
+ class AlleleGroupAllele < DBConnection
61
+ belongs_to :variation
62
+ belongs_to :allele_group
63
+ end
64
+
65
+ # = DESCRIPTION
66
+ # The Sample class gives information about the biological samples stored in the database.
67
+ #
68
+ # This class uses ActiveRecord to access data in the Ensembl database.
69
+ # See the general documentation of the Ensembl module for
70
+ # more information on what this means and what methods are available.
71
+ class Sample < DBConnection
72
+ set_primary_key "sample_id"
73
+ has_one :individual
74
+ has_one :sample_synonym
75
+ has_many :individual_genotype_multiple_bp
76
+ has_many :compressed_genotype_single_bp
77
+ has_many :read_coverage
78
+ has_one :population
79
+ has_many :tagged_variation_features
80
+ end
81
+
82
+ # = DESCRIPTION
83
+ # The IndividualPopulation class is used to connect Individual and Population classes.
84
+ # Should not be used directly.
85
+ #
86
+ # This class uses ActiveRecord to access data in the Ensembl database.
87
+ # See the general documentation of the Ensembl module for
88
+ # more information on what this means and what methods are available.
89
+ class IndividualPopulation < DBConnection
90
+ belongs_to :individual
91
+ belongs_to :population
92
+ end
93
+
94
+ # = DESCRIPTION
95
+ # The Individual class gives information on the single individuals used
96
+ # to retrieve one or more biological samples.
97
+ #
98
+ # This class uses ActiveRecord to access data in the Ensembl database.
99
+ # See the general documentation of the Ensembl module for
100
+ # more information on what this means and what methods are available.
101
+ class Individual < DBConnection
102
+ belongs_to :sample
103
+ # CAN'T FIGURE OUT SOME TABLE FIELDS
104
+ end
105
+
106
+ class IndividualGenotypeMultipleBp < DBConnection
107
+ belongs_to :sample
108
+ belongs_to :variation
109
+ end
110
+
111
+ class CompressedGenotypeSingleBp < DBConnection
112
+ belongs_to :sample
113
+ end
114
+
115
+ class ReadCoverage < DBConnection
116
+ belongs_to :sample
117
+ end
118
+
119
+ class Population < DBConnection
120
+ belongs_to :sample
121
+ end
122
+
123
+ class PopulationStructure < DBConnection
124
+ # CAN'T FIGURE OUT SOME TABLE FIELDS
125
+ end
126
+
127
+ # = DESCRIPTION
128
+ # The PopulationGenotype class gives information about alleles and allele
129
+ # frequencies for a SNP observed within a population or a group of samples.
130
+ #
131
+ # This class uses ActiveRecord to access data in the Ensembl database.
132
+ # See the general documentation of the Ensembl module for
133
+ # more information on what this means and what methods are available.
134
+ class PopulationGenotype < DBConnection
135
+ set_primary_key "population_genotype_id"
136
+ belongs_to :variation
137
+ belongs_to :population
138
+ end
139
+
140
+ # = DESCRIPTION
141
+ # The SampleSynonym class represents information about alternative names
142
+ # for sample entries.
143
+ #
144
+ # This class uses ActiveRecord to access data in the Ensembl database.
145
+ # See the general documentation of the Ensembl module for
146
+ # more information on what this means and what methods are available.
147
+ class SampleSynonym < DBConnection
148
+ set_primary_key "sample_synonym_id"
149
+ belongs_to :source
150
+ belongs_to :sample
151
+ belongs_to :population
152
+ end
153
+
154
+ # = DESCRIPTION
155
+ # The Source class gives information on the different databases and SNP
156
+ # panels used to retrieve the data
157
+ #
158
+ # This class uses ActiveRecord to access data in the Ensembl database.
159
+ # See the general documentation of the Ensembl module for
160
+ # more information on what this means and what methods are available.
161
+ class Source < DBConnection
162
+ set_primary_key "source_id"
163
+ has_many :sample_synonyms
164
+ has_many :allele_groups
165
+ has_many :variations
166
+ has_many :variation_groups
167
+ has_many :httags
168
+ has_many :variation_synonyms
169
+ end
170
+
171
+ # = DESCRIPTION
172
+ # The VariationSynonym class gives information on alterative names used
173
+ # for Variation entries.
174
+ #
175
+ # This class uses ActiveRecord to access data in the Ensembl database.
176
+ # See the general documentation of the Ensembl module for
177
+ # more information on what this means and what methods are available.
178
+ class VariationSynonym < DBConnection
179
+ set_primary_key "variation_synonym_id"
180
+ belongs_to :variation
181
+ belongs_to :source
182
+ end
183
+
184
+ # = DESCRIPTION
185
+ # The Variation class represents single nucleotide polymorhisms (SNP) or variations
186
+ # and provides information like the names (IDs), the validation status and
187
+ # the allele information.
188
+ #
189
+ # *BUG*: fields like validation_status and consequence_type are created
190
+ # using SET option directly in MySQL. These fields are bad interpreted by
191
+ # ActiveRecord, returning always 0.
192
+ #
193
+ # This class uses ActiveRecord to access data in the Ensembl database.
194
+ # See the general documentation of the Ensembl module for
195
+ # more information on what this means and what methods are available.
196
+ class Variation < DBConnection
197
+ set_primary_key "variation_id"
198
+ belongs_to :source
199
+ has_one :variation_synonym
200
+ has_one :flanking_sequence
201
+ has_many :allele_group_alleles
202
+ has_many :allele_groups, :through => :allele_group_alleles
203
+ has_many :population_genotypes
204
+ has_many :alleles
205
+ has_one :variation_feature
206
+ has_many :variation_group_variations
207
+ has_many :variation_groups, :through => :variation_group_variations
208
+ has_many :individual_genotype_multiple_bps
209
+ end
210
+
211
+ # = DESCRIPTION
212
+ # The VariationGroup class represents a group of variations (SNPs) that are
213
+ # linked and present toghether.
214
+ #
215
+ # This class uses ActiveRecord to access data in the Ensembl database.
216
+ # See the general documentation of the Ensembl module for
217
+ # more information on what this means and what methods are available.
218
+ class VariationGroup < DBConnection
219
+ set_primary_key "variation_group_id"
220
+ belongs_to :source
221
+ has_one :variation_group_variation
222
+ has_one :httag
223
+ has_one :variation_group_feature
224
+ has_one :allele_group
225
+ end
226
+
227
+ # = DESCRIPTION
228
+ # The VariationGroupVariation class is a connection class.
229
+ # Should not be used directly.
230
+ #
231
+ # This class uses ActiveRecord to access data in the Ensembl database.
232
+ # See the general documentation of the Ensembl module for
233
+ # more information on what this means and what methods are available.
234
+ class VariationGroupVariation < DBConnection
235
+ belongs_to :variation
236
+ belongs_to :variation_group
237
+ end
238
+
239
+ # = DESCRIPTION
240
+ # The VariationFeature class gives information about the genomic position of
241
+ # each Variation, including also validation status and consequence type.
242
+ #
243
+ # This class uses ActiveRecord to access data in the Ensembl database.
244
+ # See the general documentation of the Ensembl module for
245
+ # more information on what this means and what methods are available.
246
+ class VariationFeature < DBConnection
247
+ set_primary_key "variation_feature_id"
248
+ belongs_to :variation
249
+ has_many :tagged_variation_features
250
+ has_many :samples, :through => :tagged_variation_features
251
+ has_many :transcript_variations
252
+ end
253
+
254
+ # = DESCRIPTION
255
+ # The VariationGroupFeature class gives information on the genomic position
256
+ # of each VariationGroup.
257
+ #
258
+ # This class uses ActiveRecord to access data in the Ensembl database.
259
+ # See the general documentation of the Ensembl module for
260
+ # more information on what this means and what methods are available.
261
+ class VariationGroupFeature < DBConnection
262
+ set_primary_key "variation_group_feature_id"
263
+ belongs_to :variation_group
264
+ end
265
+
266
+ # = DESCRIPTION
267
+ # The TranscriptVariation class gives information about the position of
268
+ # a VariationFeature, mapped on an annotated transcript.
269
+ #
270
+ # This class uses ActiveRecord to access data in the Ensembl database.
271
+ # See the general documentation of the Ensembl module for
272
+ # more information on what this means and what methods are available.
273
+ class TranscriptVariation < DBConnection
274
+ set_primary_key "transcript_variation_id"
275
+ belongs_to :variation_feature
276
+ end
277
+
278
+ # = DESCRIPTION
279
+ # The FlankingSequence class gives information about the genomic coordinates
280
+ # of the flanking sequences, for a single VariationFeature.
281
+ #
282
+ # This class uses ActiveRecord to access data in the Ensembl database.
283
+ # See the general documentation of the Ensembl module for
284
+ # more information on what this means and what methods are available.
285
+ class FlankingSequence < DBConnection
286
+ belongs_to :variation
287
+ end
288
+
289
+ # = DESCRIPTION
290
+ # The TaggedVariationFeature class is a connection class.
291
+ # Should not be used directly.
292
+ #
293
+ # This class uses ActiveRecord to access data in the Ensembl database.
294
+ # See the general documentation of the Ensembl module for
295
+ # more information on what this means and what methods are available.
296
+ class TaggedVariationFeature < DBConnection
297
+ belongs_to :variation_feature
298
+ belongs_to :sample
299
+ end
300
+
301
+ class Httag < DBConnection
302
+ set_primary_key "httag_id"
303
+ belongs_to :variation_group
304
+ belongs_to :source
305
+ end
306
+
307
+ end
308
+ end