jandot-ruby-ensembl-api 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/TUTORIAL +623 -0
  2. data/bin/ensembl +39 -0
  3. data/lib/ensembl/core/activerecord.rb +1847 -0
  4. data/lib/ensembl/core/project.rb +248 -0
  5. data/lib/ensembl/core/slice.rb +627 -0
  6. data/lib/ensembl/core/transcript.rb +425 -0
  7. data/lib/ensembl/core/transform.rb +97 -0
  8. data/lib/ensembl/db_connection.rb +148 -0
  9. data/lib/ensembl/variation/activerecord.rb +308 -0
  10. data/lib/ensembl.rb +23 -0
  11. data/samples/examples_perl_tutorial.rb +120 -0
  12. data/samples/small_example_ruby_api.rb +34 -0
  13. data/test/unit/release_45/core/run_tests.rb +12 -0
  14. data/test/unit/release_45/core/test_project.rb +235 -0
  15. data/test/unit/release_45/core/test_project_human.rb +58 -0
  16. data/test/unit/release_45/core/test_relationships.rb +61 -0
  17. data/test/unit/release_45/core/test_sequence.rb +175 -0
  18. data/test/unit/release_45/core/test_slice.rb +56 -0
  19. data/test/unit/release_45/core/test_transcript.rb +94 -0
  20. data/test/unit/release_45/core/test_transform.rb +223 -0
  21. data/test/unit/release_45/variation/test_activerecord.rb +32 -0
  22. data/test/unit/release_50/core/run_tests.rb +12 -0
  23. data/test/unit/release_50/core/test_project.rb +215 -0
  24. data/test/unit/release_50/core/test_project_human.rb +58 -0
  25. data/test/unit/release_50/core/test_relationships.rb +66 -0
  26. data/test/unit/release_50/core/test_sequence.rb +175 -0
  27. data/test/unit/release_50/core/test_slice.rb +121 -0
  28. data/test/unit/release_50/core/test_transcript.rb +108 -0
  29. data/test/unit/release_50/core/test_transform.rb +223 -0
  30. data/test/unit/release_50/variation/test_activerecord.rb +136 -0
  31. data/test/unit/test_connection.rb +58 -0
  32. data/test/unit/test_releases.rb +40 -0
  33. metadata +243 -0
@@ -0,0 +1,148 @@
1
+ require 'rubygems'
2
+ require 'activerecord'
3
+
4
+ module Ensembl
5
+ DB_ADAPTER = 'mysql'
6
+ DB_HOST = 'ensembldb.ensembl.org'
7
+ DB_USERNAME = 'anonymous'
8
+ DB_PASSWORD = ''
9
+
10
+ class OldDummyDBConnection < ActiveRecord::Base
11
+ self.abstract_class = true
12
+
13
+ establish_connection(
14
+ :adapter => Ensembl::DB_ADAPTER,
15
+ :host => Ensembl::DB_HOST,
16
+ :database => '',
17
+ :username => Ensembl::DB_USERNAME,
18
+ :password => Ensembl::DB_PASSWORD
19
+ )
20
+ end
21
+
22
+ class NewDummyDBConnection < ActiveRecord::Base
23
+ self.abstract_class = true
24
+
25
+ establish_connection(
26
+ :adapter => Ensembl::DB_ADAPTER,
27
+ :host => Ensembl::DB_HOST,
28
+ :database => '',
29
+ :username => Ensembl::DB_USERNAME,
30
+ :password => Ensembl::DB_PASSWORD,
31
+ :port => 5306
32
+ )
33
+ end
34
+
35
+
36
+ module Core
37
+ # = DESCRIPTION
38
+ # The Ensembl::Core::DBConnection is the actual connection established
39
+ # with the Ensembl server.
40
+ class DBConnection < ActiveRecord::Base
41
+ self.abstract_class = true
42
+ self.pluralize_table_names = false
43
+
44
+ # = DESCRIPTION
45
+ # The Ensembl::Core::DBConnection#connect method makes the connection
46
+ # to the Ensembl core database for a given species. By default, it connects
47
+ # to release 50 for that species. You _could_ use a lower number, but
48
+ # some parts of the API might not work, or worse: give the wrong results.
49
+ #
50
+ # = USAGE
51
+ # # Connect to release 50 of human
52
+ # Ensembl::Core::DBConnection.connect('homo_sapiens')
53
+ #
54
+ # # Connect to release 42 of chicken
55
+ # Ensembl::Core::DBConnection.connect('gallus_gallus')
56
+ #
57
+ # ---
58
+ # *Arguments*:
59
+ # * species:: species to connect to. Arguments should be in snake_case
60
+ # * ensembl_release:: the release of the database to connect to
61
+ # (default = 50)
62
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
63
+ dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
64
+ db_name = nil
65
+
66
+ if args[:database]
67
+ db_name = args[:database]
68
+ else
69
+ db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_core_#{release.to_s}/}[0]
70
+ end
71
+
72
+ if db_name.nil?
73
+ warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
74
+ else
75
+ port = ( release > 47 ) ? 5306 : nil
76
+ establish_connection(
77
+ :adapter => args[:adapter] || Ensembl::DB_ADAPTER,
78
+ :host => args[:host] || Ensembl::DB_HOST,
79
+ :database => args[:database] || db_name,
80
+ :username => args[:username] || Ensembl::DB_USERNAME,
81
+ :password => args[:password] || Ensembl::DB_PASSWORD,
82
+ :port => args[:port] || port
83
+ )
84
+ self.retrieve_connection
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
92
+
93
+ module Variation
94
+ # = DESCRIPTION
95
+ # The Ensembl::Variation::DBConnection is the actual connection established
96
+ # with the Ensembl server.
97
+ class DBConnection < ActiveRecord::Base
98
+ self.abstract_class = true
99
+ self.pluralize_table_names = false
100
+
101
+ # = DESCRIPTION
102
+ # The Ensembl::Variation::DBConnection#connect method makes the connection
103
+ # to the Ensembl variation database for a given species. By default, it connects
104
+ # to release 50 for that species. You _could_ use a lower number, but
105
+ # some parts of the API might not work, or worse: give the wrong results.
106
+ #
107
+ # = USAGE
108
+ # # Connect to release 50 of human
109
+ # Ensembl::Variation::DBConnection.connect('homo_sapiens')
110
+ #
111
+ # # Connect to release 42 of chicken
112
+ # Ensembl::Variation::DBConnection.connect('gallus_gallus')
113
+ #
114
+ # ---
115
+ # *Arguments*:
116
+ # * species:: species to connect to. Arguments should be in snake_case
117
+ # * ensembl_release:: the release of the database to connect to
118
+ # (default = 50)
119
+ def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
120
+ dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
121
+ db_name = nil
122
+ if args[:database]
123
+ db_name = args[:database]
124
+ else
125
+ db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_variation_#{release.to_s}/}[0]
126
+ end
127
+
128
+ if db_name.nil?
129
+ warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
130
+ else
131
+ port = ( release > 47 ) ? 5306 : nil
132
+ establish_connection(
133
+ :adapter => Ensembl::DB_ADAPTER,
134
+ :host => args[:host] || Ensembl::DB_HOST,
135
+ :database => db_name,
136
+ :username => args[:username] || Ensembl::DB_USERNAME,
137
+ :password => args[:password] || Ensembl::DB_PASSWORD,
138
+ :port => args[:port] || port
139
+ )
140
+ self.retrieve_connection
141
+ end
142
+
143
+ end
144
+
145
+ end
146
+
147
+ end
148
+ end
@@ -0,0 +1,308 @@
1
+ #
2
+ # = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl variation
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Jan Aerts <http://jandot.myopenid.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ nil
9
+ module Ensembl
10
+ # = DESCRIPTION
11
+ # The Ensembl::Variation module covers the variation databases from
12
+ # ensembldb.ensembl.org.
13
+ module Variation
14
+ # = DESCRIPTION
15
+ # The Allele class describes a single allele of a variation. In addition to
16
+ # the nucleotide(s) (or absence of) that representing the allele frequency
17
+ # and population information may be present.
18
+ #
19
+ # This class uses ActiveRecord to access data in the Ensembl database.
20
+ # See the general documentation of the Ensembl module for
21
+ # more information on what this means and what methods are available.
22
+ #
23
+ # = USAGE
24
+ # allele = Allele.find(1)
25
+ # puts allele.to_yaml
26
+ class Allele < DBConnection
27
+ set_primary_key 'allele_id'
28
+ belongs_to :sample
29
+ belongs_to :variation
30
+ belongs_to :population
31
+ end
32
+
33
+ # = DESCRIPTION
34
+ # The AlleleGroup class represents a grouping of alleles that have tight
35
+ # linkage and are usually present together. This is commonly known as a
36
+ # Haplotype or Haplotype Block.
37
+ #
38
+ # This class uses ActiveRecord to access data in the Ensembl database.
39
+ # See the general documentation of the Ensembl module for
40
+ # more information on what this means and what methods are available.
41
+ #
42
+ # = USAGE
43
+ # allele_group = AlleleGroup.find(1)
44
+ # puts allele_group.to_yaml
45
+ class AlleleGroup < DBConnection
46
+ set_primary_key 'allele_group_id'
47
+ belongs_to :variation_group
48
+ belongs_to :source
49
+ belongs_to :sample
50
+ belongs_to :allele_group_allele
51
+ end
52
+
53
+ # = DESCRIPTION
54
+ # The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
55
+ # Should not be used directly.
56
+ #
57
+ # This class uses ActiveRecord to access data in the Ensembl database.
58
+ # See the general documentation of the Ensembl module for
59
+ # more information on what this means and what methods are available.
60
+ class AlleleGroupAllele < DBConnection
61
+ belongs_to :variation
62
+ belongs_to :allele_group
63
+ end
64
+
65
+ # = DESCRIPTION
66
+ # The Sample class gives information about the biological samples stored in the database.
67
+ #
68
+ # This class uses ActiveRecord to access data in the Ensembl database.
69
+ # See the general documentation of the Ensembl module for
70
+ # more information on what this means and what methods are available.
71
+ class Sample < DBConnection
72
+ set_primary_key "sample_id"
73
+ has_one :individual
74
+ has_one :sample_synonym
75
+ has_many :individual_genotype_multiple_bp
76
+ has_many :compressed_genotype_single_bp
77
+ has_many :read_coverage
78
+ has_one :population
79
+ has_many :tagged_variation_features
80
+ end
81
+
82
+ # = DESCRIPTION
83
+ # The IndividualPopulation class is used to connect Individual and Population classes.
84
+ # Should not be used directly.
85
+ #
86
+ # This class uses ActiveRecord to access data in the Ensembl database.
87
+ # See the general documentation of the Ensembl module for
88
+ # more information on what this means and what methods are available.
89
+ class IndividualPopulation < DBConnection
90
+ belongs_to :individual
91
+ belongs_to :population
92
+ end
93
+
94
+ # = DESCRIPTION
95
+ # The Individual class gives information on the single individuals used
96
+ # to retrieve one or more biological samples.
97
+ #
98
+ # This class uses ActiveRecord to access data in the Ensembl database.
99
+ # See the general documentation of the Ensembl module for
100
+ # more information on what this means and what methods are available.
101
+ class Individual < DBConnection
102
+ belongs_to :sample
103
+ # CAN'T FIGURE OUT SOME TABLE FIELDS
104
+ end
105
+
106
+ class IndividualGenotypeMultipleBp < DBConnection
107
+ belongs_to :sample
108
+ belongs_to :variation
109
+ end
110
+
111
+ class CompressedGenotypeSingleBp < DBConnection
112
+ belongs_to :sample
113
+ end
114
+
115
+ class ReadCoverage < DBConnection
116
+ belongs_to :sample
117
+ end
118
+
119
+ class Population < DBConnection
120
+ belongs_to :sample
121
+ end
122
+
123
+ class PopulationStructure < DBConnection
124
+ # CAN'T FIGURE OUT SOME TABLE FIELDS
125
+ end
126
+
127
+ # = DESCRIPTION
128
+ # The PopulationGenotype class gives information about alleles and allele
129
+ # frequencies for a SNP observed within a population or a group of samples.
130
+ #
131
+ # This class uses ActiveRecord to access data in the Ensembl database.
132
+ # See the general documentation of the Ensembl module for
133
+ # more information on what this means and what methods are available.
134
+ class PopulationGenotype < DBConnection
135
+ set_primary_key "population_genotype_id"
136
+ belongs_to :variation
137
+ belongs_to :population
138
+ end
139
+
140
+ # = DESCRIPTION
141
+ # The SampleSynonym class represents information about alternative names
142
+ # for sample entries.
143
+ #
144
+ # This class uses ActiveRecord to access data in the Ensembl database.
145
+ # See the general documentation of the Ensembl module for
146
+ # more information on what this means and what methods are available.
147
+ class SampleSynonym < DBConnection
148
+ set_primary_key "sample_synonym_id"
149
+ belongs_to :source
150
+ belongs_to :sample
151
+ belongs_to :population
152
+ end
153
+
154
+ # = DESCRIPTION
155
+ # The Source class gives information on the different databases and SNP
156
+ # panels used to retrieve the data
157
+ #
158
+ # This class uses ActiveRecord to access data in the Ensembl database.
159
+ # See the general documentation of the Ensembl module for
160
+ # more information on what this means and what methods are available.
161
+ class Source < DBConnection
162
+ set_primary_key "source_id"
163
+ has_many :sample_synonyms
164
+ has_many :allele_groups
165
+ has_many :variations
166
+ has_many :variation_groups
167
+ has_many :httags
168
+ has_many :variation_synonyms
169
+ end
170
+
171
+ # = DESCRIPTION
172
+ # The VariationSynonym class gives information on alterative names used
173
+ # for Variation entries.
174
+ #
175
+ # This class uses ActiveRecord to access data in the Ensembl database.
176
+ # See the general documentation of the Ensembl module for
177
+ # more information on what this means and what methods are available.
178
+ class VariationSynonym < DBConnection
179
+ set_primary_key "variation_synonym_id"
180
+ belongs_to :variation
181
+ belongs_to :source
182
+ end
183
+
184
+ # = DESCRIPTION
185
+ # The Variation class represents single nucleotide polymorhisms (SNP) or variations
186
+ # and provides information like the names (IDs), the validation status and
187
+ # the allele information.
188
+ #
189
+ # *BUG*: fields like validation_status and consequence_type are created
190
+ # using SET option directly in MySQL. These fields are bad interpreted by
191
+ # ActiveRecord, returning always 0.
192
+ #
193
+ # This class uses ActiveRecord to access data in the Ensembl database.
194
+ # See the general documentation of the Ensembl module for
195
+ # more information on what this means and what methods are available.
196
+ class Variation < DBConnection
197
+ set_primary_key "variation_id"
198
+ belongs_to :source
199
+ has_one :variation_synonym
200
+ has_one :flanking_sequence
201
+ has_many :allele_group_alleles
202
+ has_many :allele_groups, :through => :allele_group_alleles
203
+ has_many :population_genotypes
204
+ has_many :alleles
205
+ has_one :variation_feature
206
+ has_many :variation_group_variations
207
+ has_many :variation_groups, :through => :variation_group_variations
208
+ has_many :individual_genotype_multiple_bps
209
+ end
210
+
211
+ # = DESCRIPTION
212
+ # The VariationGroup class represents a group of variations (SNPs) that are
213
+ # linked and present toghether.
214
+ #
215
+ # This class uses ActiveRecord to access data in the Ensembl database.
216
+ # See the general documentation of the Ensembl module for
217
+ # more information on what this means and what methods are available.
218
+ class VariationGroup < DBConnection
219
+ set_primary_key "variation_group_id"
220
+ belongs_to :source
221
+ has_one :variation_group_variation
222
+ has_one :httag
223
+ has_one :variation_group_feature
224
+ has_one :allele_group
225
+ end
226
+
227
+ # = DESCRIPTION
228
+ # The VariationGroupVariation class is a connection class.
229
+ # Should not be used directly.
230
+ #
231
+ # This class uses ActiveRecord to access data in the Ensembl database.
232
+ # See the general documentation of the Ensembl module for
233
+ # more information on what this means and what methods are available.
234
+ class VariationGroupVariation < DBConnection
235
+ belongs_to :variation
236
+ belongs_to :variation_group
237
+ end
238
+
239
+ # = DESCRIPTION
240
+ # The VariationFeature class gives information about the genomic position of
241
+ # each Variation, including also validation status and consequence type.
242
+ #
243
+ # This class uses ActiveRecord to access data in the Ensembl database.
244
+ # See the general documentation of the Ensembl module for
245
+ # more information on what this means and what methods are available.
246
+ class VariationFeature < DBConnection
247
+ set_primary_key "variation_feature_id"
248
+ belongs_to :variation
249
+ has_many :tagged_variation_features
250
+ has_many :samples, :through => :tagged_variation_features
251
+ has_many :transcript_variations
252
+ end
253
+
254
+ # = DESCRIPTION
255
+ # The VariationGroupFeature class gives information on the genomic position
256
+ # of each VariationGroup.
257
+ #
258
+ # This class uses ActiveRecord to access data in the Ensembl database.
259
+ # See the general documentation of the Ensembl module for
260
+ # more information on what this means and what methods are available.
261
+ class VariationGroupFeature < DBConnection
262
+ set_primary_key "variation_group_feature_id"
263
+ belongs_to :variation_group
264
+ end
265
+
266
+ # = DESCRIPTION
267
+ # The TranscriptVariation class gives information about the position of
268
+ # a VariationFeature, mapped on an annotated transcript.
269
+ #
270
+ # This class uses ActiveRecord to access data in the Ensembl database.
271
+ # See the general documentation of the Ensembl module for
272
+ # more information on what this means and what methods are available.
273
+ class TranscriptVariation < DBConnection
274
+ set_primary_key "transcript_variation_id"
275
+ belongs_to :variation_feature
276
+ end
277
+
278
+ # = DESCRIPTION
279
+ # The FlankingSequence class gives information about the genomic coordinates
280
+ # of the flanking sequences, for a single VariationFeature.
281
+ #
282
+ # This class uses ActiveRecord to access data in the Ensembl database.
283
+ # See the general documentation of the Ensembl module for
284
+ # more information on what this means and what methods are available.
285
+ class FlankingSequence < DBConnection
286
+ belongs_to :variation
287
+ end
288
+
289
+ # = DESCRIPTION
290
+ # The TaggedVariationFeature class is a connection class.
291
+ # Should not be used directly.
292
+ #
293
+ # This class uses ActiveRecord to access data in the Ensembl database.
294
+ # See the general documentation of the Ensembl module for
295
+ # more information on what this means and what methods are available.
296
+ class TaggedVariationFeature < DBConnection
297
+ belongs_to :variation_feature
298
+ belongs_to :sample
299
+ end
300
+
301
+ class Httag < DBConnection
302
+ set_primary_key "httag_id"
303
+ belongs_to :variation_group
304
+ belongs_to :source
305
+ end
306
+
307
+ end
308
+ end