jandot-ruby-ensembl-api 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL +623 -0
- data/bin/ensembl +39 -0
- data/lib/ensembl/core/activerecord.rb +1847 -0
- data/lib/ensembl/core/project.rb +248 -0
- data/lib/ensembl/core/slice.rb +627 -0
- data/lib/ensembl/core/transcript.rb +425 -0
- data/lib/ensembl/core/transform.rb +97 -0
- data/lib/ensembl/db_connection.rb +148 -0
- data/lib/ensembl/variation/activerecord.rb +308 -0
- data/lib/ensembl.rb +23 -0
- data/samples/examples_perl_tutorial.rb +120 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/test/unit/release_45/core/run_tests.rb +12 -0
- data/test/unit/release_45/core/test_project.rb +235 -0
- data/test/unit/release_45/core/test_project_human.rb +58 -0
- data/test/unit/release_45/core/test_relationships.rb +61 -0
- data/test/unit/release_45/core/test_sequence.rb +175 -0
- data/test/unit/release_45/core/test_slice.rb +56 -0
- data/test/unit/release_45/core/test_transcript.rb +94 -0
- data/test/unit/release_45/core/test_transform.rb +223 -0
- data/test/unit/release_45/variation/test_activerecord.rb +32 -0
- data/test/unit/release_50/core/run_tests.rb +12 -0
- data/test/unit/release_50/core/test_project.rb +215 -0
- data/test/unit/release_50/core/test_project_human.rb +58 -0
- data/test/unit/release_50/core/test_relationships.rb +66 -0
- data/test/unit/release_50/core/test_sequence.rb +175 -0
- data/test/unit/release_50/core/test_slice.rb +121 -0
- data/test/unit/release_50/core/test_transcript.rb +108 -0
- data/test/unit/release_50/core/test_transform.rb +223 -0
- data/test/unit/release_50/variation/test_activerecord.rb +136 -0
- data/test/unit/test_connection.rb +58 -0
- data/test/unit/test_releases.rb +40 -0
- metadata +243 -0
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
module Ensembl
|
5
|
+
DB_ADAPTER = 'mysql'
|
6
|
+
DB_HOST = 'ensembldb.ensembl.org'
|
7
|
+
DB_USERNAME = 'anonymous'
|
8
|
+
DB_PASSWORD = ''
|
9
|
+
|
10
|
+
class OldDummyDBConnection < ActiveRecord::Base
|
11
|
+
self.abstract_class = true
|
12
|
+
|
13
|
+
establish_connection(
|
14
|
+
:adapter => Ensembl::DB_ADAPTER,
|
15
|
+
:host => Ensembl::DB_HOST,
|
16
|
+
:database => '',
|
17
|
+
:username => Ensembl::DB_USERNAME,
|
18
|
+
:password => Ensembl::DB_PASSWORD
|
19
|
+
)
|
20
|
+
end
|
21
|
+
|
22
|
+
class NewDummyDBConnection < ActiveRecord::Base
|
23
|
+
self.abstract_class = true
|
24
|
+
|
25
|
+
establish_connection(
|
26
|
+
:adapter => Ensembl::DB_ADAPTER,
|
27
|
+
:host => Ensembl::DB_HOST,
|
28
|
+
:database => '',
|
29
|
+
:username => Ensembl::DB_USERNAME,
|
30
|
+
:password => Ensembl::DB_PASSWORD,
|
31
|
+
:port => 5306
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
module Core
|
37
|
+
# = DESCRIPTION
|
38
|
+
# The Ensembl::Core::DBConnection is the actual connection established
|
39
|
+
# with the Ensembl server.
|
40
|
+
class DBConnection < ActiveRecord::Base
|
41
|
+
self.abstract_class = true
|
42
|
+
self.pluralize_table_names = false
|
43
|
+
|
44
|
+
# = DESCRIPTION
|
45
|
+
# The Ensembl::Core::DBConnection#connect method makes the connection
|
46
|
+
# to the Ensembl core database for a given species. By default, it connects
|
47
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
48
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
49
|
+
#
|
50
|
+
# = USAGE
|
51
|
+
# # Connect to release 50 of human
|
52
|
+
# Ensembl::Core::DBConnection.connect('homo_sapiens')
|
53
|
+
#
|
54
|
+
# # Connect to release 42 of chicken
|
55
|
+
# Ensembl::Core::DBConnection.connect('gallus_gallus')
|
56
|
+
#
|
57
|
+
# ---
|
58
|
+
# *Arguments*:
|
59
|
+
# * species:: species to connect to. Arguments should be in snake_case
|
60
|
+
# * ensembl_release:: the release of the database to connect to
|
61
|
+
# (default = 50)
|
62
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
63
|
+
dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
|
64
|
+
db_name = nil
|
65
|
+
|
66
|
+
if args[:database]
|
67
|
+
db_name = args[:database]
|
68
|
+
else
|
69
|
+
db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_core_#{release.to_s}/}[0]
|
70
|
+
end
|
71
|
+
|
72
|
+
if db_name.nil?
|
73
|
+
warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
|
74
|
+
else
|
75
|
+
port = ( release > 47 ) ? 5306 : nil
|
76
|
+
establish_connection(
|
77
|
+
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
78
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
79
|
+
:database => args[:database] || db_name,
|
80
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
81
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
82
|
+
:port => args[:port] || port
|
83
|
+
)
|
84
|
+
self.retrieve_connection
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
module Variation
|
94
|
+
# = DESCRIPTION
|
95
|
+
# The Ensembl::Variation::DBConnection is the actual connection established
|
96
|
+
# with the Ensembl server.
|
97
|
+
class DBConnection < ActiveRecord::Base
|
98
|
+
self.abstract_class = true
|
99
|
+
self.pluralize_table_names = false
|
100
|
+
|
101
|
+
# = DESCRIPTION
|
102
|
+
# The Ensembl::Variation::DBConnection#connect method makes the connection
|
103
|
+
# to the Ensembl variation database for a given species. By default, it connects
|
104
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
105
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
106
|
+
#
|
107
|
+
# = USAGE
|
108
|
+
# # Connect to release 50 of human
|
109
|
+
# Ensembl::Variation::DBConnection.connect('homo_sapiens')
|
110
|
+
#
|
111
|
+
# # Connect to release 42 of chicken
|
112
|
+
# Ensembl::Variation::DBConnection.connect('gallus_gallus')
|
113
|
+
#
|
114
|
+
# ---
|
115
|
+
# *Arguments*:
|
116
|
+
# * species:: species to connect to. Arguments should be in snake_case
|
117
|
+
# * ensembl_release:: the release of the database to connect to
|
118
|
+
# (default = 50)
|
119
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
120
|
+
dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
|
121
|
+
db_name = nil
|
122
|
+
if args[:database]
|
123
|
+
db_name = args[:database]
|
124
|
+
else
|
125
|
+
db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_variation_#{release.to_s}/}[0]
|
126
|
+
end
|
127
|
+
|
128
|
+
if db_name.nil?
|
129
|
+
warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
|
130
|
+
else
|
131
|
+
port = ( release > 47 ) ? 5306 : nil
|
132
|
+
establish_connection(
|
133
|
+
:adapter => Ensembl::DB_ADAPTER,
|
134
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
135
|
+
:database => db_name,
|
136
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
137
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
138
|
+
:port => args[:port] || port
|
139
|
+
)
|
140
|
+
self.retrieve_connection
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl variation
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Jan Aerts <http://jandot.myopenid.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
nil
|
9
|
+
module Ensembl
|
10
|
+
# = DESCRIPTION
|
11
|
+
# The Ensembl::Variation module covers the variation databases from
|
12
|
+
# ensembldb.ensembl.org.
|
13
|
+
module Variation
|
14
|
+
# = DESCRIPTION
|
15
|
+
# The Allele class describes a single allele of a variation. In addition to
|
16
|
+
# the nucleotide(s) (or absence of) that representing the allele frequency
|
17
|
+
# and population information may be present.
|
18
|
+
#
|
19
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
20
|
+
# See the general documentation of the Ensembl module for
|
21
|
+
# more information on what this means and what methods are available.
|
22
|
+
#
|
23
|
+
# = USAGE
|
24
|
+
# allele = Allele.find(1)
|
25
|
+
# puts allele.to_yaml
|
26
|
+
class Allele < DBConnection
|
27
|
+
set_primary_key 'allele_id'
|
28
|
+
belongs_to :sample
|
29
|
+
belongs_to :variation
|
30
|
+
belongs_to :population
|
31
|
+
end
|
32
|
+
|
33
|
+
# = DESCRIPTION
|
34
|
+
# The AlleleGroup class represents a grouping of alleles that have tight
|
35
|
+
# linkage and are usually present together. This is commonly known as a
|
36
|
+
# Haplotype or Haplotype Block.
|
37
|
+
#
|
38
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
39
|
+
# See the general documentation of the Ensembl module for
|
40
|
+
# more information on what this means and what methods are available.
|
41
|
+
#
|
42
|
+
# = USAGE
|
43
|
+
# allele_group = AlleleGroup.find(1)
|
44
|
+
# puts allele_group.to_yaml
|
45
|
+
class AlleleGroup < DBConnection
|
46
|
+
set_primary_key 'allele_group_id'
|
47
|
+
belongs_to :variation_group
|
48
|
+
belongs_to :source
|
49
|
+
belongs_to :sample
|
50
|
+
belongs_to :allele_group_allele
|
51
|
+
end
|
52
|
+
|
53
|
+
# = DESCRIPTION
|
54
|
+
# The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
|
55
|
+
# Should not be used directly.
|
56
|
+
#
|
57
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
58
|
+
# See the general documentation of the Ensembl module for
|
59
|
+
# more information on what this means and what methods are available.
|
60
|
+
class AlleleGroupAllele < DBConnection
|
61
|
+
belongs_to :variation
|
62
|
+
belongs_to :allele_group
|
63
|
+
end
|
64
|
+
|
65
|
+
# = DESCRIPTION
|
66
|
+
# The Sample class gives information about the biological samples stored in the database.
|
67
|
+
#
|
68
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
69
|
+
# See the general documentation of the Ensembl module for
|
70
|
+
# more information on what this means and what methods are available.
|
71
|
+
class Sample < DBConnection
|
72
|
+
set_primary_key "sample_id"
|
73
|
+
has_one :individual
|
74
|
+
has_one :sample_synonym
|
75
|
+
has_many :individual_genotype_multiple_bp
|
76
|
+
has_many :compressed_genotype_single_bp
|
77
|
+
has_many :read_coverage
|
78
|
+
has_one :population
|
79
|
+
has_many :tagged_variation_features
|
80
|
+
end
|
81
|
+
|
82
|
+
# = DESCRIPTION
|
83
|
+
# The IndividualPopulation class is used to connect Individual and Population classes.
|
84
|
+
# Should not be used directly.
|
85
|
+
#
|
86
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
87
|
+
# See the general documentation of the Ensembl module for
|
88
|
+
# more information on what this means and what methods are available.
|
89
|
+
class IndividualPopulation < DBConnection
|
90
|
+
belongs_to :individual
|
91
|
+
belongs_to :population
|
92
|
+
end
|
93
|
+
|
94
|
+
# = DESCRIPTION
|
95
|
+
# The Individual class gives information on the single individuals used
|
96
|
+
# to retrieve one or more biological samples.
|
97
|
+
#
|
98
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
99
|
+
# See the general documentation of the Ensembl module for
|
100
|
+
# more information on what this means and what methods are available.
|
101
|
+
class Individual < DBConnection
|
102
|
+
belongs_to :sample
|
103
|
+
# CAN'T FIGURE OUT SOME TABLE FIELDS
|
104
|
+
end
|
105
|
+
|
106
|
+
class IndividualGenotypeMultipleBp < DBConnection
|
107
|
+
belongs_to :sample
|
108
|
+
belongs_to :variation
|
109
|
+
end
|
110
|
+
|
111
|
+
class CompressedGenotypeSingleBp < DBConnection
|
112
|
+
belongs_to :sample
|
113
|
+
end
|
114
|
+
|
115
|
+
class ReadCoverage < DBConnection
|
116
|
+
belongs_to :sample
|
117
|
+
end
|
118
|
+
|
119
|
+
class Population < DBConnection
|
120
|
+
belongs_to :sample
|
121
|
+
end
|
122
|
+
|
123
|
+
class PopulationStructure < DBConnection
|
124
|
+
# CAN'T FIGURE OUT SOME TABLE FIELDS
|
125
|
+
end
|
126
|
+
|
127
|
+
# = DESCRIPTION
|
128
|
+
# The PopulationGenotype class gives information about alleles and allele
|
129
|
+
# frequencies for a SNP observed within a population or a group of samples.
|
130
|
+
#
|
131
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
132
|
+
# See the general documentation of the Ensembl module for
|
133
|
+
# more information on what this means and what methods are available.
|
134
|
+
class PopulationGenotype < DBConnection
|
135
|
+
set_primary_key "population_genotype_id"
|
136
|
+
belongs_to :variation
|
137
|
+
belongs_to :population
|
138
|
+
end
|
139
|
+
|
140
|
+
# = DESCRIPTION
|
141
|
+
# The SampleSynonym class represents information about alternative names
|
142
|
+
# for sample entries.
|
143
|
+
#
|
144
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
145
|
+
# See the general documentation of the Ensembl module for
|
146
|
+
# more information on what this means and what methods are available.
|
147
|
+
class SampleSynonym < DBConnection
|
148
|
+
set_primary_key "sample_synonym_id"
|
149
|
+
belongs_to :source
|
150
|
+
belongs_to :sample
|
151
|
+
belongs_to :population
|
152
|
+
end
|
153
|
+
|
154
|
+
# = DESCRIPTION
|
155
|
+
# The Source class gives information on the different databases and SNP
|
156
|
+
# panels used to retrieve the data
|
157
|
+
#
|
158
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
159
|
+
# See the general documentation of the Ensembl module for
|
160
|
+
# more information on what this means and what methods are available.
|
161
|
+
class Source < DBConnection
|
162
|
+
set_primary_key "source_id"
|
163
|
+
has_many :sample_synonyms
|
164
|
+
has_many :allele_groups
|
165
|
+
has_many :variations
|
166
|
+
has_many :variation_groups
|
167
|
+
has_many :httags
|
168
|
+
has_many :variation_synonyms
|
169
|
+
end
|
170
|
+
|
171
|
+
# = DESCRIPTION
|
172
|
+
# The VariationSynonym class gives information on alterative names used
|
173
|
+
# for Variation entries.
|
174
|
+
#
|
175
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
176
|
+
# See the general documentation of the Ensembl module for
|
177
|
+
# more information on what this means and what methods are available.
|
178
|
+
class VariationSynonym < DBConnection
|
179
|
+
set_primary_key "variation_synonym_id"
|
180
|
+
belongs_to :variation
|
181
|
+
belongs_to :source
|
182
|
+
end
|
183
|
+
|
184
|
+
# = DESCRIPTION
|
185
|
+
# The Variation class represents single nucleotide polymorhisms (SNP) or variations
|
186
|
+
# and provides information like the names (IDs), the validation status and
|
187
|
+
# the allele information.
|
188
|
+
#
|
189
|
+
# *BUG*: fields like validation_status and consequence_type are created
|
190
|
+
# using SET option directly in MySQL. These fields are bad interpreted by
|
191
|
+
# ActiveRecord, returning always 0.
|
192
|
+
#
|
193
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
194
|
+
# See the general documentation of the Ensembl module for
|
195
|
+
# more information on what this means and what methods are available.
|
196
|
+
class Variation < DBConnection
|
197
|
+
set_primary_key "variation_id"
|
198
|
+
belongs_to :source
|
199
|
+
has_one :variation_synonym
|
200
|
+
has_one :flanking_sequence
|
201
|
+
has_many :allele_group_alleles
|
202
|
+
has_many :allele_groups, :through => :allele_group_alleles
|
203
|
+
has_many :population_genotypes
|
204
|
+
has_many :alleles
|
205
|
+
has_one :variation_feature
|
206
|
+
has_many :variation_group_variations
|
207
|
+
has_many :variation_groups, :through => :variation_group_variations
|
208
|
+
has_many :individual_genotype_multiple_bps
|
209
|
+
end
|
210
|
+
|
211
|
+
# = DESCRIPTION
|
212
|
+
# The VariationGroup class represents a group of variations (SNPs) that are
|
213
|
+
# linked and present toghether.
|
214
|
+
#
|
215
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
216
|
+
# See the general documentation of the Ensembl module for
|
217
|
+
# more information on what this means and what methods are available.
|
218
|
+
class VariationGroup < DBConnection
|
219
|
+
set_primary_key "variation_group_id"
|
220
|
+
belongs_to :source
|
221
|
+
has_one :variation_group_variation
|
222
|
+
has_one :httag
|
223
|
+
has_one :variation_group_feature
|
224
|
+
has_one :allele_group
|
225
|
+
end
|
226
|
+
|
227
|
+
# = DESCRIPTION
|
228
|
+
# The VariationGroupVariation class is a connection class.
|
229
|
+
# Should not be used directly.
|
230
|
+
#
|
231
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
232
|
+
# See the general documentation of the Ensembl module for
|
233
|
+
# more information on what this means and what methods are available.
|
234
|
+
class VariationGroupVariation < DBConnection
|
235
|
+
belongs_to :variation
|
236
|
+
belongs_to :variation_group
|
237
|
+
end
|
238
|
+
|
239
|
+
# = DESCRIPTION
|
240
|
+
# The VariationFeature class gives information about the genomic position of
|
241
|
+
# each Variation, including also validation status and consequence type.
|
242
|
+
#
|
243
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
244
|
+
# See the general documentation of the Ensembl module for
|
245
|
+
# more information on what this means and what methods are available.
|
246
|
+
class VariationFeature < DBConnection
|
247
|
+
set_primary_key "variation_feature_id"
|
248
|
+
belongs_to :variation
|
249
|
+
has_many :tagged_variation_features
|
250
|
+
has_many :samples, :through => :tagged_variation_features
|
251
|
+
has_many :transcript_variations
|
252
|
+
end
|
253
|
+
|
254
|
+
# = DESCRIPTION
|
255
|
+
# The VariationGroupFeature class gives information on the genomic position
|
256
|
+
# of each VariationGroup.
|
257
|
+
#
|
258
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
259
|
+
# See the general documentation of the Ensembl module for
|
260
|
+
# more information on what this means and what methods are available.
|
261
|
+
class VariationGroupFeature < DBConnection
|
262
|
+
set_primary_key "variation_group_feature_id"
|
263
|
+
belongs_to :variation_group
|
264
|
+
end
|
265
|
+
|
266
|
+
# = DESCRIPTION
|
267
|
+
# The TranscriptVariation class gives information about the position of
|
268
|
+
# a VariationFeature, mapped on an annotated transcript.
|
269
|
+
#
|
270
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
271
|
+
# See the general documentation of the Ensembl module for
|
272
|
+
# more information on what this means and what methods are available.
|
273
|
+
class TranscriptVariation < DBConnection
|
274
|
+
set_primary_key "transcript_variation_id"
|
275
|
+
belongs_to :variation_feature
|
276
|
+
end
|
277
|
+
|
278
|
+
# = DESCRIPTION
|
279
|
+
# The FlankingSequence class gives information about the genomic coordinates
|
280
|
+
# of the flanking sequences, for a single VariationFeature.
|
281
|
+
#
|
282
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
283
|
+
# See the general documentation of the Ensembl module for
|
284
|
+
# more information on what this means and what methods are available.
|
285
|
+
class FlankingSequence < DBConnection
|
286
|
+
belongs_to :variation
|
287
|
+
end
|
288
|
+
|
289
|
+
# = DESCRIPTION
|
290
|
+
# The TaggedVariationFeature class is a connection class.
|
291
|
+
# Should not be used directly.
|
292
|
+
#
|
293
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
294
|
+
# See the general documentation of the Ensembl module for
|
295
|
+
# more information on what this means and what methods are available.
|
296
|
+
class TaggedVariationFeature < DBConnection
|
297
|
+
belongs_to :variation_feature
|
298
|
+
belongs_to :sample
|
299
|
+
end
|
300
|
+
|
301
|
+
class Httag < DBConnection
|
302
|
+
set_primary_key "httag_id"
|
303
|
+
belongs_to :variation_group
|
304
|
+
belongs_to :source
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
end
|