jandot-ruby-ensembl-api 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/TUTORIAL +623 -0
- data/bin/ensembl +39 -0
- data/lib/ensembl/core/activerecord.rb +1847 -0
- data/lib/ensembl/core/project.rb +248 -0
- data/lib/ensembl/core/slice.rb +627 -0
- data/lib/ensembl/core/transcript.rb +425 -0
- data/lib/ensembl/core/transform.rb +97 -0
- data/lib/ensembl/db_connection.rb +148 -0
- data/lib/ensembl/variation/activerecord.rb +308 -0
- data/lib/ensembl.rb +23 -0
- data/samples/examples_perl_tutorial.rb +120 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/test/unit/release_45/core/run_tests.rb +12 -0
- data/test/unit/release_45/core/test_project.rb +235 -0
- data/test/unit/release_45/core/test_project_human.rb +58 -0
- data/test/unit/release_45/core/test_relationships.rb +61 -0
- data/test/unit/release_45/core/test_sequence.rb +175 -0
- data/test/unit/release_45/core/test_slice.rb +56 -0
- data/test/unit/release_45/core/test_transcript.rb +94 -0
- data/test/unit/release_45/core/test_transform.rb +223 -0
- data/test/unit/release_45/variation/test_activerecord.rb +32 -0
- data/test/unit/release_50/core/run_tests.rb +12 -0
- data/test/unit/release_50/core/test_project.rb +215 -0
- data/test/unit/release_50/core/test_project_human.rb +58 -0
- data/test/unit/release_50/core/test_relationships.rb +66 -0
- data/test/unit/release_50/core/test_sequence.rb +175 -0
- data/test/unit/release_50/core/test_slice.rb +121 -0
- data/test/unit/release_50/core/test_transcript.rb +108 -0
- data/test/unit/release_50/core/test_transform.rb +223 -0
- data/test/unit/release_50/variation/test_activerecord.rb +136 -0
- data/test/unit/test_connection.rb +58 -0
- data/test/unit/test_releases.rb +40 -0
- metadata +243 -0
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
module Ensembl
|
5
|
+
DB_ADAPTER = 'mysql'
|
6
|
+
DB_HOST = 'ensembldb.ensembl.org'
|
7
|
+
DB_USERNAME = 'anonymous'
|
8
|
+
DB_PASSWORD = ''
|
9
|
+
|
10
|
+
class OldDummyDBConnection < ActiveRecord::Base
|
11
|
+
self.abstract_class = true
|
12
|
+
|
13
|
+
establish_connection(
|
14
|
+
:adapter => Ensembl::DB_ADAPTER,
|
15
|
+
:host => Ensembl::DB_HOST,
|
16
|
+
:database => '',
|
17
|
+
:username => Ensembl::DB_USERNAME,
|
18
|
+
:password => Ensembl::DB_PASSWORD
|
19
|
+
)
|
20
|
+
end
|
21
|
+
|
22
|
+
class NewDummyDBConnection < ActiveRecord::Base
|
23
|
+
self.abstract_class = true
|
24
|
+
|
25
|
+
establish_connection(
|
26
|
+
:adapter => Ensembl::DB_ADAPTER,
|
27
|
+
:host => Ensembl::DB_HOST,
|
28
|
+
:database => '',
|
29
|
+
:username => Ensembl::DB_USERNAME,
|
30
|
+
:password => Ensembl::DB_PASSWORD,
|
31
|
+
:port => 5306
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
module Core
|
37
|
+
# = DESCRIPTION
|
38
|
+
# The Ensembl::Core::DBConnection is the actual connection established
|
39
|
+
# with the Ensembl server.
|
40
|
+
class DBConnection < ActiveRecord::Base
|
41
|
+
self.abstract_class = true
|
42
|
+
self.pluralize_table_names = false
|
43
|
+
|
44
|
+
# = DESCRIPTION
|
45
|
+
# The Ensembl::Core::DBConnection#connect method makes the connection
|
46
|
+
# to the Ensembl core database for a given species. By default, it connects
|
47
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
48
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
49
|
+
#
|
50
|
+
# = USAGE
|
51
|
+
# # Connect to release 50 of human
|
52
|
+
# Ensembl::Core::DBConnection.connect('homo_sapiens')
|
53
|
+
#
|
54
|
+
# # Connect to release 42 of chicken
|
55
|
+
# Ensembl::Core::DBConnection.connect('gallus_gallus')
|
56
|
+
#
|
57
|
+
# ---
|
58
|
+
# *Arguments*:
|
59
|
+
# * species:: species to connect to. Arguments should be in snake_case
|
60
|
+
# * ensembl_release:: the release of the database to connect to
|
61
|
+
# (default = 50)
|
62
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
63
|
+
dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
|
64
|
+
db_name = nil
|
65
|
+
|
66
|
+
if args[:database]
|
67
|
+
db_name = args[:database]
|
68
|
+
else
|
69
|
+
db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_core_#{release.to_s}/}[0]
|
70
|
+
end
|
71
|
+
|
72
|
+
if db_name.nil?
|
73
|
+
warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
|
74
|
+
else
|
75
|
+
port = ( release > 47 ) ? 5306 : nil
|
76
|
+
establish_connection(
|
77
|
+
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
78
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
79
|
+
:database => args[:database] || db_name,
|
80
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
81
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
82
|
+
:port => args[:port] || port
|
83
|
+
)
|
84
|
+
self.retrieve_connection
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
module Variation
|
94
|
+
# = DESCRIPTION
|
95
|
+
# The Ensembl::Variation::DBConnection is the actual connection established
|
96
|
+
# with the Ensembl server.
|
97
|
+
class DBConnection < ActiveRecord::Base
|
98
|
+
self.abstract_class = true
|
99
|
+
self.pluralize_table_names = false
|
100
|
+
|
101
|
+
# = DESCRIPTION
|
102
|
+
# The Ensembl::Variation::DBConnection#connect method makes the connection
|
103
|
+
# to the Ensembl variation database for a given species. By default, it connects
|
104
|
+
# to release 50 for that species. You _could_ use a lower number, but
|
105
|
+
# some parts of the API might not work, or worse: give the wrong results.
|
106
|
+
#
|
107
|
+
# = USAGE
|
108
|
+
# # Connect to release 50 of human
|
109
|
+
# Ensembl::Variation::DBConnection.connect('homo_sapiens')
|
110
|
+
#
|
111
|
+
# # Connect to release 42 of chicken
|
112
|
+
# Ensembl::Variation::DBConnection.connect('gallus_gallus')
|
113
|
+
#
|
114
|
+
# ---
|
115
|
+
# *Arguments*:
|
116
|
+
# * species:: species to connect to. Arguments should be in snake_case
|
117
|
+
# * ensembl_release:: the release of the database to connect to
|
118
|
+
# (default = 50)
|
119
|
+
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
120
|
+
dummy_dbconnection = ( release > 47 ) ? Ensembl::NewDummyDBConnection.connection : Ensembl::OldDummyDBConnection.connection
|
121
|
+
db_name = nil
|
122
|
+
if args[:database]
|
123
|
+
db_name = args[:database]
|
124
|
+
else
|
125
|
+
db_name = dummy_dbconnection.select_values('show databases').select{|v| v =~ /#{species}_variation_#{release.to_s}/}[0]
|
126
|
+
end
|
127
|
+
|
128
|
+
if db_name.nil?
|
129
|
+
warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})."
|
130
|
+
else
|
131
|
+
port = ( release > 47 ) ? 5306 : nil
|
132
|
+
establish_connection(
|
133
|
+
:adapter => Ensembl::DB_ADAPTER,
|
134
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
135
|
+
:database => db_name,
|
136
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
137
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
138
|
+
:port => args[:port] || port
|
139
|
+
)
|
140
|
+
self.retrieve_connection
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#
|
2
|
+
# = ensembl/variation/activerecord.rb - ActiveRecord mappings to Ensembl variation
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Jan Aerts <http://jandot.myopenid.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
nil
|
9
|
+
module Ensembl
|
10
|
+
# = DESCRIPTION
|
11
|
+
# The Ensembl::Variation module covers the variation databases from
|
12
|
+
# ensembldb.ensembl.org.
|
13
|
+
module Variation
|
14
|
+
# = DESCRIPTION
|
15
|
+
# The Allele class describes a single allele of a variation. In addition to
|
16
|
+
# the nucleotide(s) (or absence of) that representing the allele frequency
|
17
|
+
# and population information may be present.
|
18
|
+
#
|
19
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
20
|
+
# See the general documentation of the Ensembl module for
|
21
|
+
# more information on what this means and what methods are available.
|
22
|
+
#
|
23
|
+
# = USAGE
|
24
|
+
# allele = Allele.find(1)
|
25
|
+
# puts allele.to_yaml
|
26
|
+
class Allele < DBConnection
|
27
|
+
set_primary_key 'allele_id'
|
28
|
+
belongs_to :sample
|
29
|
+
belongs_to :variation
|
30
|
+
belongs_to :population
|
31
|
+
end
|
32
|
+
|
33
|
+
# = DESCRIPTION
|
34
|
+
# The AlleleGroup class represents a grouping of alleles that have tight
|
35
|
+
# linkage and are usually present together. This is commonly known as a
|
36
|
+
# Haplotype or Haplotype Block.
|
37
|
+
#
|
38
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
39
|
+
# See the general documentation of the Ensembl module for
|
40
|
+
# more information on what this means and what methods are available.
|
41
|
+
#
|
42
|
+
# = USAGE
|
43
|
+
# allele_group = AlleleGroup.find(1)
|
44
|
+
# puts allele_group.to_yaml
|
45
|
+
class AlleleGroup < DBConnection
|
46
|
+
set_primary_key 'allele_group_id'
|
47
|
+
belongs_to :variation_group
|
48
|
+
belongs_to :source
|
49
|
+
belongs_to :sample
|
50
|
+
belongs_to :allele_group_allele
|
51
|
+
end
|
52
|
+
|
53
|
+
# = DESCRIPTION
|
54
|
+
# The AlleleGroupAllele class represents a connection class between Allele and AlleleGroup.
|
55
|
+
# Should not be used directly.
|
56
|
+
#
|
57
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
58
|
+
# See the general documentation of the Ensembl module for
|
59
|
+
# more information on what this means and what methods are available.
|
60
|
+
class AlleleGroupAllele < DBConnection
|
61
|
+
belongs_to :variation
|
62
|
+
belongs_to :allele_group
|
63
|
+
end
|
64
|
+
|
65
|
+
# = DESCRIPTION
|
66
|
+
# The Sample class gives information about the biological samples stored in the database.
|
67
|
+
#
|
68
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
69
|
+
# See the general documentation of the Ensembl module for
|
70
|
+
# more information on what this means and what methods are available.
|
71
|
+
class Sample < DBConnection
|
72
|
+
set_primary_key "sample_id"
|
73
|
+
has_one :individual
|
74
|
+
has_one :sample_synonym
|
75
|
+
has_many :individual_genotype_multiple_bp
|
76
|
+
has_many :compressed_genotype_single_bp
|
77
|
+
has_many :read_coverage
|
78
|
+
has_one :population
|
79
|
+
has_many :tagged_variation_features
|
80
|
+
end
|
81
|
+
|
82
|
+
# = DESCRIPTION
|
83
|
+
# The IndividualPopulation class is used to connect Individual and Population classes.
|
84
|
+
# Should not be used directly.
|
85
|
+
#
|
86
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
87
|
+
# See the general documentation of the Ensembl module for
|
88
|
+
# more information on what this means and what methods are available.
|
89
|
+
class IndividualPopulation < DBConnection
|
90
|
+
belongs_to :individual
|
91
|
+
belongs_to :population
|
92
|
+
end
|
93
|
+
|
94
|
+
# = DESCRIPTION
|
95
|
+
# The Individual class gives information on the single individuals used
|
96
|
+
# to retrieve one or more biological samples.
|
97
|
+
#
|
98
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
99
|
+
# See the general documentation of the Ensembl module for
|
100
|
+
# more information on what this means and what methods are available.
|
101
|
+
class Individual < DBConnection
|
102
|
+
belongs_to :sample
|
103
|
+
# CAN'T FIGURE OUT SOME TABLE FIELDS
|
104
|
+
end
|
105
|
+
|
106
|
+
class IndividualGenotypeMultipleBp < DBConnection
|
107
|
+
belongs_to :sample
|
108
|
+
belongs_to :variation
|
109
|
+
end
|
110
|
+
|
111
|
+
class CompressedGenotypeSingleBp < DBConnection
|
112
|
+
belongs_to :sample
|
113
|
+
end
|
114
|
+
|
115
|
+
class ReadCoverage < DBConnection
|
116
|
+
belongs_to :sample
|
117
|
+
end
|
118
|
+
|
119
|
+
class Population < DBConnection
|
120
|
+
belongs_to :sample
|
121
|
+
end
|
122
|
+
|
123
|
+
class PopulationStructure < DBConnection
|
124
|
+
# CAN'T FIGURE OUT SOME TABLE FIELDS
|
125
|
+
end
|
126
|
+
|
127
|
+
# = DESCRIPTION
|
128
|
+
# The PopulationGenotype class gives information about alleles and allele
|
129
|
+
# frequencies for a SNP observed within a population or a group of samples.
|
130
|
+
#
|
131
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
132
|
+
# See the general documentation of the Ensembl module for
|
133
|
+
# more information on what this means and what methods are available.
|
134
|
+
class PopulationGenotype < DBConnection
|
135
|
+
set_primary_key "population_genotype_id"
|
136
|
+
belongs_to :variation
|
137
|
+
belongs_to :population
|
138
|
+
end
|
139
|
+
|
140
|
+
# = DESCRIPTION
|
141
|
+
# The SampleSynonym class represents information about alternative names
|
142
|
+
# for sample entries.
|
143
|
+
#
|
144
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
145
|
+
# See the general documentation of the Ensembl module for
|
146
|
+
# more information on what this means and what methods are available.
|
147
|
+
class SampleSynonym < DBConnection
|
148
|
+
set_primary_key "sample_synonym_id"
|
149
|
+
belongs_to :source
|
150
|
+
belongs_to :sample
|
151
|
+
belongs_to :population
|
152
|
+
end
|
153
|
+
|
154
|
+
# = DESCRIPTION
|
155
|
+
# The Source class gives information on the different databases and SNP
|
156
|
+
# panels used to retrieve the data
|
157
|
+
#
|
158
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
159
|
+
# See the general documentation of the Ensembl module for
|
160
|
+
# more information on what this means and what methods are available.
|
161
|
+
class Source < DBConnection
|
162
|
+
set_primary_key "source_id"
|
163
|
+
has_many :sample_synonyms
|
164
|
+
has_many :allele_groups
|
165
|
+
has_many :variations
|
166
|
+
has_many :variation_groups
|
167
|
+
has_many :httags
|
168
|
+
has_many :variation_synonyms
|
169
|
+
end
|
170
|
+
|
171
|
+
# = DESCRIPTION
|
172
|
+
# The VariationSynonym class gives information on alterative names used
|
173
|
+
# for Variation entries.
|
174
|
+
#
|
175
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
176
|
+
# See the general documentation of the Ensembl module for
|
177
|
+
# more information on what this means and what methods are available.
|
178
|
+
class VariationSynonym < DBConnection
|
179
|
+
set_primary_key "variation_synonym_id"
|
180
|
+
belongs_to :variation
|
181
|
+
belongs_to :source
|
182
|
+
end
|
183
|
+
|
184
|
+
# = DESCRIPTION
|
185
|
+
# The Variation class represents single nucleotide polymorhisms (SNP) or variations
|
186
|
+
# and provides information like the names (IDs), the validation status and
|
187
|
+
# the allele information.
|
188
|
+
#
|
189
|
+
# *BUG*: fields like validation_status and consequence_type are created
|
190
|
+
# using SET option directly in MySQL. These fields are bad interpreted by
|
191
|
+
# ActiveRecord, returning always 0.
|
192
|
+
#
|
193
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
194
|
+
# See the general documentation of the Ensembl module for
|
195
|
+
# more information on what this means and what methods are available.
|
196
|
+
class Variation < DBConnection
|
197
|
+
set_primary_key "variation_id"
|
198
|
+
belongs_to :source
|
199
|
+
has_one :variation_synonym
|
200
|
+
has_one :flanking_sequence
|
201
|
+
has_many :allele_group_alleles
|
202
|
+
has_many :allele_groups, :through => :allele_group_alleles
|
203
|
+
has_many :population_genotypes
|
204
|
+
has_many :alleles
|
205
|
+
has_one :variation_feature
|
206
|
+
has_many :variation_group_variations
|
207
|
+
has_many :variation_groups, :through => :variation_group_variations
|
208
|
+
has_many :individual_genotype_multiple_bps
|
209
|
+
end
|
210
|
+
|
211
|
+
# = DESCRIPTION
|
212
|
+
# The VariationGroup class represents a group of variations (SNPs) that are
|
213
|
+
# linked and present toghether.
|
214
|
+
#
|
215
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
216
|
+
# See the general documentation of the Ensembl module for
|
217
|
+
# more information on what this means and what methods are available.
|
218
|
+
class VariationGroup < DBConnection
|
219
|
+
set_primary_key "variation_group_id"
|
220
|
+
belongs_to :source
|
221
|
+
has_one :variation_group_variation
|
222
|
+
has_one :httag
|
223
|
+
has_one :variation_group_feature
|
224
|
+
has_one :allele_group
|
225
|
+
end
|
226
|
+
|
227
|
+
# = DESCRIPTION
|
228
|
+
# The VariationGroupVariation class is a connection class.
|
229
|
+
# Should not be used directly.
|
230
|
+
#
|
231
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
232
|
+
# See the general documentation of the Ensembl module for
|
233
|
+
# more information on what this means and what methods are available.
|
234
|
+
class VariationGroupVariation < DBConnection
|
235
|
+
belongs_to :variation
|
236
|
+
belongs_to :variation_group
|
237
|
+
end
|
238
|
+
|
239
|
+
# = DESCRIPTION
|
240
|
+
# The VariationFeature class gives information about the genomic position of
|
241
|
+
# each Variation, including also validation status and consequence type.
|
242
|
+
#
|
243
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
244
|
+
# See the general documentation of the Ensembl module for
|
245
|
+
# more information on what this means and what methods are available.
|
246
|
+
class VariationFeature < DBConnection
|
247
|
+
set_primary_key "variation_feature_id"
|
248
|
+
belongs_to :variation
|
249
|
+
has_many :tagged_variation_features
|
250
|
+
has_many :samples, :through => :tagged_variation_features
|
251
|
+
has_many :transcript_variations
|
252
|
+
end
|
253
|
+
|
254
|
+
# = DESCRIPTION
|
255
|
+
# The VariationGroupFeature class gives information on the genomic position
|
256
|
+
# of each VariationGroup.
|
257
|
+
#
|
258
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
259
|
+
# See the general documentation of the Ensembl module for
|
260
|
+
# more information on what this means and what methods are available.
|
261
|
+
class VariationGroupFeature < DBConnection
|
262
|
+
set_primary_key "variation_group_feature_id"
|
263
|
+
belongs_to :variation_group
|
264
|
+
end
|
265
|
+
|
266
|
+
# = DESCRIPTION
|
267
|
+
# The TranscriptVariation class gives information about the position of
|
268
|
+
# a VariationFeature, mapped on an annotated transcript.
|
269
|
+
#
|
270
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
271
|
+
# See the general documentation of the Ensembl module for
|
272
|
+
# more information on what this means and what methods are available.
|
273
|
+
class TranscriptVariation < DBConnection
|
274
|
+
set_primary_key "transcript_variation_id"
|
275
|
+
belongs_to :variation_feature
|
276
|
+
end
|
277
|
+
|
278
|
+
# = DESCRIPTION
|
279
|
+
# The FlankingSequence class gives information about the genomic coordinates
|
280
|
+
# of the flanking sequences, for a single VariationFeature.
|
281
|
+
#
|
282
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
283
|
+
# See the general documentation of the Ensembl module for
|
284
|
+
# more information on what this means and what methods are available.
|
285
|
+
class FlankingSequence < DBConnection
|
286
|
+
belongs_to :variation
|
287
|
+
end
|
288
|
+
|
289
|
+
# = DESCRIPTION
|
290
|
+
# The TaggedVariationFeature class is a connection class.
|
291
|
+
# Should not be used directly.
|
292
|
+
#
|
293
|
+
# This class uses ActiveRecord to access data in the Ensembl database.
|
294
|
+
# See the general documentation of the Ensembl module for
|
295
|
+
# more information on what this means and what methods are available.
|
296
|
+
class TaggedVariationFeature < DBConnection
|
297
|
+
belongs_to :variation_feature
|
298
|
+
belongs_to :sample
|
299
|
+
end
|
300
|
+
|
301
|
+
class Httag < DBConnection
|
302
|
+
set_primary_key "httag_id"
|
303
|
+
belongs_to :variation_group
|
304
|
+
belongs_to :source
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
end
|