ruby-ensembl-api 0.9.6 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL.rdoc +1 -1
- data/bin/variation_effect_predictor +106 -0
- data/lib/ensembl.rb +2 -2
- data/lib/ensembl/core/activerecord.rb +119 -225
- data/lib/ensembl/core/collection.rb +14 -10
- data/lib/ensembl/core/project.rb +6 -8
- data/lib/ensembl/core/slice.rb +87 -123
- data/lib/ensembl/core/transcript.rb +49 -65
- data/lib/ensembl/core/transform.rb +6 -8
- data/lib/ensembl/db_connection.rb +56 -72
- data/lib/ensembl/variation/activerecord.rb +138 -8
- data/lib/ensembl/variation/variation.rb +284 -46
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_example.rb +67 -0
- data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
- data/test/unit/release_60/core/test_project_human.rb +38 -0
- data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
- data/test/unit/release_60/core/test_transcript.rb +126 -0
- data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
- data/test/unit/release_60/variation/test_activerecord.rb +213 -0
- data/test/unit/release_60/variation/test_consequence.rb +158 -0
- data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
- data/test/unit/test_connection.rb +2 -2
- data/test/unit/test_releases.rb +8 -8
- metadata +27 -43
- data/test/unit/data/seq_c6qbl.fa +0 -10
- data/test/unit/data/seq_cso19_coding.fa +0 -16
- data/test/unit/data/seq_cso19_transcript.fa +0 -28
- data/test/unit/data/seq_drd3_gene.fa +0 -838
- data/test/unit/data/seq_drd3_transcript.fa +0 -22
- data/test/unit/data/seq_drd4_transcript.fa +0 -24
- data/test/unit/data/seq_forward_composite.fa +0 -1669
- data/test/unit/data/seq_par_boundary.fa +0 -169
- data/test/unit/data/seq_rnd3_transcript.fa +0 -47
- data/test/unit/data/seq_ub2r1_coding.fa +0 -13
- data/test/unit/data/seq_ub2r1_gene.fa +0 -174
- data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
- data/test/unit/data/seq_y.fa +0 -2
- data/test/unit/ensembl_genomes/test_collection.rb +0 -51
- data/test/unit/ensembl_genomes/test_gene.rb +0 -52
- data/test/unit/ensembl_genomes/test_slice.rb +0 -71
- data/test/unit/ensembl_genomes/test_variation.rb +0 -17
- data/test/unit/release_50/core/test_project.rb +0 -215
- data/test/unit/release_50/core/test_project_human.rb +0 -58
- data/test/unit/release_50/core/test_relationships.rb +0 -66
- data/test/unit/release_50/core/test_sequence.rb +0 -175
- data/test/unit/release_50/core/test_slice.rb +0 -121
- data/test/unit/release_50/core/test_transcript.rb +0 -108
- data/test/unit/release_50/core/test_transform.rb +0 -223
- data/test/unit/release_50/variation/test_activerecord.rb +0 -143
- data/test/unit/release_50/variation/test_variation.rb +0 -84
- data/test/unit/release_53/core/test_gene.rb +0 -66
- data/test/unit/release_53/core/test_project.rb +0 -96
- data/test/unit/release_53/core/test_project_human.rb +0 -65
- data/test/unit/release_53/core/test_slice.rb +0 -47
- data/test/unit/release_53/variation/test_activerecord.rb +0 -145
- data/test/unit/release_53/variation/test_variation.rb +0 -71
- data/test/unit/release_56/core/test_project.rb +0 -96
- data/test/unit/release_56/core/test_transform.rb +0 -63
- data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -4,11 +4,11 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2007 Jan Aerts <http://jandot.myopenid.com>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
+
# @author Jan Aerts
|
7
8
|
nil
|
8
9
|
module Ensembl
|
9
10
|
nil
|
10
11
|
module Core
|
11
|
-
# = DESCRIPTION
|
12
12
|
# The Intron class describes an intron.
|
13
13
|
#
|
14
14
|
# This class does _not_ use ActiveRecord and is only defined within the API.
|
@@ -18,7 +18,7 @@ module Ensembl
|
|
18
18
|
# to a SeqRegion object and a Slice can be created for objects o this
|
19
19
|
# class. See Sliceable and Slice for more information.
|
20
20
|
#
|
21
|
-
#
|
21
|
+
# @example
|
22
22
|
# exon1 = Ensembl::Core::Exon.find(292811)
|
23
23
|
# exon2 = Ensembl::Core::Exon.find(292894)
|
24
24
|
# intron = Ensembl::Core::Intron.new(exon1,exon2)
|
@@ -55,7 +55,6 @@ module Ensembl
|
|
55
55
|
|
56
56
|
end
|
57
57
|
|
58
|
-
# = DESCRIPTION
|
59
58
|
# The Transcript class provides an interface to the transcript
|
60
59
|
# table. This table contains mappings of transcripts for a Gene to a
|
61
60
|
# SeqRegion.
|
@@ -68,7 +67,7 @@ module Ensembl
|
|
68
67
|
# to a SeqRegion object and a Slice can be created for objects of this
|
69
68
|
# class. See Sliceable and Slice for more information.
|
70
69
|
#
|
71
|
-
#
|
70
|
+
# @example
|
72
71
|
# #TODO
|
73
72
|
class Transcript < DBConnection
|
74
73
|
include Sliceable
|
@@ -97,9 +96,8 @@ module Ensembl
|
|
97
96
|
|
98
97
|
# The Transcript#exons method returns the exons for this transcript in
|
99
98
|
# the order of their ranks in the exon_transcript table.
|
100
|
-
#
|
101
|
-
#
|
102
|
-
# *Returns*:: sorted array of Exon objects
|
99
|
+
#
|
100
|
+
# @return [Array<Exon>] Sorted array of Exon objects
|
103
101
|
def exons
|
104
102
|
if @exons.nil?
|
105
103
|
@exons = self.exon_transcripts(:include => [:exons]).sort_by{|et| et.rank.to_i}.collect{|et| et.exon}
|
@@ -108,9 +106,8 @@ module Ensembl
|
|
108
106
|
end
|
109
107
|
|
110
108
|
# The Transcript#introns methods returns the introns for this transcript
|
111
|
-
#
|
112
|
-
#
|
113
|
-
# *Returns*:: sorted array of Intron objects
|
109
|
+
#
|
110
|
+
# @return [Array<Intron>] Sorted array of Intron objects
|
114
111
|
def introns
|
115
112
|
if @introns.nil?
|
116
113
|
@introns = Array.new
|
@@ -125,14 +122,12 @@ module Ensembl
|
|
125
122
|
end
|
126
123
|
|
127
124
|
# The Transcript#stable_id method returns the stable ID of the transcript.
|
128
|
-
#
|
129
|
-
#
|
130
|
-
# *Returns*:: String
|
125
|
+
#
|
126
|
+
# @return [String] Ensembl stable ID of the transcript.
|
131
127
|
def stable_id
|
132
128
|
return self.transcript_stable_id.stable_id
|
133
129
|
end
|
134
130
|
|
135
|
-
# = DESCRIPTION
|
136
131
|
# The Transcript#display_label method returns the default name of the transcript.
|
137
132
|
def display_label
|
138
133
|
return Xref.find(self.display_xref_id).display_label
|
@@ -141,7 +136,6 @@ module Ensembl
|
|
141
136
|
alias :label :display_label
|
142
137
|
alias :name :display_label
|
143
138
|
|
144
|
-
# = DESCRIPTION
|
145
139
|
# The Transcript#find_all_by_stable_id class method returns an array of
|
146
140
|
# transcripts with the given stable_id. If none were found, an empty
|
147
141
|
# array is returned.
|
@@ -155,7 +149,6 @@ module Ensembl
|
|
155
149
|
return answer
|
156
150
|
end
|
157
151
|
|
158
|
-
# = DESCRIPTION
|
159
152
|
# The Transcript#find_all_by_stable_id class method returns a
|
160
153
|
# transcripts with the given stable_id. If none was found, nil is returned.
|
161
154
|
def self.find_by_stable_id(stable_id)
|
@@ -167,7 +160,6 @@ module Ensembl
|
|
167
160
|
end
|
168
161
|
end
|
169
162
|
|
170
|
-
# = DESCRIPTION
|
171
163
|
# The Transcript#find_by_stable_id class method fetches a Transcript object based on
|
172
164
|
# its stable ID (i.e. the "ENST" accession number). If the name is
|
173
165
|
# not found, it returns nil.
|
@@ -180,7 +172,6 @@ module Ensembl
|
|
180
172
|
end
|
181
173
|
end
|
182
174
|
|
183
|
-
# = DESCRIPTION
|
184
175
|
# The Transcript#seq method returns the full sequence of all concatenated
|
185
176
|
# exons.
|
186
177
|
def seq
|
@@ -193,7 +184,6 @@ module Ensembl
|
|
193
184
|
return @seq
|
194
185
|
end
|
195
186
|
|
196
|
-
# = DESCRIPTION
|
197
187
|
# The Transcript#cds_seq method returns the coding sequence of the transcript,
|
198
188
|
# i.e. the concatenated sequence of all exons minus the UTRs.
|
199
189
|
def cds_seq
|
@@ -202,21 +192,18 @@ module Ensembl
|
|
202
192
|
return self.seq[(self.coding_region_cdna_start - 1), cds_length]
|
203
193
|
end
|
204
194
|
|
205
|
-
# = DESCRIPTION
|
206
195
|
# The Transcript#five_prime_utr_seq method returns the sequence of the
|
207
196
|
# 5'UTR of the transcript.
|
208
197
|
def five_prime_utr_seq
|
209
198
|
return self.seq[0, self.coding_region_cdna_start - 1]
|
210
199
|
end
|
211
200
|
|
212
|
-
# = DESCRIPTION
|
213
201
|
# The Transcript#three_prime_utr_seq method returns the sequence of the
|
214
202
|
# 3'UTR of the transcript.
|
215
203
|
def three_prime_utr_seq
|
216
204
|
return self.seq[self.coding_region_cdna_end..-1]
|
217
205
|
end
|
218
206
|
|
219
|
-
# = DESCRIPTION
|
220
207
|
# The Transcript#protein_seq method returns the sequence of the
|
221
208
|
# protein of the transcript.
|
222
209
|
def protein_seq
|
@@ -224,7 +211,6 @@ module Ensembl
|
|
224
211
|
end
|
225
212
|
|
226
213
|
|
227
|
-
# = DESCRIPTION
|
228
214
|
# The Transcript#coding_region_genomic_start returns the start position
|
229
215
|
# of the CDS in genomic coordinates. Note that, in contrast to
|
230
216
|
# Transcript#coding_region_cdna_start, the CDS start position is _always_
|
@@ -240,7 +226,6 @@ module Ensembl
|
|
240
226
|
end
|
241
227
|
end
|
242
228
|
|
243
|
-
# = DESCRIPTION
|
244
229
|
# The Transcript#coding_region_genomic_end returns the stop position
|
245
230
|
# of the CDS in genomic coordinates. Note that, in contrast to
|
246
231
|
# Transcript#coding_region_cdna_end, the CDS stop position is _always_
|
@@ -256,7 +241,6 @@ module Ensembl
|
|
256
241
|
end
|
257
242
|
end
|
258
243
|
|
259
|
-
# = DESCRIPTION
|
260
244
|
# The Transcript#coding_region_cdna_start returns the start position
|
261
245
|
# of the CDS in cDNA coordinates. Note that, in contrast to the
|
262
246
|
# Transcript#coding_region_genomic_start, the CDS start position is
|
@@ -277,7 +261,6 @@ module Ensembl
|
|
277
261
|
|
278
262
|
end
|
279
263
|
|
280
|
-
# = DESCRIPTION
|
281
264
|
# The Transcript#coding_region_cdna_end returns the stop position
|
282
265
|
# of the CDS in cDNA coordinates. Note that, in contrast to the
|
283
266
|
# Transcript#coding_region_genomic_end, the CDS start position is
|
@@ -298,11 +281,10 @@ module Ensembl
|
|
298
281
|
end
|
299
282
|
|
300
283
|
|
301
|
-
# = DESCRIPTION
|
302
284
|
# The Transcript#exon_for_position identifies the exon that covers a given
|
303
285
|
# genomic position. Returns the exon object, or nil if in intron.
|
304
286
|
def exon_for_genomic_position(pos)
|
305
|
-
if pos <
|
287
|
+
if pos < self.seq_region_start or pos > self.seq_region_end
|
306
288
|
raise RuntimeError, "Position has to be within transcript"
|
307
289
|
end
|
308
290
|
self.exons.each do |exon|
|
@@ -313,7 +295,6 @@ module Ensembl
|
|
313
295
|
return nil
|
314
296
|
end
|
315
297
|
|
316
|
-
# = DESCRIPTION
|
317
298
|
# The Transcript#exon_for_position identifies the exon that covers a given
|
318
299
|
# position of the cDNA.
|
319
300
|
def exon_for_cdna_position(pos)
|
@@ -329,90 +310,93 @@ module Ensembl
|
|
329
310
|
raise RuntimeError, "Position outside of cDNA scope"
|
330
311
|
end
|
331
312
|
|
332
|
-
# = DESCRIPTION
|
333
313
|
# The Transcript#cdna2genomic method converts cDNA coordinates to
|
334
314
|
# genomic coordinates for this transcript.
|
335
|
-
#
|
336
|
-
#
|
337
|
-
#
|
338
|
-
# *Returns*:: integer
|
315
|
+
#
|
316
|
+
# @param [Integer] pos Position on the cDNA
|
317
|
+
# @return [Integer] Position on the genomic DNA
|
339
318
|
def cdna2genomic(pos)
|
340
319
|
#FIXME: Still have to check for when pos is outside of scope of cDNA.
|
341
320
|
# Identify the exon we're looking at.
|
342
321
|
exon_with_target = self.exon_for_cdna_position(pos)
|
343
322
|
|
344
323
|
accumulated_position = 0
|
345
|
-
self.exons.
|
324
|
+
ex = self.exons.sort_by {|e| e.seq_region_start}
|
325
|
+
ex.reverse! if self.strand == -1
|
326
|
+
ex.each do |exon|
|
346
327
|
if exon == exon_with_target
|
347
|
-
|
348
|
-
|
328
|
+
length_to_be_taken_from_exon = pos - (accumulated_position + 1)
|
329
|
+
if self.strand == -1
|
330
|
+
return exon.seq_region_end - length_to_be_taken_from_exon
|
331
|
+
else
|
332
|
+
return exon.seq_region_start + length_to_be_taken_from_exon
|
333
|
+
end
|
349
334
|
else
|
350
|
-
accumulated_position += exon.length
|
335
|
+
accumulated_position += exon.length
|
351
336
|
end
|
352
337
|
end
|
353
338
|
end
|
354
339
|
|
355
|
-
# = DESCRIPTION
|
356
340
|
# The Transcript#cds2genomic method converts CDS coordinates to
|
357
341
|
# genomic coordinates for this transcript.
|
358
|
-
#
|
359
|
-
#
|
360
|
-
#
|
361
|
-
# *Returns*::
|
342
|
+
#
|
343
|
+
# @param [Integer] pos Position on the CDS
|
344
|
+
# @return [Integer] Position on the genomic DNA
|
362
345
|
def cds2genomic(pos)
|
363
346
|
return self.cdna2genomic(pos + self.coding_region_cdna_start)
|
364
347
|
end
|
365
348
|
|
366
|
-
# = DESCRIPTION
|
367
349
|
# The Transcript#pep2genomic method converts peptide coordinates to
|
368
350
|
# genomic coordinates for this transcript.
|
369
|
-
#
|
370
|
-
#
|
371
|
-
#
|
372
|
-
# *Returns*::
|
351
|
+
#
|
352
|
+
# @param [Integer] pos Aminoacid position on the protein
|
353
|
+
# @return [Integer] Position on the genomic DNA
|
373
354
|
def pep2genomic(pos)
|
374
355
|
raise NotImplementedError
|
375
356
|
end
|
376
357
|
|
377
|
-
# = DESCRIPTION
|
378
358
|
# The Transcript#genomic2cdna method converts genomic coordinates to
|
379
359
|
# cDNA coordinates for this transcript.
|
380
|
-
#
|
381
|
-
#
|
382
|
-
#
|
383
|
-
# *Returns*::
|
360
|
+
#
|
361
|
+
# @param [Integer] pos Position on the genomic DNA
|
362
|
+
# @return [Integer] Position on the cDNA
|
384
363
|
def genomic2cdna(pos)
|
385
364
|
#FIXME: Still have to check for when pos is outside of scope of cDNA.
|
386
365
|
# Identify the exon we're looking at.
|
387
366
|
exon_with_target = self.exon_for_genomic_position(pos)
|
388
367
|
|
389
368
|
accumulated_position = 0
|
390
|
-
self.exons.
|
391
|
-
|
392
|
-
|
369
|
+
ex = self.exons.sort_by {|e| e.seq_region_start}
|
370
|
+
ex.reverse! if self.strand == -1
|
371
|
+
ex.each do |exon|
|
372
|
+
if exon.stable_id == exon_with_target.stable_id
|
373
|
+
if self.strand == 1
|
374
|
+
accumulated_position += ( pos - exon.start) +1
|
375
|
+
else
|
376
|
+
accumulated_position += ( exon.stop - pos ) +1
|
377
|
+
end
|
393
378
|
return accumulated_position
|
394
379
|
else
|
395
|
-
|
380
|
+
accumulated_position += exon.length
|
396
381
|
end
|
397
382
|
end
|
398
383
|
return RuntimeError, "Position outside of cDNA scope"
|
399
384
|
end
|
400
385
|
|
401
|
-
# = DESCRIPTION
|
402
386
|
# The Transcript#genomic2cds method converts genomic coordinates to
|
403
387
|
# CDS coordinates for this transcript.
|
404
|
-
#
|
405
|
-
#
|
406
|
-
#
|
407
|
-
# *Returns*::
|
388
|
+
#
|
389
|
+
# @param [Integer] pos Position on the genomic DNA
|
390
|
+
# @return [Integer] Position on the CDS
|
408
391
|
def genomic2cds(pos)
|
409
392
|
return self.genomic2cdna(pos) - self.coding_region_cdna_start
|
410
393
|
end
|
411
394
|
|
412
|
-
# = DESCRIPTION
|
413
395
|
# The Transcript#genomic2pep method converts genomic coordinates to
|
414
396
|
# peptide coordinates for this transcript.
|
415
|
-
#
|
397
|
+
#
|
398
|
+
# @param [Integer] pos Base position on the genomic DNA
|
399
|
+
# @return [Integer] Aminoacid position in the protein
|
416
400
|
# *Arguments*:
|
417
401
|
# * pos:: position on the chromosome (required)
|
418
402
|
# *Returns*::
|
@@ -422,4 +406,4 @@ module Ensembl
|
|
422
406
|
|
423
407
|
end
|
424
408
|
end
|
425
|
-
end
|
409
|
+
end
|
@@ -4,13 +4,13 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2007 Jan Aerts <http://jandot.myopenid.com>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
+
# @author Jan Aerts
|
7
8
|
nil
|
8
9
|
module Ensembl
|
9
10
|
nil
|
10
11
|
module Core
|
11
12
|
nil
|
12
13
|
module Sliceable
|
13
|
-
# = DESCRIPTION
|
14
14
|
# The #transform method is used to transfer coordinates for a feature
|
15
15
|
# from one coordinate system to another. It basically creates a clone of
|
16
16
|
# the original feature and changes the seq_region, start position, stop
|
@@ -42,8 +42,7 @@ module Ensembl
|
|
42
42
|
# At the moment, transformations can only be done if the two coordinate
|
43
43
|
# systems are linked directly in the 'assembly' table.
|
44
44
|
#
|
45
|
-
#
|
46
|
-
#
|
45
|
+
# @example
|
47
46
|
# # Get a gene in cow and transform to scaffold level
|
48
47
|
# # (i.e. going from a high rank coord system to a lower rank coord
|
49
48
|
# # system)
|
@@ -60,11 +59,10 @@ module Ensembl
|
|
60
59
|
# puts target_gene.seq_region_end #--> 1982868
|
61
60
|
# puts target_gene.seq_region_strand #--> 1
|
62
61
|
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
# *Returns*:: nil or an object of the same class as self
|
62
|
+
# @param [String] coord_system_name Name of the coordinate system to
|
63
|
+
# transform the coordinates to
|
64
|
+
# @return Nil or an object of the same class
|
65
|
+
# as self
|
68
66
|
def transform(coord_system_name)
|
69
67
|
#-
|
70
68
|
# There are two things I can do:
|
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
|
11
11
|
require 'rubygems'
|
12
|
-
require '
|
12
|
+
require 'active_record'
|
13
13
|
|
14
14
|
module Ensembl
|
15
15
|
DB_ADAPTER = 'mysql'
|
@@ -39,43 +39,43 @@ module Ensembl
|
|
39
39
|
|
40
40
|
module DBRegistry
|
41
41
|
# = DESCRIPTION
|
42
|
-
# The Ensembl::Registry::Base is a
|
42
|
+
# The Ensembl::Registry::Base is a super class providing general methods
|
43
43
|
# to get database and connection info.
|
44
|
-
#
|
45
44
|
class Base < ActiveRecord::Base
|
45
|
+
|
46
46
|
self.abstract_class = true
|
47
47
|
self.pluralize_table_names = false
|
48
48
|
def self.get_info
|
49
49
|
host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
|
50
|
+
db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
|
51
|
+
species,release = $1,$3 # just works for standard Ensembl database names
|
52
|
+
if species.nil? and release.nil? then
|
53
|
+
raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
|
54
|
+
else
|
55
|
+
return host,user,password,db_name,port,species,release.to_i
|
56
|
+
end
|
50
57
|
end
|
51
58
|
# = DESCRIPTION
|
52
|
-
#
|
53
|
-
# passed by the user.
|
54
|
-
|
55
|
-
def self.get_name_from_db(match,species,release,args)
|
59
|
+
# Method to retrieve the name of a database, using species, release and connection parameters
|
60
|
+
# passed by the user.
|
61
|
+
def self.get_name_from_db(db_type,species,release,args)
|
56
62
|
species = species.underscore # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
|
57
|
-
dummy_db = DummyDBConnection.connect(args)
|
63
|
+
dummy_db = DummyDBConnection.connect(args)
|
58
64
|
dummy_connection = dummy_db.connection
|
59
|
-
|
60
65
|
# check if a database exists with exactly the species name passed (regular way)
|
61
|
-
db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{
|
62
|
-
|
66
|
+
db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
|
63
67
|
# if a database is not found and we are working on Ensembl Genomes database...
|
64
68
|
if db_name.nil? and args[:ensembl_genomes] then
|
65
69
|
words = species.split(/_/)
|
66
70
|
first = words.shift
|
67
71
|
# ...try to find a collection database using the first name of the species passed (convention used for collection databases)
|
68
|
-
db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{
|
72
|
+
db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
|
69
73
|
# if a collection database match is found, then look inside to find the species
|
70
74
|
if db_name != nil then
|
71
75
|
dummy_db.disconnect! # close the generic connection with the host
|
72
76
|
args[:database] = db_name
|
73
77
|
dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
|
74
|
-
|
75
|
-
words.each do |w|
|
76
|
-
others << " #{w}"
|
77
|
-
end
|
78
|
-
species_name = "#{first}#{others}" # transform the species name, so it can match the species names stored in the collection database
|
78
|
+
species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
|
79
79
|
Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
|
80
80
|
|
81
81
|
# check that the species passed is present in the collection database, otherwise returns a warning
|
@@ -87,6 +87,34 @@ module Ensembl
|
|
87
87
|
return db_name
|
88
88
|
end
|
89
89
|
|
90
|
+
def self.generic_connect(db_type, species, release, args = {})
|
91
|
+
Ensembl::SESSION.reset
|
92
|
+
db_name = nil
|
93
|
+
# if the connection is established with Ensembl Genomes, set the default port and host
|
94
|
+
if args[:ensembl_genomes] then
|
95
|
+
args[:port] = EG_PORT
|
96
|
+
args[:host] = EG_HOST
|
97
|
+
end
|
98
|
+
if args[:port].nil? then
|
99
|
+
args[:port] = ( release > 47 ) ? 5306 : 3306
|
100
|
+
end
|
101
|
+
if args[:database]
|
102
|
+
db_name = args[:database]
|
103
|
+
else
|
104
|
+
db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database
|
105
|
+
end
|
106
|
+
establish_connection(
|
107
|
+
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
108
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
109
|
+
:database => db_name,
|
110
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
111
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
112
|
+
:port => args[:port]
|
113
|
+
)
|
114
|
+
|
115
|
+
self.retrieve_connection # Check if the connection is working
|
116
|
+
end
|
117
|
+
|
90
118
|
end
|
91
119
|
|
92
120
|
end
|
@@ -116,44 +144,16 @@ module Ensembl
|
|
116
144
|
# *Arguments*:
|
117
145
|
# * species:: species to connect to. Arguments should be in snake_case
|
118
146
|
# * ensembl_release:: the release of the database to connect to
|
119
|
-
# (default = 50)
|
147
|
+
# (default = 50)
|
120
148
|
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
121
|
-
|
122
|
-
db_name = nil
|
123
|
-
# if the connection is established with Ensembl Genomes, set the default port and host
|
124
|
-
if args[:ensembl_genomes]
|
125
|
-
args[:port] = EG_PORT
|
126
|
-
args[:host] = EG_HOST
|
127
|
-
end
|
128
|
-
if args[:port].nil? then
|
129
|
-
args[:port] = ( release > 47 ) ? 5306 : 3306
|
130
|
-
end
|
131
|
-
if args[:database]
|
132
|
-
db_name = args[:database]
|
133
|
-
else
|
134
|
-
db_name = self.get_name_from_db('core',species,release,args) # try to find the corresponding core database
|
135
|
-
end
|
136
|
-
establish_connection(
|
137
|
-
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
138
|
-
:host => args[:host] || Ensembl::DB_HOST,
|
139
|
-
:database => db_name,
|
140
|
-
:username => args[:username] || Ensembl::DB_USERNAME,
|
141
|
-
:password => args[:password] || Ensembl::DB_PASSWORD,
|
142
|
-
:port => args[:port]
|
143
|
-
)
|
144
|
-
|
145
|
-
self.retrieve_connection # Checkout that the connection is working
|
149
|
+
self.generic_connect('core',species, release,args)
|
146
150
|
end
|
147
151
|
|
148
|
-
|
149
|
-
# = DESCRIPTION
|
150
|
-
# Simple wrapper for the normal DBConnection.connect() method. This is used to set the connection directly
|
151
|
-
# with the Ensembl Genomes database host
|
152
|
-
#
|
153
|
-
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
152
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
154
153
|
args[:ensembl_genomes] = true
|
155
|
-
self.
|
154
|
+
self.generic_connect('core',species,release,args)
|
156
155
|
end
|
156
|
+
|
157
157
|
|
158
158
|
end # Core::DBConnection
|
159
159
|
|
@@ -185,29 +185,13 @@ module Ensembl
|
|
185
185
|
# * ensembl_release:: the release of the database to connect to
|
186
186
|
# (default = 50)
|
187
187
|
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
188
|
-
|
189
|
-
args[:species] = species
|
190
|
-
if args[:port].nil? then
|
191
|
-
args[:port] = ( release > 47 ) ? 5306 : 3306
|
192
|
-
end
|
193
|
-
db_name = nil
|
194
|
-
if args[:database]
|
195
|
-
db_name = args[:database]
|
196
|
-
else
|
197
|
-
db_name = self.get_name_from_db('variation',species,release,args) # try to find the corresponding variation database
|
198
|
-
end
|
199
|
-
establish_connection(
|
200
|
-
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
201
|
-
:host => args[:host] || Ensembl::DB_HOST,
|
202
|
-
:database => db_name,
|
203
|
-
:username => args[:username] || Ensembl::DB_USERNAME,
|
204
|
-
:password => args[:password] || Ensembl::DB_PASSWORD,
|
205
|
-
:port => args[:port]
|
206
|
-
)
|
207
|
-
|
208
|
-
self.retrieve_connection # Checkout that the connection is working
|
209
|
-
|
188
|
+
self.generic_connect('variation',species, release, args)
|
210
189
|
end
|
190
|
+
|
191
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
192
|
+
args[:ensembl_genomes] = true
|
193
|
+
self.generic_connect('variation',species,release,args)
|
194
|
+
end
|
211
195
|
|
212
196
|
end # Variation::DBConnection
|
213
197
|
|