ruby-ensembl-api 0.9.6 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/TUTORIAL.rdoc +1 -1
- data/bin/variation_effect_predictor +106 -0
- data/lib/ensembl.rb +2 -2
- data/lib/ensembl/core/activerecord.rb +119 -225
- data/lib/ensembl/core/collection.rb +14 -10
- data/lib/ensembl/core/project.rb +6 -8
- data/lib/ensembl/core/slice.rb +87 -123
- data/lib/ensembl/core/transcript.rb +49 -65
- data/lib/ensembl/core/transform.rb +6 -8
- data/lib/ensembl/db_connection.rb +56 -72
- data/lib/ensembl/variation/activerecord.rb +138 -8
- data/lib/ensembl/variation/variation.rb +284 -46
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_example.rb +67 -0
- data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
- data/test/unit/release_60/core/test_project_human.rb +38 -0
- data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
- data/test/unit/release_60/core/test_transcript.rb +126 -0
- data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
- data/test/unit/release_60/variation/test_activerecord.rb +213 -0
- data/test/unit/release_60/variation/test_consequence.rb +158 -0
- data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
- data/test/unit/test_connection.rb +2 -2
- data/test/unit/test_releases.rb +8 -8
- metadata +27 -43
- data/test/unit/data/seq_c6qbl.fa +0 -10
- data/test/unit/data/seq_cso19_coding.fa +0 -16
- data/test/unit/data/seq_cso19_transcript.fa +0 -28
- data/test/unit/data/seq_drd3_gene.fa +0 -838
- data/test/unit/data/seq_drd3_transcript.fa +0 -22
- data/test/unit/data/seq_drd4_transcript.fa +0 -24
- data/test/unit/data/seq_forward_composite.fa +0 -1669
- data/test/unit/data/seq_par_boundary.fa +0 -169
- data/test/unit/data/seq_rnd3_transcript.fa +0 -47
- data/test/unit/data/seq_ub2r1_coding.fa +0 -13
- data/test/unit/data/seq_ub2r1_gene.fa +0 -174
- data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
- data/test/unit/data/seq_y.fa +0 -2
- data/test/unit/ensembl_genomes/test_collection.rb +0 -51
- data/test/unit/ensembl_genomes/test_gene.rb +0 -52
- data/test/unit/ensembl_genomes/test_slice.rb +0 -71
- data/test/unit/ensembl_genomes/test_variation.rb +0 -17
- data/test/unit/release_50/core/test_project.rb +0 -215
- data/test/unit/release_50/core/test_project_human.rb +0 -58
- data/test/unit/release_50/core/test_relationships.rb +0 -66
- data/test/unit/release_50/core/test_sequence.rb +0 -175
- data/test/unit/release_50/core/test_slice.rb +0 -121
- data/test/unit/release_50/core/test_transcript.rb +0 -108
- data/test/unit/release_50/core/test_transform.rb +0 -223
- data/test/unit/release_50/variation/test_activerecord.rb +0 -143
- data/test/unit/release_50/variation/test_variation.rb +0 -84
- data/test/unit/release_53/core/test_gene.rb +0 -66
- data/test/unit/release_53/core/test_project.rb +0 -96
- data/test/unit/release_53/core/test_project_human.rb +0 -65
- data/test/unit/release_53/core/test_slice.rb +0 -47
- data/test/unit/release_53/variation/test_activerecord.rb +0 -145
- data/test/unit/release_53/variation/test_variation.rb +0 -71
- data/test/unit/release_56/core/test_project.rb +0 -96
- data/test/unit/release_56/core/test_transform.rb +0 -63
- data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -4,11 +4,11 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2007 Jan Aerts <http://jandot.myopenid.com>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
+
# @author Jan Aerts
|
7
8
|
nil
|
8
9
|
module Ensembl
|
9
10
|
nil
|
10
11
|
module Core
|
11
|
-
# = DESCRIPTION
|
12
12
|
# The Intron class describes an intron.
|
13
13
|
#
|
14
14
|
# This class does _not_ use ActiveRecord and is only defined within the API.
|
@@ -18,7 +18,7 @@ module Ensembl
|
|
18
18
|
# to a SeqRegion object and a Slice can be created for objects o this
|
19
19
|
# class. See Sliceable and Slice for more information.
|
20
20
|
#
|
21
|
-
#
|
21
|
+
# @example
|
22
22
|
# exon1 = Ensembl::Core::Exon.find(292811)
|
23
23
|
# exon2 = Ensembl::Core::Exon.find(292894)
|
24
24
|
# intron = Ensembl::Core::Intron.new(exon1,exon2)
|
@@ -55,7 +55,6 @@ module Ensembl
|
|
55
55
|
|
56
56
|
end
|
57
57
|
|
58
|
-
# = DESCRIPTION
|
59
58
|
# The Transcript class provides an interface to the transcript
|
60
59
|
# table. This table contains mappings of transcripts for a Gene to a
|
61
60
|
# SeqRegion.
|
@@ -68,7 +67,7 @@ module Ensembl
|
|
68
67
|
# to a SeqRegion object and a Slice can be created for objects of this
|
69
68
|
# class. See Sliceable and Slice for more information.
|
70
69
|
#
|
71
|
-
#
|
70
|
+
# @example
|
72
71
|
# #TODO
|
73
72
|
class Transcript < DBConnection
|
74
73
|
include Sliceable
|
@@ -97,9 +96,8 @@ module Ensembl
|
|
97
96
|
|
98
97
|
# The Transcript#exons method returns the exons for this transcript in
|
99
98
|
# the order of their ranks in the exon_transcript table.
|
100
|
-
#
|
101
|
-
#
|
102
|
-
# *Returns*:: sorted array of Exon objects
|
99
|
+
#
|
100
|
+
# @return [Array<Exon>] Sorted array of Exon objects
|
103
101
|
def exons
|
104
102
|
if @exons.nil?
|
105
103
|
@exons = self.exon_transcripts(:include => [:exons]).sort_by{|et| et.rank.to_i}.collect{|et| et.exon}
|
@@ -108,9 +106,8 @@ module Ensembl
|
|
108
106
|
end
|
109
107
|
|
110
108
|
# The Transcript#introns methods returns the introns for this transcript
|
111
|
-
#
|
112
|
-
#
|
113
|
-
# *Returns*:: sorted array of Intron objects
|
109
|
+
#
|
110
|
+
# @return [Array<Intron>] Sorted array of Intron objects
|
114
111
|
def introns
|
115
112
|
if @introns.nil?
|
116
113
|
@introns = Array.new
|
@@ -125,14 +122,12 @@ module Ensembl
|
|
125
122
|
end
|
126
123
|
|
127
124
|
# The Transcript#stable_id method returns the stable ID of the transcript.
|
128
|
-
#
|
129
|
-
#
|
130
|
-
# *Returns*:: String
|
125
|
+
#
|
126
|
+
# @return [String] Ensembl stable ID of the transcript.
|
131
127
|
def stable_id
|
132
128
|
return self.transcript_stable_id.stable_id
|
133
129
|
end
|
134
130
|
|
135
|
-
# = DESCRIPTION
|
136
131
|
# The Transcript#display_label method returns the default name of the transcript.
|
137
132
|
def display_label
|
138
133
|
return Xref.find(self.display_xref_id).display_label
|
@@ -141,7 +136,6 @@ module Ensembl
|
|
141
136
|
alias :label :display_label
|
142
137
|
alias :name :display_label
|
143
138
|
|
144
|
-
# = DESCRIPTION
|
145
139
|
# The Transcript#find_all_by_stable_id class method returns an array of
|
146
140
|
# transcripts with the given stable_id. If none were found, an empty
|
147
141
|
# array is returned.
|
@@ -155,7 +149,6 @@ module Ensembl
|
|
155
149
|
return answer
|
156
150
|
end
|
157
151
|
|
158
|
-
# = DESCRIPTION
|
159
152
|
# The Transcript#find_all_by_stable_id class method returns a
|
160
153
|
# transcripts with the given stable_id. If none was found, nil is returned.
|
161
154
|
def self.find_by_stable_id(stable_id)
|
@@ -167,7 +160,6 @@ module Ensembl
|
|
167
160
|
end
|
168
161
|
end
|
169
162
|
|
170
|
-
# = DESCRIPTION
|
171
163
|
# The Transcript#find_by_stable_id class method fetches a Transcript object based on
|
172
164
|
# its stable ID (i.e. the "ENST" accession number). If the name is
|
173
165
|
# not found, it returns nil.
|
@@ -180,7 +172,6 @@ module Ensembl
|
|
180
172
|
end
|
181
173
|
end
|
182
174
|
|
183
|
-
# = DESCRIPTION
|
184
175
|
# The Transcript#seq method returns the full sequence of all concatenated
|
185
176
|
# exons.
|
186
177
|
def seq
|
@@ -193,7 +184,6 @@ module Ensembl
|
|
193
184
|
return @seq
|
194
185
|
end
|
195
186
|
|
196
|
-
# = DESCRIPTION
|
197
187
|
# The Transcript#cds_seq method returns the coding sequence of the transcript,
|
198
188
|
# i.e. the concatenated sequence of all exons minus the UTRs.
|
199
189
|
def cds_seq
|
@@ -202,21 +192,18 @@ module Ensembl
|
|
202
192
|
return self.seq[(self.coding_region_cdna_start - 1), cds_length]
|
203
193
|
end
|
204
194
|
|
205
|
-
# = DESCRIPTION
|
206
195
|
# The Transcript#five_prime_utr_seq method returns the sequence of the
|
207
196
|
# 5'UTR of the transcript.
|
208
197
|
def five_prime_utr_seq
|
209
198
|
return self.seq[0, self.coding_region_cdna_start - 1]
|
210
199
|
end
|
211
200
|
|
212
|
-
# = DESCRIPTION
|
213
201
|
# The Transcript#three_prime_utr_seq method returns the sequence of the
|
214
202
|
# 3'UTR of the transcript.
|
215
203
|
def three_prime_utr_seq
|
216
204
|
return self.seq[self.coding_region_cdna_end..-1]
|
217
205
|
end
|
218
206
|
|
219
|
-
# = DESCRIPTION
|
220
207
|
# The Transcript#protein_seq method returns the sequence of the
|
221
208
|
# protein of the transcript.
|
222
209
|
def protein_seq
|
@@ -224,7 +211,6 @@ module Ensembl
|
|
224
211
|
end
|
225
212
|
|
226
213
|
|
227
|
-
# = DESCRIPTION
|
228
214
|
# The Transcript#coding_region_genomic_start returns the start position
|
229
215
|
# of the CDS in genomic coordinates. Note that, in contrast to
|
230
216
|
# Transcript#coding_region_cdna_start, the CDS start position is _always_
|
@@ -240,7 +226,6 @@ module Ensembl
|
|
240
226
|
end
|
241
227
|
end
|
242
228
|
|
243
|
-
# = DESCRIPTION
|
244
229
|
# The Transcript#coding_region_genomic_end returns the stop position
|
245
230
|
# of the CDS in genomic coordinates. Note that, in contrast to
|
246
231
|
# Transcript#coding_region_cdna_end, the CDS stop position is _always_
|
@@ -256,7 +241,6 @@ module Ensembl
|
|
256
241
|
end
|
257
242
|
end
|
258
243
|
|
259
|
-
# = DESCRIPTION
|
260
244
|
# The Transcript#coding_region_cdna_start returns the start position
|
261
245
|
# of the CDS in cDNA coordinates. Note that, in contrast to the
|
262
246
|
# Transcript#coding_region_genomic_start, the CDS start position is
|
@@ -277,7 +261,6 @@ module Ensembl
|
|
277
261
|
|
278
262
|
end
|
279
263
|
|
280
|
-
# = DESCRIPTION
|
281
264
|
# The Transcript#coding_region_cdna_end returns the stop position
|
282
265
|
# of the CDS in cDNA coordinates. Note that, in contrast to the
|
283
266
|
# Transcript#coding_region_genomic_end, the CDS start position is
|
@@ -298,11 +281,10 @@ module Ensembl
|
|
298
281
|
end
|
299
282
|
|
300
283
|
|
301
|
-
# = DESCRIPTION
|
302
284
|
# The Transcript#exon_for_position identifies the exon that covers a given
|
303
285
|
# genomic position. Returns the exon object, or nil if in intron.
|
304
286
|
def exon_for_genomic_position(pos)
|
305
|
-
if pos <
|
287
|
+
if pos < self.seq_region_start or pos > self.seq_region_end
|
306
288
|
raise RuntimeError, "Position has to be within transcript"
|
307
289
|
end
|
308
290
|
self.exons.each do |exon|
|
@@ -313,7 +295,6 @@ module Ensembl
|
|
313
295
|
return nil
|
314
296
|
end
|
315
297
|
|
316
|
-
# = DESCRIPTION
|
317
298
|
# The Transcript#exon_for_position identifies the exon that covers a given
|
318
299
|
# position of the cDNA.
|
319
300
|
def exon_for_cdna_position(pos)
|
@@ -329,90 +310,93 @@ module Ensembl
|
|
329
310
|
raise RuntimeError, "Position outside of cDNA scope"
|
330
311
|
end
|
331
312
|
|
332
|
-
# = DESCRIPTION
|
333
313
|
# The Transcript#cdna2genomic method converts cDNA coordinates to
|
334
314
|
# genomic coordinates for this transcript.
|
335
|
-
#
|
336
|
-
#
|
337
|
-
#
|
338
|
-
# *Returns*:: integer
|
315
|
+
#
|
316
|
+
# @param [Integer] pos Position on the cDNA
|
317
|
+
# @return [Integer] Position on the genomic DNA
|
339
318
|
def cdna2genomic(pos)
|
340
319
|
#FIXME: Still have to check for when pos is outside of scope of cDNA.
|
341
320
|
# Identify the exon we're looking at.
|
342
321
|
exon_with_target = self.exon_for_cdna_position(pos)
|
343
322
|
|
344
323
|
accumulated_position = 0
|
345
|
-
self.exons.
|
324
|
+
ex = self.exons.sort_by {|e| e.seq_region_start}
|
325
|
+
ex.reverse! if self.strand == -1
|
326
|
+
ex.each do |exon|
|
346
327
|
if exon == exon_with_target
|
347
|
-
|
348
|
-
|
328
|
+
length_to_be_taken_from_exon = pos - (accumulated_position + 1)
|
329
|
+
if self.strand == -1
|
330
|
+
return exon.seq_region_end - length_to_be_taken_from_exon
|
331
|
+
else
|
332
|
+
return exon.seq_region_start + length_to_be_taken_from_exon
|
333
|
+
end
|
349
334
|
else
|
350
|
-
accumulated_position += exon.length
|
335
|
+
accumulated_position += exon.length
|
351
336
|
end
|
352
337
|
end
|
353
338
|
end
|
354
339
|
|
355
|
-
# = DESCRIPTION
|
356
340
|
# The Transcript#cds2genomic method converts CDS coordinates to
|
357
341
|
# genomic coordinates for this transcript.
|
358
|
-
#
|
359
|
-
#
|
360
|
-
#
|
361
|
-
# *Returns*::
|
342
|
+
#
|
343
|
+
# @param [Integer] pos Position on the CDS
|
344
|
+
# @return [Integer] Position on the genomic DNA
|
362
345
|
def cds2genomic(pos)
|
363
346
|
return self.cdna2genomic(pos + self.coding_region_cdna_start)
|
364
347
|
end
|
365
348
|
|
366
|
-
# = DESCRIPTION
|
367
349
|
# The Transcript#pep2genomic method converts peptide coordinates to
|
368
350
|
# genomic coordinates for this transcript.
|
369
|
-
#
|
370
|
-
#
|
371
|
-
#
|
372
|
-
# *Returns*::
|
351
|
+
#
|
352
|
+
# @param [Integer] pos Aminoacid position on the protein
|
353
|
+
# @return [Integer] Position on the genomic DNA
|
373
354
|
def pep2genomic(pos)
|
374
355
|
raise NotImplementedError
|
375
356
|
end
|
376
357
|
|
377
|
-
# = DESCRIPTION
|
378
358
|
# The Transcript#genomic2cdna method converts genomic coordinates to
|
379
359
|
# cDNA coordinates for this transcript.
|
380
|
-
#
|
381
|
-
#
|
382
|
-
#
|
383
|
-
# *Returns*::
|
360
|
+
#
|
361
|
+
# @param [Integer] pos Position on the genomic DNA
|
362
|
+
# @return [Integer] Position on the cDNA
|
384
363
|
def genomic2cdna(pos)
|
385
364
|
#FIXME: Still have to check for when pos is outside of scope of cDNA.
|
386
365
|
# Identify the exon we're looking at.
|
387
366
|
exon_with_target = self.exon_for_genomic_position(pos)
|
388
367
|
|
389
368
|
accumulated_position = 0
|
390
|
-
self.exons.
|
391
|
-
|
392
|
-
|
369
|
+
ex = self.exons.sort_by {|e| e.seq_region_start}
|
370
|
+
ex.reverse! if self.strand == -1
|
371
|
+
ex.each do |exon|
|
372
|
+
if exon.stable_id == exon_with_target.stable_id
|
373
|
+
if self.strand == 1
|
374
|
+
accumulated_position += ( pos - exon.start) +1
|
375
|
+
else
|
376
|
+
accumulated_position += ( exon.stop - pos ) +1
|
377
|
+
end
|
393
378
|
return accumulated_position
|
394
379
|
else
|
395
|
-
|
380
|
+
accumulated_position += exon.length
|
396
381
|
end
|
397
382
|
end
|
398
383
|
return RuntimeError, "Position outside of cDNA scope"
|
399
384
|
end
|
400
385
|
|
401
|
-
# = DESCRIPTION
|
402
386
|
# The Transcript#genomic2cds method converts genomic coordinates to
|
403
387
|
# CDS coordinates for this transcript.
|
404
|
-
#
|
405
|
-
#
|
406
|
-
#
|
407
|
-
# *Returns*::
|
388
|
+
#
|
389
|
+
# @param [Integer] pos Position on the genomic DNA
|
390
|
+
# @return [Integer] Position on the CDS
|
408
391
|
def genomic2cds(pos)
|
409
392
|
return self.genomic2cdna(pos) - self.coding_region_cdna_start
|
410
393
|
end
|
411
394
|
|
412
|
-
# = DESCRIPTION
|
413
395
|
# The Transcript#genomic2pep method converts genomic coordinates to
|
414
396
|
# peptide coordinates for this transcript.
|
415
|
-
#
|
397
|
+
#
|
398
|
+
# @param [Integer] pos Base position on the genomic DNA
|
399
|
+
# @return [Integer] Aminoacid position in the protein
|
416
400
|
# *Arguments*:
|
417
401
|
# * pos:: position on the chromosome (required)
|
418
402
|
# *Returns*::
|
@@ -422,4 +406,4 @@ module Ensembl
|
|
422
406
|
|
423
407
|
end
|
424
408
|
end
|
425
|
-
end
|
409
|
+
end
|
@@ -4,13 +4,13 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2007 Jan Aerts <http://jandot.myopenid.com>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
+
# @author Jan Aerts
|
7
8
|
nil
|
8
9
|
module Ensembl
|
9
10
|
nil
|
10
11
|
module Core
|
11
12
|
nil
|
12
13
|
module Sliceable
|
13
|
-
# = DESCRIPTION
|
14
14
|
# The #transform method is used to transfer coordinates for a feature
|
15
15
|
# from one coordinate system to another. It basically creates a clone of
|
16
16
|
# the original feature and changes the seq_region, start position, stop
|
@@ -42,8 +42,7 @@ module Ensembl
|
|
42
42
|
# At the moment, transformations can only be done if the two coordinate
|
43
43
|
# systems are linked directly in the 'assembly' table.
|
44
44
|
#
|
45
|
-
#
|
46
|
-
#
|
45
|
+
# @example
|
47
46
|
# # Get a gene in cow and transform to scaffold level
|
48
47
|
# # (i.e. going from a high rank coord system to a lower rank coord
|
49
48
|
# # system)
|
@@ -60,11 +59,10 @@ module Ensembl
|
|
60
59
|
# puts target_gene.seq_region_end #--> 1982868
|
61
60
|
# puts target_gene.seq_region_strand #--> 1
|
62
61
|
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
# *Returns*:: nil or an object of the same class as self
|
62
|
+
# @param [String] coord_system_name Name of the coordinate system to
|
63
|
+
# transform the coordinates to
|
64
|
+
# @return Nil or an object of the same class
|
65
|
+
# as self
|
68
66
|
def transform(coord_system_name)
|
69
67
|
#-
|
70
68
|
# There are two things I can do:
|
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
|
11
11
|
require 'rubygems'
|
12
|
-
require '
|
12
|
+
require 'active_record'
|
13
13
|
|
14
14
|
module Ensembl
|
15
15
|
DB_ADAPTER = 'mysql'
|
@@ -39,43 +39,43 @@ module Ensembl
|
|
39
39
|
|
40
40
|
module DBRegistry
|
41
41
|
# = DESCRIPTION
|
42
|
-
# The Ensembl::Registry::Base is a
|
42
|
+
# The Ensembl::Registry::Base is a super class providing general methods
|
43
43
|
# to get database and connection info.
|
44
|
-
#
|
45
44
|
class Base < ActiveRecord::Base
|
45
|
+
|
46
46
|
self.abstract_class = true
|
47
47
|
self.pluralize_table_names = false
|
48
48
|
def self.get_info
|
49
49
|
host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
|
50
|
+
db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
|
51
|
+
species,release = $1,$3 # just works for standard Ensembl database names
|
52
|
+
if species.nil? and release.nil? then
|
53
|
+
raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
|
54
|
+
else
|
55
|
+
return host,user,password,db_name,port,species,release.to_i
|
56
|
+
end
|
50
57
|
end
|
51
58
|
# = DESCRIPTION
|
52
|
-
#
|
53
|
-
# passed by the user.
|
54
|
-
|
55
|
-
def self.get_name_from_db(match,species,release,args)
|
59
|
+
# Method to retrieve the name of a database, using species, release and connection parameters
|
60
|
+
# passed by the user.
|
61
|
+
def self.get_name_from_db(db_type,species,release,args)
|
56
62
|
species = species.underscore # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
|
57
|
-
dummy_db = DummyDBConnection.connect(args)
|
63
|
+
dummy_db = DummyDBConnection.connect(args)
|
58
64
|
dummy_connection = dummy_db.connection
|
59
|
-
|
60
65
|
# check if a database exists with exactly the species name passed (regular way)
|
61
|
-
db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{
|
62
|
-
|
66
|
+
db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
|
63
67
|
# if a database is not found and we are working on Ensembl Genomes database...
|
64
68
|
if db_name.nil? and args[:ensembl_genomes] then
|
65
69
|
words = species.split(/_/)
|
66
70
|
first = words.shift
|
67
71
|
# ...try to find a collection database using the first name of the species passed (convention used for collection databases)
|
68
|
-
db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{
|
72
|
+
db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
|
69
73
|
# if a collection database match is found, then look inside to find the species
|
70
74
|
if db_name != nil then
|
71
75
|
dummy_db.disconnect! # close the generic connection with the host
|
72
76
|
args[:database] = db_name
|
73
77
|
dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
|
74
|
-
|
75
|
-
words.each do |w|
|
76
|
-
others << " #{w}"
|
77
|
-
end
|
78
|
-
species_name = "#{first}#{others}" # transform the species name, so it can match the species names stored in the collection database
|
78
|
+
species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
|
79
79
|
Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
|
80
80
|
|
81
81
|
# check that the species passed is present in the collection database, otherwise returns a warning
|
@@ -87,6 +87,34 @@ module Ensembl
|
|
87
87
|
return db_name
|
88
88
|
end
|
89
89
|
|
90
|
+
def self.generic_connect(db_type, species, release, args = {})
|
91
|
+
Ensembl::SESSION.reset
|
92
|
+
db_name = nil
|
93
|
+
# if the connection is established with Ensembl Genomes, set the default port and host
|
94
|
+
if args[:ensembl_genomes] then
|
95
|
+
args[:port] = EG_PORT
|
96
|
+
args[:host] = EG_HOST
|
97
|
+
end
|
98
|
+
if args[:port].nil? then
|
99
|
+
args[:port] = ( release > 47 ) ? 5306 : 3306
|
100
|
+
end
|
101
|
+
if args[:database]
|
102
|
+
db_name = args[:database]
|
103
|
+
else
|
104
|
+
db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database
|
105
|
+
end
|
106
|
+
establish_connection(
|
107
|
+
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
108
|
+
:host => args[:host] || Ensembl::DB_HOST,
|
109
|
+
:database => db_name,
|
110
|
+
:username => args[:username] || Ensembl::DB_USERNAME,
|
111
|
+
:password => args[:password] || Ensembl::DB_PASSWORD,
|
112
|
+
:port => args[:port]
|
113
|
+
)
|
114
|
+
|
115
|
+
self.retrieve_connection # Check if the connection is working
|
116
|
+
end
|
117
|
+
|
90
118
|
end
|
91
119
|
|
92
120
|
end
|
@@ -116,44 +144,16 @@ module Ensembl
|
|
116
144
|
# *Arguments*:
|
117
145
|
# * species:: species to connect to. Arguments should be in snake_case
|
118
146
|
# * ensembl_release:: the release of the database to connect to
|
119
|
-
# (default = 50)
|
147
|
+
# (default = 50)
|
120
148
|
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
121
|
-
|
122
|
-
db_name = nil
|
123
|
-
# if the connection is established with Ensembl Genomes, set the default port and host
|
124
|
-
if args[:ensembl_genomes]
|
125
|
-
args[:port] = EG_PORT
|
126
|
-
args[:host] = EG_HOST
|
127
|
-
end
|
128
|
-
if args[:port].nil? then
|
129
|
-
args[:port] = ( release > 47 ) ? 5306 : 3306
|
130
|
-
end
|
131
|
-
if args[:database]
|
132
|
-
db_name = args[:database]
|
133
|
-
else
|
134
|
-
db_name = self.get_name_from_db('core',species,release,args) # try to find the corresponding core database
|
135
|
-
end
|
136
|
-
establish_connection(
|
137
|
-
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
138
|
-
:host => args[:host] || Ensembl::DB_HOST,
|
139
|
-
:database => db_name,
|
140
|
-
:username => args[:username] || Ensembl::DB_USERNAME,
|
141
|
-
:password => args[:password] || Ensembl::DB_PASSWORD,
|
142
|
-
:port => args[:port]
|
143
|
-
)
|
144
|
-
|
145
|
-
self.retrieve_connection # Checkout that the connection is working
|
149
|
+
self.generic_connect('core',species, release,args)
|
146
150
|
end
|
147
151
|
|
148
|
-
|
149
|
-
# = DESCRIPTION
|
150
|
-
# Simple wrapper for the normal DBConnection.connect() method. This is used to set the connection directly
|
151
|
-
# with the Ensembl Genomes database host
|
152
|
-
#
|
153
|
-
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
152
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
154
153
|
args[:ensembl_genomes] = true
|
155
|
-
self.
|
154
|
+
self.generic_connect('core',species,release,args)
|
156
155
|
end
|
156
|
+
|
157
157
|
|
158
158
|
end # Core::DBConnection
|
159
159
|
|
@@ -185,29 +185,13 @@ module Ensembl
|
|
185
185
|
# * ensembl_release:: the release of the database to connect to
|
186
186
|
# (default = 50)
|
187
187
|
def self.connect(species, release = Ensembl::ENSEMBL_RELEASE, args = {})
|
188
|
-
|
189
|
-
args[:species] = species
|
190
|
-
if args[:port].nil? then
|
191
|
-
args[:port] = ( release > 47 ) ? 5306 : 3306
|
192
|
-
end
|
193
|
-
db_name = nil
|
194
|
-
if args[:database]
|
195
|
-
db_name = args[:database]
|
196
|
-
else
|
197
|
-
db_name = self.get_name_from_db('variation',species,release,args) # try to find the corresponding variation database
|
198
|
-
end
|
199
|
-
establish_connection(
|
200
|
-
:adapter => args[:adapter] || Ensembl::DB_ADAPTER,
|
201
|
-
:host => args[:host] || Ensembl::DB_HOST,
|
202
|
-
:database => db_name,
|
203
|
-
:username => args[:username] || Ensembl::DB_USERNAME,
|
204
|
-
:password => args[:password] || Ensembl::DB_PASSWORD,
|
205
|
-
:port => args[:port]
|
206
|
-
)
|
207
|
-
|
208
|
-
self.retrieve_connection # Checkout that the connection is working
|
209
|
-
|
188
|
+
self.generic_connect('variation',species, release, args)
|
210
189
|
end
|
190
|
+
|
191
|
+
def self.ensemblgenomes_connect(species, release = Ensembl::ENSEMBL_RELEASE, args={})
|
192
|
+
args[:ensembl_genomes] = true
|
193
|
+
self.generic_connect('variation',species,release,args)
|
194
|
+
end
|
211
195
|
|
212
196
|
end # Variation::DBConnection
|
213
197
|
|