ruby-ensembl-api 0.9.6 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/TUTORIAL.rdoc +1 -1
- data/bin/variation_effect_predictor +106 -0
- data/lib/ensembl.rb +2 -2
- data/lib/ensembl/core/activerecord.rb +119 -225
- data/lib/ensembl/core/collection.rb +14 -10
- data/lib/ensembl/core/project.rb +6 -8
- data/lib/ensembl/core/slice.rb +87 -123
- data/lib/ensembl/core/transcript.rb +49 -65
- data/lib/ensembl/core/transform.rb +6 -8
- data/lib/ensembl/db_connection.rb +56 -72
- data/lib/ensembl/variation/activerecord.rb +138 -8
- data/lib/ensembl/variation/variation.rb +284 -46
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_example.rb +67 -0
- data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
- data/test/unit/release_60/core/test_project_human.rb +38 -0
- data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
- data/test/unit/release_60/core/test_transcript.rb +126 -0
- data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
- data/test/unit/release_60/variation/test_activerecord.rb +213 -0
- data/test/unit/release_60/variation/test_consequence.rb +158 -0
- data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
- data/test/unit/test_connection.rb +2 -2
- data/test/unit/test_releases.rb +8 -8
- metadata +27 -43
- data/test/unit/data/seq_c6qbl.fa +0 -10
- data/test/unit/data/seq_cso19_coding.fa +0 -16
- data/test/unit/data/seq_cso19_transcript.fa +0 -28
- data/test/unit/data/seq_drd3_gene.fa +0 -838
- data/test/unit/data/seq_drd3_transcript.fa +0 -22
- data/test/unit/data/seq_drd4_transcript.fa +0 -24
- data/test/unit/data/seq_forward_composite.fa +0 -1669
- data/test/unit/data/seq_par_boundary.fa +0 -169
- data/test/unit/data/seq_rnd3_transcript.fa +0 -47
- data/test/unit/data/seq_ub2r1_coding.fa +0 -13
- data/test/unit/data/seq_ub2r1_gene.fa +0 -174
- data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
- data/test/unit/data/seq_y.fa +0 -2
- data/test/unit/ensembl_genomes/test_collection.rb +0 -51
- data/test/unit/ensembl_genomes/test_gene.rb +0 -52
- data/test/unit/ensembl_genomes/test_slice.rb +0 -71
- data/test/unit/ensembl_genomes/test_variation.rb +0 -17
- data/test/unit/release_50/core/test_project.rb +0 -215
- data/test/unit/release_50/core/test_project_human.rb +0 -58
- data/test/unit/release_50/core/test_relationships.rb +0 -66
- data/test/unit/release_50/core/test_sequence.rb +0 -175
- data/test/unit/release_50/core/test_slice.rb +0 -121
- data/test/unit/release_50/core/test_transcript.rb +0 -108
- data/test/unit/release_50/core/test_transform.rb +0 -223
- data/test/unit/release_50/variation/test_activerecord.rb +0 -143
- data/test/unit/release_50/variation/test_variation.rb +0 -84
- data/test/unit/release_53/core/test_gene.rb +0 -66
- data/test/unit/release_53/core/test_project.rb +0 -96
- data/test/unit/release_53/core/test_project_human.rb +0 -65
- data/test/unit/release_53/core/test_slice.rb +0 -47
- data/test/unit/release_53/variation/test_activerecord.rb +0 -145
- data/test/unit/release_53/variation/test_variation.rb +0 -71
- data/test/unit/release_56/core/test_project.rb +0 -96
- data/test/unit/release_56/core/test_transform.rb +0 -63
- data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -4,18 +4,19 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
5
|
#
|
6
6
|
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# @author Francesco Strozzi
|
7
9
|
|
8
10
|
module Ensembl
|
9
11
|
nil
|
10
12
|
module Core
|
11
|
-
# = DESCRIPTION
|
12
13
|
# Class to describe and handle multi-species databases
|
13
14
|
#
|
14
15
|
class Collection
|
15
|
-
# = DESCRIPTION
|
16
16
|
# Method to check if the current core database is a multi-species db.
|
17
17
|
# Returns a boolean value.
|
18
18
|
#
|
19
|
+
# @return [Boolean] True if current db is multi-species db; otherwise false.
|
19
20
|
def self.check()
|
20
21
|
host,user,password,db_name,port = Ensembl::Core::DBConnection.get_info
|
21
22
|
if db_name =~/(\w+)_collection_core_.*/
|
@@ -24,16 +25,17 @@ module Ensembl
|
|
24
25
|
return false
|
25
26
|
end
|
26
27
|
|
27
|
-
# = DESCRIPTION
|
28
28
|
# Returns an array with all the Species present in a collection database.
|
29
|
-
#
|
29
|
+
#
|
30
|
+
# @return [Array<String>] Array containing species names in colleciton
|
30
31
|
def self.species()
|
31
32
|
return Meta.find_all_by_meta_key("species.db_name").collect {|m| m.meta_value}
|
32
33
|
end
|
33
34
|
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
35
|
+
# Returns the species_id of a particular species present in the database.
|
36
|
+
#
|
37
|
+
# @param [String] species Name of species
|
38
|
+
# @return [Integer] Species ID in the database.
|
37
39
|
def self.get_species_id(species)
|
38
40
|
species = species.downcase
|
39
41
|
meta = Meta.find_by_sql("SELECT * FROM meta WHERE LOWER(meta_value) = '#{species}'")[0]
|
@@ -44,10 +46,12 @@ module Ensembl
|
|
44
46
|
end
|
45
47
|
end
|
46
48
|
|
47
|
-
#
|
48
|
-
# Returns an array with all the coord_system_id associated with a particular specie and a table_name.
|
49
|
+
# Returns an array with all the coord_system_id associated with a particular species and a table_name.
|
49
50
|
# Used inside Slice#method_missing to filter the coord_system_id using a particular species_id.
|
50
51
|
#
|
52
|
+
# @param [String] table_name Table name
|
53
|
+
# @param [Integer] species_id ID of species in the database
|
54
|
+
# @return [Array<Integer>] Array containing coord_system IDs.
|
51
55
|
def self.find_all_coord_by_table_name(table_name,species_id)
|
52
56
|
all_ids = CoordSystem.find_all_by_species_id(species_id)
|
53
57
|
return MetaCoord.find_all_by_coord_system_id_and_table_name(all_ids,table_name)
|
@@ -57,4 +61,4 @@ module Ensembl
|
|
57
61
|
|
58
62
|
|
59
63
|
end
|
60
|
-
end
|
64
|
+
end
|
data/lib/ensembl/core/project.rb
CHANGED
@@ -6,10 +6,11 @@
|
|
6
6
|
#
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
+
# @author Jan Aerts
|
10
|
+
# @author Francesco Strozzi
|
9
11
|
module Ensembl
|
10
12
|
module Core
|
11
13
|
class Slice
|
12
|
-
# = DESCRIPTION
|
13
14
|
# The Slice#project method is used to transfer coordinates from one
|
14
15
|
# coordinate system to another. Suppose you have a slice on a
|
15
16
|
# contig in human (let's say on contig AC000031.6.1.38703) and you
|
@@ -24,8 +25,7 @@ module Ensembl
|
|
24
25
|
# At the moment, projections can only be done if the two coordinate
|
25
26
|
# systems are linked directly in the 'assembly' table.
|
26
27
|
#
|
27
|
-
#
|
28
|
-
#
|
28
|
+
# @example
|
29
29
|
# # Get a contig slice in cow and project to scaffold level
|
30
30
|
# # (i.e. going from a high rank coord system to a lower rank coord
|
31
31
|
# # system)
|
@@ -47,11 +47,9 @@ module Ensembl
|
|
47
47
|
# puts second_bit.class #--> Gap
|
48
48
|
# puts third_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.106:1:738311:1
|
49
49
|
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
# coordinates to
|
54
|
-
# *Returns*:: an array consisting of Slices and, if necessary, Gaps
|
50
|
+
# @param [String] coord_system_name Name of coordinate system to project
|
51
|
+
# coordinates to
|
52
|
+
# @return [Array<Slice, Gap>] Array of Slices and, if necessary, Gaps
|
55
53
|
def project(coord_system_name)
|
56
54
|
answer = Array.new # an array of slices
|
57
55
|
unless Ensembl::SESSION.coord_systems.has_key?(self.seq_region.coord_system_id)
|
data/lib/ensembl/core/slice.rb
CHANGED
@@ -6,12 +6,13 @@
|
|
6
6
|
#
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
+
# @author Jan Aerts
|
10
|
+
# @author Francesco Strozzi
|
9
11
|
nil
|
10
12
|
module Ensembl
|
11
13
|
nil
|
12
14
|
module Core
|
13
15
|
|
14
|
-
# = DESCRIPTION
|
15
16
|
# From the perl API tutorial
|
16
17
|
# (http://www.ensembl.org/info/software/core/core_tutorial.html): "A
|
17
18
|
# Slice object represents a continuous region of a genome. Slices can be
|
@@ -21,7 +22,7 @@ module Ensembl
|
|
21
22
|
# In contrast to almost all other classes of Ensembl::Core,
|
22
23
|
# the Slice class is not based on ActiveRecord.
|
23
24
|
#
|
24
|
-
#
|
25
|
+
# @example
|
25
26
|
# chr4 = SeqRegion.find_by_name('4')
|
26
27
|
# my_slice = Slice.new(chr4, 95000, 98000, -1)
|
27
28
|
# puts my_slice.display_name #--> 'chromosome:4:Btau_3.1:95000:98000:1'
|
@@ -32,20 +33,17 @@ module Ensembl
|
|
32
33
|
## CREATE A SLICE
|
33
34
|
#################
|
34
35
|
|
35
|
-
# = DESCRIPTION
|
36
36
|
# Create a new Slice object from scratch.
|
37
37
|
#
|
38
|
-
#
|
38
|
+
# @example
|
39
39
|
# chr4 = SeqRegion.find_by_name('4')
|
40
40
|
# my_slice = Slice.new(chr4, 95000, 98000, -1)
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# * strand: strand of the Slice relative to the SeqRegion (default = 1)
|
48
|
-
# *Returns*:: Slice object
|
41
|
+
#
|
42
|
+
# @param [SeqRegion] seq_region SeqRegion object
|
43
|
+
# @param [Integer] start Start position of the slice on the seq_region
|
44
|
+
# @param [Integer] stop Stop position of the slice on the seq_region
|
45
|
+
# @param [Integer] strand Strand that the slice should be
|
46
|
+
# @return [Slice] Slice object
|
49
47
|
def initialize(seq_region, start = 1, stop = seq_region.length, strand = 1)
|
50
48
|
if start.nil?
|
51
49
|
start = 1
|
@@ -60,29 +58,28 @@ module Ensembl
|
|
60
58
|
@seq = nil
|
61
59
|
end
|
62
60
|
|
63
|
-
# = DESCRIPTION
|
64
61
|
# Create a Slice without first creating the SeqRegion object.
|
65
62
|
#
|
66
|
-
#
|
63
|
+
# @example
|
67
64
|
# my_slice_1 = Slice.fetch_by_region('chromosome','4',95000,98000,1)
|
68
65
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
66
|
+
# @param [String] coord_system_name Name of coordinate system
|
67
|
+
# @param [String] seq_region_name name of the seq_region
|
68
|
+
# @param [Integer] start Start position of the slice on the seq_region
|
69
|
+
# @param [Integer] stop Stop position of the slice on the seq_region
|
70
|
+
# @param [Integer] strand Strand that the slice should be
|
71
|
+
# @param [String] species Name of species in case of multi-species database
|
72
|
+
# @param [Integer] version Version number of the coordinate system
|
73
|
+
# @return [Slice] Slice object
|
77
74
|
def self.fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil)
|
78
75
|
all_coord_systems = nil
|
79
76
|
if Collection.check
|
80
77
|
species = species.downcase
|
81
78
|
if species.nil?
|
82
|
-
raise ArgumentError, "When using multi-species db, you must pass a
|
79
|
+
raise ArgumentError, "When using multi-species db, you must pass a species name to get the correct Slice"
|
83
80
|
else
|
84
81
|
species_id = Collection.get_species_id(species)
|
85
|
-
raise ArgumentError, "No
|
82
|
+
raise ArgumentError, "No species found in the database with this name: #{species}" if species_id.nil?
|
86
83
|
all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name_and_species_id(coord_system_name,species_id)
|
87
84
|
end
|
88
85
|
else
|
@@ -111,16 +108,14 @@ module Ensembl
|
|
111
108
|
return Ensembl::Core::Slice.new(seq_region, start, stop, strand)
|
112
109
|
end
|
113
110
|
|
114
|
-
# = DESCRIPTION
|
115
111
|
# Create a Slice based on a Gene
|
116
112
|
#
|
117
|
-
#
|
113
|
+
# @example
|
118
114
|
# my_slice = Slice.fetch_by_gene_stable_id('ENSG00000184895')
|
119
115
|
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
#
|
123
|
-
# *Returns*:: Ensembl::Core::Slice object
|
116
|
+
# @param [String] gene_stable_id Ensembl gene stable ID
|
117
|
+
# @param [Integer] flanking_seq_length Length of the flanking sequence
|
118
|
+
# @return [Slice] Slice object
|
124
119
|
def self.fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0)
|
125
120
|
gene_stable_id = Ensembl::Core::GeneStableId.find_by_stable_id(gene_stable_id)
|
126
121
|
gene = gene_stable_id.gene
|
@@ -129,16 +124,14 @@ module Ensembl
|
|
129
124
|
return Ensembl::Core::Slice.new(seq_region, gene.seq_region_start - flanking_seq_length, gene.seq_region_end + flanking_seq_length, gene.seq_region_strand)
|
130
125
|
end
|
131
126
|
|
132
|
-
# = DESCRIPTION
|
133
127
|
# Create a Slice based on a Transcript
|
134
128
|
#
|
135
|
-
#
|
129
|
+
# @example
|
136
130
|
# my_slice = Slice.fetch_by_transcript_stable_id('ENST00000383673')
|
137
131
|
#
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
# *Returns*:: Ensembl::Core::Slice object
|
132
|
+
# @param [String] transcript_stable_id Ensembl transcript stable ID
|
133
|
+
# @param [Integer] flanking_seq_length Length of the flanking sequence
|
134
|
+
# @return [Slice] Slice object
|
142
135
|
def self.fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0)
|
143
136
|
transcript_stable_id = Ensembl::Core::TranscriptStableId.find_by_stable_id(transcript_stable_id)
|
144
137
|
transcript = transcript_stable_id.transcript
|
@@ -147,17 +140,15 @@ module Ensembl
|
|
147
140
|
return Ensembl::Core::Slice.new(seq_region, transcript.seq_region_start - flanking_seq_length, transcript.seq_region_end + flanking_seq_length, transcript.seq_region_strand)
|
148
141
|
end
|
149
142
|
|
150
|
-
# = DESCRIPTION
|
151
143
|
# Create an array of all Slices for a given coordinate system.
|
152
144
|
#
|
153
|
-
#
|
145
|
+
# @example
|
154
146
|
# slices = Slice.fetch_all('chromosome')
|
155
147
|
#
|
156
|
-
#
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
# *Returns*:: an array of Ensembl::Core::Slice objects
|
148
|
+
# @param [String] coord_system_name Name of coordinate system
|
149
|
+
# @param [String] species Name of species
|
150
|
+
# @param [Integer] version Version of coordinate system
|
151
|
+
# @return [Array<Slice>] Array of Slice objects
|
161
152
|
def self.fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil)
|
162
153
|
answer = Array.new
|
163
154
|
coord_system = nil
|
@@ -187,50 +178,45 @@ module Ensembl
|
|
187
178
|
## GENERAL METHODS
|
188
179
|
##################
|
189
180
|
|
190
|
-
# = DESCRIPTION
|
191
181
|
# Get the length of a slice
|
192
182
|
#
|
193
|
-
#
|
183
|
+
# @example
|
194
184
|
# chr4 = SeqRegion.find_by_name('4')
|
195
185
|
# my_slice = Slice.new(chr4, 95000, 98000, -1)
|
196
186
|
# puts my_slice.length
|
197
|
-
#
|
198
|
-
#
|
199
|
-
# *Returns*:: Integer
|
187
|
+
#
|
188
|
+
# @return [Integer] Length of the slice
|
200
189
|
def length
|
201
190
|
return self.stop - self.start + 1
|
202
191
|
end
|
203
192
|
|
204
|
-
# = DESCRIPTION
|
205
193
|
# The display_name method returns a full name of this slice, containing
|
206
194
|
# the name of the coordinate system, the sequence region, start and
|
207
195
|
# stop positions on that sequence region and the strand. E.g. for a slice
|
208
196
|
# of bovine chromosome 4 from position 95000 to 98000 on the reverse strand,
|
209
197
|
# the display_name would look like: chromosome:4:Btau_3.1:95000:98000:-1
|
210
198
|
#
|
211
|
-
#
|
199
|
+
# @example
|
212
200
|
# puts my_slice.display_name
|
213
|
-
#
|
214
|
-
#
|
215
|
-
# *Result*:: String
|
201
|
+
#
|
202
|
+
# @return [String] Nicely formatted name of the Slice
|
216
203
|
def display_name
|
217
204
|
return [self.seq_region.coord_system.name, self.seq_region.coord_system.version, self.seq_region.name, self.start.to_s, self.stop.to_s, self.strand.to_s].join(':')
|
218
205
|
end
|
219
206
|
alias to_s display_name
|
220
207
|
|
221
|
-
# = DESCRIPTION
|
222
208
|
# The Slice#overlaps? method checks if this slice overlaps another one.
|
223
209
|
# The other slice has to be on the same coordinate system
|
224
210
|
#
|
225
|
-
#
|
211
|
+
# @example
|
226
212
|
# slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
|
227
213
|
# slice_b = Slice.fetch_by_region('chromosome','X',900,1500)
|
228
214
|
# if slice_a.overlaps?(slice_b)
|
229
215
|
# puts "There slices overlap"
|
230
216
|
# end
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
217
|
+
#
|
218
|
+
# @param [Slice] other_slice Another slice
|
219
|
+
# @return [Boolean] True if slices overlap, otherwise false
|
234
220
|
def overlaps?(other_slice)
|
235
221
|
if ! other_slice.class == Slice
|
236
222
|
raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
|
@@ -249,19 +235,18 @@ module Ensembl
|
|
249
235
|
end
|
250
236
|
end
|
251
237
|
|
252
|
-
# = DESCRIPTION
|
253
238
|
# The Slice#within? method checks if this slice is contained withing another one.
|
254
239
|
# The other slice has to be on the same coordinate system
|
255
240
|
#
|
256
|
-
#
|
241
|
+
# @example
|
257
242
|
# slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
|
258
243
|
# slice_b = Slice.fetch_by_region('chromosome','X',900,950)
|
259
244
|
# if slice_b.overlaps?(slice_a)
|
260
245
|
# puts "Slice b is within slice a"
|
261
246
|
# end
|
262
|
-
#
|
263
|
-
#
|
264
|
-
#
|
247
|
+
#
|
248
|
+
# @param [Slice] other_slice Another slice
|
249
|
+
# @return [Boolean] True if this slice is within other_slice, otherwise false
|
265
250
|
def within?(other_slice)
|
266
251
|
if ! other_slice.class == Slice
|
267
252
|
raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
|
@@ -280,11 +265,10 @@ module Ensembl
|
|
280
265
|
end
|
281
266
|
end
|
282
267
|
|
283
|
-
# = DESCRIPTION
|
284
268
|
# The Slice#excise method removes a bit of a slice and returns the
|
285
269
|
# remainder as separate slices.
|
286
270
|
#
|
287
|
-
#
|
271
|
+
# @example
|
288
272
|
# original_slice = Slice.fetch_by_region('chromosome','X',1,10000)
|
289
273
|
# new_slices = original_slice.excise([500..750, 1050..1075])
|
290
274
|
# new_slices.each do |s|
|
@@ -295,10 +279,9 @@ module Ensembl
|
|
295
279
|
# # chromosome:X:1:499:1
|
296
280
|
# # chromosome:X:751:1049:1
|
297
281
|
# # chromosome:X:1076:10000:1
|
298
|
-
#
|
299
|
-
#
|
300
|
-
#
|
301
|
-
# *Returns*:: array of Slice objects
|
282
|
+
#
|
283
|
+
# @param [Array<Range>] Array of ranges to excise
|
284
|
+
# @return [Array<Slice>] Array of slices
|
302
285
|
def excise(ranges)
|
303
286
|
if ranges.class != Array
|
304
287
|
raise RuntimeError, "Argument should be an array of ranges"
|
@@ -326,7 +309,6 @@ module Ensembl
|
|
326
309
|
return answer
|
327
310
|
end
|
328
311
|
|
329
|
-
# = DESCRIPTION
|
330
312
|
# Get the sequence of the Slice as a Bio::Sequence::NA object.
|
331
313
|
#
|
332
314
|
# If the Slice is on a CoordSystem that is not seq_level, it will try
|
@@ -338,12 +320,10 @@ module Ensembl
|
|
338
320
|
# Caution: Bio::Sequence::NA makes the sequence
|
339
321
|
# downcase!!
|
340
322
|
#
|
341
|
-
#
|
323
|
+
# @example
|
342
324
|
# my_slice.seq.seq.to_s
|
343
325
|
#
|
344
|
-
#
|
345
|
-
# *Arguments*:: none
|
346
|
-
# *Returns*:: Bio::Sequence::NA object
|
326
|
+
# @return [Bio::Sequence::NA] Slice sequence as a Bio::Sequence::NA object
|
347
327
|
def seq
|
348
328
|
# If we already accessed the sequence, we can just
|
349
329
|
# call the instance variable. Otherwise, we'll have
|
@@ -381,34 +361,28 @@ module Ensembl
|
|
381
361
|
raise NotImplementedError
|
382
362
|
end
|
383
363
|
|
384
|
-
# = DESCRIPTION
|
385
364
|
# Take a sub_slice from an existing one.
|
386
365
|
#
|
387
|
-
#
|
366
|
+
# @example
|
388
367
|
# my_sub_slice = my_slice.sub_slice(400,500)
|
389
368
|
#
|
390
|
-
#
|
391
|
-
#
|
392
|
-
#
|
393
|
-
# * stop: stop of subslice relative to slice (default: stop of slice)
|
394
|
-
# *Returns*:: Ensembl::Core::Slice object
|
369
|
+
# @param [Integer] start Start of subslice relative to slice
|
370
|
+
# @param [Integer] stop Stop of subslice relative to slice
|
371
|
+
# @return [Slice] Slice object
|
395
372
|
def sub_slice(start = self.start, stop = self.stop)
|
396
373
|
return self.class.new(self.seq_region, start, stop, self.strand)
|
397
374
|
end
|
398
375
|
|
399
|
-
# = DESCRIPTION
|
400
376
|
# Creates overlapping subslices for a given Slice.
|
401
377
|
#
|
402
|
-
#
|
378
|
+
# @example
|
403
379
|
# my_slice.split(50000, 250).each do |sub_slice|
|
404
380
|
# puts sub_slice.display_name
|
405
381
|
# end
|
406
382
|
#
|
407
|
-
#
|
408
|
-
#
|
409
|
-
#
|
410
|
-
# * overlap: overlap in bp between consecutive subslices (default: 0)
|
411
|
-
# *Returns*:: array of Ensembl::Core::Slice objects
|
383
|
+
# @param [Integer] max_size Maximal size of subslices
|
384
|
+
# @param [Integer] overlap Overlap in bp between consecutive subslices
|
385
|
+
# @return [Array<Slice>] Array of Slice objects
|
412
386
|
def split(max_size = 100000, overlap = 0)
|
413
387
|
sub_slices = Array.new
|
414
388
|
i = 0
|
@@ -532,7 +506,6 @@ SQL
|
|
532
506
|
end
|
533
507
|
|
534
508
|
|
535
|
-
# = DESCRIPTION
|
536
509
|
# Get all MiscFeatures that are located on a Slice for a given MiscSet.
|
537
510
|
#
|
538
511
|
# Pitfall: just looks at the CoordSystem that the Slice is located on.
|
@@ -540,14 +513,13 @@ SQL
|
|
540
513
|
# CoordSystem, but all misc_features are annotated on SeqRegions of
|
541
514
|
# the 'scaffold' CoordSystem, this method will return an empty array.
|
542
515
|
#
|
543
|
-
#
|
516
|
+
# @example
|
544
517
|
# my_slice.misc_features('encode').each do |feature|
|
545
518
|
# puts feature.to_yaml
|
546
519
|
# end
|
547
|
-
#
|
548
|
-
#
|
549
|
-
#
|
550
|
-
# *Returns*:: array of MiscFeature objects
|
520
|
+
#
|
521
|
+
# @param [String] code Code of MiscSet
|
522
|
+
# @return [Array<MiscFeature>] Array of MiscFeature objects
|
551
523
|
def misc_features(code)
|
552
524
|
answer = Array.new
|
553
525
|
if code.nil?
|
@@ -568,7 +540,6 @@ SQL
|
|
568
540
|
return answer
|
569
541
|
end
|
570
542
|
|
571
|
-
# = DESCRIPTION
|
572
543
|
# Get all DnaAlignFeatures that are located on a Slice for a given Analysis.
|
573
544
|
#
|
574
545
|
# Pitfall: just looks at the CoordSystem that the Slice is located on.
|
@@ -576,14 +547,13 @@ SQL
|
|
576
547
|
# CoordSystem, but all dna_align_features are annotated on SeqRegions of
|
577
548
|
# the 'scaffold' CoordSystem, this method will return an empty array.
|
578
549
|
#
|
579
|
-
#
|
550
|
+
# @example
|
580
551
|
# my_slice.dna_align_features('Vertrna').each do |feature|
|
581
552
|
# puts feature.to_yaml
|
582
553
|
# end
|
583
|
-
#
|
584
|
-
#
|
585
|
-
#
|
586
|
-
# *Returns*:: array of DnaAlignFeature objects
|
554
|
+
#
|
555
|
+
# @param [String] analysis_name Name of analysis
|
556
|
+
# @return [Array<DnaAlignFeature>] Array of DnaAlignFeature objects
|
587
557
|
def dna_align_features(analysis_name = nil)
|
588
558
|
if analysis_name.nil?
|
589
559
|
return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
|
@@ -593,7 +563,6 @@ SQL
|
|
593
563
|
end
|
594
564
|
end
|
595
565
|
|
596
|
-
# = DESCRIPTION
|
597
566
|
# Get all ProteinAlignFeatures that are located on a Slice for a given Analysis.
|
598
567
|
#
|
599
568
|
# Pitfall: just looks at the CoordSystem that the Slice is located on.
|
@@ -601,14 +570,13 @@ SQL
|
|
601
570
|
# CoordSystem, but all protein_align_features are annotated on SeqRegions of
|
602
571
|
# the 'scaffold' CoordSystem, this method will return an empty array.
|
603
572
|
#
|
604
|
-
#
|
573
|
+
# @example
|
605
574
|
# my_slice.protein_align_features('Uniprot').each do |feature|
|
606
575
|
# puts feature.to_yaml
|
607
576
|
# end
|
608
|
-
#
|
609
|
-
#
|
610
|
-
#
|
611
|
-
# *Returns*:: array of ProteinAlignFeature objects
|
577
|
+
#
|
578
|
+
# @param [String] analysis_name Name of analysis
|
579
|
+
# @return [Array<ProteinAlignFeature>] Array of ProteinAlignFeature objects
|
612
580
|
def protein_align_features(analysis_name)
|
613
581
|
if analysis_name.nil?
|
614
582
|
return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
|
@@ -623,9 +591,8 @@ SQL
|
|
623
591
|
############################
|
624
592
|
|
625
593
|
|
626
|
-
#= DESCRIPTION
|
627
594
|
# Method to retrieve Variation features from Ensembl::Core::Slice objects
|
628
|
-
|
595
|
+
# @example
|
629
596
|
# slice = Slice.fetch_by_region('chromosome',1,50000,51000)
|
630
597
|
# variations = slice.get_variation_features
|
631
598
|
# variations.each do |vf|
|
@@ -642,17 +609,17 @@ SQL
|
|
642
609
|
Ensembl::Variation::VariationFeature.find(:all,:conditions => ["flags = 'genotyped' AND seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
|
643
610
|
end
|
644
611
|
|
612
|
+
def get_structural_variations
|
613
|
+
variation_connection()
|
614
|
+
Ensembl::Variation::StructuralVariation.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
|
615
|
+
end
|
616
|
+
|
645
617
|
private
|
646
618
|
|
647
619
|
def variation_connection()
|
648
620
|
if !Ensembl::Variation::DBConnection.connected?
|
649
|
-
host,user,password,db_name,port = Ensembl::Core::DBConnection.get_info
|
650
|
-
|
651
|
-
species,release = $1,$2
|
652
|
-
Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
653
|
-
else
|
654
|
-
raise NameError, "Can't get Variation Database name from #{db_name}. Are you using non conventional names?"
|
655
|
-
end
|
621
|
+
host,user,password,db_name,port,species,release = Ensembl::Core::DBConnection.get_info
|
622
|
+
Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
656
623
|
end
|
657
624
|
|
658
625
|
end
|
@@ -660,25 +627,22 @@ SQL
|
|
660
627
|
|
661
628
|
end #Slice
|
662
629
|
|
663
|
-
# = DESCRIPTION
|
664
630
|
# The Gap class is similar to the Slice object, but describes a gap and
|
665
631
|
# therefore can easily be described by coordinate system and size.
|
666
632
|
#
|
667
633
|
class Gap
|
668
634
|
attr_accessor :coord_system, :size
|
669
635
|
|
670
|
-
# = DESCRIPTION
|
671
636
|
# Create a new Gap object from scratch.
|
672
637
|
#
|
673
|
-
#
|
638
|
+
# @example
|
674
639
|
# my_coord_system = CoordSystem.find_by_name('chromosome')
|
675
640
|
# # Create a gap of 10kb.
|
676
641
|
# gap = Gap.new(my_coord_system, 10000)
|
677
|
-
#
|
678
|
-
#
|
679
|
-
#
|
680
|
-
#
|
681
|
-
# *Returns*:: Gap object
|
642
|
+
#
|
643
|
+
# @param [CoordSystem] coord_system Coordinate system object
|
644
|
+
# @param [Integer] size Length of the gap
|
645
|
+
# @return [Gap] Gap object
|
682
646
|
def initialize(coord_system, size)
|
683
647
|
@coord_system, @size = coord_system, size
|
684
648
|
end
|