ruby-ensembl-api 0.9.6 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL.rdoc +1 -1
- data/bin/variation_effect_predictor +106 -0
- data/lib/ensembl.rb +2 -2
- data/lib/ensembl/core/activerecord.rb +119 -225
- data/lib/ensembl/core/collection.rb +14 -10
- data/lib/ensembl/core/project.rb +6 -8
- data/lib/ensembl/core/slice.rb +87 -123
- data/lib/ensembl/core/transcript.rb +49 -65
- data/lib/ensembl/core/transform.rb +6 -8
- data/lib/ensembl/db_connection.rb +56 -72
- data/lib/ensembl/variation/activerecord.rb +138 -8
- data/lib/ensembl/variation/variation.rb +284 -46
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_example.rb +67 -0
- data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
- data/test/unit/release_60/core/test_project_human.rb +38 -0
- data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
- data/test/unit/release_60/core/test_transcript.rb +126 -0
- data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
- data/test/unit/release_60/variation/test_activerecord.rb +213 -0
- data/test/unit/release_60/variation/test_consequence.rb +158 -0
- data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
- data/test/unit/test_connection.rb +2 -2
- data/test/unit/test_releases.rb +8 -8
- metadata +27 -43
- data/test/unit/data/seq_c6qbl.fa +0 -10
- data/test/unit/data/seq_cso19_coding.fa +0 -16
- data/test/unit/data/seq_cso19_transcript.fa +0 -28
- data/test/unit/data/seq_drd3_gene.fa +0 -838
- data/test/unit/data/seq_drd3_transcript.fa +0 -22
- data/test/unit/data/seq_drd4_transcript.fa +0 -24
- data/test/unit/data/seq_forward_composite.fa +0 -1669
- data/test/unit/data/seq_par_boundary.fa +0 -169
- data/test/unit/data/seq_rnd3_transcript.fa +0 -47
- data/test/unit/data/seq_ub2r1_coding.fa +0 -13
- data/test/unit/data/seq_ub2r1_gene.fa +0 -174
- data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
- data/test/unit/data/seq_y.fa +0 -2
- data/test/unit/ensembl_genomes/test_collection.rb +0 -51
- data/test/unit/ensembl_genomes/test_gene.rb +0 -52
- data/test/unit/ensembl_genomes/test_slice.rb +0 -71
- data/test/unit/ensembl_genomes/test_variation.rb +0 -17
- data/test/unit/release_50/core/test_project.rb +0 -215
- data/test/unit/release_50/core/test_project_human.rb +0 -58
- data/test/unit/release_50/core/test_relationships.rb +0 -66
- data/test/unit/release_50/core/test_sequence.rb +0 -175
- data/test/unit/release_50/core/test_slice.rb +0 -121
- data/test/unit/release_50/core/test_transcript.rb +0 -108
- data/test/unit/release_50/core/test_transform.rb +0 -223
- data/test/unit/release_50/variation/test_activerecord.rb +0 -143
- data/test/unit/release_50/variation/test_variation.rb +0 -84
- data/test/unit/release_53/core/test_gene.rb +0 -66
- data/test/unit/release_53/core/test_project.rb +0 -96
- data/test/unit/release_53/core/test_project_human.rb +0 -65
- data/test/unit/release_53/core/test_slice.rb +0 -47
- data/test/unit/release_53/variation/test_activerecord.rb +0 -145
- data/test/unit/release_53/variation/test_variation.rb +0 -71
- data/test/unit/release_56/core/test_project.rb +0 -96
- data/test/unit/release_56/core/test_transform.rb +0 -63
- data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -4,18 +4,19 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
5
|
#
|
6
6
|
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# @author Francesco Strozzi
|
7
9
|
|
8
10
|
module Ensembl
|
9
11
|
nil
|
10
12
|
module Core
|
11
|
-
# = DESCRIPTION
|
12
13
|
# Class to describe and handle multi-species databases
|
13
14
|
#
|
14
15
|
class Collection
|
15
|
-
# = DESCRIPTION
|
16
16
|
# Method to check if the current core database is a multi-species db.
|
17
17
|
# Returns a boolean value.
|
18
18
|
#
|
19
|
+
# @return [Boolean] True if current db is multi-species db; otherwise false.
|
19
20
|
def self.check()
|
20
21
|
host,user,password,db_name,port = Ensembl::Core::DBConnection.get_info
|
21
22
|
if db_name =~/(\w+)_collection_core_.*/
|
@@ -24,16 +25,17 @@ module Ensembl
|
|
24
25
|
return false
|
25
26
|
end
|
26
27
|
|
27
|
-
# = DESCRIPTION
|
28
28
|
# Returns an array with all the Species present in a collection database.
|
29
|
-
#
|
29
|
+
#
|
30
|
+
# @return [Array<String>] Array containing species names in colleciton
|
30
31
|
def self.species()
|
31
32
|
return Meta.find_all_by_meta_key("species.db_name").collect {|m| m.meta_value}
|
32
33
|
end
|
33
34
|
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
35
|
+
# Returns the species_id of a particular species present in the database.
|
36
|
+
#
|
37
|
+
# @param [String] species Name of species
|
38
|
+
# @return [Integer] Species ID in the database.
|
37
39
|
def self.get_species_id(species)
|
38
40
|
species = species.downcase
|
39
41
|
meta = Meta.find_by_sql("SELECT * FROM meta WHERE LOWER(meta_value) = '#{species}'")[0]
|
@@ -44,10 +46,12 @@ module Ensembl
|
|
44
46
|
end
|
45
47
|
end
|
46
48
|
|
47
|
-
#
|
48
|
-
# Returns an array with all the coord_system_id associated with a particular specie and a table_name.
|
49
|
+
# Returns an array with all the coord_system_id associated with a particular species and a table_name.
|
49
50
|
# Used inside Slice#method_missing to filter the coord_system_id using a particular species_id.
|
50
51
|
#
|
52
|
+
# @param [String] table_name Table name
|
53
|
+
# @param [Integer] species_id ID of species in the database
|
54
|
+
# @return [Array<Integer>] Array containing coord_system IDs.
|
51
55
|
def self.find_all_coord_by_table_name(table_name,species_id)
|
52
56
|
all_ids = CoordSystem.find_all_by_species_id(species_id)
|
53
57
|
return MetaCoord.find_all_by_coord_system_id_and_table_name(all_ids,table_name)
|
@@ -57,4 +61,4 @@ module Ensembl
|
|
57
61
|
|
58
62
|
|
59
63
|
end
|
60
|
-
end
|
64
|
+
end
|
data/lib/ensembl/core/project.rb
CHANGED
@@ -6,10 +6,11 @@
|
|
6
6
|
#
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
+
# @author Jan Aerts
|
10
|
+
# @author Francesco Strozzi
|
9
11
|
module Ensembl
|
10
12
|
module Core
|
11
13
|
class Slice
|
12
|
-
# = DESCRIPTION
|
13
14
|
# The Slice#project method is used to transfer coordinates from one
|
14
15
|
# coordinate system to another. Suppose you have a slice on a
|
15
16
|
# contig in human (let's say on contig AC000031.6.1.38703) and you
|
@@ -24,8 +25,7 @@ module Ensembl
|
|
24
25
|
# At the moment, projections can only be done if the two coordinate
|
25
26
|
# systems are linked directly in the 'assembly' table.
|
26
27
|
#
|
27
|
-
#
|
28
|
-
#
|
28
|
+
# @example
|
29
29
|
# # Get a contig slice in cow and project to scaffold level
|
30
30
|
# # (i.e. going from a high rank coord system to a lower rank coord
|
31
31
|
# # system)
|
@@ -47,11 +47,9 @@ module Ensembl
|
|
47
47
|
# puts second_bit.class #--> Gap
|
48
48
|
# puts third_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.106:1:738311:1
|
49
49
|
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
# coordinates to
|
54
|
-
# *Returns*:: an array consisting of Slices and, if necessary, Gaps
|
50
|
+
# @param [String] coord_system_name Name of coordinate system to project
|
51
|
+
# coordinates to
|
52
|
+
# @return [Array<Slice, Gap>] Array of Slices and, if necessary, Gaps
|
55
53
|
def project(coord_system_name)
|
56
54
|
answer = Array.new # an array of slices
|
57
55
|
unless Ensembl::SESSION.coord_systems.has_key?(self.seq_region.coord_system_id)
|
data/lib/ensembl/core/slice.rb
CHANGED
@@ -6,12 +6,13 @@
|
|
6
6
|
#
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
+
# @author Jan Aerts
|
10
|
+
# @author Francesco Strozzi
|
9
11
|
nil
|
10
12
|
module Ensembl
|
11
13
|
nil
|
12
14
|
module Core
|
13
15
|
|
14
|
-
# = DESCRIPTION
|
15
16
|
# From the perl API tutorial
|
16
17
|
# (http://www.ensembl.org/info/software/core/core_tutorial.html): "A
|
17
18
|
# Slice object represents a continuous region of a genome. Slices can be
|
@@ -21,7 +22,7 @@ module Ensembl
|
|
21
22
|
# In contrast to almost all other classes of Ensembl::Core,
|
22
23
|
# the Slice class is not based on ActiveRecord.
|
23
24
|
#
|
24
|
-
#
|
25
|
+
# @example
|
25
26
|
# chr4 = SeqRegion.find_by_name('4')
|
26
27
|
# my_slice = Slice.new(chr4, 95000, 98000, -1)
|
27
28
|
# puts my_slice.display_name #--> 'chromosome:4:Btau_3.1:95000:98000:1'
|
@@ -32,20 +33,17 @@ module Ensembl
|
|
32
33
|
## CREATE A SLICE
|
33
34
|
#################
|
34
35
|
|
35
|
-
# = DESCRIPTION
|
36
36
|
# Create a new Slice object from scratch.
|
37
37
|
#
|
38
|
-
#
|
38
|
+
# @example
|
39
39
|
# chr4 = SeqRegion.find_by_name('4')
|
40
40
|
# my_slice = Slice.new(chr4, 95000, 98000, -1)
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# * strand: strand of the Slice relative to the SeqRegion (default = 1)
|
48
|
-
# *Returns*:: Slice object
|
41
|
+
#
|
42
|
+
# @param [SeqRegion] seq_region SeqRegion object
|
43
|
+
# @param [Integer] start Start position of the slice on the seq_region
|
44
|
+
# @param [Integer] stop Stop position of the slice on the seq_region
|
45
|
+
# @param [Integer] strand Strand that the slice should be
|
46
|
+
# @return [Slice] Slice object
|
49
47
|
def initialize(seq_region, start = 1, stop = seq_region.length, strand = 1)
|
50
48
|
if start.nil?
|
51
49
|
start = 1
|
@@ -60,29 +58,28 @@ module Ensembl
|
|
60
58
|
@seq = nil
|
61
59
|
end
|
62
60
|
|
63
|
-
# = DESCRIPTION
|
64
61
|
# Create a Slice without first creating the SeqRegion object.
|
65
62
|
#
|
66
|
-
#
|
63
|
+
# @example
|
67
64
|
# my_slice_1 = Slice.fetch_by_region('chromosome','4',95000,98000,1)
|
68
65
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
66
|
+
# @param [String] coord_system_name Name of coordinate system
|
67
|
+
# @param [String] seq_region_name name of the seq_region
|
68
|
+
# @param [Integer] start Start position of the slice on the seq_region
|
69
|
+
# @param [Integer] stop Stop position of the slice on the seq_region
|
70
|
+
# @param [Integer] strand Strand that the slice should be
|
71
|
+
# @param [String] species Name of species in case of multi-species database
|
72
|
+
# @param [Integer] version Version number of the coordinate system
|
73
|
+
# @return [Slice] Slice object
|
77
74
|
def self.fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil)
|
78
75
|
all_coord_systems = nil
|
79
76
|
if Collection.check
|
80
77
|
species = species.downcase
|
81
78
|
if species.nil?
|
82
|
-
raise ArgumentError, "When using multi-species db, you must pass a
|
79
|
+
raise ArgumentError, "When using multi-species db, you must pass a species name to get the correct Slice"
|
83
80
|
else
|
84
81
|
species_id = Collection.get_species_id(species)
|
85
|
-
raise ArgumentError, "No
|
82
|
+
raise ArgumentError, "No species found in the database with this name: #{species}" if species_id.nil?
|
86
83
|
all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name_and_species_id(coord_system_name,species_id)
|
87
84
|
end
|
88
85
|
else
|
@@ -111,16 +108,14 @@ module Ensembl
|
|
111
108
|
return Ensembl::Core::Slice.new(seq_region, start, stop, strand)
|
112
109
|
end
|
113
110
|
|
114
|
-
# = DESCRIPTION
|
115
111
|
# Create a Slice based on a Gene
|
116
112
|
#
|
117
|
-
#
|
113
|
+
# @example
|
118
114
|
# my_slice = Slice.fetch_by_gene_stable_id('ENSG00000184895')
|
119
115
|
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
#
|
123
|
-
# *Returns*:: Ensembl::Core::Slice object
|
116
|
+
# @param [String] gene_stable_id Ensembl gene stable ID
|
117
|
+
# @param [Integer] flanking_seq_length Length of the flanking sequence
|
118
|
+
# @return [Slice] Slice object
|
124
119
|
def self.fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0)
|
125
120
|
gene_stable_id = Ensembl::Core::GeneStableId.find_by_stable_id(gene_stable_id)
|
126
121
|
gene = gene_stable_id.gene
|
@@ -129,16 +124,14 @@ module Ensembl
|
|
129
124
|
return Ensembl::Core::Slice.new(seq_region, gene.seq_region_start - flanking_seq_length, gene.seq_region_end + flanking_seq_length, gene.seq_region_strand)
|
130
125
|
end
|
131
126
|
|
132
|
-
# = DESCRIPTION
|
133
127
|
# Create a Slice based on a Transcript
|
134
128
|
#
|
135
|
-
#
|
129
|
+
# @example
|
136
130
|
# my_slice = Slice.fetch_by_transcript_stable_id('ENST00000383673')
|
137
131
|
#
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
# *Returns*:: Ensembl::Core::Slice object
|
132
|
+
# @param [String] transcript_stable_id Ensembl transcript stable ID
|
133
|
+
# @param [Integer] flanking_seq_length Length of the flanking sequence
|
134
|
+
# @return [Slice] Slice object
|
142
135
|
def self.fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0)
|
143
136
|
transcript_stable_id = Ensembl::Core::TranscriptStableId.find_by_stable_id(transcript_stable_id)
|
144
137
|
transcript = transcript_stable_id.transcript
|
@@ -147,17 +140,15 @@ module Ensembl
|
|
147
140
|
return Ensembl::Core::Slice.new(seq_region, transcript.seq_region_start - flanking_seq_length, transcript.seq_region_end + flanking_seq_length, transcript.seq_region_strand)
|
148
141
|
end
|
149
142
|
|
150
|
-
# = DESCRIPTION
|
151
143
|
# Create an array of all Slices for a given coordinate system.
|
152
144
|
#
|
153
|
-
#
|
145
|
+
# @example
|
154
146
|
# slices = Slice.fetch_all('chromosome')
|
155
147
|
#
|
156
|
-
#
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
# *Returns*:: an array of Ensembl::Core::Slice objects
|
148
|
+
# @param [String] coord_system_name Name of coordinate system
|
149
|
+
# @param [String] species Name of species
|
150
|
+
# @param [Integer] version Version of coordinate system
|
151
|
+
# @return [Array<Slice>] Array of Slice objects
|
161
152
|
def self.fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil)
|
162
153
|
answer = Array.new
|
163
154
|
coord_system = nil
|
@@ -187,50 +178,45 @@ module Ensembl
|
|
187
178
|
## GENERAL METHODS
|
188
179
|
##################
|
189
180
|
|
190
|
-
# = DESCRIPTION
|
191
181
|
# Get the length of a slice
|
192
182
|
#
|
193
|
-
#
|
183
|
+
# @example
|
194
184
|
# chr4 = SeqRegion.find_by_name('4')
|
195
185
|
# my_slice = Slice.new(chr4, 95000, 98000, -1)
|
196
186
|
# puts my_slice.length
|
197
|
-
#
|
198
|
-
#
|
199
|
-
# *Returns*:: Integer
|
187
|
+
#
|
188
|
+
# @return [Integer] Length of the slice
|
200
189
|
def length
|
201
190
|
return self.stop - self.start + 1
|
202
191
|
end
|
203
192
|
|
204
|
-
# = DESCRIPTION
|
205
193
|
# The display_name method returns a full name of this slice, containing
|
206
194
|
# the name of the coordinate system, the sequence region, start and
|
207
195
|
# stop positions on that sequence region and the strand. E.g. for a slice
|
208
196
|
# of bovine chromosome 4 from position 95000 to 98000 on the reverse strand,
|
209
197
|
# the display_name would look like: chromosome:4:Btau_3.1:95000:98000:-1
|
210
198
|
#
|
211
|
-
#
|
199
|
+
# @example
|
212
200
|
# puts my_slice.display_name
|
213
|
-
#
|
214
|
-
#
|
215
|
-
# *Result*:: String
|
201
|
+
#
|
202
|
+
# @return [String] Nicely formatted name of the Slice
|
216
203
|
def display_name
|
217
204
|
return [self.seq_region.coord_system.name, self.seq_region.coord_system.version, self.seq_region.name, self.start.to_s, self.stop.to_s, self.strand.to_s].join(':')
|
218
205
|
end
|
219
206
|
alias to_s display_name
|
220
207
|
|
221
|
-
# = DESCRIPTION
|
222
208
|
# The Slice#overlaps? method checks if this slice overlaps another one.
|
223
209
|
# The other slice has to be on the same coordinate system
|
224
210
|
#
|
225
|
-
#
|
211
|
+
# @example
|
226
212
|
# slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
|
227
213
|
# slice_b = Slice.fetch_by_region('chromosome','X',900,1500)
|
228
214
|
# if slice_a.overlaps?(slice_b)
|
229
215
|
# puts "There slices overlap"
|
230
216
|
# end
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
217
|
+
#
|
218
|
+
# @param [Slice] other_slice Another slice
|
219
|
+
# @return [Boolean] True if slices overlap, otherwise false
|
234
220
|
def overlaps?(other_slice)
|
235
221
|
if ! other_slice.class == Slice
|
236
222
|
raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
|
@@ -249,19 +235,18 @@ module Ensembl
|
|
249
235
|
end
|
250
236
|
end
|
251
237
|
|
252
|
-
# = DESCRIPTION
|
253
238
|
# The Slice#within? method checks if this slice is contained withing another one.
|
254
239
|
# The other slice has to be on the same coordinate system
|
255
240
|
#
|
256
|
-
#
|
241
|
+
# @example
|
257
242
|
# slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
|
258
243
|
# slice_b = Slice.fetch_by_region('chromosome','X',900,950)
|
259
244
|
# if slice_b.overlaps?(slice_a)
|
260
245
|
# puts "Slice b is within slice a"
|
261
246
|
# end
|
262
|
-
#
|
263
|
-
#
|
264
|
-
#
|
247
|
+
#
|
248
|
+
# @param [Slice] other_slice Another slice
|
249
|
+
# @return [Boolean] True if this slice is within other_slice, otherwise false
|
265
250
|
def within?(other_slice)
|
266
251
|
if ! other_slice.class == Slice
|
267
252
|
raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
|
@@ -280,11 +265,10 @@ module Ensembl
|
|
280
265
|
end
|
281
266
|
end
|
282
267
|
|
283
|
-
# = DESCRIPTION
|
284
268
|
# The Slice#excise method removes a bit of a slice and returns the
|
285
269
|
# remainder as separate slices.
|
286
270
|
#
|
287
|
-
#
|
271
|
+
# @example
|
288
272
|
# original_slice = Slice.fetch_by_region('chromosome','X',1,10000)
|
289
273
|
# new_slices = original_slice.excise([500..750, 1050..1075])
|
290
274
|
# new_slices.each do |s|
|
@@ -295,10 +279,9 @@ module Ensembl
|
|
295
279
|
# # chromosome:X:1:499:1
|
296
280
|
# # chromosome:X:751:1049:1
|
297
281
|
# # chromosome:X:1076:10000:1
|
298
|
-
#
|
299
|
-
#
|
300
|
-
#
|
301
|
-
# *Returns*:: array of Slice objects
|
282
|
+
#
|
283
|
+
# @param [Array<Range>] Array of ranges to excise
|
284
|
+
# @return [Array<Slice>] Array of slices
|
302
285
|
def excise(ranges)
|
303
286
|
if ranges.class != Array
|
304
287
|
raise RuntimeError, "Argument should be an array of ranges"
|
@@ -326,7 +309,6 @@ module Ensembl
|
|
326
309
|
return answer
|
327
310
|
end
|
328
311
|
|
329
|
-
# = DESCRIPTION
|
330
312
|
# Get the sequence of the Slice as a Bio::Sequence::NA object.
|
331
313
|
#
|
332
314
|
# If the Slice is on a CoordSystem that is not seq_level, it will try
|
@@ -338,12 +320,10 @@ module Ensembl
|
|
338
320
|
# Caution: Bio::Sequence::NA makes the sequence
|
339
321
|
# downcase!!
|
340
322
|
#
|
341
|
-
#
|
323
|
+
# @example
|
342
324
|
# my_slice.seq.seq.to_s
|
343
325
|
#
|
344
|
-
#
|
345
|
-
# *Arguments*:: none
|
346
|
-
# *Returns*:: Bio::Sequence::NA object
|
326
|
+
# @return [Bio::Sequence::NA] Slice sequence as a Bio::Sequence::NA object
|
347
327
|
def seq
|
348
328
|
# If we already accessed the sequence, we can just
|
349
329
|
# call the instance variable. Otherwise, we'll have
|
@@ -381,34 +361,28 @@ module Ensembl
|
|
381
361
|
raise NotImplementedError
|
382
362
|
end
|
383
363
|
|
384
|
-
# = DESCRIPTION
|
385
364
|
# Take a sub_slice from an existing one.
|
386
365
|
#
|
387
|
-
#
|
366
|
+
# @example
|
388
367
|
# my_sub_slice = my_slice.sub_slice(400,500)
|
389
368
|
#
|
390
|
-
#
|
391
|
-
#
|
392
|
-
#
|
393
|
-
# * stop: stop of subslice relative to slice (default: stop of slice)
|
394
|
-
# *Returns*:: Ensembl::Core::Slice object
|
369
|
+
# @param [Integer] start Start of subslice relative to slice
|
370
|
+
# @param [Integer] stop Stop of subslice relative to slice
|
371
|
+
# @return [Slice] Slice object
|
395
372
|
def sub_slice(start = self.start, stop = self.stop)
|
396
373
|
return self.class.new(self.seq_region, start, stop, self.strand)
|
397
374
|
end
|
398
375
|
|
399
|
-
# = DESCRIPTION
|
400
376
|
# Creates overlapping subslices for a given Slice.
|
401
377
|
#
|
402
|
-
#
|
378
|
+
# @example
|
403
379
|
# my_slice.split(50000, 250).each do |sub_slice|
|
404
380
|
# puts sub_slice.display_name
|
405
381
|
# end
|
406
382
|
#
|
407
|
-
#
|
408
|
-
#
|
409
|
-
#
|
410
|
-
# * overlap: overlap in bp between consecutive subslices (default: 0)
|
411
|
-
# *Returns*:: array of Ensembl::Core::Slice objects
|
383
|
+
# @param [Integer] max_size Maximal size of subslices
|
384
|
+
# @param [Integer] overlap Overlap in bp between consecutive subslices
|
385
|
+
# @return [Array<Slice>] Array of Slice objects
|
412
386
|
def split(max_size = 100000, overlap = 0)
|
413
387
|
sub_slices = Array.new
|
414
388
|
i = 0
|
@@ -532,7 +506,6 @@ SQL
|
|
532
506
|
end
|
533
507
|
|
534
508
|
|
535
|
-
# = DESCRIPTION
|
536
509
|
# Get all MiscFeatures that are located on a Slice for a given MiscSet.
|
537
510
|
#
|
538
511
|
# Pitfall: just looks at the CoordSystem that the Slice is located on.
|
@@ -540,14 +513,13 @@ SQL
|
|
540
513
|
# CoordSystem, but all misc_features are annotated on SeqRegions of
|
541
514
|
# the 'scaffold' CoordSystem, this method will return an empty array.
|
542
515
|
#
|
543
|
-
#
|
516
|
+
# @example
|
544
517
|
# my_slice.misc_features('encode').each do |feature|
|
545
518
|
# puts feature.to_yaml
|
546
519
|
# end
|
547
|
-
#
|
548
|
-
#
|
549
|
-
#
|
550
|
-
# *Returns*:: array of MiscFeature objects
|
520
|
+
#
|
521
|
+
# @param [String] code Code of MiscSet
|
522
|
+
# @return [Array<MiscFeature>] Array of MiscFeature objects
|
551
523
|
def misc_features(code)
|
552
524
|
answer = Array.new
|
553
525
|
if code.nil?
|
@@ -568,7 +540,6 @@ SQL
|
|
568
540
|
return answer
|
569
541
|
end
|
570
542
|
|
571
|
-
# = DESCRIPTION
|
572
543
|
# Get all DnaAlignFeatures that are located on a Slice for a given Analysis.
|
573
544
|
#
|
574
545
|
# Pitfall: just looks at the CoordSystem that the Slice is located on.
|
@@ -576,14 +547,13 @@ SQL
|
|
576
547
|
# CoordSystem, but all dna_align_features are annotated on SeqRegions of
|
577
548
|
# the 'scaffold' CoordSystem, this method will return an empty array.
|
578
549
|
#
|
579
|
-
#
|
550
|
+
# @example
|
580
551
|
# my_slice.dna_align_features('Vertrna').each do |feature|
|
581
552
|
# puts feature.to_yaml
|
582
553
|
# end
|
583
|
-
#
|
584
|
-
#
|
585
|
-
#
|
586
|
-
# *Returns*:: array of DnaAlignFeature objects
|
554
|
+
#
|
555
|
+
# @param [String] analysis_name Name of analysis
|
556
|
+
# @return [Array<DnaAlignFeature>] Array of DnaAlignFeature objects
|
587
557
|
def dna_align_features(analysis_name = nil)
|
588
558
|
if analysis_name.nil?
|
589
559
|
return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
|
@@ -593,7 +563,6 @@ SQL
|
|
593
563
|
end
|
594
564
|
end
|
595
565
|
|
596
|
-
# = DESCRIPTION
|
597
566
|
# Get all ProteinAlignFeatures that are located on a Slice for a given Analysis.
|
598
567
|
#
|
599
568
|
# Pitfall: just looks at the CoordSystem that the Slice is located on.
|
@@ -601,14 +570,13 @@ SQL
|
|
601
570
|
# CoordSystem, but all protein_align_features are annotated on SeqRegions of
|
602
571
|
# the 'scaffold' CoordSystem, this method will return an empty array.
|
603
572
|
#
|
604
|
-
#
|
573
|
+
# @example
|
605
574
|
# my_slice.protein_align_features('Uniprot').each do |feature|
|
606
575
|
# puts feature.to_yaml
|
607
576
|
# end
|
608
|
-
#
|
609
|
-
#
|
610
|
-
#
|
611
|
-
# *Returns*:: array of ProteinAlignFeature objects
|
577
|
+
#
|
578
|
+
# @param [String] analysis_name Name of analysis
|
579
|
+
# @return [Array<ProteinAlignFeature>] Array of ProteinAlignFeature objects
|
612
580
|
def protein_align_features(analysis_name)
|
613
581
|
if analysis_name.nil?
|
614
582
|
return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
|
@@ -623,9 +591,8 @@ SQL
|
|
623
591
|
############################
|
624
592
|
|
625
593
|
|
626
|
-
#= DESCRIPTION
|
627
594
|
# Method to retrieve Variation features from Ensembl::Core::Slice objects
|
628
|
-
|
595
|
+
# @example
|
629
596
|
# slice = Slice.fetch_by_region('chromosome',1,50000,51000)
|
630
597
|
# variations = slice.get_variation_features
|
631
598
|
# variations.each do |vf|
|
@@ -642,17 +609,17 @@ SQL
|
|
642
609
|
Ensembl::Variation::VariationFeature.find(:all,:conditions => ["flags = 'genotyped' AND seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
|
643
610
|
end
|
644
611
|
|
612
|
+
def get_structural_variations
|
613
|
+
variation_connection()
|
614
|
+
Ensembl::Variation::StructuralVariation.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
|
615
|
+
end
|
616
|
+
|
645
617
|
private
|
646
618
|
|
647
619
|
def variation_connection()
|
648
620
|
if !Ensembl::Variation::DBConnection.connected?
|
649
|
-
host,user,password,db_name,port = Ensembl::Core::DBConnection.get_info
|
650
|
-
|
651
|
-
species,release = $1,$2
|
652
|
-
Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
653
|
-
else
|
654
|
-
raise NameError, "Can't get Variation Database name from #{db_name}. Are you using non conventional names?"
|
655
|
-
end
|
621
|
+
host,user,password,db_name,port,species,release = Ensembl::Core::DBConnection.get_info
|
622
|
+
Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
|
656
623
|
end
|
657
624
|
|
658
625
|
end
|
@@ -660,25 +627,22 @@ SQL
|
|
660
627
|
|
661
628
|
end #Slice
|
662
629
|
|
663
|
-
# = DESCRIPTION
|
664
630
|
# The Gap class is similar to the Slice object, but describes a gap and
|
665
631
|
# therefore can easily be described by coordinate system and size.
|
666
632
|
#
|
667
633
|
class Gap
|
668
634
|
attr_accessor :coord_system, :size
|
669
635
|
|
670
|
-
# = DESCRIPTION
|
671
636
|
# Create a new Gap object from scratch.
|
672
637
|
#
|
673
|
-
#
|
638
|
+
# @example
|
674
639
|
# my_coord_system = CoordSystem.find_by_name('chromosome')
|
675
640
|
# # Create a gap of 10kb.
|
676
641
|
# gap = Gap.new(my_coord_system, 10000)
|
677
|
-
#
|
678
|
-
#
|
679
|
-
#
|
680
|
-
#
|
681
|
-
# *Returns*:: Gap object
|
642
|
+
#
|
643
|
+
# @param [CoordSystem] coord_system Coordinate system object
|
644
|
+
# @param [Integer] size Length of the gap
|
645
|
+
# @return [Gap] Gap object
|
682
646
|
def initialize(coord_system, size)
|
683
647
|
@coord_system, @size = coord_system, size
|
684
648
|
end
|