bio-ensembl 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,64 @@
1
+ #
2
+ # = ensembl/core/collection.rb
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # @author Francesco Strozzi
9
+
10
+ module Ensembl
11
+ nil
12
+ module Core
13
+ # Class to describe and handle multi-species databases
14
+ #
15
+ class Collection
16
+ # Method to check if the current core database is a multi-species db.
17
+ # Returns a boolean value.
18
+ #
19
+ # @return [Boolean] True if current db is multi-species db; otherwise false.
20
+ def self.check()
21
+ host,user,password,db_name,port = Ensembl::Core::DBConnection.get_info
22
+ if db_name =~/(\w+)_collection_core_.*/
23
+ return true
24
+ end
25
+ return false
26
+ end
27
+
28
+ # Returns an array with all the Species present in a collection database.
29
+ #
30
+ # @return [Array<String>] Array containing species names in colleciton
31
+ def self.species()
32
+ return Meta.find_all_by_meta_key("species.db_name").collect {|m| m.meta_value}
33
+ end
34
+
35
+ # Returns the species_id of a particular species present in the database.
36
+ #
37
+ # @param [String] species Name of species
38
+ # @return [Integer] Species ID in the database.
39
+ def self.get_species_id(species)
40
+ species = species.downcase
41
+ meta = Meta.find_by_sql("SELECT * FROM meta WHERE LOWER(meta_value) = '#{species}'")[0]
42
+ if meta.nil?
43
+ return nil
44
+ else
45
+ return meta.species_id
46
+ end
47
+ end
48
+
49
+ # Returns an array with all the coord_system_id associated with a particular species and a table_name.
50
+ # Used inside Slice#method_missing to filter the coord_system_id using a particular species_id.
51
+ #
52
+ # @param [String] table_name Table name
53
+ # @param [Integer] species_id ID of species in the database
54
+ # @return [Array<Integer>] Array containing coord_system IDs.
55
+ def self.find_all_coord_by_table_name(table_name,species_id)
56
+ all_ids = CoordSystem.find_all_by_species_id(species_id)
57
+ return MetaCoord.find_all_by_coord_system_id_and_table_name(all_ids,table_name)
58
+ end
59
+
60
+ end
61
+
62
+
63
+ end
64
+ end
@@ -0,0 +1,262 @@
1
+ #
2
+ # = ensembl/core/project.rb - project calculations for Ensembl Slice
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # @author Jan Aerts
10
+ # @author Francesco Strozzi
11
+ module Ensembl
12
+ module Core
13
+ class Slice
14
+ # The Slice#project method is used to transfer coordinates from one
15
+ # coordinate system to another. Suppose you have a slice on a
16
+ # contig in human (let's say on contig AC000031.6.1.38703) and you
17
+ # want to know the coordinates on the chromosome. This is a
18
+ # projection of coordinates from a higher ranked coordinate system to
19
+ # a lower ranked coordinate system. Projections can also be done
20
+ # from a chromosome to the contig level. However, it might be possible
21
+ # that more than one contig has to be included and that there exist
22
+ # gaps between the contigs. The output of this method therefore is
23
+ # an _array_ of Slice and Gap objects.
24
+ #
25
+ # At the moment, projections can only be done if the two coordinate
26
+ # systems are linked directly in the 'assembly' table.
27
+ #
28
+ # @example
29
+ # # Get a contig slice in cow and project to scaffold level
30
+ # # (i.e. going from a high rank coord system to a lower rank coord
31
+ # # system)
32
+ # source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
33
+ # target_slices = source_slice.project('scaffold')
34
+ # puts target_slices.length #--> 1
35
+ # puts target_slices[0].display_name #--> scaffold:ChrUn.003.3522:6570:8535:1
36
+ #
37
+ # # Get a chromosome slice in cow and project to scaffold level
38
+ # # (i.e. going from a low rank coord system to a higher rank coord
39
+ # # system)
40
+ # # The region 96652152..98000000 on BTA4 is covered by 2 scaffolds
41
+ # # that are separated by a gap.
42
+ # source_slice = Slice.fetch_by_region('chromosome','4', 96652152, 98000000)
43
+ # target_slices = source_slice.project('scaffold')
44
+ # puts target_slices.length #--> 3
45
+ # first_bit, second_bit, third_bit = target_slices
46
+ # puts first_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.105:42:599579:1
47
+ # puts second_bit.class #--> Gap
48
+ # puts third_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.106:1:738311:1
49
+ #
50
+ # @param [String] coord_system_name Name of coordinate system to project
51
+ # coordinates to
52
+ # @return [Array<Slice, Gap>] Array of Slices and, if necessary, Gaps
53
+ def project(coord_system_name)
54
+ answer = Array.new # an array of slices
55
+ unless Ensembl::SESSION.coord_systems.has_key?(self.seq_region.coord_system_id)
56
+ Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id] = self.seq_region.coord_system
57
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id].name] = self.seq_region.coord_system_id
58
+ end
59
+ source_coord_system = Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id]
60
+ target_coord_system = nil
61
+ if coord_system_name == 'toplevel'
62
+ target_coord_system = source_coord_system.find_toplevel
63
+ elsif coord_system_name == 'seqlevel'
64
+ target_coord_system = source_coord_system.find_seqlevel
65
+ else
66
+ unless Ensembl::SESSION.coord_system_ids.has_key?(coord_system_name)
67
+ cs = source_coord_system.find_level(coord_system_name)
68
+ Ensembl::SESSION.coord_systems[cs.id] = cs
69
+ Ensembl::SESSION.coord_system_ids[cs.name] = cs.id
70
+ end
71
+ target_coord_system = Ensembl::SESSION.coord_systems[Ensembl::SESSION.coord_system_ids[coord_system_name]]
72
+ end
73
+
74
+ if target_coord_system.rank < source_coord_system.rank
75
+ # We're going from component to assembly, which is easy.
76
+ assembly_links = self.seq_region.assembly_links_as_component(source_coord_system)
77
+
78
+ if assembly_links.length == 0
79
+ return []
80
+ else
81
+ assembly_links.each do |assembly_link|
82
+ target_seq_region = assembly_link.asm_seq_region
83
+ target_start = self.start + assembly_link.asm_start - assembly_link.cmp_start
84
+ target_stop = self.stop + assembly_link.asm_start - assembly_link.cmp_start
85
+ target_strand = self.strand * assembly_link.ori # 1x1=>1, 1x-1=>-1, -1x-1=>1
86
+
87
+ answer.push(Slice.new(target_seq_region, target_start, target_stop, target_strand))
88
+ end
89
+ end
90
+
91
+ else
92
+ # If we're going from assembly to component, the answer of the target method
93
+ # is an array consisting of Slices intermitted with Gaps.
94
+
95
+ # ASSEMBLY_EXCEPTIONS
96
+ # CAUTION: there are exceptions to the assembly (stored in the assembly_exception)
97
+ # table which make things a little bit more difficult... For example,
98
+ # in human, the assembly data for the pseudo-autosomal region (PAR) of
99
+ # Y is *not* stored in the assembly table. Instead, there is a record
100
+ # in the assembly_exception table that says: "For chr Y positions 1
101
+ # to 2709520, use chr X:1-2709520 for the assembly data."
102
+ # As a solution, what we'll do here, is split the assembly up in blocks:
103
+ # if a slice covers both the PAR and the allosomal region, we'll make
104
+ # two subslices (let's call them blocks not to intercede with the
105
+ # Slice#subslices method) and project these independently.
106
+ assembly_exceptions = AssemblyException.find_all_by_seq_region_id(self.seq_region.id)
107
+ if assembly_exceptions.length > 0
108
+ # Check if this bit of the original slice is covered in the
109
+ # assembly_exception table.
110
+ overlapping_exceptions = Array.new
111
+ assembly_exceptions.each do |ae|
112
+ if Slice.new(self.seq_region, ae.seq_region_start, ae.seq_region_end).overlaps?(self)
113
+ if ae.exc_type == 'HAP'
114
+ raise NotImplementedError, "The haplotype exceptions are not implemented (yet). You can't project this slice."
115
+ end
116
+ overlapping_exceptions.push(ae)
117
+ end
118
+ end
119
+
120
+ if overlapping_exceptions.length > 0
121
+ # First get all assembly blocks from chromosome Y
122
+ source_assembly_blocks = self.excise(overlapping_exceptions.collect{|e| e.seq_region_start .. e.seq_region_end})
123
+ # And insert the blocks of chromosome X
124
+ all_assembly_blocks = Array.new #both for chr X and Y
125
+ # First do all exceptions between the first and last block
126
+ previous_block = nil
127
+ source_assembly_blocks.sort_by{|b| b.start}.each do |b|
128
+ if previous_block.nil?
129
+ all_assembly_blocks.push(b)
130
+ previous_block = b
131
+ next
132
+ end
133
+ # Find the exception record
134
+ exception = nil
135
+ assembly_exceptions.each do |ae|
136
+ if ae.seq_region_end == b.start - 1
137
+ exception = ae
138
+ break
139
+ end
140
+ end
141
+
142
+ new_slice_start = exception.exc_seq_region_start + ( previous_block.stop - exception.seq_region_start )
143
+ new_slice_stop = exception.exc_seq_region_start + ( b.start - exception.seq_region_start )
144
+ new_slice_strand = self.strand * exception.ori
145
+ new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)
146
+
147
+ all_assembly_blocks.push(new_slice)
148
+ all_assembly_blocks.push(b)
149
+ previous_block = b
150
+ end
151
+
152
+ # And then see if we have to add an additional one at the start or end
153
+ first_block = source_assembly_blocks.sort_by{|b| b.start}[0]
154
+ if first_block.start > self.start
155
+ exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[0]
156
+ new_slice_start = exception.exc_seq_region_start + ( self.start - exception.seq_region_start )
157
+ new_slice_stop = exception.exc_seq_region_start + ( first_block.start - 1 - exception.seq_region_start )
158
+ new_slice_strand = self.strand * exception.ori
159
+ new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)
160
+
161
+ all_assembly_blocks.unshift(new_slice)
162
+ end
163
+
164
+ last_block = source_assembly_blocks.sort_by{|b| b.start}[-1]
165
+ if last_block.stop < self.stop
166
+ exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[-1]
167
+ new_slice_start = exception.exc_seq_region_start + ( last_block.stop + 1 - exception.seq_region_start )
168
+ new_slice_stop = exception.exc_seq_region_start + ( self.stop - exception.seq_region_start )
169
+ new_slice_strand = self.strand * exception.ori
170
+ new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)
171
+
172
+ all_assembly_blocks.shift(new_slice)
173
+ end
174
+
175
+ answer = Array.new
176
+ all_assembly_blocks.each do |b|
177
+ answer.push(b.project(coord_system_name))
178
+ end
179
+ answer.flatten!
180
+
181
+ return answer
182
+ end
183
+
184
+ end
185
+ # END OF ASSEMBLY_EXCEPTIONS
186
+
187
+ # Get all AssemblyLinks starting from this assembly and for which
188
+ # the cmp_seq_region.coord_system is what we want.
189
+ assembly_links = self.seq_region.assembly_links_as_assembly(target_coord_system)
190
+
191
+ # Now reject all the components that lie _before_ the source, then
192
+ # reject all the components that lie _after_ the source.
193
+ # Then sort based on their positions.
194
+ sorted_overlapping_assembly_links = assembly_links.reject{|al| al.asm_end < self.start}.reject{|al| al.asm_start > self.stop}.sort_by{|al| al.asm_start}
195
+ if sorted_overlapping_assembly_links.length == 0
196
+ return []
197
+ end
198
+
199
+ # What we'll do, is create slices for all the underlying components,
200
+ # including the first and the last one. At first, the first and last
201
+ # components are added in their entirety and will only be cropped afterwards.
202
+ previous_stop = nil
203
+ sorted_overlapping_assembly_links.each_index do |i|
204
+ this_link = sorted_overlapping_assembly_links[i]
205
+ if i == 0
206
+ cmp_seq_region = nil
207
+ if Ensembl::SESSION.seq_regions.has_key?(this_link.cmp_seq_region_id)
208
+ cmp_seq_region = Ensembl::SESSION.seq_regions[this_link.cmp_seq_region_id]
209
+ else
210
+ cmp_seq_region = this_link.cmp_seq_region
211
+ Ensembl::SESSION.seq_regions[cmp_seq_region.id] = cmp_seq_region
212
+ end
213
+ answer.push(Slice.new(cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori))
214
+ next
215
+ end
216
+ previous_link = sorted_overlapping_assembly_links[i-1]
217
+
218
+ # If there is a gap with the previous link: add a gap
219
+ if this_link.asm_start > ( previous_link.asm_end + 1 )
220
+ gap_size = this_link.asm_start - previous_link.asm_end - 1
221
+ answer.push(Gap.new(target_coord_system, gap_size))
222
+ end
223
+
224
+ # And add the component itself as a Slice
225
+ answer.push(Slice.new(this_link.cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori))
226
+ end
227
+
228
+ # Now see if we have to crop the first and/or last slice
229
+ first_link = sorted_overlapping_assembly_links[0]
230
+ if self.start > first_link.asm_start
231
+ if first_link.ori == -1
232
+ answer[0].stop = first_link.cmp_start + ( first_link.asm_end - self.start )
233
+ else
234
+ answer[0].start = first_link.cmp_start + ( self.start - first_link.asm_start )
235
+ end
236
+ end
237
+
238
+ last_link = sorted_overlapping_assembly_links[-1]
239
+ if self.stop < last_link.asm_end
240
+ if last_link.ori == -1
241
+ answer[-1].start = last_link.cmp_start + ( last_link.asm_end - self.stop)
242
+ else
243
+ answer[-1].stop = last_link.cmp_start + ( self.stop - last_link.asm_start )
244
+ end
245
+ end
246
+
247
+ # And check if we have to add Ns at the front and/or back
248
+ if self.start < first_link.asm_start
249
+ gap_size = first_link.asm_start - self.start
250
+ answer.unshift(Gap.new(target_coord_system, gap_size))
251
+ end
252
+ if self.stop > last_link.asm_end
253
+ gap_size = self.stop - last_link.asm_end
254
+ answer.push(Gap.new(target_coord_system, gap_size))
255
+ end
256
+ end
257
+ return answer
258
+
259
+ end
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,657 @@
1
+ #
2
+ # = ensembl/core/slice.rb - General methods for Ensembl Slice
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # @author Jan Aerts
10
+ # @author Francesco Strozzi
11
+ nil
12
+ module Ensembl
13
+ nil
14
+ module Core
15
+
16
+ # From the perl API tutorial
17
+ # (http://www.ensembl.org/info/software/core/core_tutorial.html): "A
18
+ # Slice object represents a continuous region of a genome. Slices can be
19
+ # used to obtain sequence, features or other information from a
20
+ # particular region of interest."
21
+ #
22
+ # In contrast to almost all other classes of Ensembl::Core,
23
+ # the Slice class is not based on ActiveRecord.
24
+ #
25
+ # @example
26
+ # chr4 = SeqRegion.find_by_name('4')
27
+ # my_slice = Slice.new(chr4, 95000, 98000, -1)
28
+ # puts my_slice.display_name #--> 'chromosome:4:Btau_3.1:95000:98000:1'
29
+ class Slice
30
+ attr_accessor :seq_region, :start, :stop, :strand, :seq
31
+
32
+ #################
33
+ ## CREATE A SLICE
34
+ #################
35
+
36
+ # Create a new Slice object from scratch.
37
+ #
38
+ # @example
39
+ # chr4 = SeqRegion.find_by_name('4')
40
+ # my_slice = Slice.new(chr4, 95000, 98000, -1)
41
+ #
42
+ # @param [SeqRegion] seq_region SeqRegion object
43
+ # @param [Integer] start Start position of the slice on the seq_region
44
+ # @param [Integer] stop Stop position of the slice on the seq_region
45
+ # @param [Integer] strand Strand that the slice should be
46
+ # @return [Slice] Slice object
47
+ def initialize(seq_region, start = 1, stop = seq_region.length, strand = 1)
48
+ if start.nil?
49
+ start = 1
50
+ end
51
+ if stop.nil?
52
+ stop = seq_region.length
53
+ end
54
+ unless seq_region.class == Ensembl::Core::SeqRegion
55
+ raise 'First argument has to be a Ensembl::Core::SeqRegion object'
56
+ end
57
+ @seq_region, @start, @stop, @strand = seq_region, start, stop, strand
58
+ @seq = nil
59
+ end
60
+
61
+ # Create a Slice without first creating the SeqRegion object.
62
+ #
63
+ # @example
64
+ # my_slice_1 = Slice.fetch_by_region('chromosome','4',95000,98000,1)
65
+ #
66
+ # @param [String] coord_system_name Name of coordinate system
67
+ # @param [String] seq_region_name name of the seq_region
68
+ # @param [Integer] start Start position of the slice on the seq_region
69
+ # @param [Integer] stop Stop position of the slice on the seq_region
70
+ # @param [Integer] strand Strand that the slice should be
71
+ # @param [String] species Name of species in case of multi-species database
72
+ # @param [Integer] version Version number of the coordinate system
73
+ # @return [Slice] Slice object
74
+ def self.fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil)
75
+ all_coord_systems = nil
76
+ if Collection.check
77
+ species = species.downcase
78
+ if species.nil?
79
+ raise ArgumentError, "When using multi-species db, you must pass a species name to get the correct Slice"
80
+ else
81
+ species_id = Collection.get_species_id(species)
82
+ raise ArgumentError, "No species found in the database with this name: #{species}" if species_id.nil?
83
+ all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name_and_species_id(coord_system_name,species_id)
84
+ end
85
+ else
86
+ all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name(coord_system_name)
87
+ end
88
+ coord_system = nil
89
+ if version.nil? # Take the version with the lower rank
90
+ coord_system = all_coord_systems.sort_by{|cs| cs.rank}.shift
91
+ else
92
+ coord_system = all_coord_systems.select{|cs| cs.version == version}[0]
93
+ end
94
+ unless coord_system.class == Ensembl::Core::CoordSystem
95
+ message = "Couldn't find a Ensembl::Core::CoordSystem object with name '" + coord_system_name + "'"
96
+ if ! version.nil?
97
+ message += " and version '" + version + "'"
98
+ end
99
+ raise message
100
+ end
101
+
102
+ seq_region = Ensembl::Core::SeqRegion.find_by_name_and_coord_system_id(seq_region_name, coord_system.id)
103
+ #seq_region = Ensembl::Core::SeqRegion.find_by_sql("SELECT * FROM seq_region WHERE name = '" + seq_region_name + "' AND coord_system_id = " + coord_system.id.to_s)[0]
104
+ unless seq_region.class == Ensembl::Core::SeqRegion
105
+ raise "Couldn't find a Ensembl::Core::SeqRegion object with the name '" + seq_region_name + "'"
106
+ end
107
+
108
+ return Ensembl::Core::Slice.new(seq_region, start, stop, strand)
109
+ end
110
+
111
+ # Create a Slice based on a Gene
112
+ #
113
+ # @example
114
+ # my_slice = Slice.fetch_by_gene_stable_id('ENSG00000184895')
115
+ #
116
+ # @param [String] gene_stable_id Ensembl gene stable ID
117
+ # @param [Integer] flanking_seq_length Length of the flanking sequence
118
+ # @return [Slice] Slice object
119
+ def self.fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0)
120
+ gene_stable_id = Ensembl::Core::GeneStableId.find_by_stable_id(gene_stable_id)
121
+ gene = gene_stable_id.gene
122
+ seq_region = gene.seq_region
123
+
124
+ return Ensembl::Core::Slice.new(seq_region, gene.seq_region_start - flanking_seq_length, gene.seq_region_end + flanking_seq_length, gene.seq_region_strand)
125
+ end
126
+
127
+ # Create a Slice based on a Transcript
128
+ #
129
+ # @example
130
+ # my_slice = Slice.fetch_by_transcript_stable_id('ENST00000383673')
131
+ #
132
+ # @param [String] transcript_stable_id Ensembl transcript stable ID
133
+ # @param [Integer] flanking_seq_length Length of the flanking sequence
134
+ # @return [Slice] Slice object
135
+ def self.fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0)
136
+ transcript_stable_id = Ensembl::Core::TranscriptStableId.find_by_stable_id(transcript_stable_id)
137
+ transcript = transcript_stable_id.transcript
138
+ seq_region = transcript.seq_region
139
+
140
+ return Ensembl::Core::Slice.new(seq_region, transcript.seq_region_start - flanking_seq_length, transcript.seq_region_end + flanking_seq_length, transcript.seq_region_strand)
141
+ end
142
+
143
+ # Create an array of all Slices for a given coordinate system.
144
+ #
145
+ # @example
146
+ # slices = Slice.fetch_all('chromosome')
147
+ #
148
+ # @param [String] coord_system_name Name of coordinate system
149
+ # @param [String] species Name of species
150
+ # @param [Integer] version Version of coordinate system
151
+ # @return [Array<Slice>] Array of Slice objects
152
+ def self.fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil)
153
+ answer = Array.new
154
+ coord_system = nil
155
+ if Collection.check
156
+ species = species.downcase
157
+ species_id = Collection.get_species_id(species)
158
+ raise ArgumentError, "No specie found in the database with this name: #{species}" if species_id.nil?
159
+ if version.nil?
160
+ coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id(coord_system_name,species_id)
161
+ else
162
+ coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id_and_version(coord_system_name, species_id, version)
163
+ end
164
+ else
165
+ if version.nil?
166
+ coord_system = Ensembl::Core::CoordSystem.find_by_name(coord_system_name)
167
+ else
168
+ coord_system = Ensembl::Core::CoordSystem.find_by_name_and_version(coord_system_name, version)
169
+ end
170
+ end
171
+ coord_system.seq_regions.each do |seq_region|
172
+ answer.push(Ensembl::Core::Slice.new(seq_region))
173
+ end
174
+ return answer
175
+ end
176
+
177
+ ##################
178
+ ## GENERAL METHODS
179
+ ##################
180
+
181
+ # Get the length of a slice
182
+ #
183
+ # @example
184
+ # chr4 = SeqRegion.find_by_name('4')
185
+ # my_slice = Slice.new(chr4, 95000, 98000, -1)
186
+ # puts my_slice.length
187
+ #
188
+ # @return [Integer] Length of the slice
189
+ def length
190
+ return self.stop - self.start + 1
191
+ end
192
+
193
+ # The display_name method returns a full name of this slice, containing
194
+ # the name of the coordinate system, the sequence region, start and
195
+ # stop positions on that sequence region and the strand. E.g. for a slice
196
+ # of bovine chromosome 4 from position 95000 to 98000 on the reverse strand,
197
+ # the display_name would look like: chromosome:4:Btau_3.1:95000:98000:-1
198
+ #
199
+ # @example
200
+ # puts my_slice.display_name
201
+ #
202
+ # @return [String] Nicely formatted name of the Slice
203
+ def display_name
204
+ return [self.seq_region.coord_system.name, self.seq_region.coord_system.version, self.seq_region.name, self.start.to_s, self.stop.to_s, self.strand.to_s].join(':')
205
+ end
206
+ alias to_s display_name
207
+
208
+ # The Slice#overlaps? method checks if this slice overlaps another one.
209
+ # The other slice has to be on the same coordinate system
210
+ #
211
+ # @example
212
+ # slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
213
+ # slice_b = Slice.fetch_by_region('chromosome','X',900,1500)
214
+ # if slice_a.overlaps?(slice_b)
215
+ # puts "There slices overlap"
216
+ # end
217
+ #
218
+ # @param [Slice] other_slice Another slice
219
+ # @return [Boolean] True if slices overlap, otherwise false
220
+ def overlaps?(other_slice)
221
+ if ! other_slice.class == Slice
222
+ raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
223
+ end
224
+ if self.seq_region.coord_system != other_slice.seq_region.coord_system
225
+ raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name
226
+ end
227
+
228
+ self_range = self.start .. self.stop
229
+ other_range = other_slice.start .. other_slice.stop
230
+
231
+ if self_range.include?(other_slice.start) or other_range.include?(self.start)
232
+ return true
233
+ else
234
+ return false
235
+ end
236
+ end
237
+
238
+ # The Slice#within? method checks if this slice is contained withing another one.
239
+ # The other slice has to be on the same coordinate system
240
+ #
241
+ # @example
242
+ # slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
243
+ # slice_b = Slice.fetch_by_region('chromosome','X',900,950)
244
+ # if slice_b.overlaps?(slice_a)
245
+ # puts "Slice b is within slice a"
246
+ # end
247
+ #
248
+ # @param [Slice] other_slice Another slice
249
+ # @return [Boolean] True if this slice is within other_slice, otherwise false
250
+ def within?(other_slice)
251
+ if ! other_slice.class == Slice
252
+ raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
253
+ end
254
+ if self.seq_region.coord_system != other_slice.seq_region.coord_system
255
+ raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name
256
+ end
257
+
258
+ self_range = self.start .. self.stop
259
+ other_range = other_slice.start .. other_slice.stop
260
+
261
+ if other_range.include?(self.start) and other_range.include?(self.stop)
262
+ return true
263
+ else
264
+ return false
265
+ end
266
+ end
267
+
268
+ # The Slice#excise method removes a bit of a slice and returns the
269
+ # remainder as separate slices.
270
+ #
271
+ # @example
272
+ # original_slice = Slice.fetch_by_region('chromosome','X',1,10000)
273
+ # new_slices = original_slice.excise([500..750, 1050..1075])
274
+ # new_slices.each do |s|
275
+ # puts s.display_name
276
+ # end
277
+ #
278
+ # # result:
279
+ # # chromosome:X:1:499:1
280
+ # # chromosome:X:751:1049:1
281
+ # # chromosome:X:1076:10000:1
282
+ #
283
+ # @param [Array<Range>] Array of ranges to excise
284
+ # @return [Array<Slice>] Array of slices
285
+ def excise(ranges)
286
+ if ranges.class != Array
287
+ raise RuntimeError, "Argument should be an array of ranges"
288
+ end
289
+ ranges.each do |r|
290
+ if r.class != Range
291
+ raise RuntimeError, "Argument should be an array of ranges"
292
+ end
293
+ end
294
+
295
+ answer = Array.new
296
+ previous_excised_stop = self.start - 1
297
+ ranges.sort_by{|r| r.first}.each do |r|
298
+ subslice_start = previous_excised_stop + 1
299
+ if subslice_start <= r.first - 1
300
+ answer.push(Slice.new(self.seq_region, subslice_start, r.first - 1))
301
+ end
302
+ previous_excised_stop = r.last
303
+ if r.last > self.stop
304
+ return answer
305
+ end
306
+ end
307
+ subslice_start = previous_excised_stop + 1
308
+ answer.push(Slice.new(self.seq_region, subslice_start, self.stop))
309
+ return answer
310
+ end
311
+
312
+ # Get the sequence of the Slice as a Bio::Sequence::NA object.
313
+ #
314
+ # If the Slice is on a CoordSystem that is not seq_level, it will try
315
+ # to project it coordinates to the CoordSystem that does. At this
316
+ # moment, this is only done if there is a direct link between the
317
+ # two coordinate systems. (The perl API allows for following an
318
+ # indirect link as well.)
319
+ #
320
+ # Caution: Bio::Sequence::NA makes the sequence
321
+ # downcase!!
322
+ #
323
+ # @example
324
+ # my_slice.seq.seq.to_s
325
+ #
326
+ # @return [Bio::Sequence::NA] Slice sequence as a Bio::Sequence::NA object
327
+ def seq
328
+ # If we already accessed the sequence, we can just
329
+ # call the instance variable. Otherwise, we'll have
330
+ # to get the sequence first and create a Bio::Sequence::NA
331
+ # object.
332
+ if @seq.nil?
333
+ # First check if the slice is on the seqlevel coordinate
334
+ # system, otherwise project coordinates.
335
+ if ! Ensembl::SESSION.seqlevel_id.nil? and self.seq_region.coord_system_id == Ensembl::SESSION.seqlevel_id
336
+ @seq = Bio::Sequence::NA.new(self.seq_region.subseq(self.start, self.stop))
337
+ else # we have to project coordinates
338
+ seq_string = String.new
339
+ @target_slices = self.project('seqlevel')
340
+ @target_slices.each do |component|
341
+ if component.class == Slice
342
+ seq_string += component.seq # This fetches the seq recursively
343
+ else # it's a Gap
344
+ seq_string += 'N' * (component.length)
345
+ end
346
+ end
347
+ @seq = Bio::Sequence::NA.new(seq_string)
348
+
349
+ end
350
+
351
+ if self.strand == -1
352
+ @seq.reverse_complement!
353
+ end
354
+
355
+ end
356
+ return @seq
357
+
358
+ end
359
+
360
+ def repeatmasked_seq
361
+ raise NotImplementedError
362
+ end
363
+
364
+ # Take a sub_slice from an existing one.
365
+ #
366
+ # @example
367
+ # my_sub_slice = my_slice.sub_slice(400,500)
368
+ #
369
+ # @param [Integer] start Start of subslice relative to slice
370
+ # @param [Integer] stop Stop of subslice relative to slice
371
+ # @return [Slice] Slice object
372
+ def sub_slice(start = self.start, stop = self.stop)
373
+ return self.class.new(self.seq_region, start, stop, self.strand)
374
+ end
375
+
376
+ # Creates overlapping subslices for a given Slice.
377
+ #
378
+ # @example
379
+ # my_slice.split(50000, 250).each do |sub_slice|
380
+ # puts sub_slice.display_name
381
+ # end
382
+ #
383
+ # @param [Integer] max_size Maximal size of subslices
384
+ # @param [Integer] overlap Overlap in bp between consecutive subslices
385
+ # @return [Array<Slice>] Array of Slice objects
386
+ def split(max_size = 100000, overlap = 0)
387
+ sub_slices = Array.new
388
+ i = 0
389
+ self.start.step(self.length, max_size - overlap - 1) do |i|
390
+ sub_slices.push(self.sub_slice(i, i + max_size - 1))
391
+ end
392
+ i -= (overlap + 1)
393
+ sub_slices.push(self.sub_slice(i + max_size))
394
+ return sub_slices
395
+ end
396
+
397
+ ############################
398
+ ## GET ELEMENTS WITHIN SLICE
399
+ ############################
400
+
401
+ #--
402
+ # As there should be 'getters' for a lot of classes, we'll implement
403
+ # this with method_missing. For some of the original methods, see the end
404
+ # of this file.
405
+ #
406
+ # The optional argument is either 'true' or 'false' (default = false).
407
+ # False if the features have to be completely contained within the slice;
408
+ # true if just a partly overlap is sufficient.
409
+ #++
410
+ # Don't use this method yourself.
411
+ def method_missing(method_name, *args)
412
+ table_name = method_name.to_s.singularize
413
+ class_name = table_name.camelcase
414
+
415
+ # Convert to the class object
416
+ target_class = nil
417
+ ObjectSpace.each_object(Class) do |o|
418
+ if o.name =~ /^Ensembl::Core::#{class_name}$/
419
+ target_class = o
420
+ end
421
+ end
422
+
423
+ # If it exists, see if it implements Sliceable
424
+ if ! target_class.nil? and target_class.include?(Sliceable)
425
+ inclusive = false
426
+ if [TrueClass, FalseClass].include?(args[0].class)
427
+ inclusive = args[0]
428
+ end
429
+ return self.get_objects(target_class, table_name, inclusive)
430
+ end
431
+
432
+ raise NoMethodError
433
+
434
+ end
435
+
436
+ # Don't use this method yourself.
437
+ def get_objects(target_class, table_name, inclusive = false)
438
+ answer = Array.new
439
+
440
+ coord_system_ids_with_features = nil
441
+ # Get all the coord_systems with this type of features on them
442
+ if Collection.check
443
+ coord_system_ids_with_features = Collection.find_all_coord_by_table_name(table_name,self.seq_region.coord_system.species_id).collect{|mc| mc.coord_system_id}
444
+ else
445
+ coord_system_ids_with_features = MetaCoord.find_all_by_table_name(table_name).collect{|mc| mc.coord_system_id}
446
+ end
447
+ # Get the features of the original slice
448
+ if coord_system_ids_with_features.include?(self.seq_region.coord_system_id)
449
+ sql = ''
450
+ if inclusive
451
+ sql = <<SQL
452
+ SELECT * FROM #{table_name}
453
+ WHERE seq_region_id = #{self.seq_region.id.to_s}
454
+ AND (( seq_region_start BETWEEN #{self.start.to_s} AND #{self.stop.to_s} )
455
+ OR ( seq_region_end BETWEEN #{self.start.to_s} AND #{self.stop.to_s} )
456
+ OR ( seq_region_start <= #{self.start.to_s} AND seq_region_end >= #{self.stop.to_s} )
457
+ )
458
+ SQL
459
+ else
460
+ sql = <<SQL
461
+ SELECT * FROM #{table_name}
462
+ WHERE seq_region_id = #{self.seq_region.id.to_s}
463
+ AND seq_region_start >= #{self.start.to_s}
464
+ AND seq_region_end <= #{self.stop.to_s}
465
+ SQL
466
+ end
467
+ answer.push(target_class.find_by_sql(sql))
468
+ coord_system_ids_with_features.delete(self.seq_region.coord_system_id)
469
+ end
470
+
471
+ # Transform the original slice to other coord systems and get those
472
+ # features as well. At the moment, only 'direct' projections can be made.
473
+ # Later, I'm hoping to add functionality for following a path from one
474
+ # coord_system to another if they're not directly linked in the assembly
475
+ # table.
476
+ coord_system_ids_with_features.each do |target_coord_system_id|
477
+ target_slices = self.project(CoordSystem.find(target_coord_system_id).name)
478
+ target_slices.each do |slice|
479
+ if slice.class == Slice
480
+ if inclusive
481
+ sql = <<SQL
482
+ SELECT * FROM #{table_name}
483
+ WHERE seq_region_id = #{slice.seq_region.id.to_s}
484
+ AND (( seq_region_start BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} )
485
+ OR ( seq_region_end BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} )
486
+ OR ( seq_region_start <= #{slice.start.to_s} AND seq_region_end >= #{slice.stop.to_s} )
487
+ )
488
+ SQL
489
+ else
490
+ sql = <<SQL
491
+ SELECT * FROM #{table_name}
492
+ WHERE seq_region_id = #{slice.seq_region.id.to_s}
493
+ AND seq_region_start >= #{slice.start.to_s}
494
+ AND seq_region_end <= #{slice.stop.to_s}
495
+ SQL
496
+ end
497
+ answer.push(target_class.find_by_sql(sql))
498
+ end
499
+ end
500
+ end
501
+
502
+ answer.flatten!
503
+ answer.uniq!
504
+
505
+ return answer
506
+ end
507
+
508
+
509
+ # Get all MiscFeatures that are located on a Slice for a given MiscSet.
510
+ #
511
+ # Pitfall: just looks at the CoordSystem that the Slice is located on.
512
+ # For example, if a Slice is located on a SeqRegion on the 'chromosome'
513
+ # CoordSystem, but all misc_features are annotated on SeqRegions of
514
+ # the 'scaffold' CoordSystem, this method will return an empty array.
515
+ #
516
+ # @example
517
+ # my_slice.misc_features('encode').each do |feature|
518
+ # puts feature.to_yaml
519
+ # end
520
+ #
521
+ # @param [String] code Code of MiscSet
522
+ # @return [Array<MiscFeature>] Array of MiscFeature objects
523
+ def misc_features(code)
524
+ answer = Array.new
525
+ if code.nil?
526
+ self.seq_region.misc_features.each do |mf|
527
+ if mf.seq_region_start > self.start and mf.seq_region_end < self.stop
528
+ answer.push(mf)
529
+ end
530
+ end
531
+ else
532
+ self.seq_region.misc_features.each do |mf|
533
+ if mf.misc_sets[0].code == code
534
+ if mf.seq_region_start > self.start and mf.seq_region_end < self.stop
535
+ answer.push(mf)
536
+ end
537
+ end
538
+ end
539
+ end
540
+ return answer
541
+ end
542
+
543
+ # Get all DnaAlignFeatures that are located on a Slice for a given Analysis.
544
+ #
545
+ # Pitfall: just looks at the CoordSystem that the Slice is located on.
546
+ # For example, if a Slice is located on a SeqRegion on the 'chromosome'
547
+ # CoordSystem, but all dna_align_features are annotated on SeqRegions of
548
+ # the 'scaffold' CoordSystem, this method will return an empty array.
549
+ #
550
+ # @example
551
+ # my_slice.dna_align_features('Vertrna').each do |feature|
552
+ # puts feature.to_yaml
553
+ # end
554
+ #
555
+ # @param [String] analysis_name Name of analysis
556
+ # @return [Array<DnaAlignFeature>] Array of DnaAlignFeature objects
557
+ def dna_align_features(analysis_name = nil)
558
+ if analysis_name.nil?
559
+ return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
560
+ else
561
+ analysis = Analysis.find_by_logic_name(analysis_name)
562
+ return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s)
563
+ end
564
+ end
565
+
566
+ # Get all ProteinAlignFeatures that are located on a Slice for a given Analysis.
567
+ #
568
+ # Pitfall: just looks at the CoordSystem that the Slice is located on.
569
+ # For example, if a Slice is located on a SeqRegion on the 'chromosome'
570
+ # CoordSystem, but all protein_align_features are annotated on SeqRegions of
571
+ # the 'scaffold' CoordSystem, this method will return an empty array.
572
+ #
573
+ # @example
574
+ # my_slice.protein_align_features('Uniprot').each do |feature|
575
+ # puts feature.to_yaml
576
+ # end
577
+ #
578
+ # @param [String] analysis_name Name of analysis
579
+ # @return [Array<ProteinAlignFeature>] Array of ProteinAlignFeature objects
580
+ def protein_align_features(analysis_name)
581
+ if analysis_name.nil?
582
+ return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
583
+ else
584
+ analysis = Analysis.find_by_logic_name(analysis_name)
585
+ return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s)
586
+ end
587
+ end
588
+
589
+ ############################
590
+ ## VARIATION METHODS
591
+ ############################
592
+
593
+
594
+ # Method to retrieve Variation features from Ensembl::Core::Slice objects
595
+ # @example
596
+ # slice = Slice.fetch_by_region('chromosome',1,50000,51000)
597
+ # variations = slice.get_variation_features
598
+ # variations.each do |vf|
599
+ # puts vf.variation_name, vf.allele_string
600
+ # puts vf.variation.ancestral_allele
601
+ # end
602
+ def get_variation_features
603
+ variation_connection()
604
+ Ensembl::Variation::VariationFeature.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
605
+ end
606
+
607
+ def get_genotyped_variation_features
608
+ variation_connection()
609
+ Ensembl::Variation::VariationFeature.find(:all,:conditions => ["flags = 'genotyped' AND seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
610
+ end
611
+
612
+ def get_structural_variations
613
+ variation_connection()
614
+ Ensembl::Variation::StructuralVariation.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
615
+ end
616
+
617
+ private
618
+
619
+ def variation_connection()
620
+ if !Ensembl::Variation::DBConnection.connected?
621
+ host,user,password,db_name,port,species,release = Ensembl::Core::DBConnection.get_info
622
+ Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
623
+ end
624
+
625
+ end
626
+
627
+
628
+ end #Slice
629
+
630
+ # The Gap class is similar to the Slice object, but describes a gap and
631
+ # therefore can easily be described by coordinate system and size.
632
+ #
633
+ class Gap
634
+ attr_accessor :coord_system, :size
635
+
636
+ # Create a new Gap object from scratch.
637
+ #
638
+ # @example
639
+ # my_coord_system = CoordSystem.find_by_name('chromosome')
640
+ # # Create a gap of 10kb.
641
+ # gap = Gap.new(my_coord_system, 10000)
642
+ #
643
+ # @param [CoordSystem] coord_system Coordinate system object
644
+ # @param [Integer] size Length of the gap
645
+ # @return [Gap] Gap object
646
+ def initialize(coord_system, size)
647
+ @coord_system, @size = coord_system, size
648
+ end
649
+ alias length size
650
+
651
+ def display_name
652
+ return @coord_system.name + ":gap:" + @size.to_s
653
+ end
654
+ end #Gap
655
+
656
+ end #Core
657
+ end #Ensembl