bio-ensembl 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,64 @@
1
+ #
2
+ # = ensembl/core/collection.rb
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # @author Francesco Strozzi
9
+
10
+ module Ensembl
11
+ nil
12
+ module Core
13
+ # Class to describe and handle multi-species databases
14
+ #
15
+ class Collection
16
+ # Method to check if the current core database is a multi-species db.
17
+ # Returns a boolean value.
18
+ #
19
+ # @return [Boolean] True if current db is multi-species db; otherwise false.
20
+ def self.check()
21
+ host,user,password,db_name,port = Ensembl::Core::DBConnection.get_info
22
+ if db_name =~/(\w+)_collection_core_.*/
23
+ return true
24
+ end
25
+ return false
26
+ end
27
+
28
+ # Returns an array with all the Species present in a collection database.
29
+ #
30
+ # @return [Array<String>] Array containing species names in colleciton
31
+ def self.species()
32
+ return Meta.find_all_by_meta_key("species.db_name").collect {|m| m.meta_value}
33
+ end
34
+
35
+ # Returns the species_id of a particular species present in the database.
36
+ #
37
+ # @param [String] species Name of species
38
+ # @return [Integer] Species ID in the database.
39
+ def self.get_species_id(species)
40
+ species = species.downcase
41
+ meta = Meta.find_by_sql("SELECT * FROM meta WHERE LOWER(meta_value) = '#{species}'")[0]
42
+ if meta.nil?
43
+ return nil
44
+ else
45
+ return meta.species_id
46
+ end
47
+ end
48
+
49
+ # Returns an array with all the coord_system_id associated with a particular species and a table_name.
50
+ # Used inside Slice#method_missing to filter the coord_system_id using a particular species_id.
51
+ #
52
+ # @param [String] table_name Table name
53
+ # @param [Integer] species_id ID of species in the database
54
+ # @return [Array<Integer>] Array containing coord_system IDs.
55
+ def self.find_all_coord_by_table_name(table_name,species_id)
56
+ all_ids = CoordSystem.find_all_by_species_id(species_id)
57
+ return MetaCoord.find_all_by_coord_system_id_and_table_name(all_ids,table_name)
58
+ end
59
+
60
+ end
61
+
62
+
63
+ end
64
+ end
@@ -0,0 +1,262 @@
1
+ #
2
+ # = ensembl/core/project.rb - project calculations for Ensembl Slice
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # @author Jan Aerts
10
+ # @author Francesco Strozzi
11
+ module Ensembl
12
+ module Core
13
+ class Slice
14
+ # The Slice#project method is used to transfer coordinates from one
15
+ # coordinate system to another. Suppose you have a slice on a
16
+ # contig in human (let's say on contig AC000031.6.1.38703) and you
17
+ # want to know the coordinates on the chromosome. This is a
18
+ # projection of coordinates from a higher ranked coordinate system to
19
+ # a lower ranked coordinate system. Projections can also be done
20
+ # from a chromosome to the contig level. However, it might be possible
21
+ # that more than one contig has to be included and that there exist
22
+ # gaps between the contigs. The output of this method therefore is
23
+ # an _array_ of Slice and Gap objects.
24
+ #
25
+ # At the moment, projections can only be done if the two coordinate
26
+ # systems are linked directly in the 'assembly' table.
27
+ #
28
+ # @example
29
+ # # Get a contig slice in cow and project to scaffold level
30
+ # # (i.e. going from a high rank coord system to a lower rank coord
31
+ # # system)
32
+ # source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
33
+ # target_slices = source_slice.project('scaffold')
34
+ # puts target_slices.length #--> 1
35
+ # puts target_slices[0].display_name #--> scaffold:ChrUn.003.3522:6570:8535:1
36
+ #
37
+ # # Get a chromosome slice in cow and project to scaffold level
38
+ # # (i.e. going from a low rank coord system to a higher rank coord
39
+ # # system)
40
+ # # The region 96652152..98000000 on BTA4 is covered by 2 scaffolds
41
+ # # that are separated by a gap.
42
+ # source_slice = Slice.fetch_by_region('chromosome','4', 96652152, 98000000)
43
+ # target_slices = source_slice.project('scaffold')
44
+ # puts target_slices.length #--> 3
45
+ # first_bit, second_bit, third_bit = target_slices
46
+ # puts first_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.105:42:599579:1
47
+ # puts second_bit.class #--> Gap
48
+ # puts third_bit.display_name #--> scaffold:Btau_3.1:Chr4.003.106:1:738311:1
49
+ #
50
+ # @param [String] coord_system_name Name of coordinate system to project
51
+ # coordinates to
52
+ # @return [Array<Slice, Gap>] Array of Slices and, if necessary, Gaps
53
+ def project(coord_system_name)
54
+ answer = Array.new # an array of slices
55
+ unless Ensembl::SESSION.coord_systems.has_key?(self.seq_region.coord_system_id)
56
+ Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id] = self.seq_region.coord_system
57
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id].name] = self.seq_region.coord_system_id
58
+ end
59
+ source_coord_system = Ensembl::SESSION.coord_systems[self.seq_region.coord_system_id]
60
+ target_coord_system = nil
61
+ if coord_system_name == 'toplevel'
62
+ target_coord_system = source_coord_system.find_toplevel
63
+ elsif coord_system_name == 'seqlevel'
64
+ target_coord_system = source_coord_system.find_seqlevel
65
+ else
66
+ unless Ensembl::SESSION.coord_system_ids.has_key?(coord_system_name)
67
+ cs = source_coord_system.find_level(coord_system_name)
68
+ Ensembl::SESSION.coord_systems[cs.id] = cs
69
+ Ensembl::SESSION.coord_system_ids[cs.name] = cs.id
70
+ end
71
+ target_coord_system = Ensembl::SESSION.coord_systems[Ensembl::SESSION.coord_system_ids[coord_system_name]]
72
+ end
73
+
74
+ if target_coord_system.rank < source_coord_system.rank
75
+ # We're going from component to assembly, which is easy.
76
+ assembly_links = self.seq_region.assembly_links_as_component(source_coord_system)
77
+
78
+ if assembly_links.length == 0
79
+ return []
80
+ else
81
+ assembly_links.each do |assembly_link|
82
+ target_seq_region = assembly_link.asm_seq_region
83
+ target_start = self.start + assembly_link.asm_start - assembly_link.cmp_start
84
+ target_stop = self.stop + assembly_link.asm_start - assembly_link.cmp_start
85
+ target_strand = self.strand * assembly_link.ori # 1x1=>1, 1x-1=>-1, -1x-1=>1
86
+
87
+ answer.push(Slice.new(target_seq_region, target_start, target_stop, target_strand))
88
+ end
89
+ end
90
+
91
+ else
92
+ # If we're going from assembly to component, the answer of the target method
93
+ # is an array consisting of Slices intermitted with Gaps.
94
+
95
+ # ASSEMBLY_EXCEPTIONS
96
+ # CAUTION: there are exceptions to the assembly (stored in the assembly_exception)
97
+ # table which make things a little bit more difficult... For example,
98
+ # in human, the assembly data for the pseudo-autosomal region (PAR) of
99
+ # Y is *not* stored in the assembly table. Instead, there is a record
100
+ # in the assembly_exception table that says: "For chr Y positions 1
101
+ # to 2709520, use chr X:1-2709520 for the assembly data."
102
+ # As a solution, what we'll do here, is split the assembly up in blocks:
103
+ # if a slice covers both the PAR and the allosomal region, we'll make
104
+ # two subslices (let's call them blocks not to intercede with the
105
+ # Slice#subslices method) and project these independently.
106
+ assembly_exceptions = AssemblyException.find_all_by_seq_region_id(self.seq_region.id)
107
+ if assembly_exceptions.length > 0
108
+ # Check if this bit of the original slice is covered in the
109
+ # assembly_exception table.
110
+ overlapping_exceptions = Array.new
111
+ assembly_exceptions.each do |ae|
112
+ if Slice.new(self.seq_region, ae.seq_region_start, ae.seq_region_end).overlaps?(self)
113
+ if ae.exc_type == 'HAP'
114
+ raise NotImplementedError, "The haplotype exceptions are not implemented (yet). You can't project this slice."
115
+ end
116
+ overlapping_exceptions.push(ae)
117
+ end
118
+ end
119
+
120
+ if overlapping_exceptions.length > 0
121
+ # First get all assembly blocks from chromosome Y
122
+ source_assembly_blocks = self.excise(overlapping_exceptions.collect{|e| e.seq_region_start .. e.seq_region_end})
123
+ # And insert the blocks of chromosome X
124
+ all_assembly_blocks = Array.new #both for chr X and Y
125
+ # First do all exceptions between the first and last block
126
+ previous_block = nil
127
+ source_assembly_blocks.sort_by{|b| b.start}.each do |b|
128
+ if previous_block.nil?
129
+ all_assembly_blocks.push(b)
130
+ previous_block = b
131
+ next
132
+ end
133
+ # Find the exception record
134
+ exception = nil
135
+ assembly_exceptions.each do |ae|
136
+ if ae.seq_region_end == b.start - 1
137
+ exception = ae
138
+ break
139
+ end
140
+ end
141
+
142
+ new_slice_start = exception.exc_seq_region_start + ( previous_block.stop - exception.seq_region_start )
143
+ new_slice_stop = exception.exc_seq_region_start + ( b.start - exception.seq_region_start )
144
+ new_slice_strand = self.strand * exception.ori
145
+ new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)
146
+
147
+ all_assembly_blocks.push(new_slice)
148
+ all_assembly_blocks.push(b)
149
+ previous_block = b
150
+ end
151
+
152
+ # And then see if we have to add an additional one at the start or end
153
+ first_block = source_assembly_blocks.sort_by{|b| b.start}[0]
154
+ if first_block.start > self.start
155
+ exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[0]
156
+ new_slice_start = exception.exc_seq_region_start + ( self.start - exception.seq_region_start )
157
+ new_slice_stop = exception.exc_seq_region_start + ( first_block.start - 1 - exception.seq_region_start )
158
+ new_slice_strand = self.strand * exception.ori
159
+ new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)
160
+
161
+ all_assembly_blocks.unshift(new_slice)
162
+ end
163
+
164
+ last_block = source_assembly_blocks.sort_by{|b| b.start}[-1]
165
+ if last_block.stop < self.stop
166
+ exception = assembly_exceptions.sort_by{|ae| ae.seq_region_start}[-1]
167
+ new_slice_start = exception.exc_seq_region_start + ( last_block.stop + 1 - exception.seq_region_start )
168
+ new_slice_stop = exception.exc_seq_region_start + ( self.stop - exception.seq_region_start )
169
+ new_slice_strand = self.strand * exception.ori
170
+ new_slice = Slice.fetch_by_region(self.seq_region.coord_system.name, SeqRegion.find(exception.exc_seq_region_id).name, new_slice_start, new_slice_stop, new_slice_strand)
171
+
172
+ all_assembly_blocks.shift(new_slice)
173
+ end
174
+
175
+ answer = Array.new
176
+ all_assembly_blocks.each do |b|
177
+ answer.push(b.project(coord_system_name))
178
+ end
179
+ answer.flatten!
180
+
181
+ return answer
182
+ end
183
+
184
+ end
185
+ # END OF ASSEMBLY_EXCEPTIONS
186
+
187
+ # Get all AssemblyLinks starting from this assembly and for which
188
+ # the cmp_seq_region.coord_system is what we want.
189
+ assembly_links = self.seq_region.assembly_links_as_assembly(target_coord_system)
190
+
191
+ # Now reject all the components that lie _before_ the source, then
192
+ # reject all the components that lie _after_ the source.
193
+ # Then sort based on their positions.
194
+ sorted_overlapping_assembly_links = assembly_links.reject{|al| al.asm_end < self.start}.reject{|al| al.asm_start > self.stop}.sort_by{|al| al.asm_start}
195
+ if sorted_overlapping_assembly_links.length == 0
196
+ return []
197
+ end
198
+
199
+ # What we'll do, is create slices for all the underlying components,
200
+ # including the first and the last one. At first, the first and last
201
+ # components are added in their entirety and will only be cropped afterwards.
202
+ previous_stop = nil
203
+ sorted_overlapping_assembly_links.each_index do |i|
204
+ this_link = sorted_overlapping_assembly_links[i]
205
+ if i == 0
206
+ cmp_seq_region = nil
207
+ if Ensembl::SESSION.seq_regions.has_key?(this_link.cmp_seq_region_id)
208
+ cmp_seq_region = Ensembl::SESSION.seq_regions[this_link.cmp_seq_region_id]
209
+ else
210
+ cmp_seq_region = this_link.cmp_seq_region
211
+ Ensembl::SESSION.seq_regions[cmp_seq_region.id] = cmp_seq_region
212
+ end
213
+ answer.push(Slice.new(cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori))
214
+ next
215
+ end
216
+ previous_link = sorted_overlapping_assembly_links[i-1]
217
+
218
+ # If there is a gap with the previous link: add a gap
219
+ if this_link.asm_start > ( previous_link.asm_end + 1 )
220
+ gap_size = this_link.asm_start - previous_link.asm_end - 1
221
+ answer.push(Gap.new(target_coord_system, gap_size))
222
+ end
223
+
224
+ # And add the component itself as a Slice
225
+ answer.push(Slice.new(this_link.cmp_seq_region, this_link.cmp_start, this_link.cmp_end, this_link.ori))
226
+ end
227
+
228
+ # Now see if we have to crop the first and/or last slice
229
+ first_link = sorted_overlapping_assembly_links[0]
230
+ if self.start > first_link.asm_start
231
+ if first_link.ori == -1
232
+ answer[0].stop = first_link.cmp_start + ( first_link.asm_end - self.start )
233
+ else
234
+ answer[0].start = first_link.cmp_start + ( self.start - first_link.asm_start )
235
+ end
236
+ end
237
+
238
+ last_link = sorted_overlapping_assembly_links[-1]
239
+ if self.stop < last_link.asm_end
240
+ if last_link.ori == -1
241
+ answer[-1].start = last_link.cmp_start + ( last_link.asm_end - self.stop)
242
+ else
243
+ answer[-1].stop = last_link.cmp_start + ( self.stop - last_link.asm_start )
244
+ end
245
+ end
246
+
247
+ # And check if we have to add Ns at the front and/or back
248
+ if self.start < first_link.asm_start
249
+ gap_size = first_link.asm_start - self.start
250
+ answer.unshift(Gap.new(target_coord_system, gap_size))
251
+ end
252
+ if self.stop > last_link.asm_end
253
+ gap_size = self.stop - last_link.asm_end
254
+ answer.push(Gap.new(target_coord_system, gap_size))
255
+ end
256
+ end
257
+ return answer
258
+
259
+ end
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,657 @@
1
+ #
2
+ # = ensembl/core/slice.rb - General methods for Ensembl Slice
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # @author Jan Aerts
10
+ # @author Francesco Strozzi
11
+ nil
12
+ module Ensembl
13
+ nil
14
+ module Core
15
+
16
+ # From the perl API tutorial
17
+ # (http://www.ensembl.org/info/software/core/core_tutorial.html): "A
18
+ # Slice object represents a continuous region of a genome. Slices can be
19
+ # used to obtain sequence, features or other information from a
20
+ # particular region of interest."
21
+ #
22
+ # In contrast to almost all other classes of Ensembl::Core,
23
+ # the Slice class is not based on ActiveRecord.
24
+ #
25
+ # @example
26
+ # chr4 = SeqRegion.find_by_name('4')
27
+ # my_slice = Slice.new(chr4, 95000, 98000, -1)
28
+ # puts my_slice.display_name #--> 'chromosome:4:Btau_3.1:95000:98000:1'
29
+ class Slice
30
+ attr_accessor :seq_region, :start, :stop, :strand, :seq
31
+
32
+ #################
33
+ ## CREATE A SLICE
34
+ #################
35
+
36
+ # Create a new Slice object from scratch.
37
+ #
38
+ # @example
39
+ # chr4 = SeqRegion.find_by_name('4')
40
+ # my_slice = Slice.new(chr4, 95000, 98000, -1)
41
+ #
42
+ # @param [SeqRegion] seq_region SeqRegion object
43
+ # @param [Integer] start Start position of the slice on the seq_region
44
+ # @param [Integer] stop Stop position of the slice on the seq_region
45
+ # @param [Integer] strand Strand that the slice should be
46
+ # @return [Slice] Slice object
47
+ def initialize(seq_region, start = 1, stop = seq_region.length, strand = 1)
48
+ if start.nil?
49
+ start = 1
50
+ end
51
+ if stop.nil?
52
+ stop = seq_region.length
53
+ end
54
+ unless seq_region.class == Ensembl::Core::SeqRegion
55
+ raise 'First argument has to be a Ensembl::Core::SeqRegion object'
56
+ end
57
+ @seq_region, @start, @stop, @strand = seq_region, start, stop, strand
58
+ @seq = nil
59
+ end
60
+
61
+ # Create a Slice without first creating the SeqRegion object.
62
+ #
63
+ # @example
64
+ # my_slice_1 = Slice.fetch_by_region('chromosome','4',95000,98000,1)
65
+ #
66
+ # @param [String] coord_system_name Name of coordinate system
67
+ # @param [String] seq_region_name name of the seq_region
68
+ # @param [Integer] start Start position of the slice on the seq_region
69
+ # @param [Integer] stop Stop position of the slice on the seq_region
70
+ # @param [Integer] strand Strand that the slice should be
71
+ # @param [String] species Name of species in case of multi-species database
72
+ # @param [Integer] version Version number of the coordinate system
73
+ # @return [Slice] Slice object
74
+ def self.fetch_by_region(coord_system_name, seq_region_name, start = nil, stop = nil, strand = 1, species = Ensembl::SESSION.collection_species ,version = nil)
75
+ all_coord_systems = nil
76
+ if Collection.check
77
+ species = species.downcase
78
+ if species.nil?
79
+ raise ArgumentError, "When using multi-species db, you must pass a species name to get the correct Slice"
80
+ else
81
+ species_id = Collection.get_species_id(species)
82
+ raise ArgumentError, "No species found in the database with this name: #{species}" if species_id.nil?
83
+ all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name_and_species_id(coord_system_name,species_id)
84
+ end
85
+ else
86
+ all_coord_systems = Ensembl::Core::CoordSystem.find_all_by_name(coord_system_name)
87
+ end
88
+ coord_system = nil
89
+ if version.nil? # Take the version with the lower rank
90
+ coord_system = all_coord_systems.sort_by{|cs| cs.rank}.shift
91
+ else
92
+ coord_system = all_coord_systems.select{|cs| cs.version == version}[0]
93
+ end
94
+ unless coord_system.class == Ensembl::Core::CoordSystem
95
+ message = "Couldn't find a Ensembl::Core::CoordSystem object with name '" + coord_system_name + "'"
96
+ if ! version.nil?
97
+ message += " and version '" + version + "'"
98
+ end
99
+ raise message
100
+ end
101
+
102
+ seq_region = Ensembl::Core::SeqRegion.find_by_name_and_coord_system_id(seq_region_name, coord_system.id)
103
+ #seq_region = Ensembl::Core::SeqRegion.find_by_sql("SELECT * FROM seq_region WHERE name = '" + seq_region_name + "' AND coord_system_id = " + coord_system.id.to_s)[0]
104
+ unless seq_region.class == Ensembl::Core::SeqRegion
105
+ raise "Couldn't find a Ensembl::Core::SeqRegion object with the name '" + seq_region_name + "'"
106
+ end
107
+
108
+ return Ensembl::Core::Slice.new(seq_region, start, stop, strand)
109
+ end
110
+
111
+ # Create a Slice based on a Gene
112
+ #
113
+ # @example
114
+ # my_slice = Slice.fetch_by_gene_stable_id('ENSG00000184895')
115
+ #
116
+ # @param [String] gene_stable_id Ensembl gene stable ID
117
+ # @param [Integer] flanking_seq_length Length of the flanking sequence
118
+ # @return [Slice] Slice object
119
+ def self.fetch_by_gene_stable_id(gene_stable_id, flanking_seq_length = 0)
120
+ gene_stable_id = Ensembl::Core::GeneStableId.find_by_stable_id(gene_stable_id)
121
+ gene = gene_stable_id.gene
122
+ seq_region = gene.seq_region
123
+
124
+ return Ensembl::Core::Slice.new(seq_region, gene.seq_region_start - flanking_seq_length, gene.seq_region_end + flanking_seq_length, gene.seq_region_strand)
125
+ end
126
+
127
+ # Create a Slice based on a Transcript
128
+ #
129
+ # @example
130
+ # my_slice = Slice.fetch_by_transcript_stable_id('ENST00000383673')
131
+ #
132
+ # @param [String] transcript_stable_id Ensembl transcript stable ID
133
+ # @param [Integer] flanking_seq_length Length of the flanking sequence
134
+ # @return [Slice] Slice object
135
+ def self.fetch_by_transcript_stable_id(transcript_stable_id, flanking_seq_length = 0)
136
+ transcript_stable_id = Ensembl::Core::TranscriptStableId.find_by_stable_id(transcript_stable_id)
137
+ transcript = transcript_stable_id.transcript
138
+ seq_region = transcript.seq_region
139
+
140
+ return Ensembl::Core::Slice.new(seq_region, transcript.seq_region_start - flanking_seq_length, transcript.seq_region_end + flanking_seq_length, transcript.seq_region_strand)
141
+ end
142
+
143
+ # Create an array of all Slices for a given coordinate system.
144
+ #
145
+ # @example
146
+ # slices = Slice.fetch_all('chromosome')
147
+ #
148
+ # @param [String] coord_system_name Name of coordinate system
149
+ # @param [String] species Name of species
150
+ # @param [Integer] version Version of coordinate system
151
+ # @return [Array<Slice>] Array of Slice objects
152
+ def self.fetch_all(coord_system_name = 'chromosome',species = Ensembl::SESSION.collection_species ,version = nil)
153
+ answer = Array.new
154
+ coord_system = nil
155
+ if Collection.check
156
+ species = species.downcase
157
+ species_id = Collection.get_species_id(species)
158
+ raise ArgumentError, "No specie found in the database with this name: #{species}" if species_id.nil?
159
+ if version.nil?
160
+ coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id(coord_system_name,species_id)
161
+ else
162
+ coord_system = Ensembl::Core::CoordSystem.find_by_name_and_species_id_and_version(coord_system_name, species_id, version)
163
+ end
164
+ else
165
+ if version.nil?
166
+ coord_system = Ensembl::Core::CoordSystem.find_by_name(coord_system_name)
167
+ else
168
+ coord_system = Ensembl::Core::CoordSystem.find_by_name_and_version(coord_system_name, version)
169
+ end
170
+ end
171
+ coord_system.seq_regions.each do |seq_region|
172
+ answer.push(Ensembl::Core::Slice.new(seq_region))
173
+ end
174
+ return answer
175
+ end
176
+
177
+ ##################
178
+ ## GENERAL METHODS
179
+ ##################
180
+
181
+ # Get the length of a slice
182
+ #
183
+ # @example
184
+ # chr4 = SeqRegion.find_by_name('4')
185
+ # my_slice = Slice.new(chr4, 95000, 98000, -1)
186
+ # puts my_slice.length
187
+ #
188
+ # @return [Integer] Length of the slice
189
+ def length
190
+ return self.stop - self.start + 1
191
+ end
192
+
193
+ # The display_name method returns a full name of this slice, containing
194
+ # the name of the coordinate system, the sequence region, start and
195
+ # stop positions on that sequence region and the strand. E.g. for a slice
196
+ # of bovine chromosome 4 from position 95000 to 98000 on the reverse strand,
197
+ # the display_name would look like: chromosome:4:Btau_3.1:95000:98000:-1
198
+ #
199
+ # @example
200
+ # puts my_slice.display_name
201
+ #
202
+ # @return [String] Nicely formatted name of the Slice
203
+ def display_name
204
+ return [self.seq_region.coord_system.name, self.seq_region.coord_system.version, self.seq_region.name, self.start.to_s, self.stop.to_s, self.strand.to_s].join(':')
205
+ end
206
+ alias to_s display_name
207
+
208
+ # The Slice#overlaps? method checks if this slice overlaps another one.
209
+ # The other slice has to be on the same coordinate system
210
+ #
211
+ # @example
212
+ # slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
213
+ # slice_b = Slice.fetch_by_region('chromosome','X',900,1500)
214
+ # if slice_a.overlaps?(slice_b)
215
+ # puts "There slices overlap"
216
+ # end
217
+ #
218
+ # @param [Slice] other_slice Another slice
219
+ # @return [Boolean] True if slices overlap, otherwise false
220
+ def overlaps?(other_slice)
221
+ if ! other_slice.class == Slice
222
+ raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
223
+ end
224
+ if self.seq_region.coord_system != other_slice.seq_region.coord_system
225
+ raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name
226
+ end
227
+
228
+ self_range = self.start .. self.stop
229
+ other_range = other_slice.start .. other_slice.stop
230
+
231
+ if self_range.include?(other_slice.start) or other_range.include?(self.start)
232
+ return true
233
+ else
234
+ return false
235
+ end
236
+ end
237
+
238
+ # The Slice#within? method checks if this slice is contained withing another one.
239
+ # The other slice has to be on the same coordinate system
240
+ #
241
+ # @example
242
+ # slice_a = Slice.fetch_by_region('chromosome','X',1,1000)
243
+ # slice_b = Slice.fetch_by_region('chromosome','X',900,950)
244
+ # if slice_b.overlaps?(slice_a)
245
+ # puts "Slice b is within slice a"
246
+ # end
247
+ #
248
+ # @param [Slice] other_slice Another slice
249
+ # @return [Boolean] True if this slice is within other_slice, otherwise false
250
+ def within?(other_slice)
251
+ if ! other_slice.class == Slice
252
+ raise RuntimeError, "The Slice#overlaps? method takes a Slice object as its arguments."
253
+ end
254
+ if self.seq_region.coord_system != other_slice.seq_region.coord_system
255
+ raise RuntimeError, "The argument slice of Slice#overlaps? has to be in the same coordinate system, but were " + self.seq_region.coord_system.name + " and " + other_slice.seq_region.coord_system.name
256
+ end
257
+
258
+ self_range = self.start .. self.stop
259
+ other_range = other_slice.start .. other_slice.stop
260
+
261
+ if other_range.include?(self.start) and other_range.include?(self.stop)
262
+ return true
263
+ else
264
+ return false
265
+ end
266
+ end
267
+
268
+ # The Slice#excise method removes a bit of a slice and returns the
269
+ # remainder as separate slices.
270
+ #
271
+ # @example
272
+ # original_slice = Slice.fetch_by_region('chromosome','X',1,10000)
273
+ # new_slices = original_slice.excise([500..750, 1050..1075])
274
+ # new_slices.each do |s|
275
+ # puts s.display_name
276
+ # end
277
+ #
278
+ # # result:
279
+ # # chromosome:X:1:499:1
280
+ # # chromosome:X:751:1049:1
281
+ # # chromosome:X:1076:10000:1
282
+ #
283
+ # @param [Array<Range>] Array of ranges to excise
284
+ # @return [Array<Slice>] Array of slices
285
+ def excise(ranges)
286
+ if ranges.class != Array
287
+ raise RuntimeError, "Argument should be an array of ranges"
288
+ end
289
+ ranges.each do |r|
290
+ if r.class != Range
291
+ raise RuntimeError, "Argument should be an array of ranges"
292
+ end
293
+ end
294
+
295
+ answer = Array.new
296
+ previous_excised_stop = self.start - 1
297
+ ranges.sort_by{|r| r.first}.each do |r|
298
+ subslice_start = previous_excised_stop + 1
299
+ if subslice_start <= r.first - 1
300
+ answer.push(Slice.new(self.seq_region, subslice_start, r.first - 1))
301
+ end
302
+ previous_excised_stop = r.last
303
+ if r.last > self.stop
304
+ return answer
305
+ end
306
+ end
307
+ subslice_start = previous_excised_stop + 1
308
+ answer.push(Slice.new(self.seq_region, subslice_start, self.stop))
309
+ return answer
310
+ end
311
+
312
+ # Get the sequence of the Slice as a Bio::Sequence::NA object.
313
+ #
314
+ # If the Slice is on a CoordSystem that is not seq_level, it will try
315
+ # to project it coordinates to the CoordSystem that does. At this
316
+ # moment, this is only done if there is a direct link between the
317
+ # two coordinate systems. (The perl API allows for following an
318
+ # indirect link as well.)
319
+ #
320
+ # Caution: Bio::Sequence::NA makes the sequence
321
+ # downcase!!
322
+ #
323
+ # @example
324
+ # my_slice.seq.seq.to_s
325
+ #
326
+ # @return [Bio::Sequence::NA] Slice sequence as a Bio::Sequence::NA object
327
+ def seq
328
+ # If we already accessed the sequence, we can just
329
+ # call the instance variable. Otherwise, we'll have
330
+ # to get the sequence first and create a Bio::Sequence::NA
331
+ # object.
332
+ if @seq.nil?
333
+ # First check if the slice is on the seqlevel coordinate
334
+ # system, otherwise project coordinates.
335
+ if ! Ensembl::SESSION.seqlevel_id.nil? and self.seq_region.coord_system_id == Ensembl::SESSION.seqlevel_id
336
+ @seq = Bio::Sequence::NA.new(self.seq_region.subseq(self.start, self.stop))
337
+ else # we have to project coordinates
338
+ seq_string = String.new
339
+ @target_slices = self.project('seqlevel')
340
+ @target_slices.each do |component|
341
+ if component.class == Slice
342
+ seq_string += component.seq # This fetches the seq recursively
343
+ else # it's a Gap
344
+ seq_string += 'N' * (component.length)
345
+ end
346
+ end
347
+ @seq = Bio::Sequence::NA.new(seq_string)
348
+
349
+ end
350
+
351
+ if self.strand == -1
352
+ @seq.reverse_complement!
353
+ end
354
+
355
+ end
356
+ return @seq
357
+
358
+ end
359
+
360
+ def repeatmasked_seq
361
+ raise NotImplementedError
362
+ end
363
+
364
+ # Take a sub_slice from an existing one.
365
+ #
366
+ # @example
367
+ # my_sub_slice = my_slice.sub_slice(400,500)
368
+ #
369
+ # @param [Integer] start Start of subslice relative to slice
370
+ # @param [Integer] stop Stop of subslice relative to slice
371
+ # @return [Slice] Slice object
372
+ def sub_slice(start = self.start, stop = self.stop)
373
+ return self.class.new(self.seq_region, start, stop, self.strand)
374
+ end
375
+
376
+ # Creates overlapping subslices for a given Slice.
377
+ #
378
+ # @example
379
+ # my_slice.split(50000, 250).each do |sub_slice|
380
+ # puts sub_slice.display_name
381
+ # end
382
+ #
383
+ # @param [Integer] max_size Maximal size of subslices
384
+ # @param [Integer] overlap Overlap in bp between consecutive subslices
385
+ # @return [Array<Slice>] Array of Slice objects
386
+ def split(max_size = 100000, overlap = 0)
387
+ sub_slices = Array.new
388
+ i = 0
389
+ self.start.step(self.length, max_size - overlap - 1) do |i|
390
+ sub_slices.push(self.sub_slice(i, i + max_size - 1))
391
+ end
392
+ i -= (overlap + 1)
393
+ sub_slices.push(self.sub_slice(i + max_size))
394
+ return sub_slices
395
+ end
396
+
397
+ ############################
398
+ ## GET ELEMENTS WITHIN SLICE
399
+ ############################
400
+
401
+ #--
402
+ # As there should be 'getters' for a lot of classes, we'll implement
403
+ # this with method_missing. For some of the original methods, see the end
404
+ # of this file.
405
+ #
406
+ # The optional argument is either 'true' or 'false' (default = false).
407
+ # False if the features have to be completely contained within the slice;
408
+ # true if just a partly overlap is sufficient.
409
+ #++
410
+ # Don't use this method yourself.
411
+ def method_missing(method_name, *args)
412
+ table_name = method_name.to_s.singularize
413
+ class_name = table_name.camelcase
414
+
415
+ # Convert to the class object
416
+ target_class = nil
417
+ ObjectSpace.each_object(Class) do |o|
418
+ if o.name =~ /^Ensembl::Core::#{class_name}$/
419
+ target_class = o
420
+ end
421
+ end
422
+
423
+ # If it exists, see if it implements Sliceable
424
+ if ! target_class.nil? and target_class.include?(Sliceable)
425
+ inclusive = false
426
+ if [TrueClass, FalseClass].include?(args[0].class)
427
+ inclusive = args[0]
428
+ end
429
+ return self.get_objects(target_class, table_name, inclusive)
430
+ end
431
+
432
+ raise NoMethodError
433
+
434
+ end
435
+
436
+ # Don't use this method yourself.
437
+ def get_objects(target_class, table_name, inclusive = false)
438
+ answer = Array.new
439
+
440
+ coord_system_ids_with_features = nil
441
+ # Get all the coord_systems with this type of features on them
442
+ if Collection.check
443
+ coord_system_ids_with_features = Collection.find_all_coord_by_table_name(table_name,self.seq_region.coord_system.species_id).collect{|mc| mc.coord_system_id}
444
+ else
445
+ coord_system_ids_with_features = MetaCoord.find_all_by_table_name(table_name).collect{|mc| mc.coord_system_id}
446
+ end
447
+ # Get the features of the original slice
448
+ if coord_system_ids_with_features.include?(self.seq_region.coord_system_id)
449
+ sql = ''
450
+ if inclusive
451
+ sql = <<SQL
452
+ SELECT * FROM #{table_name}
453
+ WHERE seq_region_id = #{self.seq_region.id.to_s}
454
+ AND (( seq_region_start BETWEEN #{self.start.to_s} AND #{self.stop.to_s} )
455
+ OR ( seq_region_end BETWEEN #{self.start.to_s} AND #{self.stop.to_s} )
456
+ OR ( seq_region_start <= #{self.start.to_s} AND seq_region_end >= #{self.stop.to_s} )
457
+ )
458
+ SQL
459
+ else
460
+ sql = <<SQL
461
+ SELECT * FROM #{table_name}
462
+ WHERE seq_region_id = #{self.seq_region.id.to_s}
463
+ AND seq_region_start >= #{self.start.to_s}
464
+ AND seq_region_end <= #{self.stop.to_s}
465
+ SQL
466
+ end
467
+ answer.push(target_class.find_by_sql(sql))
468
+ coord_system_ids_with_features.delete(self.seq_region.coord_system_id)
469
+ end
470
+
471
+ # Transform the original slice to other coord systems and get those
472
+ # features as well. At the moment, only 'direct' projections can be made.
473
+ # Later, I'm hoping to add functionality for following a path from one
474
+ # coord_system to another if they're not directly linked in the assembly
475
+ # table.
476
+ coord_system_ids_with_features.each do |target_coord_system_id|
477
+ target_slices = self.project(CoordSystem.find(target_coord_system_id).name)
478
+ target_slices.each do |slice|
479
+ if slice.class == Slice
480
+ if inclusive
481
+ sql = <<SQL
482
+ SELECT * FROM #{table_name}
483
+ WHERE seq_region_id = #{slice.seq_region.id.to_s}
484
+ AND (( seq_region_start BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} )
485
+ OR ( seq_region_end BETWEEN #{slice.start.to_s} AND #{slice.stop.to_s} )
486
+ OR ( seq_region_start <= #{slice.start.to_s} AND seq_region_end >= #{slice.stop.to_s} )
487
+ )
488
+ SQL
489
+ else
490
+ sql = <<SQL
491
+ SELECT * FROM #{table_name}
492
+ WHERE seq_region_id = #{slice.seq_region.id.to_s}
493
+ AND seq_region_start >= #{slice.start.to_s}
494
+ AND seq_region_end <= #{slice.stop.to_s}
495
+ SQL
496
+ end
497
+ answer.push(target_class.find_by_sql(sql))
498
+ end
499
+ end
500
+ end
501
+
502
+ answer.flatten!
503
+ answer.uniq!
504
+
505
+ return answer
506
+ end
507
+
508
+
509
+ # Get all MiscFeatures that are located on a Slice for a given MiscSet.
510
+ #
511
+ # Pitfall: just looks at the CoordSystem that the Slice is located on.
512
+ # For example, if a Slice is located on a SeqRegion on the 'chromosome'
513
+ # CoordSystem, but all misc_features are annotated on SeqRegions of
514
+ # the 'scaffold' CoordSystem, this method will return an empty array.
515
+ #
516
+ # @example
517
+ # my_slice.misc_features('encode').each do |feature|
518
+ # puts feature.to_yaml
519
+ # end
520
+ #
521
+ # @param [String] code Code of MiscSet
522
+ # @return [Array<MiscFeature>] Array of MiscFeature objects
523
+ def misc_features(code)
524
+ answer = Array.new
525
+ if code.nil?
526
+ self.seq_region.misc_features.each do |mf|
527
+ if mf.seq_region_start > self.start and mf.seq_region_end < self.stop
528
+ answer.push(mf)
529
+ end
530
+ end
531
+ else
532
+ self.seq_region.misc_features.each do |mf|
533
+ if mf.misc_sets[0].code == code
534
+ if mf.seq_region_start > self.start and mf.seq_region_end < self.stop
535
+ answer.push(mf)
536
+ end
537
+ end
538
+ end
539
+ end
540
+ return answer
541
+ end
542
+
543
+ # Get all DnaAlignFeatures that are located on a Slice for a given Analysis.
544
+ #
545
+ # Pitfall: just looks at the CoordSystem that the Slice is located on.
546
+ # For example, if a Slice is located on a SeqRegion on the 'chromosome'
547
+ # CoordSystem, but all dna_align_features are annotated on SeqRegions of
548
+ # the 'scaffold' CoordSystem, this method will return an empty array.
549
+ #
550
+ # @example
551
+ # my_slice.dna_align_features('Vertrna').each do |feature|
552
+ # puts feature.to_yaml
553
+ # end
554
+ #
555
+ # @param [String] analysis_name Name of analysis
556
+ # @return [Array<DnaAlignFeature>] Array of DnaAlignFeature objects
557
+ def dna_align_features(analysis_name = nil)
558
+ if analysis_name.nil?
559
+ return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
560
+ else
561
+ analysis = Analysis.find_by_logic_name(analysis_name)
562
+ return DnaAlignFeature.find_by_sql('SELECT * FROM dna_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s)
563
+ end
564
+ end
565
+
566
+ # Get all ProteinAlignFeatures that are located on a Slice for a given Analysis.
567
+ #
568
+ # Pitfall: just looks at the CoordSystem that the Slice is located on.
569
+ # For example, if a Slice is located on a SeqRegion on the 'chromosome'
570
+ # CoordSystem, but all protein_align_features are annotated on SeqRegions of
571
+ # the 'scaffold' CoordSystem, this method will return an empty array.
572
+ #
573
+ # @example
574
+ # my_slice.protein_align_features('Uniprot').each do |feature|
575
+ # puts feature.to_yaml
576
+ # end
577
+ #
578
+ # @param [String] analysis_name Name of analysis
579
+ # @return [Array<ProteinAlignFeature>] Array of ProteinAlignFeature objects
580
+ def protein_align_features(analysis_name)
581
+ if analysis_name.nil?
582
+ return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s)
583
+ else
584
+ analysis = Analysis.find_by_logic_name(analysis_name)
585
+ return ProteinAlignFeature.find_by_sql('SELECT * FROM protein_align_feature WHERE seq_region_id = ' + self.seq_region.id.to_s + ' AND seq_region_start >= ' + self.start.to_s + ' AND seq_region_end <= ' + self.stop.to_s + ' AND analysis_id = ' + analysis.id.to_s)
586
+ end
587
+ end
588
+
589
+ ############################
590
+ ## VARIATION METHODS
591
+ ############################
592
+
593
+
594
+ # Method to retrieve Variation features from Ensembl::Core::Slice objects
595
+ # @example
596
+ # slice = Slice.fetch_by_region('chromosome',1,50000,51000)
597
+ # variations = slice.get_variation_features
598
+ # variations.each do |vf|
599
+ # puts vf.variation_name, vf.allele_string
600
+ # puts vf.variation.ancestral_allele
601
+ # end
602
+ def get_variation_features
603
+ variation_connection()
604
+ Ensembl::Variation::VariationFeature.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
605
+ end
606
+
607
+ def get_genotyped_variation_features
608
+ variation_connection()
609
+ Ensembl::Variation::VariationFeature.find(:all,:conditions => ["flags = 'genotyped' AND seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
610
+ end
611
+
612
+ def get_structural_variations
613
+ variation_connection()
614
+ Ensembl::Variation::StructuralVariation.find(:all,:conditions => ["seq_region_id = ? AND seq_region_start >= ? AND seq_region_end <= ?",self.seq_region.seq_region_id,self.start,self.stop])
615
+ end
616
+
617
+ private
618
+
619
+ def variation_connection()
620
+ if !Ensembl::Variation::DBConnection.connected?
621
+ host,user,password,db_name,port,species,release = Ensembl::Core::DBConnection.get_info
622
+ Ensembl::Variation::DBConnection.connect(species,release.to_i,:username => user, :password => password,:host => host, :port => port)
623
+ end
624
+
625
+ end
626
+
627
+
628
+ end #Slice
629
+
630
+ # The Gap class is similar to the Slice object, but describes a gap and
631
+ # therefore can easily be described by coordinate system and size.
632
+ #
633
+ class Gap
634
+ attr_accessor :coord_system, :size
635
+
636
+ # Create a new Gap object from scratch.
637
+ #
638
+ # @example
639
+ # my_coord_system = CoordSystem.find_by_name('chromosome')
640
+ # # Create a gap of 10kb.
641
+ # gap = Gap.new(my_coord_system, 10000)
642
+ #
643
+ # @param [CoordSystem] coord_system Coordinate system object
644
+ # @param [Integer] size Length of the gap
645
+ # @return [Gap] Gap object
646
+ def initialize(coord_system, size)
647
+ @coord_system, @size = coord_system, size
648
+ end
649
+ alias length size
650
+
651
+ def display_name
652
+ return @coord_system.name + ":gap:" + @size.to_s
653
+ end
654
+ end #Gap
655
+
656
+ end #Core
657
+ end #Ensembl