bio-ensembl 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,65 @@
1
+ # #
2
+ # = bio-ensembl.rb
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # @author Jan Aerts
10
+ # @author Francesco Strozzi
11
+
12
+ module Ensembl
13
+ ENSEMBL_RELEASE = 60
14
+
15
+ class Session
16
+ attr_accessor :coord_systems
17
+ attr_accessor :seqlevel_id, :seqlevel_coord_system
18
+ attr_accessor :toplevel_id, :toplevel_coord_system
19
+ attr_accessor :coord_system_ids #map CS id to CS name
20
+ attr_accessor :seq_regions
21
+ attr_accessor :collection_species
22
+ attr_accessor :release
23
+
24
+ def initialize
25
+ @coord_systems = Hash.new # key = id; value = CoordSystem object
26
+ @coord_system_ids = Hash.new # key = id; value = name
27
+ @seq_regions = Hash.new
28
+ @release = ENSEMBL_RELEASE
29
+ end
30
+
31
+ def reset
32
+ @coord_systems = Hash.new
33
+ @coord_system_ids = Hash.new
34
+ @seq_regions = Hash.new
35
+ @seqlevel_id = nil
36
+ @toplevel_id = nil
37
+ @seqlevel_coord_system = nil
38
+ @toplevel_coord_system = nil
39
+ @collection_species = nil
40
+ end
41
+ end
42
+
43
+ SESSION = Ensembl::Session.new
44
+
45
+ end
46
+
47
+
48
+
49
+ # BioRuby
50
+ require 'bio'
51
+
52
+ # Database connection
53
+ require 'active_record'
54
+ require 'bio-ensembl/db_connection'
55
+
56
+ # Core modules
57
+ require 'bio-ensembl/core/activerecord'
58
+ require 'bio-ensembl/core/transcript'
59
+ require 'bio-ensembl/core/slice'
60
+ require 'bio-ensembl/core/project'
61
+ require 'bio-ensembl/core/transform'
62
+ require 'bio-ensembl/core/collection'
63
+
64
+ # Variation modules
65
+ require 'bio-ensembl/variation/activerecord'
@@ -0,0 +1,1812 @@
1
+ #
2
+ # = ensembl/core/activerecord.rb - ActiveRecord mappings to Ensembl core
3
+ #
4
+ # Copyright:: Copyright (C) 2007-2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # @author Jan Aerts
9
+ # @author Francesco Strozzi
10
+
11
+ # == What is it?
12
+ # The Ensembl module provides an API to the Ensembl databases
13
+ # stored at ensembldb.ensembl.org. This is the same information that is
14
+ # available from http://www.ensembl.org.
15
+ #
16
+ # The Ensembl::Core module mainly covers sequences and
17
+ # annotations.
18
+ # The Ensembl::Variation module covers variations (e.g. SNPs).
19
+ # The Ensembl::Compara module covers comparative mappings
20
+ # between species.
21
+ #
22
+ # == ActiveRecord
23
+ # The Ensembl API provides a ruby interface to the Ensembl mysql databases
24
+ # at ensembldb.ensembl.org. Most of the API is based on ActiveRecord to
25
+ # get data from that database. In general, each table is described by a
26
+ # class with the same name: the coord_system table is covered by the
27
+ # CoordSystem class, the seq_region table is covered by the SeqRegion class,
28
+ # etc. As a result, accessors are available for all columns in each table.
29
+ # For example, the seq_region table has the following columns: seq_region_id,
30
+ # name, coord_system_id and length. Through ActiveRecord, these column names
31
+ # become available as attributes of SeqRegion objects:
32
+ # puts my_seq_region.seq_region_id
33
+ # puts my_seq_region.name
34
+ # puts my_seq_region.coord_system_id
35
+ # puts my_seq_region.length.to_s
36
+ #
37
+ # ActiveRecord makes it easy to extract data from those tables using the
38
+ # collection of #find methods. There are three types of #find methods (e.g.
39
+ # for the CoordSystem class):
40
+ # a. find based on primary key in table:
41
+ # my_coord_system = CoordSystem.find(5)
42
+ # b. find_by_sql:
43
+ # my_coord_system = CoordSystem.find_by_sql('SELECT * FROM coord_system WHERE name = 'chromosome'")
44
+ # c. find_by_<insert_your_column_name_here>
45
+ # my_coord_system1 = CoordSystem.find_by_name('chromosome')
46
+ # my_coord_system2 = CoordSystem.find_by_rank(3)
47
+ # To find out which find_by_<column> methods are available, you can list the
48
+ # column names using the column_names class methods:
49
+ #
50
+ # puts Ensembl::Core::CoordSystem.column_names.join("\t")
51
+ #
52
+ # For more information on the find methods, see
53
+ # http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
54
+ #
55
+ # The relationships between different tables are accessible through the
56
+ # classes as well. For example, to loop over all seq_regions belonging to
57
+ # a coord_system (a coord_system "has many" seq_regions):
58
+ # chr_coord_system = CoordSystem.find_by_name('chromosome')
59
+ # chr_coord_system.seq_regions.each do |seq_region|
60
+ # puts seq_region.name
61
+ # end
62
+ # Of course, you can go the other way as well (a seq_region "belongs to"
63
+ # a coord_system):
64
+ # chr4 = SeqRegion.find_by_name('4')
65
+ # puts chr4.coord_system.name #--> 'chromosome'
66
+ #
67
+ # To find out what relationships exist for a given class, you can use the
68
+ # #reflect_on_all_associations class methods:
69
+ # puts SeqRegion.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join("\n")
70
+ # puts SeqRegion.reflect_on_all_associations(:has_one).collect{|a| a.name.to_s}.join("\n")
71
+ # puts SeqRegion.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join("\n")
72
+ module Ensembl
73
+ # The Ensembl::Core module covers the core databases from
74
+ # ensembldb.ensembl.org and covers mainly sequences and their annotations.
75
+ # For a full description of the database (and therefore the classes that
76
+ # are available), see http://www.ensembl.org/info/software/core/schema/index.html
77
+ # and http://www.ensembl.org/info/software/core/schema/schema_description.html
78
+ module Core
79
+ # The Sliceable mixin holds the get_slice method and can be included
80
+ # in any class that lends itself to having a position on a SeqRegion.
81
+ module Sliceable
82
+ # The Sliceable#slice method takes the coordinates on a reference
83
+ # and creates a Ensembl::Core::Slice object.
84
+ #
85
+ # @return [Ensembl::Core::Slice] Ensembl::Core::Slice object
86
+ def slice
87
+ start, stop, strand = nil, nil, nil
88
+
89
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_start')
90
+ start = self.seq_region_start
91
+ end
92
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_end')
93
+ stop = self.seq_region_end
94
+ end
95
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_strand')
96
+ strand = self.seq_region_strand
97
+ else #FIXME: we shouldn't do this, but can't #project if no strand given
98
+ strand = 1
99
+ end
100
+
101
+ return Ensembl::Core::Slice.new(self.seq_region, start, stop, strand)
102
+ end
103
+
104
+ # The Sliceable#seq method takes the coordinates on a reference, transforms
105
+ # onto the seqlevel coordinate system if necessary, and retrieves the
106
+ # sequence.
107
+ #
108
+ # @return [String] sequence
109
+ def seq
110
+ return self.slice.seq
111
+ end
112
+
113
+ # The Sliceable#start method is a convenience method and returns
114
+ # self.seq_region_start.
115
+ #
116
+ # @return [Integer] seq_region_start
117
+ def start
118
+ return self.seq_region_start
119
+ end
120
+
121
+ # The Sliceable#stop method is a convenience method and returns
122
+ # self.seq_region_end.
123
+ #
124
+ # @return [Integer] seq_region_end
125
+ def stop
126
+ return self.seq_region_end
127
+ end
128
+
129
+ # The Sliceable#strand method is a convenience method and returns
130
+ # self.seq_region_strand.
131
+ #
132
+ # @return [Numeric] seq_region_strand
133
+ def strand
134
+ return self.seq_region_strand
135
+ end
136
+
137
+ # The Sliceable#length method returns the length of the feature (based on
138
+ # seq_region_start and seq_region_end.
139
+ #
140
+ # @return [Integer] Length of the slice
141
+ def length
142
+ return self.stop - self.start + 1
143
+ end
144
+
145
+ # The Sliceable#project method is used to transfer coordinates from one
146
+ # coordinate system to another. Suppose you have a feature on a
147
+ # contig in human (let's say on contig AC000031.6.1.38703) and you
148
+ # want to know the coordinates on the chromosome. This is a
149
+ # projection of coordinates from a higher ranked coordinate system to
150
+ # a lower ranked coordinate system. Projections can also be done
151
+ # from a chromosome to the contig level. However, it might be possible
152
+ # that more than one contig has to be included and that there exist
153
+ # gaps between the contigs. The output of this method therefore is
154
+ # an _array_ of Slice and Gap objects.
155
+ #
156
+ # At the moment, projections can only be done if the two coordinate
157
+ # systems are linked directly in the 'assembly' table.
158
+ #
159
+ # @example
160
+ # # Get a contig slice in cow and project to scaffold level
161
+ # # (i.e. going from a high rank coord system to a lower rank coord
162
+ # # system)
163
+ # original_feature = Gene.find(85743)
164
+ # target_slices = original_feature.project('scaffold')
165
+ #
166
+ # @param [String] coord_system_name Name of coordinate system to project coordinates to
167
+ # @return [Array<Slice,Gap>] an array consisting of Slices and, if necessary, Gaps
168
+ def project(coord_system_name)
169
+ return self.slice.project(coord_system_name)
170
+ end
171
+
172
+ end
173
+
174
+
175
+ # The CoordSystem class describes the coordinate system to which
176
+ # a given SeqRegion belongs. It is an interface to the coord_system
177
+ # table of the Ensembl mysql database.
178
+ #
179
+ # Two virtual coordinate systems exist for
180
+ # every species:
181
+ # * toplevel: the coordinate system with rank 1
182
+ # * seqlevel: the coordinate system that contains the seq_regions
183
+ # with the sequence
184
+ #
185
+ # This class uses ActiveRecord to access data in the Ensembl database.
186
+ # See the general documentation of the Ensembl module for
187
+ # more information on what this means and what methods are available.
188
+ #
189
+ # @example
190
+ # coord_system = Ensembl::Core::CoordSystem.find_by_name('chromosome')
191
+ # if coord_system == CoordSystem.toplevel
192
+ # puts coord_system.name + " is the toplevel coordinate system."
193
+ # end
194
+ class CoordSystem < DBConnection
195
+ set_primary_key 'coord_system_id'
196
+
197
+ has_many :seq_regions
198
+
199
+ # The CoordSystem#toplevel? method checks if this coordinate system is the
200
+ # toplevel coordinate system or not.
201
+ #
202
+ # @return [Boolean] True if coord_system is toplevel, else false.
203
+ def toplevel?
204
+ if Collection.check # When usign multi-species databases
205
+ return true if self == CoordSystem.find_by_rank_and_species_id(1,self.species_id)
206
+ else
207
+ return true if self == CoordSystem.find_by_rank(1)
208
+ end
209
+ return false
210
+ end
211
+
212
+ # The CoordSystem#seqlevel? method checks if this coordinate system is the
213
+ # seqlevel coordinate system or not.
214
+ #
215
+ # @return [Boolean] True if coord_system is seqlevel, else false.
216
+ def seqlevel?
217
+ if Collection.check # When usign multi-species databases
218
+ return true if self == CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
219
+ else
220
+ return true if self == CoordSystem.find_seqlevel
221
+ end
222
+ return false
223
+ end
224
+
225
+ # The CoordSystem#find_toplevel class method returns the toplevel coordinate
226
+ # system.
227
+ #
228
+ # @return [Ensembl::Core::CoordSystem] Toplevel coord_system object.
229
+ def find_toplevel
230
+ not_cached = false
231
+ if Ensembl::SESSION.toplevel_coord_system.nil?
232
+ not_cached = true
233
+ elsif Collection.check
234
+ not_cached = true if Ensembl::SESSION.toplevel_coord_system.species_id != self.species_id
235
+ end
236
+ if not_cached
237
+ if Collection.check # When usign multi-species databases
238
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank_and_species_id(1,self.species_id)
239
+ else
240
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank(1)
241
+ end
242
+ Ensembl::SESSION.toplevel_id = Ensembl::SESSION.toplevel_coord_system.id
243
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.toplevel_coord_system.name] = Ensembl::SESSION.toplevel_id
244
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.toplevel_id] = Ensembl::SESSION.toplevel_coord_system
245
+ end
246
+ return Ensembl::SESSION.toplevel_coord_system
247
+ end
248
+
249
+ # The CoordSystem#find_seqlevel class method returns the seqlevel coordinate
250
+ # system.
251
+ #
252
+ # @return [Ensembl::Core::CoordSystem] Seqlevel coord_system object.
253
+ def find_seqlevel
254
+ not_cached = false
255
+ if Ensembl::SESSION.seqlevel_coord_system.nil?
256
+ not_cached = true
257
+ elsif Collection.check # When usign multi-species databases
258
+ not_cached = true if Ensembl::SESSION.seqlevel_coord_system.species_id != self.species_id
259
+ end
260
+ if not_cached
261
+ if Collection.check
262
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
263
+ else
264
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%'")[0]
265
+ end
266
+ Ensembl::SESSION.seqlevel_id = Ensembl::SESSION.seqlevel_coord_system.id
267
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.seqlevel_coord_system.name] = Ensembl::SESSION.seqlevel_id
268
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.seqlevel_id] = Ensembl::SESSION.seqlevel_coord_system
269
+ end
270
+ return Ensembl::SESSION.seqlevel_coord_system
271
+ end
272
+
273
+ # The CoordSystem#find_level class method returns the seqlevel coordinate
274
+ # system corresponding to the name passed.
275
+ #
276
+ # @param [String] coord_system_name Name of coordinate system
277
+ # @return [Ensembl::Core::CoordSystem] Coordinate system object
278
+ def find_level(coord_system_name)
279
+ if Collection.check # When usign multi-species databases
280
+ return CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE name = '#{coord_system_name}' AND species_id = #{self.species_id}")[0]
281
+ else
282
+ return CoordSystem.find_by_name(coord_system_name)
283
+ end
284
+ end
285
+
286
+ # The CoordSystem#find_default_by_name class method returns the
287
+ # coordinate system by that name with the lowest rank. Normally, a lower
288
+ # rank means a 'bigger' coordinate system. The 'chromosome' typically has
289
+ # rank 1. However, there might be more than one coordinate system with the
290
+ # name chromosome but with different version (e.g. in human, there is one
291
+ # for the NCBI36 and one for the NCBI35 version). The older version of these
292
+ # is typically given a high number and the one with the new version is the
293
+ # 'default' system.
294
+ #
295
+ # @return [Ensembl::Core::CoordSystem] Coordinate system object
296
+ def self.find_default_by_name(name)
297
+ all_coord_systems_with_name = Ensembl::Core::CoordSystem.find_all_by_name(name)
298
+ if all_coord_systems_with_name.length == 1
299
+ return all_coord_systems_with_name[0]
300
+ else
301
+ return all_coord_systems_with_name.select{|cs| cs.attrib =~ /default_version/}[0]
302
+ end
303
+ end
304
+
305
+ # The CoordSystem#name_with_version returns a string containing the name
306
+ # and version of the coordinate system. If no version is available, then
307
+ # just the name is returned
308
+ #
309
+ # @return [String] Name of the coordinate system if possible including version
310
+ def name_with_version
311
+ if self.version.nil?
312
+ return name
313
+ else
314
+ return [name, version].join(':')
315
+ end
316
+ end
317
+
318
+ ## Calculate the shortest path between a source coordinate system and a
319
+ ## target coordinate system. This can be done by looking for the
320
+ ## 'assembly.mapping' records in the meta_coord table.
321
+ ## At the moment, only direct mappings are possible. Later on, this method
322
+ ## should be changed to make longer paths possible.
323
+ ## Is used to get features for a slice object.
324
+ #def calculate_path(target_coord_system)
325
+ # MetaCoord.find_all_by_meta_key('assembly.mapping').each do |mapping|
326
+ # coord_system_names = mapping.meta_value.split(/[#|\|]/)
327
+ # if coord_system_names.sort.join(';') == [self.name_with_version, target_coord_system.name_with_version].sort.join(';')
328
+ # answer = Array.new
329
+ # answer.push(CoordSystem.find_by_name(coord_system_names[0]))
330
+ # answer.push(CoordSystem.find_by_name(coord_system_names[1]))
331
+ # return answer
332
+ # end
333
+ # end
334
+ # return nil
335
+ #
336
+ #end
337
+ end
338
+
339
+ # The SeqRegion class describes a part of a coordinate systems. It is an
340
+ # interface to the seq_region table of the Ensembl mysql database.
341
+ #
342
+ # This class uses ActiveRecord to access data in the Ensembl database.
343
+ # See the general documentation of the Ensembl module for
344
+ # more information on what this means and what methods are available.
345
+ #
346
+ # @example
347
+ # chr4 = SeqRegion.find_by_name('4')
348
+ # puts chr4.coord_system.name #--> 'chromosome'
349
+ # chr4.genes.each do |gene|
350
+ # puts gene.biotype
351
+ # end
352
+ class SeqRegion < DBConnection
353
+ set_primary_key 'seq_region_id'
354
+
355
+ belongs_to :coord_system
356
+ has_many :simple_features
357
+ has_many :marker_features
358
+ has_many :genes
359
+ has_many :exons
360
+ has_many :repeat_features
361
+ has_many :seq_region_attribs
362
+ has_many :attrib_types, :through => :seq_region_attrib
363
+ has_many :transcripts
364
+ has_one :dna
365
+ has_many :dna_align_features
366
+ has_many :misc_features
367
+ has_many :density_features
368
+ has_many :karyotypes
369
+ has_many :oligo_features
370
+ has_many :prediction_exons
371
+ has_many :prediction_transcripts
372
+ has_many :protein_align_features
373
+ has_many :regulatory_features
374
+ has_many :assembly_exceptions
375
+
376
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
377
+ has_many :asm_links_as_asm, :foreign_key => 'asm_seq_region_id', :class_name => 'AssemblyLink'
378
+ has_many :asm_links_as_cmp, :foreign_key => 'cmp_seq_region_id', :class_name => 'AssemblyLink'
379
+ has_many :asm_seq_regions, :through => :asm_links_as_cmp
380
+ has_many :cmp_seq_regions, :through => :asm_links_as_asm
381
+
382
+ alias attribs seq_region_attribs
383
+
384
+ # The SeqRegion#slice method returns a slice object that covers the whole
385
+ # of the seq_region.
386
+ #
387
+ # @return [Ensembl::Core::Slice] Slice object
388
+ def slice
389
+ return Ensembl::Core::Slice.new(self)
390
+ end
391
+
392
+ # The SeqRegion#assembled_seq_regions returns the sequence regions on which
393
+ # the current region is assembled. For example, calling this method on a
394
+ # contig sequence region, it might return the chromosome that that contig
395
+ # is part of. Optionally, this method takes a coordinate system name so
396
+ # that only regions of that coordinate system are returned.
397
+ #
398
+ # @param [String] coord_system_name Name of coordinate system
399
+ # @return [Array<SeqRegion>] Array of SeqRegion objects
400
+ def assembled_seq_regions(coord_system_name = nil)
401
+ if coord_system_name.nil?
402
+ return self.asm_seq_regions
403
+ else
404
+ answer = Array.new
405
+ coord_system = CoordSystem.find_by_name(coord_system_name)
406
+ self.asm_seq_regions.each do |asr|
407
+ if asr.coord_system_id == coord_system.id
408
+ answer.push(asr)
409
+ end
410
+ end
411
+ return answer
412
+ end
413
+ end
414
+
415
+ # The SeqRegion#component_seq_regions returns the sequence regions
416
+ # contained within the current region (in other words: the bits used to
417
+ # assemble the current region). For example, calling this method on a
418
+ # chromosome sequence region, it might return the contigs that were assembled
419
+ # into this chromosome. Optionally, this method takes a coordinate system
420
+ # name so that only regions of that coordinate system are returned.
421
+ #
422
+ # @param [String] coord_system_name Name of coordinate system
423
+ # @return [Array<SeqRegion>] Array of SeqRegion objects
424
+ def component_seq_regions(coord_system_name = nil)
425
+ if coord_system_name.nil?
426
+ return self.cmp_seq_regions
427
+ else
428
+ answer = Array.new
429
+ coord_system = CoordSystem.find_by_name(coord_system_name)
430
+ self.cmp_seq_regions.each do |csr|
431
+ if csr.coord_system_id == coord_system.id
432
+ answer.push(csr)
433
+ end
434
+ end
435
+ return answer
436
+ end
437
+ end
438
+
439
+ # This method queries the assembly table to find those rows (i.e.
440
+ # AssemblyLink objects) for which this seq_region is the assembly.
441
+ #
442
+ # @example
443
+ # my_seq_region = SeqRegion.find('4')
444
+ # first_link = my_seq_region.assembly_links_as_assembly[0]
445
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
446
+ #
447
+ # @param [CoordSystem] coord_system Coordinate system object
448
+ # that the components should belong to
449
+ # @return [Array<AssemblyLink>] Array of AssemblyLink objects
450
+ def assembly_links_as_assembly(coord_system = nil)
451
+ if Ensembl::SESSION.coord_system_ids.has_key?(coord_system.name)
452
+ coord_system_id = Ensembl::SESSION.coord_system_ids[coord_system.name]
453
+ else
454
+ Ensembl::SESSION.coord_systems[cs.id] = coord_system.id
455
+ Ensembl::SESSION.coord_system_ids[coord_system.name] = coord_system.id
456
+ end
457
+ coord_system = Ensembl::SESSION.coord_systems[coord_system.id]
458
+ return AssemblyLink.find_by_sql("SELECT * FROM assembly a WHERE a.asm_seq_region_id = #{self.id} AND a.cmp_seq_region_id IN (SELECT sr.seq_region_id FROM seq_region sr WHERE coord_system_id = #{coord_system.id} )")
459
+ end
460
+
461
+ # This method queries the assembly table to find those rows (i.e.
462
+ # AssemblyLink objects) for which this seq_region is the component.
463
+ #
464
+ # @example
465
+ #
466
+ # my_seq_region = SeqRegion.find('Chr4.003.1')
467
+ # first_link = my_seq_region.assembly_links_as_component[0]
468
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
469
+ #
470
+ # @param [CoordSystem] coord_system Coordinate system object that the assembly
471
+ # should belong to
472
+ # @return [Array<AssemblyLink>] Array of AssemblyLink objects
473
+ def assembly_links_as_component(coord_system = nil)
474
+ if coord_system.nil?
475
+ return self.asm_links_as_cmp
476
+ else
477
+ return self.asm_links_as_cmp.select{|alac| alac.asm_seq_region.coord_system_id == coord_system.id}
478
+ end
479
+ end
480
+
481
+ # The SeqRegion#sequence method returns the sequence of this seq_region. At
482
+ # the moment, it will only return the sequence if the region belongs to the
483
+ # seqlevel coordinate system.
484
+ #
485
+ # @return [String] DNA sequence
486
+ def sequence
487
+ return self.dna.sequence
488
+ end
489
+ alias seq sequence
490
+
491
+ # The SeqRegion#subsequence method returns a subsequence of this seq_region. At
492
+ # the moment, it will only return the sequence if the region belongs to the
493
+ # seqlevel coordinate system.
494
+ #
495
+ # @param [Integer] start Start position
496
+ # @param [Integer] stop Stop position
497
+ # @return [String] DNA sequence
498
+ def subsequence(start, stop)
499
+ return self.seq.slice(start - 1, (stop - start) + 1)
500
+ end
501
+ alias subseq subsequence
502
+
503
+ end
504
+
505
+ # The AssemblyLink class describes the relationships between different
506
+ # seq_regions. For example, a chromosome might consist of a number of
507
+ # scaffolds, each of which in turn consists of a number of contigs. The
508
+ # AssemblyLink class
509
+ # This class is an interface to the assembly table of the Ensembl mysql
510
+ # database.
511
+ #
512
+ # This class uses ActiveRecord to access data in the Ensembl database.
513
+ # See the general documentation of the Ensembl module for
514
+ # more information on what this means and what methods are available.
515
+ #
516
+ # @example
517
+ # chr4 = SeqRegion.find_by_name('4')
518
+ # puts chr4.coord_system.name #--> 'chromosome'
519
+ # chr4.genes.each do |gene|
520
+ # puts gene.biotype
521
+ # end
522
+ class AssemblyLink < DBConnection
523
+ set_table_name 'assembly'
524
+ set_primary_key nil
525
+
526
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
527
+ belongs_to :asm_seq_region, :foreign_key => 'asm_seq_region_id', :class_name => 'SeqRegion'
528
+ belongs_to :cmp_seq_region, :foreign_key => 'cmp_seq_region_id', :class_name => 'SeqRegion'
529
+ end
530
+
531
+ # The AssemblyException class describes the exceptions in to AssemblyLink. Most
532
+ # notably, this concerns the allosomes. In human, for example, only the
533
+ # part of the Y chromosome that is different from X is covered in the
534
+ # assembly table. Therefore, the sequence of the tip and end of the Y
535
+ # chromosome are not stored in the database, but fetched from the X
536
+ # chromosome. The assembly_exception table contain the information on
537
+ # which bits are the same.
538
+ #
539
+ # This class uses ActiveRecord to access data in the Ensembl database.
540
+ # See the general documentation of the Ensembl module for
541
+ # more information on what this means and what methods are available.
542
+ #
543
+ # This class should normally not be used directly by the user.
544
+ class AssemblyException < DBConnection
545
+ include Sliceable
546
+
547
+ set_primary_key 'assembly_exception_id'
548
+
549
+ belongs_to :seq_region
550
+ end
551
+
552
+ # The MetaCoord class describes what coordinate systems are used to annotate
553
+ # features. It will for example tell you that marker_features are annotated
554
+ # either on the chromosome, supercontig and clone level.
555
+ #
556
+ # This class should normally not be used by the end user, but is used internally.
557
+ #
558
+ # This class uses ActiveRecord to access data in the Ensembl database.
559
+ # See the general documentation of the Ensembl module for
560
+ # more information on what this means and what methods are available.
561
+ class MetaCoord < DBConnection
562
+ set_primary_key nil
563
+ end
564
+
565
+ # The Meta class describes meta data of the database. These include information
566
+ # on what coordinate system is mapping on another one and which patches
567
+ # are applied.
568
+ #
569
+ # This class should normally not be used by the end user, but is used internally.
570
+ #
571
+ # This class uses ActiveRecord to access data in the Ensembl database.
572
+ # See the general documentation of the Ensembl module for
573
+ # more information on what this means and what methods are available.
574
+ class Meta < DBConnection
575
+ set_primary_key nil
576
+ end
577
+
578
+ # The Analysis class describes an analysis.
579
+ #
580
+ # This class uses ActiveRecord to access data in the Ensembl database.
581
+ # See the general documentation of the Ensembl module for
582
+ # more information on what this means and what methods are available.
583
+ #
584
+ # @example
585
+ # repeat_masker_analysis = Analysis.find_by_logic_name('RepeatMask')
586
+ # puts repeat_masker_analysis.to_yaml
587
+ class Analysis < DBConnection
588
+ set_primary_key 'analysis_id'
589
+
590
+ has_many :genes
591
+ has_many :dna_align_features
592
+ has_many :protein_align_features
593
+ has_one :analysis_description
594
+ has_many :density_types
595
+ has_many :oligo_features
596
+ has_many :protein_features
597
+ has_many :regulatory_features
598
+ has_many :simple_features
599
+ has_many :prediction_transcripts
600
+ end
601
+
602
+ # The AnalysisDescription class belongs to an analysis.
603
+ #
604
+ # This class uses ActiveRecord to access data in the Ensembl database.
605
+ # See the general documentation of the Ensembl module for
606
+ # more information on what this means and what methods are available.
607
+ #
608
+ # @example
609
+ # descr = AnalysisDescription.find(3)
610
+ # puts descr.to_yaml
611
+ class AnalysisDescription < DBConnection
612
+ set_primary_key nil
613
+
614
+ belongs_to :analysis
615
+ end
616
+
617
+ # The Dna class contains the actual DNA sequence for the sequence regions
618
+ # that belong to the seq_level coordinate system.
619
+ #
620
+ # This class uses ActiveRecord to access data in the Ensembl database.
621
+ # See the general documentation of the Ensembl module for
622
+ # more information on what this means and what methods are available.
623
+ #
624
+ # @example
625
+ # seq_region = SeqRegion.find(1)
626
+ # puts seq_region.dna.sequence
627
+ class Dna < DBConnection
628
+ set_primary_key nil
629
+
630
+ belongs_to :seq_region
631
+ end
632
+
633
+ # The Exon class describes an exon.
634
+ #
635
+ # This class uses ActiveRecord to access data in the Ensembl database.
636
+ # See the general documentation of the Ensembl module for
637
+ # more information on what this means and what methods are available.
638
+ #
639
+ # This class includes the mixin Sliceable, which means that it is mapped
640
+ # to a SeqRegion object and a Slice can be created for objects of this
641
+ # class. See Sliceable and Slice for more information.
642
+ #
643
+ # @example
644
+ # seq_region = SeqRegion.find(1)
645
+ # puts seq_region.exons.length
646
+ class Exon < DBConnection
647
+ include Sliceable
648
+
649
+ set_primary_key 'exon_id'
650
+
651
+ belongs_to :seq_region
652
+ has_many :exon_transcripts
653
+ has_many :transcripts, :through => :exon_transcripts
654
+
655
+ has_many :translations, :foreign_key => 'start_exon_id'
656
+ has_many :translations, :foreign_key => 'end_exon_id'
657
+
658
+ has_one :exon_stable_id
659
+
660
+ has_many :exon_supporting_features
661
+ has_many :dna_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'dna_align_feature'"]
662
+ has_many :protein_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'protein_align_feature'"]
663
+
664
+ def stable_id
665
+ return self.exon_stable_id.stable_id
666
+ end
667
+
668
+ # The Exon#seq method returns the sequence of the exon.
669
+ def seq
670
+ seq_region = nil
671
+ if Ensembl::SESSION.seq_regions.has_key?(self.seq_region_id)
672
+ seq_region = Ensembl::SESSION.seq_regions[self.seq_region_id]
673
+ else
674
+ seq_region = self.seq_region
675
+ Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
676
+ end
677
+ slice = Ensembl::Core::Slice.new(seq_region, seq_region_start, seq_region_end, seq_region_strand)
678
+ return slice.seq
679
+ end
680
+
681
+
682
+ def self.find_by_stable_id(stable_id)
683
+ exon_stable_id = ExonStableId.find_by_stable_id(stable_id)
684
+ if exon_stable_id.nil?
685
+ return nil
686
+ else
687
+ return exon_stable_id.exon
688
+ end
689
+ end
690
+
691
+ end
692
+
693
+ # The ExonStableId class provides an interface to the exon_stable_id
694
+ # table. This table contains Ensembl stable IDs for exons.
695
+ #
696
+ # This class uses ActiveRecord to access data in the Ensembl database.
697
+ # See the general documentation of the Ensembl module for
698
+ # more information on what this means and what methods are available.
699
+ #
700
+ # @example
701
+ # my_exon = ExonStableId.find_by_stable_id('ENSE00001494622').exon
702
+ class ExonStableId < DBConnection
703
+ set_primary_key 'stable_id'
704
+
705
+ belongs_to :exon
706
+ end
707
+
708
+ # The ExonTranscript class provides the link between exons and transcripts.
709
+ #
710
+ # This class uses ActiveRecord to access data in the Ensembl database.
711
+ # See the general documentation of the Ensembl module for
712
+ # more information on what this means and what methods are available.
713
+ #
714
+ # @example
715
+ # link = ExonTranscript.find(1)
716
+ # puts link.exon.to_yaml
717
+ # puts link.transcript.to_yaml
718
+ class ExonTranscript < DBConnection
719
+ set_primary_key nil
720
+
721
+ belongs_to :exon
722
+ belongs_to :transcript
723
+ end
724
+
725
+ class ExonSupportingFeature < DBConnection
726
+ set_table_name 'supporting_feature'
727
+ set_primary_key nil
728
+
729
+ belongs_to :exon
730
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
731
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
732
+ end
733
+
734
+ class TranscriptSupportingFeature < DBConnection
735
+ set_primary_key nil
736
+
737
+ belongs_to :transcript
738
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
739
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
740
+ end
741
+
742
+ # The SimpleFeature class describes simple features that have positions
743
+ # on a SeqRegion.
744
+ #
745
+ # This class uses ActiveRecord to access data in the Ensembl database.
746
+ # See the general documentation of the Ensembl module for
747
+ # more information on what this means and what methods are available.
748
+ #
749
+ # This class includes the mixin Sliceable, which means that it is mapped
750
+ # to a SeqRegion object and a Slice can be created for objects of this
751
+ # class. See Sliceable and Slice for more information.
752
+ #
753
+ # @example
754
+ # simple_feature = SimpleFeature.find(123)
755
+ # puts simple_feature.analysis.logic_name
756
+ class SimpleFeature < DBConnection
757
+ include Sliceable
758
+
759
+ set_primary_key 'simple_feature_id'
760
+
761
+ belongs_to :seq_region
762
+ belongs_to :analysis
763
+ end
764
+
765
+ # The DensityFeature class provides an interface to the density_feature
766
+ # table.
767
+ #
768
+ # This class uses ActiveRecord to access data in the Ensembl database.
769
+ # See the general documentation of the Ensembl module for
770
+ # more information on what this means and what methods are available.
771
+ #
772
+ # This class includes the mixin Sliceable, which means that it is mapped
773
+ # to a SeqRegion object and a Slice can be created for objects of this
774
+ # class. See Sliceable and Slice for more information.
775
+ #
776
+ # @example
777
+ # density_feature = DensityFeature.find(2716384)
778
+ # puts density_feature.to_yaml
779
+ class DensityFeature < DBConnection
780
+ set_primary_key 'density_feature_id'
781
+
782
+ belongs_to :density_type
783
+ belongs_to :seq_region
784
+ end
785
+
786
+ # The DensityType class provides an interface to the density_type
787
+ # table.
788
+ #
789
+ # This class uses ActiveRecord to access data in the Ensembl database.
790
+ # See the general documentation of the Ensembl module for
791
+ # more information on what this means and what methods are available.
792
+ #
793
+ # This class includes the mixin Sliceable, which means that it is mapped
794
+ # to a SeqRegion object and a Slice can be created for objects of this
795
+ # class. See Sliceable and Slice for more information.
796
+ #
797
+ class DensityType < DBConnection
798
+ set_primary_key 'density_type_id'
799
+
800
+ has_many :density_features
801
+ belongs_to :analysis
802
+ end
803
+
804
+ # The Marker class provides an interface to the marker
805
+ # table. This table contains primer sequences and PCR product lengths.
806
+ #
807
+ # This class uses ActiveRecord to access data in the Ensembl database.
808
+ # See the general documentation of the Ensembl module for
809
+ # more information on what this means and what methods are available.
810
+ #
811
+ # @example
812
+ # marker = Marker.find(52194)
813
+ # puts marker.left_primer
814
+ # puts marker.right_primer
815
+ # puts marker.min_primer_dist.to_s
816
+ class Marker < DBConnection
817
+ set_primary_key 'marker_id'
818
+
819
+ has_many :marker_features
820
+ has_many :marker_synonyms
821
+ has_many :marker_map_locations
822
+
823
+ def self.inheritance_column
824
+ nil
825
+ end
826
+
827
+ # The Marker#name method returns a comma-separated list of synonyms of
828
+ # this marker
829
+ #
830
+ # @example
831
+ # marker = Marker.find(1)
832
+ # puts marker.name --> 58017,D29149
833
+ def name
834
+ self.marker_synonyms.collect{|ms| ms.name}.join(',')
835
+ end
836
+
837
+ # The Marker#find_by_name class method returns one marker with this name.
838
+ #
839
+ # @return [Marker, nil] Marker object or nil
840
+ def self.find_by_name(name)
841
+ all_names = self.find_all_by_name(name)
842
+ if all_names.length == 0
843
+ return nil
844
+ else
845
+ return all_names[0]
846
+ end
847
+ end
848
+
849
+ # The Marker#find_all_by_name class method returns all markers with this
850
+ # name. If no marker is found, it returns an empty array.
851
+ #
852
+ # @return [Array] Empty array or array of Marker objects
853
+ def self.find_all_by_name(name)
854
+ marker_synonyms = Ensembl::Core::MarkerSynonym.find_all_by_name(name)
855
+ answers = Array.new
856
+ marker_synonyms.each do |ms|
857
+ answers.push(Ensembl::Core::Marker.find_all_by_marker_id(ms.marker_id))
858
+ end
859
+ answers.flatten!
860
+ return answers
861
+ end
862
+
863
+ #def to_mappings
864
+ # output = Array.new
865
+ # self.marker_features.each do |mf|
866
+ # output.push(mf.slice.display_name)
867
+ # end
868
+ # return output.join("\n")
869
+ #
870
+ #end
871
+
872
+ end
873
+
874
+ # The MarkerSynonym class provides an interface to the marker_synonym
875
+ # table. This table contains names for markers (that are themselves
876
+ # stored in the marker table (so Marker class)).
877
+ #
878
+ # This class uses ActiveRecord to access data in the Ensembl database.
879
+ # See the general documentation of the Ensembl module for
880
+ # more information on what this means and what methods are available.
881
+ #
882
+ # @example
883
+ # marker = Marker.find(52194)
884
+ # puts marker.marker_synonym.source
885
+ # puts marker.marker_synonym.name
886
+ class MarkerSynonym < DBConnection
887
+ set_primary_key 'marker_synonym_id'
888
+
889
+ belongs_to :marker
890
+ end
891
+
892
+ # The MarkerFeature class provides an interface to the marker_feature
893
+ # table. This table contains mappings of markers to a SeqRegion.
894
+ #
895
+ # This class uses ActiveRecord to access data in the Ensembl database.
896
+ # See the general documentation of the Ensembl module for
897
+ # more information on what this means and what methods are available.
898
+ #
899
+ # This class includes the mixin Sliceable, which means that it is mapped
900
+ # to a SeqRegion object and a Slice can be created for objects of this
901
+ # class. See Sliceable and Slice for more information.
902
+ #
903
+ # @example
904
+ # marker = Marker.find(52194)
905
+ # puts marker.marker_feature.seq_region_start.to_s
906
+ # puts marker.marker_feature.seq_region_end.to_s
907
+ class MarkerFeature < DBConnection
908
+ include Sliceable
909
+
910
+ set_primary_key 'marker_feature_id'
911
+
912
+ belongs_to :marker
913
+ belongs_to :seq_region
914
+ end
915
+
916
+ # The MiscFeature class provides an interface to the misc_feature
917
+ # table. The actual type of feature is stored in the MiscSet class.
918
+ #
919
+ # This class uses ActiveRecord to access data in the Ensembl database.
920
+ # See the general documentation of the Ensembl module for
921
+ # more information on what this means and what methods are available.
922
+ #
923
+ # This class includes the mixin Sliceable, which means that it is mapped
924
+ # to a SeqRegion object and a Slice can be created for objects of this
925
+ # class. See Sliceable and Slice for more information.
926
+ #
927
+ # @example
928
+ # #TODO
929
+ class MiscFeature < DBConnection
930
+ include Sliceable
931
+
932
+ set_primary_key 'misc_feature_id'
933
+
934
+ belongs_to :seq_region
935
+ has_one :misc_feature_misc_set
936
+ has_many :misc_sets, :through => :misc_feature_misc_set
937
+
938
+ has_many :misc_attribs
939
+
940
+ alias attribs misc_attribs
941
+
942
+ def self.find_by_attrib_type_value(code, value)
943
+ return self.find_all_by_attrib_type_value(code, value)[0]
944
+ end
945
+
946
+ def self.find_all_by_attrib_type_value(code, value)
947
+ code_id = AttribType.find_by_code(code)
948
+ misc_attribs = MiscAttrib.find_all_by_attrib_type_id_and_value(code_id, value)
949
+ answers = Array.new
950
+ misc_attribs.each do |ma|
951
+ answers.push(MiscFeature.find_all_by_misc_feature_id(ma.misc_feature_id))
952
+ end
953
+ answers.flatten!
954
+ return answers
955
+ end
956
+ end
957
+
958
+
959
+ # The MiscAttrib class provides an interface to the misc_attrib
960
+ # table. It is the link between MiscFeature and AttribType.
961
+ #
962
+ # This class uses ActiveRecord to access data in the Ensembl database.
963
+ # See the general documentation of the Ensembl module for
964
+ # more information on what this means and what methods are available.
965
+ #
966
+ # @example
967
+ # marker = Marker.find(52194)
968
+ # puts marker.marker_feature.seq_region_start.to_s
969
+ # puts marker.marker_feature.seq_region_end.to_s
970
+ class MiscAttrib < DBConnection
971
+ set_primary_key nil
972
+
973
+ belongs_to :misc_feature
974
+ belongs_to :attrib_type
975
+
976
+ def to_s
977
+ return self.attrib_type.code + ":" + self.value.to_s
978
+ end
979
+ end
980
+
981
+ # The MiscSet class provides an interface to the misc_set
982
+ # table. This table contains the sets to which MiscFeature objects
983
+ # belong.
984
+ #
985
+ # This class uses ActiveRecord to access data in the Ensembl database.
986
+ # See the general documentation of the Ensembl module for
987
+ # more information on what this means and what methods are available.
988
+ #
989
+ # @example
990
+ # feature_set = MiscFeature.find(1)
991
+ # puts feature_set.features.length.to_s
992
+ class MiscSet < DBConnection
993
+ set_primary_key 'misc_set_id'
994
+
995
+ has_many :misc_feature_misc_sets
996
+ has_many :misc_features, :through => :misc_feature_misc_set
997
+ end
998
+
999
+ # The MiscFeatureMiscSet class provides an interface to the
1000
+ # misc_feature_misc_set table. This table links MiscFeature objects to
1001
+ # their MiscSet.
1002
+ #
1003
+ # This class uses ActiveRecord to access data in the Ensembl database.
1004
+ # See the general documentation of the Ensembl module for
1005
+ # more information on what this means and what methods are available.
1006
+ #
1007
+ # @example
1008
+ # # TODO
1009
+ class MiscFeatureMiscSet < DBConnection
1010
+ set_primary_key nil
1011
+
1012
+ belongs_to :misc_feature
1013
+ belongs_to :misc_set
1014
+ end
1015
+
1016
+ # The Gene class provides an interface to the gene
1017
+ # table. This table contains mappings of genes to a SeqRegion.
1018
+ #
1019
+ # This class uses ActiveRecord to access data in the Ensembl database.
1020
+ # See the general documentation of the Ensembl module for
1021
+ # more information on what this means and what methods are available.
1022
+ #
1023
+ # This class includes the mixin Sliceable, which means that it is mapped
1024
+ # to a SeqRegion object and a Slice can be created for objects of this
1025
+ # class. See Sliceable and Slice for more information.
1026
+ #
1027
+ # @example
1028
+ # puts Gene.find_by_biotype('protein_coding').length
1029
+ class Gene < DBConnection
1030
+ include Sliceable
1031
+
1032
+ set_primary_key 'gene_id'
1033
+
1034
+ belongs_to :seq_region
1035
+ has_one :gene_stable_id
1036
+
1037
+ has_many :gene_attribs
1038
+ has_many :attrib_types, :through => :gene_attrib
1039
+
1040
+ has_many :transcripts
1041
+
1042
+ belongs_to :analysis
1043
+
1044
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Gene'"
1045
+ has_many :xrefs, :through => :object_xrefs
1046
+
1047
+ alias attribs gene_attribs
1048
+
1049
+ # The Gene#stable_id method returns the stable_id of the gene (i.e. the
1050
+ # ENSG id).
1051
+ def stable_id
1052
+ return self.gene_stable_id.stable_id
1053
+
1054
+ end
1055
+
1056
+ # The Gene#display_label method returns the default name of the gene.
1057
+ def display_label
1058
+ return Xref.find(self.display_xref_id).display_label
1059
+ end
1060
+ alias :display_name :display_label
1061
+ alias :label :display_label
1062
+ alias :name :display_label
1063
+
1064
+ # The Gene#find_all_by_name class method searches the Xrefs for that name
1065
+ # and returns an array of the corresponding Gene objects. If the name is
1066
+ # not found, it returns an empty array.
1067
+ def self.find_all_by_name(name)
1068
+ answer = Array.new
1069
+ xrefs = Ensembl::Core::Xref.find_all_by_display_label(name)
1070
+ xrefs.each do |xref|
1071
+ answer.push(Ensembl::Core::Gene.find_by_display_xref_id(xref.xref_id))
1072
+ end
1073
+
1074
+ answer.reject!{|a| a.nil?}
1075
+ return answer
1076
+ end
1077
+
1078
+ # The Gene#find_by_name class method searches the Xrefs for that name
1079
+ # and returns one Gene objects (even if there should be more). If the name is
1080
+ # not found, it returns nil.
1081
+ def self.find_by_name(name)
1082
+ all_names = self.find_all_by_name(name)
1083
+ if all_names.length == 0
1084
+ return nil
1085
+ else
1086
+ return all_names[0]
1087
+ end
1088
+ end
1089
+
1090
+ # The Gene#find_by_stable_id class method fetches a Gene object based on
1091
+ # its stable ID (i.e. the "ENSG" accession number). If the name is
1092
+ # not found, it returns nil.
1093
+ def self.find_by_stable_id(stable_id)
1094
+ result = nil
1095
+ if stable_id.kind_of? Array
1096
+ gene_stable_ids = GeneStableId.where({:stable_id => stable_id})
1097
+ result = (gene_stable_ids.size == 0) ? nil : gene_stable_ids.map {|id| id.gene}
1098
+ else
1099
+ gene_stable_id = GeneStableId.find_by_stable_id(stable_id)
1100
+ result = (gene_stable_id.nil?) ? nil : gene_stable_id.gene
1101
+ end
1102
+ return result
1103
+ end
1104
+
1105
+ # The Gene#all_xrefs method is a convenience method in that it combines
1106
+ # three methods into one. It collects all xrefs for the gene itself, plus
1107
+ # all xrefs for all transcripts for the gene, and all xrefs for all
1108
+ # translations for those transcripts.
1109
+ def all_xrefs
1110
+ answer = Array.new
1111
+ answer.push(self.xrefs)
1112
+ self.transcripts.each do |transcript|
1113
+ answer.push(transcript.xrefs)
1114
+ if ! transcript.translation.nil?
1115
+ answer.push(transcript.translation.xrefs)
1116
+ end
1117
+ end
1118
+ answer.flatten!
1119
+ return answer
1120
+ end
1121
+
1122
+ # The Gene#go_terms method returns all GO terms associated with a gene.
1123
+ def go_terms
1124
+ go_db_id = ExternalDb.find_by_db_name('GO').id
1125
+ return self.all_xrefs.select{|x| x.external_db_id == go_db_id}.collect{|x| x.dbprimary_acc}.uniq
1126
+ end
1127
+
1128
+ # The Gene#hgnc returns the HGNC symbol for the gene.
1129
+ def hgnc
1130
+ hgnc_db_id = ExternalDb.find_by_db_name('HGNC_curated_gene').id
1131
+ xref = self.all_xrefs.select{|x| x.external_db_id == hgnc_db_id}[0]
1132
+ return nil if xref.nil?
1133
+ return xref.display_label
1134
+ end
1135
+
1136
+ end
1137
+
1138
+ # The Gene#canonical_transcript returns the longest transcript for that gene.
1139
+ #
1140
+ def canonical_transcript
1141
+ ct = self.transcripts.sort {|a,b| b.seq.length <=> a.seq.length}
1142
+ return ct[0]
1143
+ end
1144
+
1145
+ # The GeneStableId class provides an interface to the gene_stable_id
1146
+ # table. This table contains Ensembl stable IDs for genes.
1147
+ #
1148
+ # This class uses ActiveRecord to access data in the Ensembl database.
1149
+ # See the general documentation of the Ensembl module for
1150
+ # more information on what this means and what methods are available.
1151
+ #
1152
+ # @example
1153
+ # my_gene = GeneStableId.find_by_stable_id('ENSBTAG00000011670').gene
1154
+ class GeneStableId < DBConnection
1155
+ set_primary_key 'stable_id'
1156
+
1157
+ belongs_to :gene
1158
+ end
1159
+
1160
+ # The MarkerMapLocation class provides an interface to the
1161
+ # marker_map_location table. This table contains mappings of
1162
+ # MarkerSynonym objects to a chromosome, and basically just stores
1163
+ # the genetic maps.
1164
+ #
1165
+ # This class uses ActiveRecord to access data in the Ensembl database.
1166
+ # See the general documentation of the Ensembl module for
1167
+ # more information on what this means and what methods are available.
1168
+ #
1169
+ # @example
1170
+ # marker_synonym = MarkerSynonym.find_by_name('CYP19A1_(5)')
1171
+ # marker_synonym.marker_map_locations.each do |mapping|
1172
+ # puts mapping.chromosome_name + "\t" + mapping.position.to_s
1173
+ # end
1174
+ class MarkerMapLocation < DBConnection
1175
+ set_primary_key nil
1176
+
1177
+ belongs_to :map
1178
+ belongs_to :marker
1179
+
1180
+ end
1181
+
1182
+ # The Map class provides an interface to the map
1183
+ # table. This table contains genetic maps.
1184
+ #
1185
+ # This class uses ActiveRecord to access data in the Ensembl database.
1186
+ # See the general documentation of the Ensembl module for
1187
+ # more information on what this means and what methods are available.
1188
+ #
1189
+ # @example
1190
+ # map = Map.find_by_name('MARC')
1191
+ # puts map.markers.length.to_s
1192
+ class Map < DBConnection
1193
+ set_primary_key 'map_id'
1194
+
1195
+ has_many :marker_map_locations
1196
+ has_many :markers, :through => :marker_map_locations
1197
+
1198
+ def name
1199
+ return self.map_name
1200
+ end
1201
+ end
1202
+
1203
+ # The RepeatConsensus class provides an interface to the repeat_consensus
1204
+ # table. This table contains consensus sequences for repeats.
1205
+ #
1206
+ # This class uses ActiveRecord to access data in the Ensembl database.
1207
+ # See the general documentation of the Ensembl module for
1208
+ # more information on what this means and what methods are available.
1209
+ #
1210
+ # @example
1211
+ # repeat = RepeatFeature.find(29)
1212
+ # puts repeat.repeat_consensus.repeat_name + "\t" + repeat.repeat_consensus.repeat_consensus
1213
+ class RepeatConsensus < DBConnection
1214
+ set_primary_key 'repeat_consensus_id'
1215
+
1216
+ has_many :repeat_features
1217
+ end
1218
+
1219
+ # The RepeatFeature class provides an interface to the repeat_feature
1220
+ # table. This table contains mappings of repeats to a SeqRegion.
1221
+ #
1222
+ # This class uses ActiveRecord to access data in the Ensembl database.
1223
+ # See the general documentation of the Ensembl module for
1224
+ # more information on what this means and what methods are available.
1225
+ #
1226
+ # This class includes the mixin Sliceable, which means that it is mapped
1227
+ # to a SeqRegion object and a Slice can be created for objects of this
1228
+ # class. See Sliceable and Slice for more information.
1229
+ #
1230
+ # @example
1231
+ # repeat_feature = RepeatFeature.find(29)
1232
+ # puts repeat_feature.seq_region_start.to_s
1233
+ class RepeatFeature < DBConnection
1234
+ include Sliceable
1235
+
1236
+ set_primary_key 'repeat_feature_id'
1237
+
1238
+ belongs_to :repeat_consensus
1239
+ belongs_to :seq_region
1240
+ end
1241
+
1242
+ # The SeqRegionAttrib class provides an interface to the seq_region_attrib
1243
+ # table. This table contains attribute values for SeqRegion objects
1244
+ #
1245
+ # This class uses ActiveRecord to access data in the Ensembl database.
1246
+ # See the general documentation of the Ensembl module for
1247
+ # more information on what this means and what methods are available.
1248
+ #
1249
+ # @example
1250
+ # chr4 = SeqRegion.find_by_name('4')
1251
+ # chr4.seq_region_attribs.each do |attrib|
1252
+ # puts attrib.attrib_type.name + "\t" + attrib.value.to_s
1253
+ # end
1254
+ class SeqRegionAttrib < DBConnection
1255
+ set_primary_key nil
1256
+
1257
+ belongs_to :seq_region
1258
+ belongs_to :attrib_type
1259
+ end
1260
+
1261
+ # The GeneAttrib class provides an interface to the gene_attrib
1262
+ # table. This table contains attribute values for Gene objects
1263
+ #
1264
+ # This class uses ActiveRecord to access data in the Ensembl database.
1265
+ # See the general documentation of the Ensembl module for
1266
+ # more information on what this means and what methods are available.
1267
+ #
1268
+ # @example
1269
+ # #TODO
1270
+ class GeneAttrib < DBConnection
1271
+ set_primary_key nil
1272
+
1273
+ belongs_to :gene
1274
+ belongs_to :attrib_type
1275
+ end
1276
+
1277
+ # The AttribType class provides an interface to the attrib_type
1278
+ # table. This table contains the types that attributes can belong to for
1279
+ # SeqRegion, Gene and Transcript.
1280
+ #
1281
+ # This class uses ActiveRecord to access data in the Ensembl database.
1282
+ # See the general documentation of the Ensembl module for
1283
+ # more information on what this means and what methods are available.
1284
+ #
1285
+ # @example
1286
+ # #TODO
1287
+ class AttribType < DBConnection
1288
+ set_primary_key 'attrib_type_id'
1289
+
1290
+ has_many :seq_region_attribs
1291
+ has_many :seq_regions, :through => :seq_region_attrib
1292
+
1293
+ has_many :gene_attribs
1294
+ has_many :genes, :through => :gene_attrib
1295
+
1296
+ has_many :transcript_attribs
1297
+ has_many :transcripts, :through => :transcript_attrib
1298
+ end
1299
+
1300
+ # The Transcript class provides an interface to the transcript_stable_id
1301
+ # table. This table contains the Ensembl stable IDs for Transcript
1302
+ # objects.
1303
+ #
1304
+ # This class uses ActiveRecord to access data in the Ensembl database.
1305
+ # See the general documentation of the Ensembl module for
1306
+ # more information on what this means and what methods are available.
1307
+ #
1308
+ # @example
1309
+ # transcript_stable_id = TranscriptStableId.find_by_stable_id('ENSBTAT00000015494')
1310
+ # puts transcript_stable_id.transcript.to_yaml
1311
+ class TranscriptStableId < DBConnection
1312
+ set_primary_key 'stable_id'
1313
+
1314
+ belongs_to :transcript
1315
+ end
1316
+
1317
+ # The TranscriptAttrib class provides an interface to the transcript_attrib
1318
+ # table. This table contains the attributes for Transcript objects.
1319
+ #
1320
+ # This class uses ActiveRecord to access data in the Ensembl database.
1321
+ # See the general documentation of the Ensembl module for
1322
+ # more information on what this means and what methods are available.
1323
+ #
1324
+ # @example
1325
+ # transcript = Transcript.find(32495)
1326
+ # transcript.transcript_attribs.each do |attr|
1327
+ # puts attr.attrib_type.name + "\t" + attr.value
1328
+ # end
1329
+ class TranscriptAttrib < DBConnection
1330
+ set_primary_key nil
1331
+
1332
+ belongs_to :transcript
1333
+ belongs_to :attrib_type
1334
+ end
1335
+
1336
+ # The DnaAlignFeature class provides an interface to the
1337
+ # dna_align_feature table. This table contains sequence similarity
1338
+ # mappings against a SeqRegion.
1339
+ #
1340
+ # This class uses ActiveRecord to access data in the Ensembl database.
1341
+ # See the general documentation of the Ensembl module for
1342
+ # more information on what this means and what methods are available.
1343
+ #
1344
+ # This class includes the mixin Sliceable, which means that it is mapped
1345
+ # to a SeqRegion object and a Slice can be created for objects of this
1346
+ # class. See Sliceable and Slice for more information.
1347
+ #
1348
+ # @example
1349
+ # unigene_scan = Analysis.find_by_logic_name('Unigene')
1350
+ # unigene_scan.dna_align_features.each do |hit|
1351
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1352
+ # end
1353
+ class DnaAlignFeature < DBConnection
1354
+ include Sliceable
1355
+
1356
+ set_primary_key 'dna_align_feature_id'
1357
+
1358
+ belongs_to :seq_region
1359
+ belongs_to :analysis
1360
+
1361
+ has_many :exon_supporting_features
1362
+ has_many :protein_supporting_features
1363
+ end
1364
+
1365
+ # The Translation class provides an interface to the
1366
+ # translation table. This table contains the translation start and
1367
+ # stop positions and exons for a given Transcript
1368
+ #
1369
+ # This class uses ActiveRecord to access data in the Ensembl database.
1370
+ # See the general documentation of the Ensembl module for
1371
+ # more information on what this means and what methods are available.
1372
+ #
1373
+ # @example
1374
+ # #TODO
1375
+ class Translation < DBConnection
1376
+ set_primary_key 'translation_id'
1377
+
1378
+ belongs_to :transcript
1379
+ has_many :translation_stable_ids
1380
+
1381
+ has_many :translation_attribs
1382
+ has_many :protein_features
1383
+
1384
+ has_one :translation_stable_id
1385
+
1386
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Translation'"
1387
+ has_many :xrefs, :through => :object_xrefs
1388
+
1389
+ belongs_to :start_exon, :class_name => 'Exon', :foreign_key => 'start_exon_id'
1390
+ belongs_to :end_exon, :class_name => 'Exon', :foreign_key => 'end_exon_id'
1391
+
1392
+ alias attribs translation_attribs
1393
+
1394
+ # The Translation#stable_id method returns the stable ID of the translation.
1395
+ #
1396
+ # @return [String] Ensembl stable ID
1397
+ def stable_id
1398
+ return self.translation_stable_id.stable_id
1399
+ end
1400
+
1401
+ # The Translation#display_label method returns the default name of the translation.
1402
+ def display_label
1403
+ return Xref.find(self.display_xref_id).display_label
1404
+ end
1405
+ alias :display_name :display_label
1406
+ alias :label :display_label
1407
+ alias :name :display_label
1408
+
1409
+ # The Translation#find_by_stable_id class method fetches a Translation
1410
+ # object based on its stable ID (i.e. the "ENSP" accession number). If the
1411
+ # name is not found, it returns nil.
1412
+ def self.find_by_stable_id(stable_id)
1413
+ translation_stable_id = TranslationStableId.find_by_stable_id(stable_id)
1414
+ if translation_stable_id.nil?
1415
+ return nil
1416
+ else
1417
+ return translation_stable_id.translation
1418
+ end
1419
+ end
1420
+ end
1421
+
1422
+ # The TranslationStableId class provides an interface to the
1423
+ # translation_stable_id table. This table contains the Ensembl stable IDs
1424
+ # for a given Translation.
1425
+ #
1426
+ # This class uses ActiveRecord to access data in the Ensembl database.
1427
+ # See the general documentation of the Ensembl module for
1428
+ # more information on what this means and what methods are available.
1429
+ #
1430
+ # @example
1431
+ # stable_id = TranslationStableId.find_by_name('ENSBTAP00000015494')
1432
+ # puts stable_id.to_yaml
1433
+ class TranslationStableId < DBConnection
1434
+ set_primary_key 'stable_id'
1435
+
1436
+ belongs_to :translation
1437
+ end
1438
+
1439
+ # The TranslationAttrib class provides an interface to the
1440
+ # translation_attrib table. This table contains attribute values for the
1441
+ # Translation class.
1442
+ #
1443
+ # This class uses ActiveRecord to access data in the Ensembl database.
1444
+ # See the general documentation of the Ensembl module for
1445
+ # more information on what this means and what methods are available.
1446
+ #
1447
+ # @example
1448
+ # translation = Translation.find(9979)
1449
+ # translation.translation_attribs.each do |attr|
1450
+ # puts attr.attr_type.name + "\t" + attr.value
1451
+ # end
1452
+ class TranslationAttrib < DBConnection
1453
+ set_primary_key nil
1454
+
1455
+ belongs_to :translation
1456
+ belongs_to :attrib_type
1457
+ end
1458
+
1459
+ # The Xref class provides an interface to the
1460
+ # xref table. This table contains external references for objects in the
1461
+ # database.
1462
+ #
1463
+ # This class uses ActiveRecord to access data in the Ensembl database.
1464
+ # See the general documentation of the Ensembl module for
1465
+ # more information on what this means and what methods are available.
1466
+ #
1467
+ # @example
1468
+ # gene = Gene.find(1)
1469
+ # gene.xrefs.each do |xref|
1470
+ # puts xref.display_label + "\t" + xref.description
1471
+ # end
1472
+ class Xref < DBConnection
1473
+ set_primary_key 'xref_id'
1474
+
1475
+ belongs_to :external_db
1476
+ has_many :external_synonyms
1477
+
1478
+ has_many :genes
1479
+
1480
+ def to_s
1481
+ return self.external_db.db_name.to_s + ":" + self.display_label
1482
+ end
1483
+ end
1484
+
1485
+ # The ObjectXref class provides the link between gene, transcript and
1486
+ # translation objects on the one hand and an xref on the other.
1487
+ #
1488
+ # This class uses ActiveRecord to access data in the Ensembl database.
1489
+ # See the general documentation of the Ensembl module for
1490
+ # more information on what this means and what methods are available.
1491
+ #
1492
+ # @example
1493
+ # gene = Gene.find(1)
1494
+ # gene.object_xrefs.each do |ox|
1495
+ # puts ox.to_yaml
1496
+ # end
1497
+ class ObjectXref < DBConnection
1498
+ set_primary_key 'object_xref_id'
1499
+
1500
+ belongs_to :gene, :class_name => "Gene", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Gene'"]
1501
+ belongs_to :transcript, :class_name => "Transcript", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Transcript'"]
1502
+ belongs_to :translation, :class_name => "Translation", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Translation'"]
1503
+ belongs_to :xref
1504
+ has_one :go_xref
1505
+ end
1506
+
1507
+ # The GoXref class provides an interface to the
1508
+ # go_xref table. This table contains the evidence codes for those object_refs
1509
+ # that are GO terms.
1510
+ #
1511
+ # This class uses ActiveRecord to access data in the Ensembl database.
1512
+ # See the general documentation of the Ensembl module for
1513
+ # more information on what this means and what methods are available.
1514
+ class GoXref < DBConnection
1515
+ set_primary_key nil
1516
+
1517
+ belongs_to :xref
1518
+ end
1519
+
1520
+ # The ExternalDb class provides an interface to the
1521
+ # external_db table. This table contains references to databases to which
1522
+ # xrefs can point to
1523
+ #
1524
+ # This class uses ActiveRecord to access data in the Ensembl database.
1525
+ # See the general documentation of the Ensembl module for
1526
+ # more information on what this means and what methods are available.
1527
+ #
1528
+ # @example
1529
+ # embl_db = ExternalDb.find_by_db_name('EMBL')
1530
+ # puts embl_db.xrefs.length.to_s
1531
+ class ExternalDb < DBConnection
1532
+ set_primary_key 'external_db_id'
1533
+
1534
+ has_many :xrefs
1535
+
1536
+ def self.inheritance_column
1537
+ nil
1538
+ end
1539
+
1540
+ # The ExternalDb#find_all_by_display_label method returns all external
1541
+ # databases that have this label. There should normally be no more than
1542
+ # one. If no databases are found with this name, this method returns an
1543
+ # empty array.
1544
+ def self.find_all_by_display_label(label)
1545
+ answer = Array.new
1546
+ xrefs = Xref.find_all_by_display_label(label)
1547
+ xrefs.each do |xref|
1548
+ answer.push(self.class.find_by_xref_id(xref.xref_id))
1549
+ end
1550
+
1551
+ return answer
1552
+ end
1553
+
1554
+ # The ExternalDb#find_by_display_label method returns a
1555
+ # database that has this label. If no databases are found with this name,
1556
+ # this method returns nil.
1557
+ # empty array.
1558
+ def self.find_by_display_label(label)
1559
+ all_dbs = self.find_all_by_display_label(label)
1560
+ if all_dbs.length == 0
1561
+ return nil
1562
+ else
1563
+ return all_dbs[0]
1564
+ end
1565
+ end
1566
+
1567
+
1568
+ end
1569
+
1570
+ # The ExternalSynonym class provides an interface to the
1571
+ # external_synonym table. This table contains synonyms for Xref objects.
1572
+ #
1573
+ # This class uses ActiveRecord to access data in the Ensembl database.
1574
+ # See the general documentation of the Ensembl module for
1575
+ # more information on what this means and what methods are available.
1576
+ #
1577
+ # This class includes the mixin Sliceable, which means that it is mapped
1578
+ # to a SeqRegion object and a Slice can be created for objects of this
1579
+ # class. See Sliceable and Slice for more information.
1580
+ #
1581
+ # @example
1582
+ # xref = Xref.find(185185)
1583
+ # puts xref.external_synonyms[0].synonyms
1584
+ class ExternalSynonym < DBConnection
1585
+ set_primary_key nil
1586
+
1587
+ belongs_to :xref
1588
+ end
1589
+
1590
+ # The Karyotype class provides an interface to the
1591
+ # karyotype table. This table contains <>.
1592
+ #
1593
+ # This class uses ActiveRecord to access data in the Ensembl database.
1594
+ # See the general documentation of the Ensembl module for
1595
+ # more information on what this means and what methods are available.
1596
+ #
1597
+ # This class includes the mixin Sliceable, which means that it is mapped
1598
+ # to a SeqRegion object and a Slice can be created for objects of this
1599
+ # class. See Sliceable and Slice for more information.
1600
+ #
1601
+ # @example
1602
+ # band = Karyotype.find_by_band('p36.32')
1603
+ # puts band.to_yaml
1604
+ class Karyotype < DBConnection
1605
+ include Sliceable
1606
+
1607
+ set_primary_key 'karyotype_id'
1608
+
1609
+ belongs_to :seq_region
1610
+ end
1611
+
1612
+ # The OligoFeature class provides an interface to the
1613
+ # oligo_feature table. This table contains mappings of Oligo objects to
1614
+ # a SeqRegion.
1615
+ #
1616
+ # This class uses ActiveRecord to access data in the Ensembl database.
1617
+ # See the general documentation of the Ensembl module for
1618
+ # more information on what this means and what methods are available.
1619
+ #
1620
+ # This class includes the mixin Sliceable, which means that it is mapped
1621
+ # to a SeqRegion object and a Slice can be created for objects of this
1622
+ # class. See Sliceable and Slice for more information.
1623
+ #
1624
+ # @example
1625
+ # seq_region = SeqRegion.find_by_name('4')
1626
+ # puts seq_region.oligo_features.length
1627
+ class OligoFeature < DBConnection
1628
+ include Sliceable
1629
+
1630
+ set_primary_key 'oligo_feature_id'
1631
+
1632
+ belongs_to :seq_region
1633
+ belongs_to :oligo_probe
1634
+ belongs_to :analysis
1635
+ end
1636
+
1637
+ # The OligoProbe class provides an interface to the
1638
+ # oligo_probe table.
1639
+ #
1640
+ # This class uses ActiveRecord to access data in the Ensembl database.
1641
+ # See the general documentation of the Ensembl module for
1642
+ # more information on what this means and what methods are available.
1643
+ #
1644
+ # @example
1645
+ # probe = OligoProbe.find_by_name('373:434;')
1646
+ # puts probe.probeset + "\t" + probe.oligo_array.name
1647
+ class OligoProbe < DBConnection
1648
+ set_primary_key 'oligo_probe_id'
1649
+
1650
+ has_many :oligo_features
1651
+ belongs_to :oligo_array
1652
+ end
1653
+
1654
+ # The OligoArray class provides an interface to the
1655
+ # oligo_array table. This table contains data describing a microarray
1656
+ # slide.
1657
+ #
1658
+ # This class uses ActiveRecord to access data in the Ensembl database.
1659
+ # See the general documentation of the Ensembl module for
1660
+ # more information on what this means and what methods are available.
1661
+ #
1662
+ # @example
1663
+ # array = OligoArray.find_by_name_and_type('Bovine','AFFY')
1664
+ # puts array.oligo_probes.length
1665
+ class OligoArray < DBConnection
1666
+ set_primary_key 'oligo_array_id'
1667
+
1668
+ has_many :oligo_probes
1669
+ end
1670
+
1671
+ # The PredictionExon class provides an interface to the
1672
+ # prediction_exon table. This table contains <>.
1673
+ #
1674
+ # This class uses ActiveRecord to access data in the Ensembl database.
1675
+ # See the general documentation of the Ensembl module for
1676
+ # more information on what this means and what methods are available.
1677
+ #
1678
+ # This class includes the mixin Sliceable, which means that it is mapped
1679
+ # to a SeqRegion object and a Slice can be created for objects of this
1680
+ # class. See Sliceable and Slice for more information.
1681
+ #
1682
+ # @example
1683
+ # #TODO
1684
+ class PredictionExon < DBConnection
1685
+ include Sliceable
1686
+
1687
+ set_primary_key 'prediction_exon_id'
1688
+
1689
+ belongs_to :prediction_transcript
1690
+ belongs_to :seq_region
1691
+ end
1692
+
1693
+ # The PredictionTranscript class provides an interface to the
1694
+ # prediction_transcript table.
1695
+ #
1696
+ # This class uses ActiveRecord to access data in the Ensembl database.
1697
+ # See the general documentation of the Ensembl module for
1698
+ # more information on what this means and what methods are available.
1699
+ #
1700
+ # This class includes the mixin Sliceable, which means that it is mapped
1701
+ # to a SeqRegion object and a Slice can be created for objects of this
1702
+ # class. See Sliceable and Slice for more information.
1703
+ #
1704
+ # @example
1705
+ # predicted_transcript = PredictionTranscript.find_by_display_label('GENSCAN00000000006')
1706
+ # puts predicted_transcript.prediction_exons.length
1707
+ class PredictionTranscript < DBConnection
1708
+ include Sliceable
1709
+
1710
+ set_primary_key 'prediction_transcript_id'
1711
+
1712
+ has_many :prediction_exons
1713
+ belongs_to :seq_region
1714
+ belongs_to :analysis
1715
+ end
1716
+
1717
+ # The ProteinFeature class provides an interface to the
1718
+ # protein_feature table. This table contains mappings of a Translation
1719
+ # onto a SeqRegion.
1720
+ #
1721
+ # This class uses ActiveRecord to access data in the Ensembl database.
1722
+ # See the general documentation of the Ensembl module for
1723
+ # more information on what this means and what methods are available.
1724
+ #
1725
+ # This class includes the mixin Sliceable, which means that it is mapped
1726
+ # to a SeqRegion object and a Slice can be created for objects of this
1727
+ # class. See Sliceable and Slice for more information.
1728
+ #
1729
+ # @example
1730
+ # #TODO
1731
+ class ProteinFeature < DBConnection
1732
+ include Sliceable
1733
+
1734
+ set_primary_key 'protein_feature_id'
1735
+
1736
+ belongs_to :translation
1737
+ belongs_to :analysis
1738
+ end
1739
+
1740
+ # The ProteinAlignFeature class provides an interface to the
1741
+ # protein_align_feature table. This table contains sequence similarity
1742
+ # mappings against a SeqRegion.
1743
+ #
1744
+ # This class uses ActiveRecord to access data in the Ensembl database.
1745
+ # See the general documentation of the Ensembl module for
1746
+ # more information on what this means and what methods are available.
1747
+ #
1748
+ # This class includes the mixin Sliceable, which means that it is mapped
1749
+ # to a SeqRegion object and a Slice can be created for objects of this
1750
+ # class. See Sliceable and Slice for more information.
1751
+ #
1752
+ # @example
1753
+ # uniprot_scan = Analysis.find_by_logic_name('Uniprot')
1754
+ # uniprot_scan.protein_align_features.each do |hit|
1755
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1756
+ # end
1757
+ class ProteinAlignFeature < DBConnection
1758
+ include Sliceable
1759
+
1760
+ set_primary_key 'protein_align_feature_id'
1761
+
1762
+ belongs_to :seq_region
1763
+ belongs_to :analysis
1764
+
1765
+ has_many :exon_supporting_features
1766
+ has_many :transcript_supporting_features
1767
+ end
1768
+
1769
+ # The RegulatoryFactor class provides an interface to the
1770
+ # regulatory_factor table.
1771
+ #
1772
+ # This class uses ActiveRecord to access data in the Ensembl database.
1773
+ # See the general documentation of the Ensembl module for
1774
+ # more information on what this means and what methods are available.
1775
+ #
1776
+ # @example
1777
+ # factor = RegulatoryFactor.find_by_name('crtHsap8070')
1778
+ # puts factor.to_yaml
1779
+ class RegulatoryFactor < DBConnection
1780
+ set_primary_key 'regulatory_factor_id'
1781
+
1782
+ has_many :regulatory_features
1783
+ end
1784
+
1785
+ # The RegulatoryFeature class provides an interface to the
1786
+ # regulatory_feature table. This table contains mappings of
1787
+ # RegulatoryFactor objects against a SeqRegion.
1788
+ #
1789
+ # This class uses ActiveRecord to access data in the Ensembl database.
1790
+ # See the general documentation of the Ensembl module for
1791
+ # more information on what this means and what methods are available.
1792
+ #
1793
+ # This class includes the mixin Sliceable, which means that it is mapped
1794
+ # to a SeqRegion object and a Slice can be created for objects of this
1795
+ # class. See Sliceable and Slice for more information.
1796
+ #
1797
+ # @example
1798
+ # analysis = Analysis.find_by_logic_name('miRanda')
1799
+ # analysis.regulatory_features.each do |feature|
1800
+ # puts feature.name + "\t" + feature.regulatory_factor.name
1801
+ # end
1802
+ class RegulatoryFeature < DBConnection
1803
+ include Sliceable
1804
+
1805
+ set_primary_key 'regulatory_feature_id'
1806
+
1807
+ belongs_to :seq_region
1808
+ belongs_to :analysis
1809
+ belongs_to :regulatory_factor
1810
+ end
1811
+ end
1812
+ end