bio-ensembl 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,65 @@
1
+ # #
2
+ # = bio-ensembl.rb
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # @author Jan Aerts
10
+ # @author Francesco Strozzi
11
+
12
+ module Ensembl
13
+ ENSEMBL_RELEASE = 60
14
+
15
+ class Session
16
+ attr_accessor :coord_systems
17
+ attr_accessor :seqlevel_id, :seqlevel_coord_system
18
+ attr_accessor :toplevel_id, :toplevel_coord_system
19
+ attr_accessor :coord_system_ids #map CS id to CS name
20
+ attr_accessor :seq_regions
21
+ attr_accessor :collection_species
22
+ attr_accessor :release
23
+
24
+ def initialize
25
+ @coord_systems = Hash.new # key = id; value = CoordSystem object
26
+ @coord_system_ids = Hash.new # key = id; value = name
27
+ @seq_regions = Hash.new
28
+ @release = ENSEMBL_RELEASE
29
+ end
30
+
31
+ def reset
32
+ @coord_systems = Hash.new
33
+ @coord_system_ids = Hash.new
34
+ @seq_regions = Hash.new
35
+ @seqlevel_id = nil
36
+ @toplevel_id = nil
37
+ @seqlevel_coord_system = nil
38
+ @toplevel_coord_system = nil
39
+ @collection_species = nil
40
+ end
41
+ end
42
+
43
+ SESSION = Ensembl::Session.new
44
+
45
+ end
46
+
47
+
48
+
49
+ # BioRuby
50
+ require 'bio'
51
+
52
+ # Database connection
53
+ require 'active_record'
54
+ require 'bio-ensembl/db_connection'
55
+
56
+ # Core modules
57
+ require 'bio-ensembl/core/activerecord'
58
+ require 'bio-ensembl/core/transcript'
59
+ require 'bio-ensembl/core/slice'
60
+ require 'bio-ensembl/core/project'
61
+ require 'bio-ensembl/core/transform'
62
+ require 'bio-ensembl/core/collection'
63
+
64
+ # Variation modules
65
+ require 'bio-ensembl/variation/activerecord'
@@ -0,0 +1,1812 @@
1
+ #
2
+ # = ensembl/core/activerecord.rb - ActiveRecord mappings to Ensembl core
3
+ #
4
+ # Copyright:: Copyright (C) 2007-2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # @author Jan Aerts
9
+ # @author Francesco Strozzi
10
+
11
+ # == What is it?
12
+ # The Ensembl module provides an API to the Ensembl databases
13
+ # stored at ensembldb.ensembl.org. This is the same information that is
14
+ # available from http://www.ensembl.org.
15
+ #
16
+ # The Ensembl::Core module mainly covers sequences and
17
+ # annotations.
18
+ # The Ensembl::Variation module covers variations (e.g. SNPs).
19
+ # The Ensembl::Compara module covers comparative mappings
20
+ # between species.
21
+ #
22
+ # == ActiveRecord
23
+ # The Ensembl API provides a ruby interface to the Ensembl mysql databases
24
+ # at ensembldb.ensembl.org. Most of the API is based on ActiveRecord to
25
+ # get data from that database. In general, each table is described by a
26
+ # class with the same name: the coord_system table is covered by the
27
+ # CoordSystem class, the seq_region table is covered by the SeqRegion class,
28
+ # etc. As a result, accessors are available for all columns in each table.
29
+ # For example, the seq_region table has the following columns: seq_region_id,
30
+ # name, coord_system_id and length. Through ActiveRecord, these column names
31
+ # become available as attributes of SeqRegion objects:
32
+ # puts my_seq_region.seq_region_id
33
+ # puts my_seq_region.name
34
+ # puts my_seq_region.coord_system_id
35
+ # puts my_seq_region.length.to_s
36
+ #
37
+ # ActiveRecord makes it easy to extract data from those tables using the
38
+ # collection of #find methods. There are three types of #find methods (e.g.
39
+ # for the CoordSystem class):
40
+ # a. find based on primary key in table:
41
+ # my_coord_system = CoordSystem.find(5)
42
+ # b. find_by_sql:
43
+ # my_coord_system = CoordSystem.find_by_sql('SELECT * FROM coord_system WHERE name = 'chromosome'")
44
+ # c. find_by_<insert_your_column_name_here>
45
+ # my_coord_system1 = CoordSystem.find_by_name('chromosome')
46
+ # my_coord_system2 = CoordSystem.find_by_rank(3)
47
+ # To find out which find_by_<column> methods are available, you can list the
48
+ # column names using the column_names class methods:
49
+ #
50
+ # puts Ensembl::Core::CoordSystem.column_names.join("\t")
51
+ #
52
+ # For more information on the find methods, see
53
+ # http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
54
+ #
55
+ # The relationships between different tables are accessible through the
56
+ # classes as well. For example, to loop over all seq_regions belonging to
57
+ # a coord_system (a coord_system "has many" seq_regions):
58
+ # chr_coord_system = CoordSystem.find_by_name('chromosome')
59
+ # chr_coord_system.seq_regions.each do |seq_region|
60
+ # puts seq_region.name
61
+ # end
62
+ # Of course, you can go the other way as well (a seq_region "belongs to"
63
+ # a coord_system):
64
+ # chr4 = SeqRegion.find_by_name('4')
65
+ # puts chr4.coord_system.name #--> 'chromosome'
66
+ #
67
+ # To find out what relationships exist for a given class, you can use the
68
+ # #reflect_on_all_associations class methods:
69
+ # puts SeqRegion.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join("\n")
70
+ # puts SeqRegion.reflect_on_all_associations(:has_one).collect{|a| a.name.to_s}.join("\n")
71
+ # puts SeqRegion.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join("\n")
72
+ module Ensembl
73
+ # The Ensembl::Core module covers the core databases from
74
+ # ensembldb.ensembl.org and covers mainly sequences and their annotations.
75
+ # For a full description of the database (and therefore the classes that
76
+ # are available), see http://www.ensembl.org/info/software/core/schema/index.html
77
+ # and http://www.ensembl.org/info/software/core/schema/schema_description.html
78
+ module Core
79
+ # The Sliceable mixin holds the get_slice method and can be included
80
+ # in any class that lends itself to having a position on a SeqRegion.
81
+ module Sliceable
82
+ # The Sliceable#slice method takes the coordinates on a reference
83
+ # and creates a Ensembl::Core::Slice object.
84
+ #
85
+ # @return [Ensembl::Core::Slice] Ensembl::Core::Slice object
86
+ def slice
87
+ start, stop, strand = nil, nil, nil
88
+
89
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_start')
90
+ start = self.seq_region_start
91
+ end
92
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_end')
93
+ stop = self.seq_region_end
94
+ end
95
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_strand')
96
+ strand = self.seq_region_strand
97
+ else #FIXME: we shouldn't do this, but can't #project if no strand given
98
+ strand = 1
99
+ end
100
+
101
+ return Ensembl::Core::Slice.new(self.seq_region, start, stop, strand)
102
+ end
103
+
104
+ # The Sliceable#seq method takes the coordinates on a reference, transforms
105
+ # onto the seqlevel coordinate system if necessary, and retrieves the
106
+ # sequence.
107
+ #
108
+ # @return [String] sequence
109
+ def seq
110
+ return self.slice.seq
111
+ end
112
+
113
+ # The Sliceable#start method is a convenience method and returns
114
+ # self.seq_region_start.
115
+ #
116
+ # @return [Integer] seq_region_start
117
+ def start
118
+ return self.seq_region_start
119
+ end
120
+
121
+ # The Sliceable#stop method is a convenience method and returns
122
+ # self.seq_region_end.
123
+ #
124
+ # @return [Integer] seq_region_end
125
+ def stop
126
+ return self.seq_region_end
127
+ end
128
+
129
+ # The Sliceable#strand method is a convenience method and returns
130
+ # self.seq_region_strand.
131
+ #
132
+ # @return [Numeric] seq_region_strand
133
+ def strand
134
+ return self.seq_region_strand
135
+ end
136
+
137
+ # The Sliceable#length method returns the length of the feature (based on
138
+ # seq_region_start and seq_region_end.
139
+ #
140
+ # @return [Integer] Length of the slice
141
+ def length
142
+ return self.stop - self.start + 1
143
+ end
144
+
145
+ # The Sliceable#project method is used to transfer coordinates from one
146
+ # coordinate system to another. Suppose you have a feature on a
147
+ # contig in human (let's say on contig AC000031.6.1.38703) and you
148
+ # want to know the coordinates on the chromosome. This is a
149
+ # projection of coordinates from a higher ranked coordinate system to
150
+ # a lower ranked coordinate system. Projections can also be done
151
+ # from a chromosome to the contig level. However, it might be possible
152
+ # that more than one contig has to be included and that there exist
153
+ # gaps between the contigs. The output of this method therefore is
154
+ # an _array_ of Slice and Gap objects.
155
+ #
156
+ # At the moment, projections can only be done if the two coordinate
157
+ # systems are linked directly in the 'assembly' table.
158
+ #
159
+ # @example
160
+ # # Get a contig slice in cow and project to scaffold level
161
+ # # (i.e. going from a high rank coord system to a lower rank coord
162
+ # # system)
163
+ # original_feature = Gene.find(85743)
164
+ # target_slices = original_feature.project('scaffold')
165
+ #
166
+ # @param [String] coord_system_name Name of coordinate system to project coordinates to
167
+ # @return [Array<Slice,Gap>] an array consisting of Slices and, if necessary, Gaps
168
+ def project(coord_system_name)
169
+ return self.slice.project(coord_system_name)
170
+ end
171
+
172
+ end
173
+
174
+
175
+ # The CoordSystem class describes the coordinate system to which
176
+ # a given SeqRegion belongs. It is an interface to the coord_system
177
+ # table of the Ensembl mysql database.
178
+ #
179
+ # Two virtual coordinate systems exist for
180
+ # every species:
181
+ # * toplevel: the coordinate system with rank 1
182
+ # * seqlevel: the coordinate system that contains the seq_regions
183
+ # with the sequence
184
+ #
185
+ # This class uses ActiveRecord to access data in the Ensembl database.
186
+ # See the general documentation of the Ensembl module for
187
+ # more information on what this means and what methods are available.
188
+ #
189
+ # @example
190
+ # coord_system = Ensembl::Core::CoordSystem.find_by_name('chromosome')
191
+ # if coord_system == CoordSystem.toplevel
192
+ # puts coord_system.name + " is the toplevel coordinate system."
193
+ # end
194
+ class CoordSystem < DBConnection
195
+ set_primary_key 'coord_system_id'
196
+
197
+ has_many :seq_regions
198
+
199
+ # The CoordSystem#toplevel? method checks if this coordinate system is the
200
+ # toplevel coordinate system or not.
201
+ #
202
+ # @return [Boolean] True if coord_system is toplevel, else false.
203
+ def toplevel?
204
+ if Collection.check # When usign multi-species databases
205
+ return true if self == CoordSystem.find_by_rank_and_species_id(1,self.species_id)
206
+ else
207
+ return true if self == CoordSystem.find_by_rank(1)
208
+ end
209
+ return false
210
+ end
211
+
212
+ # The CoordSystem#seqlevel? method checks if this coordinate system is the
213
+ # seqlevel coordinate system or not.
214
+ #
215
+ # @return [Boolean] True if coord_system is seqlevel, else false.
216
+ def seqlevel?
217
+ if Collection.check # When usign multi-species databases
218
+ return true if self == CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
219
+ else
220
+ return true if self == CoordSystem.find_seqlevel
221
+ end
222
+ return false
223
+ end
224
+
225
+ # The CoordSystem#find_toplevel class method returns the toplevel coordinate
226
+ # system.
227
+ #
228
+ # @return [Ensembl::Core::CoordSystem] Toplevel coord_system object.
229
+ def find_toplevel
230
+ not_cached = false
231
+ if Ensembl::SESSION.toplevel_coord_system.nil?
232
+ not_cached = true
233
+ elsif Collection.check
234
+ not_cached = true if Ensembl::SESSION.toplevel_coord_system.species_id != self.species_id
235
+ end
236
+ if not_cached
237
+ if Collection.check # When usign multi-species databases
238
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank_and_species_id(1,self.species_id)
239
+ else
240
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank(1)
241
+ end
242
+ Ensembl::SESSION.toplevel_id = Ensembl::SESSION.toplevel_coord_system.id
243
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.toplevel_coord_system.name] = Ensembl::SESSION.toplevel_id
244
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.toplevel_id] = Ensembl::SESSION.toplevel_coord_system
245
+ end
246
+ return Ensembl::SESSION.toplevel_coord_system
247
+ end
248
+
249
+ # The CoordSystem#find_seqlevel class method returns the seqlevel coordinate
250
+ # system.
251
+ #
252
+ # @return [Ensembl::Core::CoordSystem] Seqlevel coord_system object.
253
+ def find_seqlevel
254
+ not_cached = false
255
+ if Ensembl::SESSION.seqlevel_coord_system.nil?
256
+ not_cached = true
257
+ elsif Collection.check # When usign multi-species databases
258
+ not_cached = true if Ensembl::SESSION.seqlevel_coord_system.species_id != self.species_id
259
+ end
260
+ if not_cached
261
+ if Collection.check
262
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
263
+ else
264
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%'")[0]
265
+ end
266
+ Ensembl::SESSION.seqlevel_id = Ensembl::SESSION.seqlevel_coord_system.id
267
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.seqlevel_coord_system.name] = Ensembl::SESSION.seqlevel_id
268
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.seqlevel_id] = Ensembl::SESSION.seqlevel_coord_system
269
+ end
270
+ return Ensembl::SESSION.seqlevel_coord_system
271
+ end
272
+
273
+ # The CoordSystem#find_level class method returns the seqlevel coordinate
274
+ # system corresponding to the name passed.
275
+ #
276
+ # @param [String] coord_system_name Name of coordinate system
277
+ # @return [Ensembl::Core::CoordSystem] Coordinate system object
278
+ def find_level(coord_system_name)
279
+ if Collection.check # When usign multi-species databases
280
+ return CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE name = '#{coord_system_name}' AND species_id = #{self.species_id}")[0]
281
+ else
282
+ return CoordSystem.find_by_name(coord_system_name)
283
+ end
284
+ end
285
+
286
+ # The CoordSystem#find_default_by_name class method returns the
287
+ # coordinate system by that name with the lowest rank. Normally, a lower
288
+ # rank means a 'bigger' coordinate system. The 'chromosome' typically has
289
+ # rank 1. However, there might be more than one coordinate system with the
290
+ # name chromosome but with different version (e.g. in human, there is one
291
+ # for the NCBI36 and one for the NCBI35 version). The older version of these
292
+ # is typically given a high number and the one with the new version is the
293
+ # 'default' system.
294
+ #
295
+ # @return [Ensembl::Core::CoordSystem] Coordinate system object
296
+ def self.find_default_by_name(name)
297
+ all_coord_systems_with_name = Ensembl::Core::CoordSystem.find_all_by_name(name)
298
+ if all_coord_systems_with_name.length == 1
299
+ return all_coord_systems_with_name[0]
300
+ else
301
+ return all_coord_systems_with_name.select{|cs| cs.attrib =~ /default_version/}[0]
302
+ end
303
+ end
304
+
305
+ # The CoordSystem#name_with_version returns a string containing the name
306
+ # and version of the coordinate system. If no version is available, then
307
+ # just the name is returned
308
+ #
309
+ # @return [String] Name of the coordinate system if possible including version
310
+ def name_with_version
311
+ if self.version.nil?
312
+ return name
313
+ else
314
+ return [name, version].join(':')
315
+ end
316
+ end
317
+
318
+ ## Calculate the shortest path between a source coordinate system and a
319
+ ## target coordinate system. This can be done by looking for the
320
+ ## 'assembly.mapping' records in the meta_coord table.
321
+ ## At the moment, only direct mappings are possible. Later on, this method
322
+ ## should be changed to make longer paths possible.
323
+ ## Is used to get features for a slice object.
324
+ #def calculate_path(target_coord_system)
325
+ # MetaCoord.find_all_by_meta_key('assembly.mapping').each do |mapping|
326
+ # coord_system_names = mapping.meta_value.split(/[#|\|]/)
327
+ # if coord_system_names.sort.join(';') == [self.name_with_version, target_coord_system.name_with_version].sort.join(';')
328
+ # answer = Array.new
329
+ # answer.push(CoordSystem.find_by_name(coord_system_names[0]))
330
+ # answer.push(CoordSystem.find_by_name(coord_system_names[1]))
331
+ # return answer
332
+ # end
333
+ # end
334
+ # return nil
335
+ #
336
+ #end
337
+ end
338
+
339
+ # The SeqRegion class describes a part of a coordinate systems. It is an
340
+ # interface to the seq_region table of the Ensembl mysql database.
341
+ #
342
+ # This class uses ActiveRecord to access data in the Ensembl database.
343
+ # See the general documentation of the Ensembl module for
344
+ # more information on what this means and what methods are available.
345
+ #
346
+ # @example
347
+ # chr4 = SeqRegion.find_by_name('4')
348
+ # puts chr4.coord_system.name #--> 'chromosome'
349
+ # chr4.genes.each do |gene|
350
+ # puts gene.biotype
351
+ # end
352
+ class SeqRegion < DBConnection
353
+ set_primary_key 'seq_region_id'
354
+
355
+ belongs_to :coord_system
356
+ has_many :simple_features
357
+ has_many :marker_features
358
+ has_many :genes
359
+ has_many :exons
360
+ has_many :repeat_features
361
+ has_many :seq_region_attribs
362
+ has_many :attrib_types, :through => :seq_region_attrib
363
+ has_many :transcripts
364
+ has_one :dna
365
+ has_many :dna_align_features
366
+ has_many :misc_features
367
+ has_many :density_features
368
+ has_many :karyotypes
369
+ has_many :oligo_features
370
+ has_many :prediction_exons
371
+ has_many :prediction_transcripts
372
+ has_many :protein_align_features
373
+ has_many :regulatory_features
374
+ has_many :assembly_exceptions
375
+
376
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
377
+ has_many :asm_links_as_asm, :foreign_key => 'asm_seq_region_id', :class_name => 'AssemblyLink'
378
+ has_many :asm_links_as_cmp, :foreign_key => 'cmp_seq_region_id', :class_name => 'AssemblyLink'
379
+ has_many :asm_seq_regions, :through => :asm_links_as_cmp
380
+ has_many :cmp_seq_regions, :through => :asm_links_as_asm
381
+
382
+ alias attribs seq_region_attribs
383
+
384
+ # The SeqRegion#slice method returns a slice object that covers the whole
385
+ # of the seq_region.
386
+ #
387
+ # @return [Ensembl::Core::Slice] Slice object
388
+ def slice
389
+ return Ensembl::Core::Slice.new(self)
390
+ end
391
+
392
+ # The SeqRegion#assembled_seq_regions returns the sequence regions on which
393
+ # the current region is assembled. For example, calling this method on a
394
+ # contig sequence region, it might return the chromosome that that contig
395
+ # is part of. Optionally, this method takes a coordinate system name so
396
+ # that only regions of that coordinate system are returned.
397
+ #
398
+ # @param [String] coord_system_name Name of coordinate system
399
+ # @return [Array<SeqRegion>] Array of SeqRegion objects
400
+ def assembled_seq_regions(coord_system_name = nil)
401
+ if coord_system_name.nil?
402
+ return self.asm_seq_regions
403
+ else
404
+ answer = Array.new
405
+ coord_system = CoordSystem.find_by_name(coord_system_name)
406
+ self.asm_seq_regions.each do |asr|
407
+ if asr.coord_system_id == coord_system.id
408
+ answer.push(asr)
409
+ end
410
+ end
411
+ return answer
412
+ end
413
+ end
414
+
415
+ # The SeqRegion#component_seq_regions returns the sequence regions
416
+ # contained within the current region (in other words: the bits used to
417
+ # assemble the current region). For example, calling this method on a
418
+ # chromosome sequence region, it might return the contigs that were assembled
419
+ # into this chromosome. Optionally, this method takes a coordinate system
420
+ # name so that only regions of that coordinate system are returned.
421
+ #
422
+ # @param [String] coord_system_name Name of coordinate system
423
+ # @return [Array<SeqRegion>] Array of SeqRegion objects
424
+ def component_seq_regions(coord_system_name = nil)
425
+ if coord_system_name.nil?
426
+ return self.cmp_seq_regions
427
+ else
428
+ answer = Array.new
429
+ coord_system = CoordSystem.find_by_name(coord_system_name)
430
+ self.cmp_seq_regions.each do |csr|
431
+ if csr.coord_system_id == coord_system.id
432
+ answer.push(csr)
433
+ end
434
+ end
435
+ return answer
436
+ end
437
+ end
438
+
439
+ # This method queries the assembly table to find those rows (i.e.
440
+ # AssemblyLink objects) for which this seq_region is the assembly.
441
+ #
442
+ # @example
443
+ # my_seq_region = SeqRegion.find('4')
444
+ # first_link = my_seq_region.assembly_links_as_assembly[0]
445
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
446
+ #
447
+ # @param [CoordSystem] coord_system Coordinate system object
448
+ # that the components should belong to
449
+ # @return [Array<AssemblyLink>] Array of AssemblyLink objects
450
+ def assembly_links_as_assembly(coord_system = nil)
451
+ if Ensembl::SESSION.coord_system_ids.has_key?(coord_system.name)
452
+ coord_system_id = Ensembl::SESSION.coord_system_ids[coord_system.name]
453
+ else
454
+ Ensembl::SESSION.coord_systems[cs.id] = coord_system.id
455
+ Ensembl::SESSION.coord_system_ids[coord_system.name] = coord_system.id
456
+ end
457
+ coord_system = Ensembl::SESSION.coord_systems[coord_system.id]
458
+ return AssemblyLink.find_by_sql("SELECT * FROM assembly a WHERE a.asm_seq_region_id = #{self.id} AND a.cmp_seq_region_id IN (SELECT sr.seq_region_id FROM seq_region sr WHERE coord_system_id = #{coord_system.id} )")
459
+ end
460
+
461
+ # This method queries the assembly table to find those rows (i.e.
462
+ # AssemblyLink objects) for which this seq_region is the component.
463
+ #
464
+ # @example
465
+ #
466
+ # my_seq_region = SeqRegion.find('Chr4.003.1')
467
+ # first_link = my_seq_region.assembly_links_as_component[0]
468
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
469
+ #
470
+ # @param [CoordSystem] coord_system Coordinate system object that the assembly
471
+ # should belong to
472
+ # @return [Array<AssemblyLink>] Array of AssemblyLink objects
473
+ def assembly_links_as_component(coord_system = nil)
474
+ if coord_system.nil?
475
+ return self.asm_links_as_cmp
476
+ else
477
+ return self.asm_links_as_cmp.select{|alac| alac.asm_seq_region.coord_system_id == coord_system.id}
478
+ end
479
+ end
480
+
481
+ # The SeqRegion#sequence method returns the sequence of this seq_region. At
482
+ # the moment, it will only return the sequence if the region belongs to the
483
+ # seqlevel coordinate system.
484
+ #
485
+ # @return [String] DNA sequence
486
+ def sequence
487
+ return self.dna.sequence
488
+ end
489
+ alias seq sequence
490
+
491
+ # The SeqRegion#subsequence method returns a subsequence of this seq_region. At
492
+ # the moment, it will only return the sequence if the region belongs to the
493
+ # seqlevel coordinate system.
494
+ #
495
+ # @param [Integer] start Start position
496
+ # @param [Integer] stop Stop position
497
+ # @return [String] DNA sequence
498
+ def subsequence(start, stop)
499
+ return self.seq.slice(start - 1, (stop - start) + 1)
500
+ end
501
+ alias subseq subsequence
502
+
503
+ end
504
+
505
+ # The AssemblyLink class describes the relationships between different
506
+ # seq_regions. For example, a chromosome might consist of a number of
507
+ # scaffolds, each of which in turn consists of a number of contigs. The
508
+ # AssemblyLink class
509
+ # This class is an interface to the assembly table of the Ensembl mysql
510
+ # database.
511
+ #
512
+ # This class uses ActiveRecord to access data in the Ensembl database.
513
+ # See the general documentation of the Ensembl module for
514
+ # more information on what this means and what methods are available.
515
+ #
516
+ # @example
517
+ # chr4 = SeqRegion.find_by_name('4')
518
+ # puts chr4.coord_system.name #--> 'chromosome'
519
+ # chr4.genes.each do |gene|
520
+ # puts gene.biotype
521
+ # end
522
+ class AssemblyLink < DBConnection
523
+ set_table_name 'assembly'
524
+ set_primary_key nil
525
+
526
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
527
+ belongs_to :asm_seq_region, :foreign_key => 'asm_seq_region_id', :class_name => 'SeqRegion'
528
+ belongs_to :cmp_seq_region, :foreign_key => 'cmp_seq_region_id', :class_name => 'SeqRegion'
529
+ end
530
+
531
+ # The AssemblyException class describes the exceptions in to AssemblyLink. Most
532
+ # notably, this concerns the allosomes. In human, for example, only the
533
+ # part of the Y chromosome that is different from X is covered in the
534
+ # assembly table. Therefore, the sequence of the tip and end of the Y
535
+ # chromosome are not stored in the database, but fetched from the X
536
+ # chromosome. The assembly_exception table contain the information on
537
+ # which bits are the same.
538
+ #
539
+ # This class uses ActiveRecord to access data in the Ensembl database.
540
+ # See the general documentation of the Ensembl module for
541
+ # more information on what this means and what methods are available.
542
+ #
543
+ # This class should normally not be used directly by the user.
544
+ class AssemblyException < DBConnection
545
+ include Sliceable
546
+
547
+ set_primary_key 'assembly_exception_id'
548
+
549
+ belongs_to :seq_region
550
+ end
551
+
552
+ # The MetaCoord class describes what coordinate systems are used to annotate
553
+ # features. It will for example tell you that marker_features are annotated
554
+ # either on the chromosome, supercontig and clone level.
555
+ #
556
+ # This class should normally not be used by the end user, but is used internally.
557
+ #
558
+ # This class uses ActiveRecord to access data in the Ensembl database.
559
+ # See the general documentation of the Ensembl module for
560
+ # more information on what this means and what methods are available.
561
+ class MetaCoord < DBConnection
562
+ set_primary_key nil
563
+ end
564
+
565
+ # The Meta class describes meta data of the database. These include information
566
+ # on what coordinate system is mapping on another one and which patches
567
+ # are applied.
568
+ #
569
+ # This class should normally not be used by the end user, but is used internally.
570
+ #
571
+ # This class uses ActiveRecord to access data in the Ensembl database.
572
+ # See the general documentation of the Ensembl module for
573
+ # more information on what this means and what methods are available.
574
+ class Meta < DBConnection
575
+ set_primary_key nil
576
+ end
577
+
578
+ # The Analysis class describes an analysis.
579
+ #
580
+ # This class uses ActiveRecord to access data in the Ensembl database.
581
+ # See the general documentation of the Ensembl module for
582
+ # more information on what this means and what methods are available.
583
+ #
584
+ # @example
585
+ # repeat_masker_analysis = Analysis.find_by_logic_name('RepeatMask')
586
+ # puts repeat_masker_analysis.to_yaml
587
+ class Analysis < DBConnection
588
+ set_primary_key 'analysis_id'
589
+
590
+ has_many :genes
591
+ has_many :dna_align_features
592
+ has_many :protein_align_features
593
+ has_one :analysis_description
594
+ has_many :density_types
595
+ has_many :oligo_features
596
+ has_many :protein_features
597
+ has_many :regulatory_features
598
+ has_many :simple_features
599
+ has_many :prediction_transcripts
600
+ end
601
+
602
+ # The AnalysisDescription class belongs to an analysis.
603
+ #
604
+ # This class uses ActiveRecord to access data in the Ensembl database.
605
+ # See the general documentation of the Ensembl module for
606
+ # more information on what this means and what methods are available.
607
+ #
608
+ # @example
609
+ # descr = AnalysisDescription.find(3)
610
+ # puts descr.to_yaml
611
+ class AnalysisDescription < DBConnection
612
+ set_primary_key nil
613
+
614
+ belongs_to :analysis
615
+ end
616
+
617
+ # The Dna class contains the actual DNA sequence for the sequence regions
618
+ # that belong to the seq_level coordinate system.
619
+ #
620
+ # This class uses ActiveRecord to access data in the Ensembl database.
621
+ # See the general documentation of the Ensembl module for
622
+ # more information on what this means and what methods are available.
623
+ #
624
+ # @example
625
+ # seq_region = SeqRegion.find(1)
626
+ # puts seq_region.dna.sequence
627
+ class Dna < DBConnection
628
+ set_primary_key nil
629
+
630
+ belongs_to :seq_region
631
+ end
632
+
633
+ # The Exon class describes an exon.
634
+ #
635
+ # This class uses ActiveRecord to access data in the Ensembl database.
636
+ # See the general documentation of the Ensembl module for
637
+ # more information on what this means and what methods are available.
638
+ #
639
+ # This class includes the mixin Sliceable, which means that it is mapped
640
+ # to a SeqRegion object and a Slice can be created for objects of this
641
+ # class. See Sliceable and Slice for more information.
642
+ #
643
+ # @example
644
+ # seq_region = SeqRegion.find(1)
645
+ # puts seq_region.exons.length
646
+ class Exon < DBConnection
647
+ include Sliceable
648
+
649
+ set_primary_key 'exon_id'
650
+
651
+ belongs_to :seq_region
652
+ has_many :exon_transcripts
653
+ has_many :transcripts, :through => :exon_transcripts
654
+
655
+ has_many :translations, :foreign_key => 'start_exon_id'
656
+ has_many :translations, :foreign_key => 'end_exon_id'
657
+
658
+ has_one :exon_stable_id
659
+
660
+ has_many :exon_supporting_features
661
+ has_many :dna_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'dna_align_feature'"]
662
+ has_many :protein_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'protein_align_feature'"]
663
+
664
+ def stable_id
665
+ return self.exon_stable_id.stable_id
666
+ end
667
+
668
+ # The Exon#seq method returns the sequence of the exon.
669
+ def seq
670
+ seq_region = nil
671
+ if Ensembl::SESSION.seq_regions.has_key?(self.seq_region_id)
672
+ seq_region = Ensembl::SESSION.seq_regions[self.seq_region_id]
673
+ else
674
+ seq_region = self.seq_region
675
+ Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
676
+ end
677
+ slice = Ensembl::Core::Slice.new(seq_region, seq_region_start, seq_region_end, seq_region_strand)
678
+ return slice.seq
679
+ end
680
+
681
+
682
+ def self.find_by_stable_id(stable_id)
683
+ exon_stable_id = ExonStableId.find_by_stable_id(stable_id)
684
+ if exon_stable_id.nil?
685
+ return nil
686
+ else
687
+ return exon_stable_id.exon
688
+ end
689
+ end
690
+
691
+ end
692
+
693
+ # The ExonStableId class provides an interface to the exon_stable_id
694
+ # table. This table contains Ensembl stable IDs for exons.
695
+ #
696
+ # This class uses ActiveRecord to access data in the Ensembl database.
697
+ # See the general documentation of the Ensembl module for
698
+ # more information on what this means and what methods are available.
699
+ #
700
+ # @example
701
+ # my_exon = ExonStableId.find_by_stable_id('ENSE00001494622').exon
702
+ class ExonStableId < DBConnection
703
+ set_primary_key 'stable_id'
704
+
705
+ belongs_to :exon
706
+ end
707
+
708
+ # The ExonTranscript class provides the link between exons and transcripts.
709
+ #
710
+ # This class uses ActiveRecord to access data in the Ensembl database.
711
+ # See the general documentation of the Ensembl module for
712
+ # more information on what this means and what methods are available.
713
+ #
714
+ # @example
715
+ # link = ExonTranscript.find(1)
716
+ # puts link.exon.to_yaml
717
+ # puts link.transcript.to_yaml
718
+ class ExonTranscript < DBConnection
719
+ set_primary_key nil
720
+
721
+ belongs_to :exon
722
+ belongs_to :transcript
723
+ end
724
+
725
+ class ExonSupportingFeature < DBConnection
726
+ set_table_name 'supporting_feature'
727
+ set_primary_key nil
728
+
729
+ belongs_to :exon
730
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
731
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
732
+ end
733
+
734
+ class TranscriptSupportingFeature < DBConnection
735
+ set_primary_key nil
736
+
737
+ belongs_to :transcript
738
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
739
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
740
+ end
741
+
742
+ # The SimpleFeature class describes simple features that have positions
743
+ # on a SeqRegion.
744
+ #
745
+ # This class uses ActiveRecord to access data in the Ensembl database.
746
+ # See the general documentation of the Ensembl module for
747
+ # more information on what this means and what methods are available.
748
+ #
749
+ # This class includes the mixin Sliceable, which means that it is mapped
750
+ # to a SeqRegion object and a Slice can be created for objects of this
751
+ # class. See Sliceable and Slice for more information.
752
+ #
753
+ # @example
754
+ # simple_feature = SimpleFeature.find(123)
755
+ # puts simple_feature.analysis.logic_name
756
+ class SimpleFeature < DBConnection
757
+ include Sliceable
758
+
759
+ set_primary_key 'simple_feature_id'
760
+
761
+ belongs_to :seq_region
762
+ belongs_to :analysis
763
+ end
764
+
765
+ # The DensityFeature class provides an interface to the density_feature
766
+ # table.
767
+ #
768
+ # This class uses ActiveRecord to access data in the Ensembl database.
769
+ # See the general documentation of the Ensembl module for
770
+ # more information on what this means and what methods are available.
771
+ #
772
+ # This class includes the mixin Sliceable, which means that it is mapped
773
+ # to a SeqRegion object and a Slice can be created for objects of this
774
+ # class. See Sliceable and Slice for more information.
775
+ #
776
+ # @example
777
+ # density_feature = DensityFeature.find(2716384)
778
+ # puts density_feature.to_yaml
779
+ class DensityFeature < DBConnection
780
+ set_primary_key 'density_feature_id'
781
+
782
+ belongs_to :density_type
783
+ belongs_to :seq_region
784
+ end
785
+
786
+ # The DensityType class provides an interface to the density_type
787
+ # table.
788
+ #
789
+ # This class uses ActiveRecord to access data in the Ensembl database.
790
+ # See the general documentation of the Ensembl module for
791
+ # more information on what this means and what methods are available.
792
+ #
793
+ # This class includes the mixin Sliceable, which means that it is mapped
794
+ # to a SeqRegion object and a Slice can be created for objects of this
795
+ # class. See Sliceable and Slice for more information.
796
+ #
797
+ class DensityType < DBConnection
798
+ set_primary_key 'density_type_id'
799
+
800
+ has_many :density_features
801
+ belongs_to :analysis
802
+ end
803
+
804
+ # The Marker class provides an interface to the marker
805
+ # table. This table contains primer sequences and PCR product lengths.
806
+ #
807
+ # This class uses ActiveRecord to access data in the Ensembl database.
808
+ # See the general documentation of the Ensembl module for
809
+ # more information on what this means and what methods are available.
810
+ #
811
+ # @example
812
+ # marker = Marker.find(52194)
813
+ # puts marker.left_primer
814
+ # puts marker.right_primer
815
+ # puts marker.min_primer_dist.to_s
816
+ class Marker < DBConnection
817
+ set_primary_key 'marker_id'
818
+
819
+ has_many :marker_features
820
+ has_many :marker_synonyms
821
+ has_many :marker_map_locations
822
+
823
+ def self.inheritance_column
824
+ nil
825
+ end
826
+
827
+ # The Marker#name method returns a comma-separated list of synonyms of
828
+ # this marker
829
+ #
830
+ # @example
831
+ # marker = Marker.find(1)
832
+ # puts marker.name --> 58017,D29149
833
+ def name
834
+ self.marker_synonyms.collect{|ms| ms.name}.join(',')
835
+ end
836
+
837
+ # The Marker#find_by_name class method returns one marker with this name.
838
+ #
839
+ # @return [Marker, nil] Marker object or nil
840
+ def self.find_by_name(name)
841
+ all_names = self.find_all_by_name(name)
842
+ if all_names.length == 0
843
+ return nil
844
+ else
845
+ return all_names[0]
846
+ end
847
+ end
848
+
849
+ # The Marker#find_all_by_name class method returns all markers with this
850
+ # name. If no marker is found, it returns an empty array.
851
+ #
852
+ # @return [Array] Empty array or array of Marker objects
853
+ def self.find_all_by_name(name)
854
+ marker_synonyms = Ensembl::Core::MarkerSynonym.find_all_by_name(name)
855
+ answers = Array.new
856
+ marker_synonyms.each do |ms|
857
+ answers.push(Ensembl::Core::Marker.find_all_by_marker_id(ms.marker_id))
858
+ end
859
+ answers.flatten!
860
+ return answers
861
+ end
862
+
863
+ #def to_mappings
864
+ # output = Array.new
865
+ # self.marker_features.each do |mf|
866
+ # output.push(mf.slice.display_name)
867
+ # end
868
+ # return output.join("\n")
869
+ #
870
+ #end
871
+
872
+ end
873
+
874
+ # The MarkerSynonym class provides an interface to the marker_synonym
875
+ # table. This table contains names for markers (that are themselves
876
+ # stored in the marker table (so Marker class)).
877
+ #
878
+ # This class uses ActiveRecord to access data in the Ensembl database.
879
+ # See the general documentation of the Ensembl module for
880
+ # more information on what this means and what methods are available.
881
+ #
882
+ # @example
883
+ # marker = Marker.find(52194)
884
+ # puts marker.marker_synonym.source
885
+ # puts marker.marker_synonym.name
886
+ class MarkerSynonym < DBConnection
887
+ set_primary_key 'marker_synonym_id'
888
+
889
+ belongs_to :marker
890
+ end
891
+
892
+ # The MarkerFeature class provides an interface to the marker_feature
893
+ # table. This table contains mappings of markers to a SeqRegion.
894
+ #
895
+ # This class uses ActiveRecord to access data in the Ensembl database.
896
+ # See the general documentation of the Ensembl module for
897
+ # more information on what this means and what methods are available.
898
+ #
899
+ # This class includes the mixin Sliceable, which means that it is mapped
900
+ # to a SeqRegion object and a Slice can be created for objects of this
901
+ # class. See Sliceable and Slice for more information.
902
+ #
903
+ # @example
904
+ # marker = Marker.find(52194)
905
+ # puts marker.marker_feature.seq_region_start.to_s
906
+ # puts marker.marker_feature.seq_region_end.to_s
907
+ class MarkerFeature < DBConnection
908
+ include Sliceable
909
+
910
+ set_primary_key 'marker_feature_id'
911
+
912
+ belongs_to :marker
913
+ belongs_to :seq_region
914
+ end
915
+
916
+ # The MiscFeature class provides an interface to the misc_feature
917
+ # table. The actual type of feature is stored in the MiscSet class.
918
+ #
919
+ # This class uses ActiveRecord to access data in the Ensembl database.
920
+ # See the general documentation of the Ensembl module for
921
+ # more information on what this means and what methods are available.
922
+ #
923
+ # This class includes the mixin Sliceable, which means that it is mapped
924
+ # to a SeqRegion object and a Slice can be created for objects of this
925
+ # class. See Sliceable and Slice for more information.
926
+ #
927
+ # @example
928
+ # #TODO
929
+ class MiscFeature < DBConnection
930
+ include Sliceable
931
+
932
+ set_primary_key 'misc_feature_id'
933
+
934
+ belongs_to :seq_region
935
+ has_one :misc_feature_misc_set
936
+ has_many :misc_sets, :through => :misc_feature_misc_set
937
+
938
+ has_many :misc_attribs
939
+
940
+ alias attribs misc_attribs
941
+
942
+ def self.find_by_attrib_type_value(code, value)
943
+ return self.find_all_by_attrib_type_value(code, value)[0]
944
+ end
945
+
946
+ def self.find_all_by_attrib_type_value(code, value)
947
+ code_id = AttribType.find_by_code(code)
948
+ misc_attribs = MiscAttrib.find_all_by_attrib_type_id_and_value(code_id, value)
949
+ answers = Array.new
950
+ misc_attribs.each do |ma|
951
+ answers.push(MiscFeature.find_all_by_misc_feature_id(ma.misc_feature_id))
952
+ end
953
+ answers.flatten!
954
+ return answers
955
+ end
956
+ end
957
+
958
+
959
+ # The MiscAttrib class provides an interface to the misc_attrib
960
+ # table. It is the link between MiscFeature and AttribType.
961
+ #
962
+ # This class uses ActiveRecord to access data in the Ensembl database.
963
+ # See the general documentation of the Ensembl module for
964
+ # more information on what this means and what methods are available.
965
+ #
966
+ # @example
967
+ # marker = Marker.find(52194)
968
+ # puts marker.marker_feature.seq_region_start.to_s
969
+ # puts marker.marker_feature.seq_region_end.to_s
970
+ class MiscAttrib < DBConnection
971
+ set_primary_key nil
972
+
973
+ belongs_to :misc_feature
974
+ belongs_to :attrib_type
975
+
976
+ def to_s
977
+ return self.attrib_type.code + ":" + self.value.to_s
978
+ end
979
+ end
980
+
981
+ # The MiscSet class provides an interface to the misc_set
982
+ # table. This table contains the sets to which MiscFeature objects
983
+ # belong.
984
+ #
985
+ # This class uses ActiveRecord to access data in the Ensembl database.
986
+ # See the general documentation of the Ensembl module for
987
+ # more information on what this means and what methods are available.
988
+ #
989
+ # @example
990
+ # feature_set = MiscFeature.find(1)
991
+ # puts feature_set.features.length.to_s
992
+ class MiscSet < DBConnection
993
+ set_primary_key 'misc_set_id'
994
+
995
+ has_many :misc_feature_misc_sets
996
+ has_many :misc_features, :through => :misc_feature_misc_set
997
+ end
998
+
999
+ # The MiscFeatureMiscSet class provides an interface to the
1000
+ # misc_feature_misc_set table. This table links MiscFeature objects to
1001
+ # their MiscSet.
1002
+ #
1003
+ # This class uses ActiveRecord to access data in the Ensembl database.
1004
+ # See the general documentation of the Ensembl module for
1005
+ # more information on what this means and what methods are available.
1006
+ #
1007
+ # @example
1008
+ # # TODO
1009
+ class MiscFeatureMiscSet < DBConnection
1010
+ set_primary_key nil
1011
+
1012
+ belongs_to :misc_feature
1013
+ belongs_to :misc_set
1014
+ end
1015
+
1016
+ # The Gene class provides an interface to the gene
1017
+ # table. This table contains mappings of genes to a SeqRegion.
1018
+ #
1019
+ # This class uses ActiveRecord to access data in the Ensembl database.
1020
+ # See the general documentation of the Ensembl module for
1021
+ # more information on what this means and what methods are available.
1022
+ #
1023
+ # This class includes the mixin Sliceable, which means that it is mapped
1024
+ # to a SeqRegion object and a Slice can be created for objects of this
1025
+ # class. See Sliceable and Slice for more information.
1026
+ #
1027
+ # @example
1028
+ # puts Gene.find_by_biotype('protein_coding').length
1029
+ class Gene < DBConnection
1030
+ include Sliceable
1031
+
1032
+ set_primary_key 'gene_id'
1033
+
1034
+ belongs_to :seq_region
1035
+ has_one :gene_stable_id
1036
+
1037
+ has_many :gene_attribs
1038
+ has_many :attrib_types, :through => :gene_attrib
1039
+
1040
+ has_many :transcripts
1041
+
1042
+ belongs_to :analysis
1043
+
1044
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Gene'"
1045
+ has_many :xrefs, :through => :object_xrefs
1046
+
1047
+ alias attribs gene_attribs
1048
+
1049
+ # The Gene#stable_id method returns the stable_id of the gene (i.e. the
1050
+ # ENSG id).
1051
+ def stable_id
1052
+ return self.gene_stable_id.stable_id
1053
+
1054
+ end
1055
+
1056
+ # The Gene#display_label method returns the default name of the gene.
1057
+ def display_label
1058
+ return Xref.find(self.display_xref_id).display_label
1059
+ end
1060
+ alias :display_name :display_label
1061
+ alias :label :display_label
1062
+ alias :name :display_label
1063
+
1064
+ # The Gene#find_all_by_name class method searches the Xrefs for that name
1065
+ # and returns an array of the corresponding Gene objects. If the name is
1066
+ # not found, it returns an empty array.
1067
+ def self.find_all_by_name(name)
1068
+ answer = Array.new
1069
+ xrefs = Ensembl::Core::Xref.find_all_by_display_label(name)
1070
+ xrefs.each do |xref|
1071
+ answer.push(Ensembl::Core::Gene.find_by_display_xref_id(xref.xref_id))
1072
+ end
1073
+
1074
+ answer.reject!{|a| a.nil?}
1075
+ return answer
1076
+ end
1077
+
1078
+ # The Gene#find_by_name class method searches the Xrefs for that name
1079
+ # and returns one Gene objects (even if there should be more). If the name is
1080
+ # not found, it returns nil.
1081
+ def self.find_by_name(name)
1082
+ all_names = self.find_all_by_name(name)
1083
+ if all_names.length == 0
1084
+ return nil
1085
+ else
1086
+ return all_names[0]
1087
+ end
1088
+ end
1089
+
1090
+ # The Gene#find_by_stable_id class method fetches a Gene object based on
1091
+ # its stable ID (i.e. the "ENSG" accession number). If the name is
1092
+ # not found, it returns nil.
1093
+ def self.find_by_stable_id(stable_id)
1094
+ result = nil
1095
+ if stable_id.kind_of? Array
1096
+ gene_stable_ids = GeneStableId.where({:stable_id => stable_id})
1097
+ result = (gene_stable_ids.size == 0) ? nil : gene_stable_ids.map {|id| id.gene}
1098
+ else
1099
+ gene_stable_id = GeneStableId.find_by_stable_id(stable_id)
1100
+ result = (gene_stable_id.nil?) ? nil : gene_stable_id.gene
1101
+ end
1102
+ return result
1103
+ end
1104
+
1105
+ # The Gene#all_xrefs method is a convenience method in that it combines
1106
+ # three methods into one. It collects all xrefs for the gene itself, plus
1107
+ # all xrefs for all transcripts for the gene, and all xrefs for all
1108
+ # translations for those transcripts.
1109
+ def all_xrefs
1110
+ answer = Array.new
1111
+ answer.push(self.xrefs)
1112
+ self.transcripts.each do |transcript|
1113
+ answer.push(transcript.xrefs)
1114
+ if ! transcript.translation.nil?
1115
+ answer.push(transcript.translation.xrefs)
1116
+ end
1117
+ end
1118
+ answer.flatten!
1119
+ return answer
1120
+ end
1121
+
1122
+ # The Gene#go_terms method returns all GO terms associated with a gene.
1123
+ def go_terms
1124
+ go_db_id = ExternalDb.find_by_db_name('GO').id
1125
+ return self.all_xrefs.select{|x| x.external_db_id == go_db_id}.collect{|x| x.dbprimary_acc}.uniq
1126
+ end
1127
+
1128
+ # The Gene#hgnc returns the HGNC symbol for the gene.
1129
+ def hgnc
1130
+ hgnc_db_id = ExternalDb.find_by_db_name('HGNC_curated_gene').id
1131
+ xref = self.all_xrefs.select{|x| x.external_db_id == hgnc_db_id}[0]
1132
+ return nil if xref.nil?
1133
+ return xref.display_label
1134
+ end
1135
+
1136
+ end
1137
+
1138
+ # The Gene#canonical_transcript returns the longest transcript for that gene.
1139
+ #
1140
+ def canonical_transcript
1141
+ ct = self.transcripts.sort {|a,b| b.seq.length <=> a.seq.length}
1142
+ return ct[0]
1143
+ end
1144
+
1145
+ # The GeneStableId class provides an interface to the gene_stable_id
1146
+ # table. This table contains Ensembl stable IDs for genes.
1147
+ #
1148
+ # This class uses ActiveRecord to access data in the Ensembl database.
1149
+ # See the general documentation of the Ensembl module for
1150
+ # more information on what this means and what methods are available.
1151
+ #
1152
+ # @example
1153
+ # my_gene = GeneStableId.find_by_stable_id('ENSBTAG00000011670').gene
1154
+ class GeneStableId < DBConnection
1155
+ set_primary_key 'stable_id'
1156
+
1157
+ belongs_to :gene
1158
+ end
1159
+
1160
+ # The MarkerMapLocation class provides an interface to the
1161
+ # marker_map_location table. This table contains mappings of
1162
+ # MarkerSynonym objects to a chromosome, and basically just stores
1163
+ # the genetic maps.
1164
+ #
1165
+ # This class uses ActiveRecord to access data in the Ensembl database.
1166
+ # See the general documentation of the Ensembl module for
1167
+ # more information on what this means and what methods are available.
1168
+ #
1169
+ # @example
1170
+ # marker_synonym = MarkerSynonym.find_by_name('CYP19A1_(5)')
1171
+ # marker_synonym.marker_map_locations.each do |mapping|
1172
+ # puts mapping.chromosome_name + "\t" + mapping.position.to_s
1173
+ # end
1174
+ class MarkerMapLocation < DBConnection
1175
+ set_primary_key nil
1176
+
1177
+ belongs_to :map
1178
+ belongs_to :marker
1179
+
1180
+ end
1181
+
1182
+ # The Map class provides an interface to the map
1183
+ # table. This table contains genetic maps.
1184
+ #
1185
+ # This class uses ActiveRecord to access data in the Ensembl database.
1186
+ # See the general documentation of the Ensembl module for
1187
+ # more information on what this means and what methods are available.
1188
+ #
1189
+ # @example
1190
+ # map = Map.find_by_name('MARC')
1191
+ # puts map.markers.length.to_s
1192
+ class Map < DBConnection
1193
+ set_primary_key 'map_id'
1194
+
1195
+ has_many :marker_map_locations
1196
+ has_many :markers, :through => :marker_map_locations
1197
+
1198
+ def name
1199
+ return self.map_name
1200
+ end
1201
+ end
1202
+
1203
+ # The RepeatConsensus class provides an interface to the repeat_consensus
1204
+ # table. This table contains consensus sequences for repeats.
1205
+ #
1206
+ # This class uses ActiveRecord to access data in the Ensembl database.
1207
+ # See the general documentation of the Ensembl module for
1208
+ # more information on what this means and what methods are available.
1209
+ #
1210
+ # @example
1211
+ # repeat = RepeatFeature.find(29)
1212
+ # puts repeat.repeat_consensus.repeat_name + "\t" + repeat.repeat_consensus.repeat_consensus
1213
+ class RepeatConsensus < DBConnection
1214
+ set_primary_key 'repeat_consensus_id'
1215
+
1216
+ has_many :repeat_features
1217
+ end
1218
+
1219
+ # The RepeatFeature class provides an interface to the repeat_feature
1220
+ # table. This table contains mappings of repeats to a SeqRegion.
1221
+ #
1222
+ # This class uses ActiveRecord to access data in the Ensembl database.
1223
+ # See the general documentation of the Ensembl module for
1224
+ # more information on what this means and what methods are available.
1225
+ #
1226
+ # This class includes the mixin Sliceable, which means that it is mapped
1227
+ # to a SeqRegion object and a Slice can be created for objects of this
1228
+ # class. See Sliceable and Slice for more information.
1229
+ #
1230
+ # @example
1231
+ # repeat_feature = RepeatFeature.find(29)
1232
+ # puts repeat_feature.seq_region_start.to_s
1233
+ class RepeatFeature < DBConnection
1234
+ include Sliceable
1235
+
1236
+ set_primary_key 'repeat_feature_id'
1237
+
1238
+ belongs_to :repeat_consensus
1239
+ belongs_to :seq_region
1240
+ end
1241
+
1242
+ # The SeqRegionAttrib class provides an interface to the seq_region_attrib
1243
+ # table. This table contains attribute values for SeqRegion objects
1244
+ #
1245
+ # This class uses ActiveRecord to access data in the Ensembl database.
1246
+ # See the general documentation of the Ensembl module for
1247
+ # more information on what this means and what methods are available.
1248
+ #
1249
+ # @example
1250
+ # chr4 = SeqRegion.find_by_name('4')
1251
+ # chr4.seq_region_attribs.each do |attrib|
1252
+ # puts attrib.attrib_type.name + "\t" + attrib.value.to_s
1253
+ # end
1254
+ class SeqRegionAttrib < DBConnection
1255
+ set_primary_key nil
1256
+
1257
+ belongs_to :seq_region
1258
+ belongs_to :attrib_type
1259
+ end
1260
+
1261
+ # The GeneAttrib class provides an interface to the gene_attrib
1262
+ # table. This table contains attribute values for Gene objects
1263
+ #
1264
+ # This class uses ActiveRecord to access data in the Ensembl database.
1265
+ # See the general documentation of the Ensembl module for
1266
+ # more information on what this means and what methods are available.
1267
+ #
1268
+ # @example
1269
+ # #TODO
1270
+ class GeneAttrib < DBConnection
1271
+ set_primary_key nil
1272
+
1273
+ belongs_to :gene
1274
+ belongs_to :attrib_type
1275
+ end
1276
+
1277
+ # The AttribType class provides an interface to the attrib_type
1278
+ # table. This table contains the types that attributes can belong to for
1279
+ # SeqRegion, Gene and Transcript.
1280
+ #
1281
+ # This class uses ActiveRecord to access data in the Ensembl database.
1282
+ # See the general documentation of the Ensembl module for
1283
+ # more information on what this means and what methods are available.
1284
+ #
1285
+ # @example
1286
+ # #TODO
1287
+ class AttribType < DBConnection
1288
+ set_primary_key 'attrib_type_id'
1289
+
1290
+ has_many :seq_region_attribs
1291
+ has_many :seq_regions, :through => :seq_region_attrib
1292
+
1293
+ has_many :gene_attribs
1294
+ has_many :genes, :through => :gene_attrib
1295
+
1296
+ has_many :transcript_attribs
1297
+ has_many :transcripts, :through => :transcript_attrib
1298
+ end
1299
+
1300
+ # The Transcript class provides an interface to the transcript_stable_id
1301
+ # table. This table contains the Ensembl stable IDs for Transcript
1302
+ # objects.
1303
+ #
1304
+ # This class uses ActiveRecord to access data in the Ensembl database.
1305
+ # See the general documentation of the Ensembl module for
1306
+ # more information on what this means and what methods are available.
1307
+ #
1308
+ # @example
1309
+ # transcript_stable_id = TranscriptStableId.find_by_stable_id('ENSBTAT00000015494')
1310
+ # puts transcript_stable_id.transcript.to_yaml
1311
+ class TranscriptStableId < DBConnection
1312
+ set_primary_key 'stable_id'
1313
+
1314
+ belongs_to :transcript
1315
+ end
1316
+
1317
+ # The TranscriptAttrib class provides an interface to the transcript_attrib
1318
+ # table. This table contains the attributes for Transcript objects.
1319
+ #
1320
+ # This class uses ActiveRecord to access data in the Ensembl database.
1321
+ # See the general documentation of the Ensembl module for
1322
+ # more information on what this means and what methods are available.
1323
+ #
1324
+ # @example
1325
+ # transcript = Transcript.find(32495)
1326
+ # transcript.transcript_attribs.each do |attr|
1327
+ # puts attr.attrib_type.name + "\t" + attr.value
1328
+ # end
1329
+ class TranscriptAttrib < DBConnection
1330
+ set_primary_key nil
1331
+
1332
+ belongs_to :transcript
1333
+ belongs_to :attrib_type
1334
+ end
1335
+
1336
+ # The DnaAlignFeature class provides an interface to the
1337
+ # dna_align_feature table. This table contains sequence similarity
1338
+ # mappings against a SeqRegion.
1339
+ #
1340
+ # This class uses ActiveRecord to access data in the Ensembl database.
1341
+ # See the general documentation of the Ensembl module for
1342
+ # more information on what this means and what methods are available.
1343
+ #
1344
+ # This class includes the mixin Sliceable, which means that it is mapped
1345
+ # to a SeqRegion object and a Slice can be created for objects of this
1346
+ # class. See Sliceable and Slice for more information.
1347
+ #
1348
+ # @example
1349
+ # unigene_scan = Analysis.find_by_logic_name('Unigene')
1350
+ # unigene_scan.dna_align_features.each do |hit|
1351
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1352
+ # end
1353
+ class DnaAlignFeature < DBConnection
1354
+ include Sliceable
1355
+
1356
+ set_primary_key 'dna_align_feature_id'
1357
+
1358
+ belongs_to :seq_region
1359
+ belongs_to :analysis
1360
+
1361
+ has_many :exon_supporting_features
1362
+ has_many :protein_supporting_features
1363
+ end
1364
+
1365
+ # The Translation class provides an interface to the
1366
+ # translation table. This table contains the translation start and
1367
+ # stop positions and exons for a given Transcript
1368
+ #
1369
+ # This class uses ActiveRecord to access data in the Ensembl database.
1370
+ # See the general documentation of the Ensembl module for
1371
+ # more information on what this means and what methods are available.
1372
+ #
1373
+ # @example
1374
+ # #TODO
1375
+ class Translation < DBConnection
1376
+ set_primary_key 'translation_id'
1377
+
1378
+ belongs_to :transcript
1379
+ has_many :translation_stable_ids
1380
+
1381
+ has_many :translation_attribs
1382
+ has_many :protein_features
1383
+
1384
+ has_one :translation_stable_id
1385
+
1386
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Translation'"
1387
+ has_many :xrefs, :through => :object_xrefs
1388
+
1389
+ belongs_to :start_exon, :class_name => 'Exon', :foreign_key => 'start_exon_id'
1390
+ belongs_to :end_exon, :class_name => 'Exon', :foreign_key => 'end_exon_id'
1391
+
1392
+ alias attribs translation_attribs
1393
+
1394
+ # The Translation#stable_id method returns the stable ID of the translation.
1395
+ #
1396
+ # @return [String] Ensembl stable ID
1397
+ def stable_id
1398
+ return self.translation_stable_id.stable_id
1399
+ end
1400
+
1401
+ # The Translation#display_label method returns the default name of the translation.
1402
+ def display_label
1403
+ return Xref.find(self.display_xref_id).display_label
1404
+ end
1405
+ alias :display_name :display_label
1406
+ alias :label :display_label
1407
+ alias :name :display_label
1408
+
1409
+ # The Translation#find_by_stable_id class method fetches a Translation
1410
+ # object based on its stable ID (i.e. the "ENSP" accession number). If the
1411
+ # name is not found, it returns nil.
1412
+ def self.find_by_stable_id(stable_id)
1413
+ translation_stable_id = TranslationStableId.find_by_stable_id(stable_id)
1414
+ if translation_stable_id.nil?
1415
+ return nil
1416
+ else
1417
+ return translation_stable_id.translation
1418
+ end
1419
+ end
1420
+ end
1421
+
1422
+ # The TranslationStableId class provides an interface to the
1423
+ # translation_stable_id table. This table contains the Ensembl stable IDs
1424
+ # for a given Translation.
1425
+ #
1426
+ # This class uses ActiveRecord to access data in the Ensembl database.
1427
+ # See the general documentation of the Ensembl module for
1428
+ # more information on what this means and what methods are available.
1429
+ #
1430
+ # @example
1431
+ # stable_id = TranslationStableId.find_by_name('ENSBTAP00000015494')
1432
+ # puts stable_id.to_yaml
1433
+ class TranslationStableId < DBConnection
1434
+ set_primary_key 'stable_id'
1435
+
1436
+ belongs_to :translation
1437
+ end
1438
+
1439
+ # The TranslationAttrib class provides an interface to the
1440
+ # translation_attrib table. This table contains attribute values for the
1441
+ # Translation class.
1442
+ #
1443
+ # This class uses ActiveRecord to access data in the Ensembl database.
1444
+ # See the general documentation of the Ensembl module for
1445
+ # more information on what this means and what methods are available.
1446
+ #
1447
+ # @example
1448
+ # translation = Translation.find(9979)
1449
+ # translation.translation_attribs.each do |attr|
1450
+ # puts attr.attr_type.name + "\t" + attr.value
1451
+ # end
1452
+ class TranslationAttrib < DBConnection
1453
+ set_primary_key nil
1454
+
1455
+ belongs_to :translation
1456
+ belongs_to :attrib_type
1457
+ end
1458
+
1459
+ # The Xref class provides an interface to the
1460
+ # xref table. This table contains external references for objects in the
1461
+ # database.
1462
+ #
1463
+ # This class uses ActiveRecord to access data in the Ensembl database.
1464
+ # See the general documentation of the Ensembl module for
1465
+ # more information on what this means and what methods are available.
1466
+ #
1467
+ # @example
1468
+ # gene = Gene.find(1)
1469
+ # gene.xrefs.each do |xref|
1470
+ # puts xref.display_label + "\t" + xref.description
1471
+ # end
1472
+ class Xref < DBConnection
1473
+ set_primary_key 'xref_id'
1474
+
1475
+ belongs_to :external_db
1476
+ has_many :external_synonyms
1477
+
1478
+ has_many :genes
1479
+
1480
+ def to_s
1481
+ return self.external_db.db_name.to_s + ":" + self.display_label
1482
+ end
1483
+ end
1484
+
1485
+ # The ObjectXref class provides the link between gene, transcript and
1486
+ # translation objects on the one hand and an xref on the other.
1487
+ #
1488
+ # This class uses ActiveRecord to access data in the Ensembl database.
1489
+ # See the general documentation of the Ensembl module for
1490
+ # more information on what this means and what methods are available.
1491
+ #
1492
+ # @example
1493
+ # gene = Gene.find(1)
1494
+ # gene.object_xrefs.each do |ox|
1495
+ # puts ox.to_yaml
1496
+ # end
1497
+ class ObjectXref < DBConnection
1498
+ set_primary_key 'object_xref_id'
1499
+
1500
+ belongs_to :gene, :class_name => "Gene", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Gene'"]
1501
+ belongs_to :transcript, :class_name => "Transcript", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Transcript'"]
1502
+ belongs_to :translation, :class_name => "Translation", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Translation'"]
1503
+ belongs_to :xref
1504
+ has_one :go_xref
1505
+ end
1506
+
1507
+ # The GoXref class provides an interface to the
1508
+ # go_xref table. This table contains the evidence codes for those object_refs
1509
+ # that are GO terms.
1510
+ #
1511
+ # This class uses ActiveRecord to access data in the Ensembl database.
1512
+ # See the general documentation of the Ensembl module for
1513
+ # more information on what this means and what methods are available.
1514
+ class GoXref < DBConnection
1515
+ set_primary_key nil
1516
+
1517
+ belongs_to :xref
1518
+ end
1519
+
1520
+ # The ExternalDb class provides an interface to the
1521
+ # external_db table. This table contains references to databases to which
1522
+ # xrefs can point to
1523
+ #
1524
+ # This class uses ActiveRecord to access data in the Ensembl database.
1525
+ # See the general documentation of the Ensembl module for
1526
+ # more information on what this means and what methods are available.
1527
+ #
1528
+ # @example
1529
+ # embl_db = ExternalDb.find_by_db_name('EMBL')
1530
+ # puts embl_db.xrefs.length.to_s
1531
+ class ExternalDb < DBConnection
1532
+ set_primary_key 'external_db_id'
1533
+
1534
+ has_many :xrefs
1535
+
1536
+ def self.inheritance_column
1537
+ nil
1538
+ end
1539
+
1540
+ # The ExternalDb#find_all_by_display_label method returns all external
1541
+ # databases that have this label. There should normally be no more than
1542
+ # one. If no databases are found with this name, this method returns an
1543
+ # empty array.
1544
+ def self.find_all_by_display_label(label)
1545
+ answer = Array.new
1546
+ xrefs = Xref.find_all_by_display_label(label)
1547
+ xrefs.each do |xref|
1548
+ answer.push(self.class.find_by_xref_id(xref.xref_id))
1549
+ end
1550
+
1551
+ return answer
1552
+ end
1553
+
1554
+ # The ExternalDb#find_by_display_label method returns a
1555
+ # database that has this label. If no databases are found with this name,
1556
+ # this method returns nil.
1557
+ # empty array.
1558
+ def self.find_by_display_label(label)
1559
+ all_dbs = self.find_all_by_display_label(label)
1560
+ if all_dbs.length == 0
1561
+ return nil
1562
+ else
1563
+ return all_dbs[0]
1564
+ end
1565
+ end
1566
+
1567
+
1568
+ end
1569
+
1570
+ # The ExternalSynonym class provides an interface to the
1571
+ # external_synonym table. This table contains synonyms for Xref objects.
1572
+ #
1573
+ # This class uses ActiveRecord to access data in the Ensembl database.
1574
+ # See the general documentation of the Ensembl module for
1575
+ # more information on what this means and what methods are available.
1576
+ #
1577
+ # This class includes the mixin Sliceable, which means that it is mapped
1578
+ # to a SeqRegion object and a Slice can be created for objects of this
1579
+ # class. See Sliceable and Slice for more information.
1580
+ #
1581
+ # @example
1582
+ # xref = Xref.find(185185)
1583
+ # puts xref.external_synonyms[0].synonyms
1584
+ class ExternalSynonym < DBConnection
1585
+ set_primary_key nil
1586
+
1587
+ belongs_to :xref
1588
+ end
1589
+
1590
+ # The Karyotype class provides an interface to the
1591
+ # karyotype table. This table contains <>.
1592
+ #
1593
+ # This class uses ActiveRecord to access data in the Ensembl database.
1594
+ # See the general documentation of the Ensembl module for
1595
+ # more information on what this means and what methods are available.
1596
+ #
1597
+ # This class includes the mixin Sliceable, which means that it is mapped
1598
+ # to a SeqRegion object and a Slice can be created for objects of this
1599
+ # class. See Sliceable and Slice for more information.
1600
+ #
1601
+ # @example
1602
+ # band = Karyotype.find_by_band('p36.32')
1603
+ # puts band.to_yaml
1604
+ class Karyotype < DBConnection
1605
+ include Sliceable
1606
+
1607
+ set_primary_key 'karyotype_id'
1608
+
1609
+ belongs_to :seq_region
1610
+ end
1611
+
1612
+ # The OligoFeature class provides an interface to the
1613
+ # oligo_feature table. This table contains mappings of Oligo objects to
1614
+ # a SeqRegion.
1615
+ #
1616
+ # This class uses ActiveRecord to access data in the Ensembl database.
1617
+ # See the general documentation of the Ensembl module for
1618
+ # more information on what this means and what methods are available.
1619
+ #
1620
+ # This class includes the mixin Sliceable, which means that it is mapped
1621
+ # to a SeqRegion object and a Slice can be created for objects of this
1622
+ # class. See Sliceable and Slice for more information.
1623
+ #
1624
+ # @example
1625
+ # seq_region = SeqRegion.find_by_name('4')
1626
+ # puts seq_region.oligo_features.length
1627
+ class OligoFeature < DBConnection
1628
+ include Sliceable
1629
+
1630
+ set_primary_key 'oligo_feature_id'
1631
+
1632
+ belongs_to :seq_region
1633
+ belongs_to :oligo_probe
1634
+ belongs_to :analysis
1635
+ end
1636
+
1637
+ # The OligoProbe class provides an interface to the
1638
+ # oligo_probe table.
1639
+ #
1640
+ # This class uses ActiveRecord to access data in the Ensembl database.
1641
+ # See the general documentation of the Ensembl module for
1642
+ # more information on what this means and what methods are available.
1643
+ #
1644
+ # @example
1645
+ # probe = OligoProbe.find_by_name('373:434;')
1646
+ # puts probe.probeset + "\t" + probe.oligo_array.name
1647
+ class OligoProbe < DBConnection
1648
+ set_primary_key 'oligo_probe_id'
1649
+
1650
+ has_many :oligo_features
1651
+ belongs_to :oligo_array
1652
+ end
1653
+
1654
+ # The OligoArray class provides an interface to the
1655
+ # oligo_array table. This table contains data describing a microarray
1656
+ # slide.
1657
+ #
1658
+ # This class uses ActiveRecord to access data in the Ensembl database.
1659
+ # See the general documentation of the Ensembl module for
1660
+ # more information on what this means and what methods are available.
1661
+ #
1662
+ # @example
1663
+ # array = OligoArray.find_by_name_and_type('Bovine','AFFY')
1664
+ # puts array.oligo_probes.length
1665
+ class OligoArray < DBConnection
1666
+ set_primary_key 'oligo_array_id'
1667
+
1668
+ has_many :oligo_probes
1669
+ end
1670
+
1671
+ # The PredictionExon class provides an interface to the
1672
+ # prediction_exon table. This table contains <>.
1673
+ #
1674
+ # This class uses ActiveRecord to access data in the Ensembl database.
1675
+ # See the general documentation of the Ensembl module for
1676
+ # more information on what this means and what methods are available.
1677
+ #
1678
+ # This class includes the mixin Sliceable, which means that it is mapped
1679
+ # to a SeqRegion object and a Slice can be created for objects of this
1680
+ # class. See Sliceable and Slice for more information.
1681
+ #
1682
+ # @example
1683
+ # #TODO
1684
+ class PredictionExon < DBConnection
1685
+ include Sliceable
1686
+
1687
+ set_primary_key 'prediction_exon_id'
1688
+
1689
+ belongs_to :prediction_transcript
1690
+ belongs_to :seq_region
1691
+ end
1692
+
1693
+ # The PredictionTranscript class provides an interface to the
1694
+ # prediction_transcript table.
1695
+ #
1696
+ # This class uses ActiveRecord to access data in the Ensembl database.
1697
+ # See the general documentation of the Ensembl module for
1698
+ # more information on what this means and what methods are available.
1699
+ #
1700
+ # This class includes the mixin Sliceable, which means that it is mapped
1701
+ # to a SeqRegion object and a Slice can be created for objects of this
1702
+ # class. See Sliceable and Slice for more information.
1703
+ #
1704
+ # @example
1705
+ # predicted_transcript = PredictionTranscript.find_by_display_label('GENSCAN00000000006')
1706
+ # puts predicted_transcript.prediction_exons.length
1707
+ class PredictionTranscript < DBConnection
1708
+ include Sliceable
1709
+
1710
+ set_primary_key 'prediction_transcript_id'
1711
+
1712
+ has_many :prediction_exons
1713
+ belongs_to :seq_region
1714
+ belongs_to :analysis
1715
+ end
1716
+
1717
+ # The ProteinFeature class provides an interface to the
1718
+ # protein_feature table. This table contains mappings of a Translation
1719
+ # onto a SeqRegion.
1720
+ #
1721
+ # This class uses ActiveRecord to access data in the Ensembl database.
1722
+ # See the general documentation of the Ensembl module for
1723
+ # more information on what this means and what methods are available.
1724
+ #
1725
+ # This class includes the mixin Sliceable, which means that it is mapped
1726
+ # to a SeqRegion object and a Slice can be created for objects of this
1727
+ # class. See Sliceable and Slice for more information.
1728
+ #
1729
+ # @example
1730
+ # #TODO
1731
+ class ProteinFeature < DBConnection
1732
+ include Sliceable
1733
+
1734
+ set_primary_key 'protein_feature_id'
1735
+
1736
+ belongs_to :translation
1737
+ belongs_to :analysis
1738
+ end
1739
+
1740
+ # The ProteinAlignFeature class provides an interface to the
1741
+ # protein_align_feature table. This table contains sequence similarity
1742
+ # mappings against a SeqRegion.
1743
+ #
1744
+ # This class uses ActiveRecord to access data in the Ensembl database.
1745
+ # See the general documentation of the Ensembl module for
1746
+ # more information on what this means and what methods are available.
1747
+ #
1748
+ # This class includes the mixin Sliceable, which means that it is mapped
1749
+ # to a SeqRegion object and a Slice can be created for objects of this
1750
+ # class. See Sliceable and Slice for more information.
1751
+ #
1752
+ # @example
1753
+ # uniprot_scan = Analysis.find_by_logic_name('Uniprot')
1754
+ # uniprot_scan.protein_align_features.each do |hit|
1755
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1756
+ # end
1757
+ class ProteinAlignFeature < DBConnection
1758
+ include Sliceable
1759
+
1760
+ set_primary_key 'protein_align_feature_id'
1761
+
1762
+ belongs_to :seq_region
1763
+ belongs_to :analysis
1764
+
1765
+ has_many :exon_supporting_features
1766
+ has_many :transcript_supporting_features
1767
+ end
1768
+
1769
+ # The RegulatoryFactor class provides an interface to the
1770
+ # regulatory_factor table.
1771
+ #
1772
+ # This class uses ActiveRecord to access data in the Ensembl database.
1773
+ # See the general documentation of the Ensembl module for
1774
+ # more information on what this means and what methods are available.
1775
+ #
1776
+ # @example
1777
+ # factor = RegulatoryFactor.find_by_name('crtHsap8070')
1778
+ # puts factor.to_yaml
1779
+ class RegulatoryFactor < DBConnection
1780
+ set_primary_key 'regulatory_factor_id'
1781
+
1782
+ has_many :regulatory_features
1783
+ end
1784
+
1785
+ # The RegulatoryFeature class provides an interface to the
1786
+ # regulatory_feature table. This table contains mappings of
1787
+ # RegulatoryFactor objects against a SeqRegion.
1788
+ #
1789
+ # This class uses ActiveRecord to access data in the Ensembl database.
1790
+ # See the general documentation of the Ensembl module for
1791
+ # more information on what this means and what methods are available.
1792
+ #
1793
+ # This class includes the mixin Sliceable, which means that it is mapped
1794
+ # to a SeqRegion object and a Slice can be created for objects of this
1795
+ # class. See Sliceable and Slice for more information.
1796
+ #
1797
+ # @example
1798
+ # analysis = Analysis.find_by_logic_name('miRanda')
1799
+ # analysis.regulatory_features.each do |feature|
1800
+ # puts feature.name + "\t" + feature.regulatory_factor.name
1801
+ # end
1802
+ class RegulatoryFeature < DBConnection
1803
+ include Sliceable
1804
+
1805
+ set_primary_key 'regulatory_feature_id'
1806
+
1807
+ belongs_to :seq_region
1808
+ belongs_to :analysis
1809
+ belongs_to :regulatory_factor
1810
+ end
1811
+ end
1812
+ end