ruby-ensembl-api 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/TUTORIAL.rdoc +623 -0
  2. data/bin/ensembl +40 -0
  3. data/lib/ensembl.rb +64 -0
  4. data/lib/ensembl/core/activerecord.rb +1914 -0
  5. data/lib/ensembl/core/collection.rb +60 -0
  6. data/lib/ensembl/core/project.rb +264 -0
  7. data/lib/ensembl/core/slice.rb +693 -0
  8. data/lib/ensembl/core/transcript.rb +425 -0
  9. data/lib/ensembl/core/transform.rb +97 -0
  10. data/lib/ensembl/db_connection.rb +216 -0
  11. data/lib/ensembl/variation/activerecord.rb +253 -0
  12. data/lib/ensembl/variation/variation.rb +163 -0
  13. data/test/unit/data/seq_c6qbl.fa +10 -0
  14. data/test/unit/data/seq_cso19_coding.fa +16 -0
  15. data/test/unit/data/seq_cso19_transcript.fa +28 -0
  16. data/test/unit/data/seq_drd3_gene.fa +838 -0
  17. data/test/unit/data/seq_drd3_transcript.fa +22 -0
  18. data/test/unit/data/seq_drd4_transcript.fa +24 -0
  19. data/test/unit/data/seq_forward_composite.fa +1669 -0
  20. data/test/unit/data/seq_par_boundary.fa +169 -0
  21. data/test/unit/data/seq_rnd3_transcript.fa +47 -0
  22. data/test/unit/data/seq_ub2r1_coding.fa +13 -0
  23. data/test/unit/data/seq_ub2r1_gene.fa +174 -0
  24. data/test/unit/data/seq_ub2r1_transcript.fa +26 -0
  25. data/test/unit/data/seq_y.fa +2 -0
  26. data/test/unit/ensembl_genomes/test_collection.rb +51 -0
  27. data/test/unit/ensembl_genomes/test_gene.rb +52 -0
  28. data/test/unit/ensembl_genomes/test_slice.rb +71 -0
  29. data/test/unit/ensembl_genomes/test_variation.rb +17 -0
  30. data/test/unit/release_50/core/test_project.rb +215 -0
  31. data/test/unit/release_50/core/test_project_human.rb +58 -0
  32. data/test/unit/release_50/core/test_relationships.rb +66 -0
  33. data/test/unit/release_50/core/test_sequence.rb +175 -0
  34. data/test/unit/release_50/core/test_slice.rb +121 -0
  35. data/test/unit/release_50/core/test_transcript.rb +108 -0
  36. data/test/unit/release_50/core/test_transform.rb +223 -0
  37. data/test/unit/release_50/variation/test_activerecord.rb +143 -0
  38. data/test/unit/release_50/variation/test_variation.rb +84 -0
  39. data/test/unit/release_53/core/test_gene.rb +66 -0
  40. data/test/unit/release_53/core/test_project.rb +96 -0
  41. data/test/unit/release_53/core/test_project_human.rb +65 -0
  42. data/test/unit/release_53/core/test_slice.rb +47 -0
  43. data/test/unit/release_53/core/test_transform.rb +63 -0
  44. data/test/unit/release_53/variation/test_activerecord.rb +145 -0
  45. data/test/unit/release_53/variation/test_variation.rb +71 -0
  46. data/test/unit/release_56/core/test_gene.rb +66 -0
  47. data/test/unit/release_56/core/test_project.rb +96 -0
  48. data/test/unit/release_56/core/test_slice.rb +54 -0
  49. data/test/unit/release_56/core/test_transform.rb +63 -0
  50. data/test/unit/release_56/variation/test_activerecord.rb +142 -0
  51. data/test/unit/release_56/variation/test_variation.rb +68 -0
  52. data/test/unit/test_connection.rb +66 -0
  53. data/test/unit/test_releases.rb +136 -0
  54. metadata +128 -0
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/ruby
2
+ require 'irb'
3
+ require 'ensembl'
4
+
5
+ module IRB
6
+ def self.start_session(binding)
7
+ IRB.setup(nil)
8
+
9
+ workspace = WorkSpace.new(binding)
10
+
11
+ if @CONF[:SCRIPT]
12
+ irb = Irb.new(workspace, @CONF[:SCRIPT])
13
+ else
14
+ irb = Irb.new(workspace)
15
+ end
16
+
17
+ @CONF[:IRB_RC].call(irb.context) if @CONF[:IRB_RC]
18
+ @CONF[:MAIN_CONTEXT] = irb.context
19
+
20
+ trap("SIGINT") do
21
+ irb.signal_handle
22
+ end
23
+
24
+ catch(:IRB_EXIT) do
25
+ irb.eval_input
26
+ end
27
+ end
28
+ end
29
+
30
+ include Ensembl::Core
31
+ include Ensembl::Variation
32
+ if ARGV.length == 2
33
+ species = ARGV.shift
34
+ release = ARGV.shift.to_i
35
+ Ensembl::Core::DBConnection.connect(species,release)
36
+ Ensembl::Variation::DBConnection.connect(species,release)
37
+ IRB.start_session(Kernel.binding)
38
+ else
39
+ raise "ERROR: Please provide snake_case species and Ensembl release number"
40
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ # = ensembl.rb
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+
10
+ module Ensembl
11
+ ENSEMBL_RELEASE = 56
12
+
13
+ class Session
14
+ attr_accessor :coord_systems
15
+ attr_accessor :seqlevel_id, :seqlevel_coord_system
16
+ attr_accessor :toplevel_id, :toplevel_coord_system
17
+ attr_accessor :coord_system_ids #map CS id to CS name
18
+ attr_accessor :seq_regions
19
+ attr_accessor :collection_species
20
+
21
+ def initialize
22
+ @coord_systems = Hash.new # key = id; value = CoordSystem object
23
+ @coord_system_ids = Hash.new # key = id; value = name
24
+ @seq_regions = Hash.new
25
+ end
26
+
27
+ def reset
28
+ @coord_systems = Hash.new
29
+ @coord_system_ids = Hash.new
30
+ @seq_regions = Hash.new
31
+ @seqlevel_id = nil
32
+ @toplevel_id = nil
33
+ @seqlevel_coord_system = nil
34
+ @toplevel_coord_system = nil
35
+ @collection_species = nil
36
+ end
37
+ end
38
+
39
+ SESSION = Ensembl::Session.new
40
+
41
+ end
42
+
43
+ begin
44
+ require 'rubygems'
45
+ require 'bio'
46
+ rescue LoadError
47
+ raise LoadError, "You must have bioruby installed"
48
+ end
49
+
50
+ # Database connection
51
+ require File.dirname(__FILE__) + '/ensembl/db_connection.rb'
52
+
53
+ # Core modules
54
+ require File.dirname(__FILE__) + '/ensembl/core/activerecord.rb'
55
+ require File.dirname(__FILE__) + '/ensembl/core/transcript.rb'
56
+ require File.dirname(__FILE__) + '/ensembl/core/slice.rb'
57
+ require File.dirname(__FILE__) + '/ensembl/core/project.rb'
58
+ require File.dirname(__FILE__) + '/ensembl/core/transform.rb'
59
+ require File.dirname(__FILE__) + '/ensembl/core/collection.rb'
60
+
61
+ # Variation modules
62
+ require File.dirname(__FILE__) + '/ensembl/variation/activerecord.rb'
63
+ require File.dirname(__FILE__) + '/ensembl/variation/variation.rb'
64
+
@@ -0,0 +1,1914 @@
1
+ #
2
+ # = ensembl/core/activerecord.rb - ActiveRecord mappings to Ensembl core
3
+ #
4
+ # Copyright:: Copyright (C) 2007-2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+
9
+ # = DESCRIPTION
10
+ # == What is it?
11
+ # The Ensembl module provides an API to the Ensembl databases
12
+ # stored at ensembldb.ensembl.org. This is the same information that is
13
+ # available from http://www.ensembl.org.
14
+ #
15
+ # The Ensembl::Core module mainly covers sequences and
16
+ # annotations.
17
+ # The Ensembl::Variation module covers variations (e.g. SNPs).
18
+ # The Ensembl::Compara module covers comparative mappings
19
+ # between species.
20
+ #
21
+ # == ActiveRecord
22
+ # The Ensembl API provides a ruby interface to the Ensembl mysql databases
23
+ # at ensembldb.ensembl.org. Most of the API is based on ActiveRecord to
24
+ # get data from that database. In general, each table is described by a
25
+ # class with the same name: the coord_system table is covered by the
26
+ # CoordSystem class, the seq_region table is covered by the SeqRegion class,
27
+ # etc. As a result, accessors are available for all columns in each table.
28
+ # For example, the seq_region table has the following columns: seq_region_id,
29
+ # name, coord_system_id and length. Through ActiveRecord, these column names
30
+ # become available as attributes of SeqRegion objects:
31
+ # puts my_seq_region.seq_region_id
32
+ # puts my_seq_region.name
33
+ # puts my_seq_region.coord_system_id
34
+ # puts my_seq_region.length.to_s
35
+ #
36
+ # ActiveRecord makes it easy to extract data from those tables using the
37
+ # collection of #find methods. There are three types of #find methods (e.g.
38
+ # for the CoordSystem class):
39
+ # a. find based on primary key in table:
40
+ # my_coord_system = CoordSystem.find(5)
41
+ # b. find_by_sql:
42
+ # my_coord_system = CoordSystem.find_by_sql('SELECT * FROM coord_system WHERE name = 'chromosome'")
43
+ # c. find_by_<insert_your_column_name_here>
44
+ # my_coord_system1 = CoordSystem.find_by_name('chromosome')
45
+ # my_coord_system2 = CoordSystem.find_by_rank(3)
46
+ # To find out which find_by_<column> methods are available, you can list the
47
+ # column names using the column_names class methods:
48
+ #
49
+ # puts Ensembl::Core::CoordSystem.column_names.join("\t")
50
+ #
51
+ # For more information on the find methods, see
52
+ # http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
53
+ #
54
+ # The relationships between different tables are accessible through the
55
+ # classes as well. For example, to loop over all seq_regions belonging to
56
+ # a coord_system (a coord_system "has many" seq_regions):
57
+ # chr_coord_system = CoordSystem.find_by_name('chromosome')
58
+ # chr_coord_system.seq_regions.each do |seq_region|
59
+ # puts seq_region.name
60
+ # end
61
+ # Of course, you can go the other way as well (a seq_region "belongs to"
62
+ # a coord_system):
63
+ # chr4 = SeqRegion.find_by_name('4')
64
+ # puts chr4.coord_system.name #--> 'chromosome'
65
+ #
66
+ # To find out what relationships exist for a given class, you can use the
67
+ # #reflect_on_all_associations class methods:
68
+ # puts SeqRegion.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join("\n")
69
+ # puts SeqRegion.reflect_on_all_associations(:has_one).collect{|a| a.name.to_s}.join("\n")
70
+ # puts SeqRegion.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join("\n")
71
+ module Ensembl
72
+ # = DESCRIPTION
73
+ # The Ensembl::Core module covers the core databases from
74
+ # ensembldb.ensembl.org and covers mainly sequences and their annotations.
75
+ # For a full description of the database (and therefore the classes that
76
+ # are available), see http://www.ensembl.org/info/software/core/schema/index.html
77
+ # and http://www.ensembl.org/info/software/core/schema/schema_description.html
78
+ module Core
79
+ # = DESCRIPTION
80
+ # The Sliceable mixin holds the get_slice method and can be included
81
+ # in any class that lends itself to having a position on a SeqRegion.
82
+ module Sliceable
83
+ # = DESCRIPTION
84
+ # The Sliceable#slice method takes the coordinates on a reference
85
+ # and creates a Ensembl::Core::Slice object.
86
+ # ---
87
+ # *Arguments*:: none
88
+ # *Returns*:: Ensembl::Core::Slice object
89
+ def slice
90
+ start, stop, strand = nil, nil, nil
91
+
92
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_start')
93
+ start = self.seq_region_start
94
+ end
95
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_end')
96
+ stop = self.seq_region_end
97
+ end
98
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_strand')
99
+ strand = self.seq_region_strand
100
+ else #FIXME: we shouldn't do this, but can't #project if no strand given
101
+ strand = 1
102
+ end
103
+
104
+ return Ensembl::Core::Slice.new(self.seq_region, start, stop, strand)
105
+ end
106
+
107
+ # = DESCRIPTION
108
+ # The Sliceable#seq method takes the coordinates on a reference, transforms
109
+ # onto the seqlevel coordinate system if necessary, and retrieves the
110
+ # sequence.
111
+ # ---
112
+ # *Arguments*:: none
113
+ # *Returns*:: sequence
114
+ def seq
115
+ return self.slice.seq
116
+ end
117
+
118
+ # = DESCRIPTION
119
+ # The Sliceable#start method is a convenience method and returns
120
+ # self.seq_region_start.
121
+ # ---
122
+ # *Arguments*:: none
123
+ # *Returns*:: sequence
124
+ def start
125
+ return self.seq_region_start
126
+ end
127
+
128
+ # = DESCRIPTION
129
+ # The Sliceable#stop method is a convenience method and returns
130
+ # self.seq_region_end.
131
+ # ---
132
+ # *Arguments*:: none
133
+ # *Returns*:: sequence
134
+ def stop
135
+ return self.seq_region_end
136
+ end
137
+
138
+ # = DESCRIPTION
139
+ # The Sliceable#strand method is a convenience method and returns
140
+ # self.seq_region_strand.
141
+ # ---
142
+ # *Arguments*:: none
143
+ # *Returns*:: sequence
144
+ def strand
145
+ return self.seq_region_strand
146
+ end
147
+
148
+ # = DESCRIPTION
149
+ # The Sliceable#length method returns the length of the feature (based on
150
+ # seq_region_start and seq_region_end.
151
+ # ---
152
+ # *Arguments*:: none
153
+ # *Returns*:: sequence
154
+ def length
155
+ return self.stop - self.start + 1
156
+ end
157
+
158
+ # = DESCRIPTION
159
+ # The Sliceable#project method is used to transfer coordinates from one
160
+ # coordinate system to another. Suppose you have a feature on a
161
+ # contig in human (let's say on contig AC000031.6.1.38703) and you
162
+ # want to know the coordinates on the chromosome. This is a
163
+ # projection of coordinates from a higher ranked coordinate system to
164
+ # a lower ranked coordinate system. Projections can also be done
165
+ # from a chromosome to the contig level. However, it might be possible
166
+ # that more than one contig has to be included and that there exist
167
+ # gaps between the contigs. The output of this method therefore is
168
+ # an _array_ of Slice and Gap objects.
169
+ #
170
+ # At the moment, projections can only be done if the two coordinate
171
+ # systems are linked directly in the 'assembly' table.
172
+ #
173
+ # = USAGE
174
+ #
175
+ # # Get a contig slice in cow and project to scaffold level
176
+ # # (i.e. going from a high rank coord system to a lower rank coord
177
+ # # system)
178
+ # original_feature = Gene.find(85743)
179
+ # target_slices = original_feature.project('scaffold')
180
+ #
181
+ # ---
182
+ # *Arguments*:
183
+ # * coord_system_name:: name of coordinate system to project
184
+ # coordinates to
185
+ # *Returns*:: an array consisting of Slices and, if necessary, Gaps
186
+ def project(coord_system_name)
187
+ return self.slice.project(coord_system_name)
188
+ end
189
+
190
+ end
191
+
192
+
193
+ # = DESCRIPTION
194
+ # The CoordSystem class describes the coordinate system to which
195
+ # a given SeqRegion belongs. It is an interface to the coord_system
196
+ # table of the Ensembl mysql database.
197
+ #
198
+ # Two virtual coordinate systems exist for
199
+ # every species:
200
+ # * toplevel: the coordinate system with rank 1
201
+ # * seqlevel: the coordinate system that contains the seq_regions
202
+ # with the sequence
203
+ #
204
+ # This class uses ActiveRecord to access data in the Ensembl database.
205
+ # See the general documentation of the Ensembl module for
206
+ # more information on what this means and what methods are available.
207
+ #
208
+ # = USAGE
209
+ # coord_system = Ensembl::Core::CoordSystem.find_by_name('chromosome')
210
+ # if coord_system == CoordSystem.toplevel
211
+ # puts coord_system.name + " is the toplevel coordinate system."
212
+ # end
213
+ class CoordSystem < DBConnection
214
+ set_primary_key 'coord_system_id'
215
+
216
+ has_many :seq_regions
217
+
218
+ # = DESCRIPTION
219
+ # The CoordSystem#toplevel? method checks if this coordinate system is the
220
+ # toplevel coordinate system or not.
221
+ # ---
222
+ # *Arguments*:: none
223
+ # *Returns*:: TRUE or FALSE
224
+ def toplevel?
225
+ if Collection.check # When usign multi-species databases
226
+ return true if self == CoordSystem.find_by_rank_and_species_id(1,self.species_id)
227
+ else
228
+ return true if self == CoordSystem.find_by_rank(1)
229
+ end
230
+ return false
231
+ end
232
+
233
+ # = DESCRIPTION
234
+ # The CoordSystem#seqlevel? method checks if this coordinate system is the
235
+ # seqlevel coordinate system or not.
236
+ # ---
237
+ # *Arguments*:: none
238
+ # *Returns*:: TRUE or FALSE
239
+ def seqlevel?
240
+ if Collection.check # When usign multi-species databases
241
+ return true if self == CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
242
+ else
243
+ return true if self == CoordSystem.find_seqlevel
244
+ end
245
+ return false
246
+ end
247
+
248
+ # = DESCRIPTION
249
+ # The CoordSystem#find_toplevel class method returns the toplevel coordinate
250
+ # system.
251
+ # ---
252
+ # *Arguments*:: none
253
+ # *Returns*:: CoordSystem object
254
+ def find_toplevel
255
+ not_cached = false
256
+ if Ensembl::SESSION.toplevel_coord_system.nil?
257
+ not_cached = true
258
+ elsif Collection.check
259
+ not_cached = true if Ensembl::SESSION.toplevel_coord_system.species_id != self.species_id
260
+ end
261
+ if not_cached
262
+ if Collection.check # When usign multi-species databases
263
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank_and_species_id(1,self.species_id)
264
+ else
265
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank(1)
266
+ end
267
+ Ensembl::SESSION.toplevel_id = Ensembl::SESSION.toplevel_coord_system.id
268
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.toplevel_coord_system.name] = Ensembl::SESSION.toplevel_id
269
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.toplevel_id] = Ensembl::SESSION.toplevel_coord_system
270
+ end
271
+ return Ensembl::SESSION.toplevel_coord_system
272
+ end
273
+
274
+ # = DESCRIPTION
275
+ # The CoordSystem#find_seqlevel class method returns the seqlevel coordinate
276
+ # system.
277
+ # ---
278
+ # *Arguments*:: none
279
+ # *Returns*:: CoordSystem object
280
+ def find_seqlevel
281
+ not_cached = false
282
+ if Ensembl::SESSION.seqlevel_coord_system.nil?
283
+ not_cached = true
284
+ elsif Collection.check # When usign multi-species databases
285
+ not_cached = true if Ensembl::SESSION.seqlevel_coord_system.species_id != self.species_id
286
+ end
287
+ if not_cached
288
+ if Collection.check
289
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
290
+ else
291
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%'")[0]
292
+ end
293
+ Ensembl::SESSION.seqlevel_id = Ensembl::SESSION.seqlevel_coord_system.id
294
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.seqlevel_coord_system.name] = Ensembl::SESSION.seqlevel_id
295
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.seqlevel_id] = Ensembl::SESSION.seqlevel_coord_system
296
+ end
297
+ return Ensembl::SESSION.seqlevel_coord_system
298
+ end
299
+
300
+ # = DESCRIPTION
301
+ # The CoordSystem#find_level class method returns the seqlevel coordinate
302
+ # system corresponding to the name passed.
303
+ # ---
304
+ # *Arguments*:: Coordinate system name
305
+ # *Returns*:: CoordSystem object
306
+ def find_level(coord_system_name)
307
+ if Collection.check # When usign multi-species databases
308
+ return CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE name = '#{coord_system_name}' AND species_id = #{self.species_id}")[0]
309
+ else
310
+ return CoordSystem.find_by_name(coord_system_name)
311
+ end
312
+ end
313
+
314
+ # = DESCRIPTION
315
+ # The CoordSystem#find_default_by_name class method returns the
316
+ # coordinate system by that name with the lowest rank. Normally, a lower
317
+ # rank means a 'bigger' coordinate system. The 'chromosome' typically has
318
+ # rank 1. However, there might be more than one coordinate system with the
319
+ # name chromosome but with different version (e.g. in human, there is one
320
+ # for the NCBI36 and one for the NCBI35 version). The older version of these
321
+ # is typically given a high number and the one with the new version is the
322
+ # 'default' system.
323
+ # ---
324
+ # *Arguments*:: none
325
+ # *Returns*:: CoordSystem object
326
+ def self.find_default_by_name(name)
327
+ all_coord_systems_with_name = Ensembl::Core::CoordSystem.find_all_by_name(name)
328
+ if all_coord_systems_with_name.length == 1
329
+ return all_coord_systems_with_name[0]
330
+ else
331
+ return all_coord_systems_with_name.select{|cs| cs.attrib =~ /default_version/}[0]
332
+ end
333
+ end
334
+
335
+ # = DESCRIPTION
336
+ # The CoordSystem#name_with_version returns a string containing the name
337
+ # and version of the coordinate system. If no version is available, then
338
+ # just the name is returned
339
+ # ---
340
+ # *Arguments*:: none
341
+ # *Returns*:: String object
342
+ def name_with_version
343
+ if self.version.nil?
344
+ return name
345
+ else
346
+ return [name, version].join(':')
347
+ end
348
+ end
349
+
350
+ ## Calculate the shortest path between a source coordinate system and a
351
+ ## target coordinate system. This can be done by looking for the
352
+ ## 'assembly.mapping' records in the meta_coord table.
353
+ ## At the moment, only direct mappings are possible. Later on, this method
354
+ ## should be changed to make longer paths possible.
355
+ ## Is used to get features for a slice object.
356
+ #def calculate_path(target_coord_system)
357
+ # MetaCoord.find_all_by_meta_key('assembly.mapping').each do |mapping|
358
+ # coord_system_names = mapping.meta_value.split(/[#|\|]/)
359
+ # if coord_system_names.sort.join(';') == [self.name_with_version, target_coord_system.name_with_version].sort.join(';')
360
+ # answer = Array.new
361
+ # answer.push(CoordSystem.find_by_name(coord_system_names[0]))
362
+ # answer.push(CoordSystem.find_by_name(coord_system_names[1]))
363
+ # return answer
364
+ # end
365
+ # end
366
+ # return nil
367
+ #
368
+ #end
369
+ end
370
+
371
+ # = DESCRIPTION
372
+ # The SeqRegion class describes a part of a coordinate systems. It is an
373
+ # interface to the seq_region table of the Ensembl mysql database.
374
+ #
375
+ # This class uses ActiveRecord to access data in the Ensembl database.
376
+ # See the general documentation of the Ensembl module for
377
+ # more information on what this means and what methods are available.
378
+ #
379
+ # = USAGE
380
+ # chr4 = SeqRegion.find_by_name('4')
381
+ # puts chr4.coord_system.name #--> 'chromosome'
382
+ # chr4.genes.each do |gene|
383
+ # puts gene.biotype
384
+ # end
385
+ class SeqRegion < DBConnection
386
+ set_primary_key 'seq_region_id'
387
+
388
+ belongs_to :coord_system
389
+ has_many :simple_features
390
+ has_many :marker_features
391
+ has_many :genes
392
+ has_many :exons
393
+ has_many :repeat_features
394
+ has_many :seq_region_attribs
395
+ has_many :attrib_types, :through => :seq_region_attrib
396
+ has_many :transcripts
397
+ has_one :dna
398
+ has_many :dna_align_features
399
+ has_many :misc_features
400
+ has_many :density_features
401
+ has_many :karyotypes
402
+ has_many :oligo_features
403
+ has_many :prediction_exons
404
+ has_many :prediction_transcripts
405
+ has_many :protein_align_features
406
+ has_many :regulatory_features
407
+ has_many :assembly_exceptions
408
+
409
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
410
+ has_many :asm_links_as_asm, :foreign_key => 'asm_seq_region_id', :class_name => 'AssemblyLink'
411
+ has_many :asm_links_as_cmp, :foreign_key => 'cmp_seq_region_id', :class_name => 'AssemblyLink'
412
+ has_many :asm_seq_regions, :through => :asm_links_as_cmp
413
+ has_many :cmp_seq_regions, :through => :asm_links_as_asm
414
+
415
+ alias attribs seq_region_attribs
416
+
417
+ # = DESCRIPTION
418
+ # The SeqRegion#slice method returns a slice object that covers the whole
419
+ # of the seq_region.
420
+ # ---
421
+ # *Arguments*:: none
422
+ # *Returns*:: Ensembl::Core::Slice object
423
+ def slice
424
+ return Ensembl::Core::Slice.new(self)
425
+ end
426
+
427
+ # = DESCRIPTION
428
+ # The SeqRegion#assembled_seq_regions returns the sequence regions on which
429
+ # the current region is assembled. For example, calling this method on a
430
+ # contig sequence region, it might return the chromosome that that contig
431
+ # is part of. Optionally, this method takes a coordinate system name so
432
+ # that only regions of that coordinate system are returned.
433
+ # ---
434
+ # *Arguments*:: coord_system_name (optional)
435
+ # *Returns*:: array of SeqRegion objects
436
+ def assembled_seq_regions(coord_system_name = nil)
437
+ if coord_system_name.nil?
438
+ return self.asm_seq_regions
439
+ else
440
+ answer = Array.new
441
+ coord_system = CoordSystem.find_by_name(coord_system_name)
442
+ self.asm_seq_regions.each do |asr|
443
+ if asr.coord_system_id == coord_system.id
444
+ answer.push(asr)
445
+ end
446
+ end
447
+ return answer
448
+ end
449
+ end
450
+
451
+ # = DESCRIPTION
452
+ # The SeqRegion#component_seq_regions returns the sequence regions
453
+ # contained within the current region (in other words: the bits used to
454
+ # assemble the current region). For example, calling this method on a
455
+ # chromosome sequence region, it might return the contigs that were assembled
456
+ # into this chromosome. Optionally, this method takes a coordinate system
457
+ # name so that only regions of that coordinate system are returned.
458
+ # ---
459
+ # *Arguments*:: coord_system_name (optional)
460
+ # *Returns*:: array of SeqRegion objects
461
+ def component_seq_regions(coord_system_name = nil)
462
+ if coord_system_name.nil?
463
+ return self.cmp_seq_regions
464
+ else
465
+ answer = Array.new
466
+ coord_system = CoordSystem.find_by_name(coord_system_name)
467
+ self.cmp_seq_regions.each do |csr|
468
+ if csr.coord_system_id == coord_system.id
469
+ answer.push(csr)
470
+ end
471
+ end
472
+ return answer
473
+ end
474
+ end
475
+
476
+ # = DESCRIPTION
477
+ # This method queries the assembly table to find those rows (i.e.
478
+ # AssemblyLink objects) for which this seq_region is the assembly.
479
+ #
480
+ # = USAGE
481
+ #
482
+ # my_seq_region = SeqRegion.find('4')
483
+ # first_link = my_seq_region.assembly_links_as_assembly[0]
484
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
485
+ #
486
+ # ---
487
+ # *Arguments*:
488
+ # * coord_system_name: name of coordinate system that the components
489
+ # should belong to (default = nil)
490
+ # *Returns*:: array of AssemblyLink objects
491
+ def assembly_links_as_assembly(coord_system = nil)
492
+ if Ensembl::SESSION.coord_system_ids.has_key?(coord_system.name)
493
+ coord_system_id = Ensembl::SESSION.coord_system_ids[coord_system.name]
494
+ else
495
+ Ensembl::SESSION.coord_systems[cs.id] = coord_system.id
496
+ Ensembl::SESSION.coord_system_ids[coord_system.name] = coord_system.id
497
+ end
498
+ coord_system = Ensembl::SESSION.coord_systems[coord_system.id]
499
+ return AssemblyLink.find_by_sql("SELECT * FROM assembly a WHERE a.asm_seq_region_id = #{self.id} AND a.cmp_seq_region_id IN (SELECT sr.seq_region_id FROM seq_region sr WHERE coord_system_id = #{coord_system.id} )")
500
+ end
501
+
502
+ # = DESCRIPTION
503
+ # This method queries the assembly table to find those rows (i.e.
504
+ # AssemblyLink objects) for which this seq_region is the component.
505
+ #
506
+ # = USAGE
507
+ #
508
+ # my_seq_region = SeqRegion.find('Chr4.003.1')
509
+ # first_link = my_seq_region.assembly_links_as_component[0]
510
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
511
+ #
512
+ # ---
513
+ # *Arguments*:
514
+ # * coord_system_name: name of coordinate system that the assembly
515
+ # should belong to (default = nil)
516
+ # *Returns*:: array of AssemblyLink objects
517
+ def assembly_links_as_component(coord_system = nil)
518
+ if coord_system.nil?
519
+ return self.asm_links_as_cmp
520
+ else
521
+ return self.asm_links_as_cmp.select{|alac| alac.asm_seq_region.coord_system_id == coord_system.id}
522
+ end
523
+ end
524
+
525
+ # = DESCRIPTION
526
+ # The SeqRegion#sequence method returns the sequence of this seq_region. At
527
+ # the moment, it will only return the sequence if the region belongs to the
528
+ # seqlevel coordinate system.
529
+ # ---
530
+ # *Arguments*:: none
531
+ # *Returns*:: DNA sequence as String
532
+ def sequence
533
+ return self.dna.sequence
534
+ end
535
+ alias seq sequence
536
+
537
+ # = DESCRIPTION
538
+ # The SeqRegion#subsequence method returns a subsequence of this seq_region. At
539
+ # the moment, it will only return the sequence if the region belongs to the
540
+ # seqlevel coordinate system.
541
+ # ---
542
+ # *Arguments*:: start and stop position
543
+ # *Returns*:: DNA sequence as String
544
+ def subsequence(start, stop)
545
+ return self.seq.slice(start - 1, (stop - start) + 1)
546
+ end
547
+ alias subseq subsequence
548
+
549
+ end
550
+
551
+ # = DESCRIPTION
552
+ # The AssemblyLink class describes the relationships between different
553
+ # seq_regions. For example, a chromosome might consist of a number of
554
+ # scaffolds, each of which in turn consists of a number of contigs. The
555
+ # AssemblyLink class
556
+ # This class is an interface to the assembly table of the Ensembl mysql
557
+ # database.
558
+ #
559
+ # This class uses ActiveRecord to access data in the Ensembl database.
560
+ # See the general documentation of the Ensembl module for
561
+ # more information on what this means and what methods are available.
562
+ #
563
+ # = USAGE
564
+ # chr4 = SeqRegion.find_by_name('4')
565
+ # puts chr4.coord_system.name #--> 'chromosome'
566
+ # chr4.genes.each do |gene|
567
+ # puts gene.biotype
568
+ # end
569
+ class AssemblyLink < DBConnection
570
+ set_table_name 'assembly'
571
+ set_primary_key nil
572
+
573
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
574
+ belongs_to :asm_seq_region, :foreign_key => 'asm_seq_region_id', :class_name => 'SeqRegion'
575
+ belongs_to :cmp_seq_region, :foreign_key => 'cmp_seq_region_id', :class_name => 'SeqRegion'
576
+ end
577
+
578
+ # = DESCRIPTION
579
+ # The AssemblyException class describes the exceptions in to AssemblyLink. Most
580
+ # notably, this concerns the allosomes. In human, for example, only the
581
+ # part of the Y chromosome that is different from X is covered in the
582
+ # assembly table. Therefore, the sequence of the tip and end of the Y
583
+ # chromosome are not stored in the database, but fetched from the X
584
+ # chromosome. The assembly_exception table contain the information on
585
+ # which bits are the same.
586
+ #
587
+ # This class uses ActiveRecord to access data in the Ensembl database.
588
+ # See the general documentation of the Ensembl module for
589
+ # more information on what this means and what methods are available.
590
+ #
591
+ # This class should normally not be used directly by the user.
592
+ class AssemblyException < DBConnection
593
+ include Sliceable
594
+
595
+ set_primary_key 'assembly_exception_id'
596
+
597
+ belongs_to :seq_region
598
+ end
599
+
600
+ # = DESCRIPTION
601
+ # The MetaCoord class describes what coordinate systems are used to annotate
602
+ # features. It will for example tell you that marker_features are annotated
603
+ # either on the chromosome, supercontig and clone level.
604
+ #
605
+ # This class should normally not be used by the end user, but is used internally.
606
+ #
607
+ # This class uses ActiveRecord to access data in the Ensembl database.
608
+ # See the general documentation of the Ensembl module for
609
+ # more information on what this means and what methods are available.
610
+ class MetaCoord < DBConnection
611
+ set_primary_key nil
612
+ end
613
+
614
+ # = DESCRIPTION
615
+ # The Meta class describes meta data of the database. These include information
616
+ # on what coordinate system is mapping on another one and which patches
617
+ # are applied.
618
+ #
619
+ # This class should normally not be used by the end user, but is used internally.
620
+ #
621
+ # This class uses ActiveRecord to access data in the Ensembl database.
622
+ # See the general documentation of the Ensembl module for
623
+ # more information on what this means and what methods are available.
624
+ class Meta < DBConnection
625
+ set_primary_key nil
626
+ end
627
+
628
+ # = DESCRIPTION
629
+ # The Analysis class describes an analysis.
630
+ #
631
+ # This class uses ActiveRecord to access data in the Ensembl database.
632
+ # See the general documentation of the Ensembl module for
633
+ # more information on what this means and what methods are available.
634
+ #
635
+ # = USAGE
636
+ # repeat_masker_analysis = Analysis.find_by_logic_name('RepeatMask')
637
+ # puts repeat_masker_analysis.to_yaml
638
+ class Analysis < DBConnection
639
+ set_primary_key 'analysis_id'
640
+
641
+ has_many :genes
642
+ has_many :dna_align_features
643
+ has_many :protein_align_features
644
+ has_one :analysis_description
645
+ has_many :density_types
646
+ has_many :oligo_features
647
+ has_many :protein_features
648
+ has_many :regulatory_features
649
+ has_many :simple_features
650
+ has_many :prediction_transcripts
651
+ end
652
+
653
+ # = DESCRIPTION
654
+ # The AnalysisDescription class belongs to an analysis.
655
+ #
656
+ # This class uses ActiveRecord to access data in the Ensembl database.
657
+ # See the general documentation of the Ensembl module for
658
+ # more information on what this means and what methods are available.
659
+ #
660
+ # = USAGE
661
+ # descr = AnalysisDescription.find(3)
662
+ # puts descr.to_yaml
663
+ class AnalysisDescription < DBConnection
664
+ set_primary_key nil
665
+
666
+ belongs_to :analysis
667
+ end
668
+
669
+ # = DESCRIPTION
670
+ # The Dna class contains the actual DNA sequence for the sequence regions
671
+ # that belong to the seq_level coordinate system.
672
+ #
673
+ # This class uses ActiveRecord to access data in the Ensembl database.
674
+ # See the general documentation of the Ensembl module for
675
+ # more information on what this means and what methods are available.
676
+ #
677
+ # = USAGE
678
+ # seq_region = SeqRegion.find(1)
679
+ # puts seq_region.dna.sequence
680
+ class Dna < DBConnection
681
+ set_primary_key nil
682
+
683
+ belongs_to :seq_region
684
+ end
685
+
686
+ # = DESCRIPTION
687
+ # The Exon class describes an exon.
688
+ #
689
+ # This class uses ActiveRecord to access data in the Ensembl database.
690
+ # See the general documentation of the Ensembl module for
691
+ # more information on what this means and what methods are available.
692
+ #
693
+ # This class includes the mixin Sliceable, which means that it is mapped
694
+ # to a SeqRegion object and a Slice can be created for objects of this
695
+ # class. See Sliceable and Slice for more information.
696
+ #
697
+ # = USAGE
698
+ # seq_region = SeqRegion.find(1)
699
+ # puts seq_region.exons.length
700
+ class Exon < DBConnection
701
+ include Sliceable
702
+
703
+ set_primary_key 'exon_id'
704
+
705
+ belongs_to :seq_region
706
+ has_many :exon_transcripts
707
+ has_many :transcripts, :through => :exon_transcripts
708
+
709
+ has_many :translations, :foreign_key => 'start_exon_id'
710
+ has_many :translations, :foreign_key => 'end_exon_id'
711
+
712
+ has_one :exon_stable_id
713
+
714
+ has_many :exon_supporting_features
715
+ has_many :dna_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'dna_align_feature'"]
716
+ has_many :protein_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'protein_align_feature'"]
717
+
718
+ def stable_id
719
+ return self.exon_stable_id.stable_id
720
+ end
721
+
722
+ # = DESCRIPTION
723
+ # The Exon#seq method returns the sequence of the exon.
724
+ def seq
725
+ seq_region = nil
726
+ if Ensembl::SESSION.seq_regions.has_key?(self.seq_region_id)
727
+ seq_region = Ensembl::SESSION.seq_regions[self.seq_region_id]
728
+ else
729
+ seq_region = self.seq_region
730
+ Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
731
+ end
732
+ slice = Ensembl::Core::Slice.new(seq_region, seq_region_start, seq_region_end, seq_region_strand)
733
+ return slice.seq
734
+ end
735
+ end
736
+
737
+ # = DESCRIPTION
738
+ # The ExonStableId class provides an interface to the exon_stable_id
739
+ # table. This table contains Ensembl stable IDs for exons.
740
+ #
741
+ # This class uses ActiveRecord to access data in the Ensembl database.
742
+ # See the general documentation of the Ensembl module for
743
+ # more information on what this means and what methods are available.
744
+ #
745
+ # = USAGE
746
+ # my_exon = ExonStableId.find_by_stable_id('ENSE00001494622').exon
747
+ class ExonStableId < DBConnection
748
+ set_primary_key 'stable_id'
749
+
750
+ belongs_to :exon
751
+ end
752
+
753
+ # = DESCRIPTION
754
+ # The ExonTranscript class provides the link between exons and transcripts.
755
+ #
756
+ # This class uses ActiveRecord to access data in the Ensembl database.
757
+ # See the general documentation of the Ensembl module for
758
+ # more information on what this means and what methods are available.
759
+ #
760
+ # = USAGE
761
+ # link = ExonTranscript.find(1)
762
+ # puts link.exon.to_yaml
763
+ # puts link.transcript.to_yaml
764
+ class ExonTranscript < DBConnection
765
+ set_primary_key nil
766
+
767
+ belongs_to :exon
768
+ belongs_to :transcript
769
+ end
770
+
771
+ class ExonSupportingFeature < DBConnection
772
+ set_table_name 'supporting_feature'
773
+ set_primary_key nil
774
+
775
+ belongs_to :exon
776
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
777
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
778
+ end
779
+
780
+ class TranscriptSupportingFeature < DBConnection
781
+ set_primary_key nil
782
+
783
+ belongs_to :transcript
784
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
785
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
786
+ end
787
+
788
+ # = DESCRIPTION
789
+ # The SimpleFeature class describes simple features that have positions
790
+ # on a SeqRegion.
791
+ #
792
+ # This class uses ActiveRecord to access data in the Ensembl database.
793
+ # See the general documentation of the Ensembl module for
794
+ # more information on what this means and what methods are available.
795
+ #
796
+ # This class includes the mixin Sliceable, which means that it is mapped
797
+ # to a SeqRegion object and a Slice can be created for objects of this
798
+ # class. See Sliceable and Slice for more information.
799
+ #
800
+ # = USAGE
801
+ # simple_feature = SimpleFeature.find(123)
802
+ # puts simple_feature.analysis.logic_name
803
+ class SimpleFeature < DBConnection
804
+ include Sliceable
805
+
806
+ set_primary_key 'simple_feature_id'
807
+
808
+ belongs_to :seq_region
809
+ belongs_to :analysis
810
+ end
811
+
812
+ # = DESCRIPTION
813
+ # The DensityFeature class provides an interface to the density_feature
814
+ # table.
815
+ #
816
+ # This class uses ActiveRecord to access data in the Ensembl database.
817
+ # See the general documentation of the Ensembl module for
818
+ # more information on what this means and what methods are available.
819
+ #
820
+ # This class includes the mixin Sliceable, which means that it is mapped
821
+ # to a SeqRegion object and a Slice can be created for objects of this
822
+ # class. See Sliceable and Slice for more information.
823
+ #
824
+ # = USAGE
825
+ # density_feature = DensityFeature.find(2716384)
826
+ # puts density_feature.to_yaml
827
+ class DensityFeature < DBConnection
828
+ set_primary_key 'density_feature_id'
829
+
830
+ belongs_to :density_type
831
+ belongs_to :seq_region
832
+ end
833
+
834
+ # = DESCRIPTION
835
+ # The DensityType class provides an interface to the density_type
836
+ # table.
837
+ #
838
+ # This class uses ActiveRecord to access data in the Ensembl database.
839
+ # See the general documentation of the Ensembl module for
840
+ # more information on what this means and what methods are available.
841
+ #
842
+ # This class includes the mixin Sliceable, which means that it is mapped
843
+ # to a SeqRegion object and a Slice can be created for objects of this
844
+ # class. See Sliceable and Slice for more information.
845
+ #
846
+ class DensityType < DBConnection
847
+ set_primary_key 'density_type_id'
848
+
849
+ has_many :density_features
850
+ belongs_to :analysis
851
+ end
852
+
853
+ # = DESCRIPTION
854
+ # The Marker class provides an interface to the marker
855
+ # table. This table contains primer sequences and PCR product lengths.
856
+ #
857
+ # This class uses ActiveRecord to access data in the Ensembl database.
858
+ # See the general documentation of the Ensembl module for
859
+ # more information on what this means and what methods are available.
860
+ #
861
+ # = USAGE
862
+ # marker = Marker.find(52194)
863
+ # puts marker.left_primer
864
+ # puts marker.right_primer
865
+ # puts marker.min_primer_dist.to_s
866
+ class Marker < DBConnection
867
+ set_primary_key 'marker_id'
868
+
869
+ has_many :marker_features
870
+ has_many :marker_synonyms
871
+ has_many :marker_map_locations
872
+
873
+ def self.inheritance_column
874
+ nil
875
+ end
876
+
877
+ # = DESCRIPTION
878
+ # The Marker#name method returns a comma-separated list of synonyms of
879
+ # this marker
880
+ #
881
+ # = USAGE
882
+ # marker = Marker.find(1)
883
+ # puts marker.name --> 58017,D29149
884
+ def name
885
+ self.marker_synonyms.collect{|ms| ms.name}.join(',')
886
+ end
887
+
888
+ # = DESCRIPTION
889
+ # The Marker#find_by_name class method returns one marker with this name.
890
+ #
891
+ # ---
892
+ # *Arguments*:: name
893
+ # *Returns*:: Marker object or nil
894
+ def self.find_by_name(name)
895
+ all_names = self.find_all_by_name(name)
896
+ if all_names.length == 0
897
+ return nil
898
+ else
899
+ return all_names[0]
900
+ end
901
+ end
902
+
903
+ # = DESCRIPTION
904
+ # The Marker#find_all_by_name class method returns all markers with this
905
+ # name. If no marker is found, it returns an empty array.
906
+ # ---
907
+ # *Arguments*:: name
908
+ # *Returns*:: empty array or array of Marker objects
909
+ def self.find_all_by_name(name)
910
+ marker_synonyms = Ensembl::Core::MarkerSynonym.find_all_by_name(name)
911
+ answers = Array.new
912
+ marker_synonyms.each do |ms|
913
+ answers.push(Ensembl::Core::Marker.find_all_by_marker_id(ms.marker_id))
914
+ end
915
+ answers.flatten!
916
+ return answers
917
+ end
918
+
919
+ #def to_mappings
920
+ # output = Array.new
921
+ # self.marker_features.each do |mf|
922
+ # output.push(mf.slice.display_name)
923
+ # end
924
+ # return output.join("\n")
925
+ #
926
+ #end
927
+
928
+ end
929
+
930
+ # = DESCRIPTION
931
+ # The MarkerSynonym class provides an interface to the marker_synonym
932
+ # table. This table contains names for markers (that are themselves
933
+ # stored in the marker table (so Marker class)).
934
+ #
935
+ # This class uses ActiveRecord to access data in the Ensembl database.
936
+ # See the general documentation of the Ensembl module for
937
+ # more information on what this means and what methods are available.
938
+ #
939
+ # = USAGE
940
+ # marker = Marker.find(52194)
941
+ # puts marker.marker_synonym.source
942
+ # puts marker.marker_synonym.name
943
+ class MarkerSynonym < DBConnection
944
+ set_primary_key 'marker_synonym_id'
945
+
946
+ belongs_to :marker
947
+ end
948
+
949
+ # = DESCRIPTION
950
+ # The MarkerFeature class provides an interface to the marker_feature
951
+ # table. This table contains mappings of markers to a SeqRegion.
952
+ #
953
+ # This class uses ActiveRecord to access data in the Ensembl database.
954
+ # See the general documentation of the Ensembl module for
955
+ # more information on what this means and what methods are available.
956
+ #
957
+ # This class includes the mixin Sliceable, which means that it is mapped
958
+ # to a SeqRegion object and a Slice can be created for objects of this
959
+ # class. See Sliceable and Slice for more information.
960
+ #
961
+ # = USAGE
962
+ # marker = Marker.find(52194)
963
+ # puts marker.marker_feature.seq_region_start.to_s
964
+ # puts marker.marker_feature.seq_region_end.to_s
965
+ class MarkerFeature < DBConnection
966
+ include Sliceable
967
+
968
+ set_primary_key 'marker_feature_id'
969
+
970
+ belongs_to :marker
971
+ belongs_to :seq_region
972
+ end
973
+
974
+ # = DESCRIPTION
975
+ # The MiscFeature class provides an interface to the misc_feature
976
+ # table. The actual type of feature is stored in the MiscSet class.
977
+ #
978
+ # This class uses ActiveRecord to access data in the Ensembl database.
979
+ # See the general documentation of the Ensembl module for
980
+ # more information on what this means and what methods are available.
981
+ #
982
+ # This class includes the mixin Sliceable, which means that it is mapped
983
+ # to a SeqRegion object and a Slice can be created for objects of this
984
+ # class. See Sliceable and Slice for more information.
985
+ #
986
+ # = USAGE
987
+ # #TODO
988
+ class MiscFeature < DBConnection
989
+ include Sliceable
990
+
991
+ set_primary_key 'misc_feature_id'
992
+
993
+ belongs_to :seq_region
994
+ has_one :misc_feature_misc_set
995
+ has_many :misc_sets, :through => :misc_feature_misc_set
996
+
997
+ has_many :misc_attribs
998
+
999
+ alias attribs misc_attribs
1000
+
1001
+ def self.find_by_attrib_type_value(code, value)
1002
+ return self.find_all_by_attrib_type_value(code, value)[0]
1003
+ end
1004
+
1005
+ def self.find_all_by_attrib_type_value(code, value)
1006
+ code_id = AttribType.find_by_code(code)
1007
+ misc_attribs = MiscAttrib.find_all_by_attrib_type_id_and_value(code_id, value)
1008
+ answers = Array.new
1009
+ misc_attribs.each do |ma|
1010
+ answers.push(MiscFeature.find_all_by_misc_feature_id(ma.misc_feature_id))
1011
+ end
1012
+ answers.flatten!
1013
+ return answers
1014
+ end
1015
+ end
1016
+
1017
+
1018
+ # = DESCRIPTION
1019
+ # The MiscAttrib class provides an interface to the misc_attrib
1020
+ # table. It is the link between MiscFeature and AttribType.
1021
+ #
1022
+ # This class uses ActiveRecord to access data in the Ensembl database.
1023
+ # See the general documentation of the Ensembl module for
1024
+ # more information on what this means and what methods are available.
1025
+ #
1026
+ # = USAGE
1027
+ # marker = Marker.find(52194)
1028
+ # puts marker.marker_feature.seq_region_start.to_s
1029
+ # puts marker.marker_feature.seq_region_end.to_s
1030
+ class MiscAttrib < DBConnection
1031
+ set_primary_key nil
1032
+
1033
+ belongs_to :misc_feature
1034
+ belongs_to :attrib_type
1035
+
1036
+ def to_s
1037
+ return self.attrib_type.code + ":" + self.value.to_s
1038
+ end
1039
+ end
1040
+
1041
+ # = DESCRIPTION
1042
+ # The MiscSet class provides an interface to the misc_set
1043
+ # table. This table contains the sets to which MiscFeature objects
1044
+ # belong.
1045
+ #
1046
+ # This class uses ActiveRecord to access data in the Ensembl database.
1047
+ # See the general documentation of the Ensembl module for
1048
+ # more information on what this means and what methods are available.
1049
+ #
1050
+ # = USAGE
1051
+ # feature_set = MiscFeature.find(1)
1052
+ # puts feature_set.features.length.to_s
1053
+ class MiscSet < DBConnection
1054
+ set_primary_key 'misc_set_id'
1055
+
1056
+ has_many :misc_feature_misc_sets
1057
+ has_many :misc_features, :through => :misc_feature_misc_set
1058
+ end
1059
+
1060
+ # = DESCRIPTION
1061
+ # The MiscFeatureMiscSet class provides an interface to the
1062
+ # misc_feature_misc_set table. This table links MiscFeature objects to
1063
+ # their MiscSet.
1064
+ #
1065
+ # This class uses ActiveRecord to access data in the Ensembl database.
1066
+ # See the general documentation of the Ensembl module for
1067
+ # more information on what this means and what methods are available.
1068
+ #
1069
+ # = USAGE
1070
+ # # TODO
1071
+ class MiscFeatureMiscSet < DBConnection
1072
+ set_primary_key nil
1073
+
1074
+ belongs_to :misc_feature
1075
+ belongs_to :misc_set
1076
+ end
1077
+
1078
+ # = DESCRIPTION
1079
+ # The Gene class provides an interface to the gene
1080
+ # table. This table contains mappings of genes to a SeqRegion.
1081
+ #
1082
+ # This class uses ActiveRecord to access data in the Ensembl database.
1083
+ # See the general documentation of the Ensembl module for
1084
+ # more information on what this means and what methods are available.
1085
+ #
1086
+ # This class includes the mixin Sliceable, which means that it is mapped
1087
+ # to a SeqRegion object and a Slice can be created for objects of this
1088
+ # class. See Sliceable and Slice for more information.
1089
+ #
1090
+ # = USAGE
1091
+ # puts Gene.find_by_biotype('protein_coding').length
1092
+ class Gene < DBConnection
1093
+ include Sliceable
1094
+
1095
+ set_primary_key 'gene_id'
1096
+
1097
+ belongs_to :seq_region
1098
+ has_one :gene_stable_id
1099
+
1100
+ has_many :gene_attribs
1101
+ has_many :attrib_types, :through => :gene_attrib
1102
+
1103
+ has_many :transcripts
1104
+
1105
+ belongs_to :analysis
1106
+
1107
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Gene'"
1108
+ has_many :xrefs, :through => :object_xrefs
1109
+
1110
+ alias attribs gene_attribs
1111
+
1112
+ # = DESCRIPTION
1113
+ # The Gene#stable_id method returns the stable_id of the gene (i.e. the
1114
+ # ENSG id).
1115
+ def stable_id
1116
+ return self.gene_stable_id.stable_id
1117
+
1118
+ end
1119
+
1120
+ # = DESCRIPTION
1121
+ # The Gene#display_label method returns the default name of the gene.
1122
+ def display_label
1123
+ return Xref.find(self.display_xref_id).display_label
1124
+ end
1125
+ alias :display_name :display_label
1126
+ alias :label :display_label
1127
+ alias :name :display_label
1128
+
1129
+ # = DESCRIPTION
1130
+ # The Gene#find_all_by_name class method searches the Xrefs for that name
1131
+ # and returns an array of the corresponding Gene objects. If the name is
1132
+ # not found, it returns an empty array.
1133
+ def self.find_all_by_name(name)
1134
+ answer = Array.new
1135
+ xrefs = Ensembl::Core::Xref.find_all_by_display_label(name)
1136
+ xrefs.each do |xref|
1137
+ answer.push(Ensembl::Core::Gene.find_by_display_xref_id(xref.xref_id))
1138
+ end
1139
+
1140
+ return answer
1141
+ end
1142
+
1143
+ # = DESCRIPTION
1144
+ # The Gene#find_by_name class method searches the Xrefs for that name
1145
+ # and returns one Gene objects (even if there should be more). If the name is
1146
+ # not found, it returns nil.
1147
+ def self.find_by_name(name)
1148
+ all_names = self.find_all_by_name(name)
1149
+ if all_names.length == 0
1150
+ return nil
1151
+ else
1152
+ return all_names[0]
1153
+ end
1154
+ end
1155
+
1156
+ # = DESCRIPTION
1157
+ # The Gene#find_by_stable_id class method fetches a Gene object based on
1158
+ # its stable ID (i.e. the "ENSG" accession number). If the name is
1159
+ # not found, it returns nil.
1160
+ def self.find_by_stable_id(stable_id)
1161
+ gene_stable_id = GeneStableId.find_by_stable_id(stable_id)
1162
+ if gene_stable_id.nil?
1163
+ return nil
1164
+ else
1165
+ return gene_stable_id.gene
1166
+ end
1167
+ end
1168
+
1169
+ # = DESCRIPTION
1170
+ # The Gene#all_xrefs method is a convenience method in that it combines
1171
+ # three methods into one. It collects all xrefs for the gene itself, plus
1172
+ # all xrefs for all transcripts for the gene, and all xrefs for all
1173
+ # translations for those transcripts.
1174
+ def all_xrefs
1175
+ answer = Array.new
1176
+ answer.push(self.xrefs)
1177
+ self.transcripts.each do |transcript|
1178
+ answer.push(transcript.xrefs)
1179
+ if ! transcript.translation.nil?
1180
+ answer.push(transcript.translation.xrefs)
1181
+ end
1182
+ end
1183
+ answer.flatten!
1184
+ return answer
1185
+ end
1186
+
1187
+ # = DESCRIPTION
1188
+ # The Gene#go_terms method returns all GO terms associated with a gene.
1189
+ def go_terms
1190
+ go_db_id = ExternalDb.find_by_db_name('GO').id
1191
+ return self.all_xrefs.select{|x| x.external_db_id == go_db_id}.collect{|x| x.dbprimary_acc}.uniq
1192
+ end
1193
+
1194
+ # = DESCRIPTION
1195
+ # The Gene#hgnc returns the HGNC symbol for the gene.
1196
+ def hgnc
1197
+ hgnc_db_id = ExternalDb.find_by_db_name('HGNC_curated_gene').id
1198
+ xref = self.all_xrefs.select{|x| x.external_db_id == hgnc_db_id}[0]
1199
+ return nil if xref.nil?
1200
+ return xref.display_label
1201
+ end
1202
+
1203
+ end
1204
+
1205
+ # = DESCRIPTION
1206
+ # The Gene#canonical_transcript returns the longest transcript for that gene.
1207
+ #
1208
+ def canonical_transcript
1209
+ ct = self.transcripts.sort {|a,b| b.seq.length <=> a.seq.length}
1210
+ return ct[0]
1211
+ end
1212
+
1213
+ # = DESCRIPTION
1214
+ # The GeneStableId class provides an interface to the gene_stable_id
1215
+ # table. This table contains Ensembl stable IDs for genes.
1216
+ #
1217
+ # This class uses ActiveRecord to access data in the Ensembl database.
1218
+ # See the general documentation of the Ensembl module for
1219
+ # more information on what this means and what methods are available.
1220
+ #
1221
+ # = USAGE
1222
+ # my_gene = GeneStableId.find_by_stable_id('ENSBTAG00000011670').gene
1223
+ class GeneStableId < DBConnection
1224
+ set_primary_key 'stable_id'
1225
+
1226
+ belongs_to :gene
1227
+ end
1228
+
1229
+ # = DESCRIPTION
1230
+ # The MarkerMapLocation class provides an interface to the
1231
+ # marker_map_location table. This table contains mappings of
1232
+ # MarkerSynonym objects to a chromosome, and basically just stores
1233
+ # the genetic maps.
1234
+ #
1235
+ # This class uses ActiveRecord to access data in the Ensembl database.
1236
+ # See the general documentation of the Ensembl module for
1237
+ # more information on what this means and what methods are available.
1238
+ #
1239
+ # = USAGE
1240
+ # marker_synonym = MarkerSynonym.find_by_name('CYP19A1_(5)')
1241
+ # marker_synonym.marker_map_locations.each do |mapping|
1242
+ # puts mapping.chromosome_name + "\t" + mapping.position.to_s
1243
+ # end
1244
+ class MarkerMapLocation < DBConnection
1245
+ set_primary_key nil
1246
+
1247
+ belongs_to :map
1248
+ belongs_to :marker
1249
+
1250
+ end
1251
+
1252
+ # = DESCRIPTION
1253
+ # The Map class provides an interface to the map
1254
+ # table. This table contains genetic maps.
1255
+ #
1256
+ # This class uses ActiveRecord to access data in the Ensembl database.
1257
+ # See the general documentation of the Ensembl module for
1258
+ # more information on what this means and what methods are available.
1259
+ #
1260
+ # = USAGE
1261
+ # map = Map.find_by_name('MARC')
1262
+ # puts map.markers.length.to_s
1263
+ class Map < DBConnection
1264
+ set_primary_key 'map_id'
1265
+
1266
+ has_many :marker_map_locations
1267
+ has_many :markers, :through => :marker_map_locations
1268
+
1269
+ def name
1270
+ return self.map_name
1271
+ end
1272
+ end
1273
+
1274
+ # = DESCRIPTION
1275
+ # The RepeatConsensus class provides an interface to the repeat_consensus
1276
+ # table. This table contains consensus sequences for repeats.
1277
+ #
1278
+ # This class uses ActiveRecord to access data in the Ensembl database.
1279
+ # See the general documentation of the Ensembl module for
1280
+ # more information on what this means and what methods are available.
1281
+ #
1282
+ # = USAGE
1283
+ # repeat = RepeatFeature.find(29)
1284
+ # puts repeat.repeat_consensus.repeat_name + "\t" + repeat.repeat_consensus.repeat_consensus
1285
+ class RepeatConsensus < DBConnection
1286
+ set_primary_key 'repeat_consensus_id'
1287
+
1288
+ has_many :repeat_features
1289
+ end
1290
+
1291
+ # = DESCRIPTION
1292
+ # The RepeatFeature class provides an interface to the repeat_feature
1293
+ # table. This table contains mappings of repeats to a SeqRegion.
1294
+ #
1295
+ # This class uses ActiveRecord to access data in the Ensembl database.
1296
+ # See the general documentation of the Ensembl module for
1297
+ # more information on what this means and what methods are available.
1298
+ #
1299
+ # This class includes the mixin Sliceable, which means that it is mapped
1300
+ # to a SeqRegion object and a Slice can be created for objects of this
1301
+ # class. See Sliceable and Slice for more information.
1302
+ #
1303
+ # = USAGE
1304
+ # repeat_feature = RepeatFeature.find(29)
1305
+ # puts repeat_feature.seq_region_start.to_s
1306
+ class RepeatFeature < DBConnection
1307
+ include Sliceable
1308
+
1309
+ set_primary_key 'repeat_feature_id'
1310
+
1311
+ belongs_to :repeat_consensus
1312
+ belongs_to :seq_region
1313
+ end
1314
+
1315
+ # = DESCRIPTION
1316
+ # The SeqRegionAttrib class provides an interface to the seq_region_attrib
1317
+ # table. This table contains attribute values for SeqRegion objects
1318
+ #
1319
+ # This class uses ActiveRecord to access data in the Ensembl database.
1320
+ # See the general documentation of the Ensembl module for
1321
+ # more information on what this means and what methods are available.
1322
+ #
1323
+ # = USAGE
1324
+ # chr4 = SeqRegion.find_by_name('4')
1325
+ # chr4.seq_region_attribs.each do |attrib|
1326
+ # puts attrib.attrib_type.name + "\t" + attrib.value.to_s
1327
+ # end
1328
+ class SeqRegionAttrib < DBConnection
1329
+ set_primary_key nil
1330
+
1331
+ belongs_to :seq_region
1332
+ belongs_to :attrib_type
1333
+ end
1334
+
1335
+ # = DESCRIPTION
1336
+ # The GeneAttrib class provides an interface to the gene_attrib
1337
+ # table. This table contains attribute values for Gene objects
1338
+ #
1339
+ # This class uses ActiveRecord to access data in the Ensembl database.
1340
+ # See the general documentation of the Ensembl module for
1341
+ # more information on what this means and what methods are available.
1342
+ #
1343
+ # = USAGE
1344
+ # #TODO
1345
+ class GeneAttrib < DBConnection
1346
+ set_primary_key nil
1347
+
1348
+ belongs_to :gene
1349
+ belongs_to :attrib_type
1350
+ end
1351
+
1352
+ # = DESCRIPTION
1353
+ # The AttribType class provides an interface to the attrib_type
1354
+ # table. This table contains the types that attributes can belong to for
1355
+ # SeqRegion, Gene and Transcript.
1356
+ #
1357
+ # This class uses ActiveRecord to access data in the Ensembl database.
1358
+ # See the general documentation of the Ensembl module for
1359
+ # more information on what this means and what methods are available.
1360
+ #
1361
+ # = USAGE
1362
+ # #TODO
1363
+ class AttribType < DBConnection
1364
+ set_primary_key 'attrib_type_id'
1365
+
1366
+ has_many :seq_region_attribs
1367
+ has_many :seq_regions, :through => :seq_region_attrib
1368
+
1369
+ has_many :gene_attribs
1370
+ has_many :genes, :through => :gene_attrib
1371
+
1372
+ has_many :transcript_attribs
1373
+ has_many :transcripts, :through => :transcript_attrib
1374
+ end
1375
+
1376
+ # = DESCRIPTION
1377
+ # The Transcript class provides an interface to the transcript_stable_id
1378
+ # table. This table contains the Ensembl stable IDs for Transcript
1379
+ # objects.
1380
+ #
1381
+ # This class uses ActiveRecord to access data in the Ensembl database.
1382
+ # See the general documentation of the Ensembl module for
1383
+ # more information on what this means and what methods are available.
1384
+ #
1385
+ # = USAGE
1386
+ # transcript_stable_id = TranscriptStableId.find_by_stable_id('ENSBTAT00000015494')
1387
+ # puts transcript_stable_id.transcript.to_yaml
1388
+ class TranscriptStableId < DBConnection
1389
+ set_primary_key 'stable_id'
1390
+
1391
+ belongs_to :transcript
1392
+ end
1393
+
1394
+ # = DESCRIPTION
1395
+ # The TranscriptAttrib class provides an interface to the transcript_attrib
1396
+ # table. This table contains the attributes for Transcript objects.
1397
+ #
1398
+ # This class uses ActiveRecord to access data in the Ensembl database.
1399
+ # See the general documentation of the Ensembl module for
1400
+ # more information on what this means and what methods are available.
1401
+ #
1402
+ # = USAGE
1403
+ # transcript = Transcript.find(32495)
1404
+ # transcript.transcript_attribs.each do |attr|
1405
+ # puts attr.attrib_type.name + "\t" + attr.value
1406
+ # end
1407
+ class TranscriptAttrib < DBConnection
1408
+ set_primary_key nil
1409
+
1410
+ belongs_to :transcript
1411
+ belongs_to :attrib_type
1412
+ end
1413
+
1414
+ # = DESCRIPTION
1415
+ # The DnaAlignFeature class provides an interface to the
1416
+ # dna_align_feature table. This table contains sequence similarity
1417
+ # mappings against a SeqRegion.
1418
+ #
1419
+ # This class uses ActiveRecord to access data in the Ensembl database.
1420
+ # See the general documentation of the Ensembl module for
1421
+ # more information on what this means and what methods are available.
1422
+ #
1423
+ # This class includes the mixin Sliceable, which means that it is mapped
1424
+ # to a SeqRegion object and a Slice can be created for objects of this
1425
+ # class. See Sliceable and Slice for more information.
1426
+ #
1427
+ # = USAGE
1428
+ # unigene_scan = Analysis.find_by_logic_name('Unigene')
1429
+ # unigene_scan.dna_align_features.each do |hit|
1430
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1431
+ # end
1432
+ class DnaAlignFeature < DBConnection
1433
+ include Sliceable
1434
+
1435
+ set_primary_key 'dna_align_feature_id'
1436
+
1437
+ belongs_to :seq_region
1438
+ belongs_to :analysis
1439
+
1440
+ has_many :exon_supporting_features
1441
+ has_many :protein_supporting_features
1442
+ end
1443
+
1444
+ # = DESCRIPTION
1445
+ # The Translation class provides an interface to the
1446
+ # translation table. This table contains the translation start and
1447
+ # stop positions and exons for a given Transcript
1448
+ #
1449
+ # This class uses ActiveRecord to access data in the Ensembl database.
1450
+ # See the general documentation of the Ensembl module for
1451
+ # more information on what this means and what methods are available.
1452
+ #
1453
+ # = USAGE
1454
+ # #TODO
1455
+ class Translation < DBConnection
1456
+ set_primary_key 'translation_id'
1457
+
1458
+ belongs_to :transcript
1459
+ has_many :translation_stable_ids
1460
+
1461
+ has_many :translation_attribs
1462
+ has_many :protein_features
1463
+
1464
+ has_one :translation_stable_id
1465
+
1466
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Translation'"
1467
+ has_many :xrefs, :through => :object_xrefs
1468
+
1469
+ belongs_to :start_exon, :class_name => 'Exon', :foreign_key => 'start_exon_id'
1470
+ belongs_to :end_exon, :class_name => 'Exon', :foreign_key => 'end_exon_id'
1471
+
1472
+ alias attribs translation_attribs
1473
+
1474
+ # The Translation#stable_id method returns the stable ID of the translation.
1475
+ # ---
1476
+ # *Arguments*:: none
1477
+ # *Returns*:: String
1478
+ def stable_id
1479
+ return self.translation_stable_id.stable_id
1480
+ end
1481
+
1482
+ # = DESCRIPTION
1483
+ # The Translation#display_label method returns the default name of the translation.
1484
+ def display_label
1485
+ return Xref.find(self.display_xref_id).display_label
1486
+ end
1487
+ alias :display_name :display_label
1488
+ alias :label :display_label
1489
+ alias :name :display_label
1490
+
1491
+ # = DESCRIPTION
1492
+ # The Translation#find_by_stable_id class method fetches a Translation
1493
+ # object based on its stable ID (i.e. the "ENSP" accession number). If the
1494
+ # name is not found, it returns nil.
1495
+ def self.find_by_stable_id(stable_id)
1496
+ translation_stable_id = TranslationStableId.find_by_stable_id(stable_id)
1497
+ if translation_stable_id.nil?
1498
+ return nil
1499
+ else
1500
+ return translation_stable_id.translation
1501
+ end
1502
+ end
1503
+ end
1504
+
1505
+ # = DESCRIPTION
1506
+ # The TranslationStableId class provides an interface to the
1507
+ # translation_stable_id table. This table contains the Ensembl stable IDs
1508
+ # for a given Translation.
1509
+ #
1510
+ # This class uses ActiveRecord to access data in the Ensembl database.
1511
+ # See the general documentation of the Ensembl module for
1512
+ # more information on what this means and what methods are available.
1513
+ #
1514
+ # = USAGE
1515
+ # stable_id = TranslationStableId.find_by_name('ENSBTAP00000015494')
1516
+ # puts stable_id.to_yaml
1517
+ class TranslationStableId < DBConnection
1518
+ set_primary_key 'stable_id'
1519
+
1520
+ belongs_to :translation
1521
+ end
1522
+
1523
+ # = DESCRIPTION
1524
+ # The TranslationAttrib class provides an interface to the
1525
+ # translation_attrib table. This table contains attribute values for the
1526
+ # Translation class.
1527
+ #
1528
+ # This class uses ActiveRecord to access data in the Ensembl database.
1529
+ # See the general documentation of the Ensembl module for
1530
+ # more information on what this means and what methods are available.
1531
+ #
1532
+ # = USAGE
1533
+ # translation = Translation.find(9979)
1534
+ # translation.translation_attribs.each do |attr|
1535
+ # puts attr.attr_type.name + "\t" + attr.value
1536
+ # end
1537
+ class TranslationAttrib < DBConnection
1538
+ set_primary_key nil
1539
+
1540
+ belongs_to :translation
1541
+ belongs_to :attrib_type
1542
+ end
1543
+
1544
+ # = DESCRIPTION
1545
+ # The Xref class provides an interface to the
1546
+ # xref table. This table contains external references for objects in the
1547
+ # database.
1548
+ #
1549
+ # This class uses ActiveRecord to access data in the Ensembl database.
1550
+ # See the general documentation of the Ensembl module for
1551
+ # more information on what this means and what methods are available.
1552
+ #
1553
+ # = USAGE
1554
+ # gene = Gene.find(1)
1555
+ # gene.xrefs.each do |xref|
1556
+ # puts xref.display_label + "\t" + xref.description
1557
+ # end
1558
+ class Xref < DBConnection
1559
+ set_primary_key 'xref_id'
1560
+
1561
+ belongs_to :external_db
1562
+ has_many :external_synonyms
1563
+
1564
+ has_many :genes
1565
+
1566
+ def to_s
1567
+ return self.external_db.db_name.to_s + ":" + self.display_label
1568
+ end
1569
+ end
1570
+
1571
+ # = DESCRIPTION
1572
+ # The ObjectXref class provides the link between gene, transcript and
1573
+ # translation objects on the one hand and an xref on the other.
1574
+ #
1575
+ # This class uses ActiveRecord to access data in the Ensembl database.
1576
+ # See the general documentation of the Ensembl module for
1577
+ # more information on what this means and what methods are available.
1578
+ #
1579
+ # = USAGE
1580
+ # gene = Gene.find(1)
1581
+ # gene.object_xrefs.each do |ox|
1582
+ # puts ox.to_yaml
1583
+ # end
1584
+ class ObjectXref < DBConnection
1585
+ set_primary_key 'object_xref_id'
1586
+
1587
+ belongs_to :gene, :class_name => "Gene", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Gene'"]
1588
+ belongs_to :transcript, :class_name => "Transcript", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Transcript'"]
1589
+ belongs_to :translation, :class_name => "Translation", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Translation'"]
1590
+ belongs_to :xref
1591
+ has_one :go_xref
1592
+ end
1593
+
1594
+ # = DESCRIPTION
1595
+ # The GoXref class provides an interface to the
1596
+ # go_xref table. This table contains the evidence codes for those object_refs
1597
+ # that are GO terms.
1598
+ #
1599
+ # This class uses ActiveRecord to access data in the Ensembl database.
1600
+ # See the general documentation of the Ensembl module for
1601
+ # more information on what this means and what methods are available.
1602
+ class GoXref < DBConnection
1603
+ set_primary_key nil
1604
+
1605
+ belongs_to :xref
1606
+ end
1607
+
1608
+ # = DESCRIPTION
1609
+ # The ExternalDb class provides an interface to the
1610
+ # external_db table. This table contains references to databases to which
1611
+ # xrefs can point to
1612
+ #
1613
+ # This class uses ActiveRecord to access data in the Ensembl database.
1614
+ # See the general documentation of the Ensembl module for
1615
+ # more information on what this means and what methods are available.
1616
+ #
1617
+ # = USAGE
1618
+ # embl_db = ExternalDb.find_by_db_name('EMBL')
1619
+ # puts embl_db.xrefs.length.to_s
1620
+ class ExternalDb < DBConnection
1621
+ set_primary_key 'external_db_id'
1622
+
1623
+ has_many :xrefs
1624
+
1625
+ def self.inheritance_column
1626
+ nil
1627
+ end
1628
+
1629
+ # = DESCRIPTION
1630
+ # The ExternalDb#find_all_by_display_label method returns all external
1631
+ # databases that have this label. There should normally be no more than
1632
+ # one. If no databases are found with this name, this method returns an
1633
+ # empty array.
1634
+ def self.find_all_by_display_label(label)
1635
+ answer = Array.new
1636
+ xrefs = Xref.find_all_by_display_label(label)
1637
+ xrefs.each do |xref|
1638
+ answer.push(self.class.find_by_xref_id(xref.xref_id))
1639
+ end
1640
+
1641
+ return answer
1642
+ end
1643
+
1644
+ # = DESCRIPTION
1645
+ # The ExternalDb#find_by_display_label method returns a
1646
+ # database that has this label. If no databases are found with this name,
1647
+ # this method returns nil.
1648
+ # empty array.
1649
+ def self.find_by_display_label(label)
1650
+ all_dbs = self.find_all_by_display_label(label)
1651
+ if all_dbs.length == 0
1652
+ return nil
1653
+ else
1654
+ return all_dbs[0]
1655
+ end
1656
+ end
1657
+
1658
+
1659
+ end
1660
+
1661
+ # = DESCRIPTION
1662
+ # The ExternalSynonym class provides an interface to the
1663
+ # external_synonym table. This table contains synonyms for Xref objects.
1664
+ #
1665
+ # This class uses ActiveRecord to access data in the Ensembl database.
1666
+ # See the general documentation of the Ensembl module for
1667
+ # more information on what this means and what methods are available.
1668
+ #
1669
+ # This class includes the mixin Sliceable, which means that it is mapped
1670
+ # to a SeqRegion object and a Slice can be created for objects of this
1671
+ # class. See Sliceable and Slice for more information.
1672
+ #
1673
+ # = USAGE
1674
+ # xref = Xref.find(185185)
1675
+ # puts xref.external_synonyms[0].synonyms
1676
+ class ExternalSynonym < DBConnection
1677
+ set_primary_key nil
1678
+
1679
+ belongs_to :xref
1680
+ end
1681
+
1682
+ # = DESCRIPTION
1683
+ # The Karyotype class provides an interface to the
1684
+ # karyotype table. This table contains <>.
1685
+ #
1686
+ # This class uses ActiveRecord to access data in the Ensembl database.
1687
+ # See the general documentation of the Ensembl module for
1688
+ # more information on what this means and what methods are available.
1689
+ #
1690
+ # This class includes the mixin Sliceable, which means that it is mapped
1691
+ # to a SeqRegion object and a Slice can be created for objects of this
1692
+ # class. See Sliceable and Slice for more information.
1693
+ #
1694
+ # = USAGE
1695
+ # band = Karyotype.find_by_band('p36.32')
1696
+ # puts band.to_yaml
1697
+ class Karyotype < DBConnection
1698
+ include Sliceable
1699
+
1700
+ set_primary_key 'karyotype_id'
1701
+
1702
+ belongs_to :seq_region
1703
+ end
1704
+
1705
+ # = DESCRIPTION
1706
+ # The OligoFeature class provides an interface to the
1707
+ # oligo_feature table. This table contains mappings of Oligo objects to
1708
+ # a SeqRegion.
1709
+ #
1710
+ # This class uses ActiveRecord to access data in the Ensembl database.
1711
+ # See the general documentation of the Ensembl module for
1712
+ # more information on what this means and what methods are available.
1713
+ #
1714
+ # This class includes the mixin Sliceable, which means that it is mapped
1715
+ # to a SeqRegion object and a Slice can be created for objects of this
1716
+ # class. See Sliceable and Slice for more information.
1717
+ #
1718
+ # = USAGE
1719
+ # seq_region = SeqRegion.find_by_name('4')
1720
+ # puts seq_region.oligo_features.length
1721
+ class OligoFeature < DBConnection
1722
+ include Sliceable
1723
+
1724
+ set_primary_key 'oligo_feature_id'
1725
+
1726
+ belongs_to :seq_region
1727
+ belongs_to :oligo_probe
1728
+ belongs_to :analysis
1729
+ end
1730
+
1731
+ # = DESCRIPTION
1732
+ # The OligoProbe class provides an interface to the
1733
+ # oligo_probe table.
1734
+ #
1735
+ # This class uses ActiveRecord to access data in the Ensembl database.
1736
+ # See the general documentation of the Ensembl module for
1737
+ # more information on what this means and what methods are available.
1738
+ #
1739
+ # = USAGE
1740
+ # probe = OligoProbe.find_by_name('373:434;')
1741
+ # puts probe.probeset + "\t" + probe.oligo_array.name
1742
+ class OligoProbe < DBConnection
1743
+ set_primary_key 'oligo_probe_id'
1744
+
1745
+ has_many :oligo_features
1746
+ belongs_to :oligo_array
1747
+ end
1748
+
1749
+ # = DESCRIPTION
1750
+ # The OligoArray class provides an interface to the
1751
+ # oligo_array table. This table contains data describing a microarray
1752
+ # slide.
1753
+ #
1754
+ # This class uses ActiveRecord to access data in the Ensembl database.
1755
+ # See the general documentation of the Ensembl module for
1756
+ # more information on what this means and what methods are available.
1757
+ #
1758
+ # = USAGE
1759
+ # array = OligoArray.find_by_name_and_type('Bovine','AFFY')
1760
+ # puts array.oligo_probes.length
1761
+ class OligoArray < DBConnection
1762
+ set_primary_key 'oligo_array_id'
1763
+
1764
+ has_many :oligo_probes
1765
+ end
1766
+
1767
+ # = DESCRIPTION
1768
+ # The PredictionExon class provides an interface to the
1769
+ # prediction_exon table. This table contains <>.
1770
+ #
1771
+ # This class uses ActiveRecord to access data in the Ensembl database.
1772
+ # See the general documentation of the Ensembl module for
1773
+ # more information on what this means and what methods are available.
1774
+ #
1775
+ # This class includes the mixin Sliceable, which means that it is mapped
1776
+ # to a SeqRegion object and a Slice can be created for objects of this
1777
+ # class. See Sliceable and Slice for more information.
1778
+ #
1779
+ # = USAGE
1780
+ # #TODO
1781
+ class PredictionExon < DBConnection
1782
+ include Sliceable
1783
+
1784
+ set_primary_key 'prediction_exon_id'
1785
+
1786
+ belongs_to :prediction_transcript
1787
+ belongs_to :seq_region
1788
+ end
1789
+
1790
+ # = DESCRIPTION
1791
+ # The PredictionTranscript class provides an interface to the
1792
+ # prediction_transcript table.
1793
+ #
1794
+ # This class uses ActiveRecord to access data in the Ensembl database.
1795
+ # See the general documentation of the Ensembl module for
1796
+ # more information on what this means and what methods are available.
1797
+ #
1798
+ # This class includes the mixin Sliceable, which means that it is mapped
1799
+ # to a SeqRegion object and a Slice can be created for objects of this
1800
+ # class. See Sliceable and Slice for more information.
1801
+ #
1802
+ # = USAGE
1803
+ # predicted_transcript = PredictionTranscript.find_by_display_label('GENSCAN00000000006')
1804
+ # puts predicted_transcript.prediction_exons.length
1805
+ class PredictionTranscript < DBConnection
1806
+ include Sliceable
1807
+
1808
+ set_primary_key 'prediction_transcript_id'
1809
+
1810
+ has_many :prediction_exons
1811
+ belongs_to :seq_region
1812
+ belongs_to :analysis
1813
+ end
1814
+
1815
+ # = DESCRIPTION
1816
+ # The ProteinFeature class provides an interface to the
1817
+ # protein_feature table. This table contains mappings of a Translation
1818
+ # onto a SeqRegion.
1819
+ #
1820
+ # This class uses ActiveRecord to access data in the Ensembl database.
1821
+ # See the general documentation of the Ensembl module for
1822
+ # more information on what this means and what methods are available.
1823
+ #
1824
+ # This class includes the mixin Sliceable, which means that it is mapped
1825
+ # to a SeqRegion object and a Slice can be created for objects of this
1826
+ # class. See Sliceable and Slice for more information.
1827
+ #
1828
+ # = USAGE
1829
+ # #TODO
1830
+ class ProteinFeature < DBConnection
1831
+ include Sliceable
1832
+
1833
+ set_primary_key 'protein_feature_id'
1834
+
1835
+ belongs_to :translation
1836
+ belongs_to :analysis
1837
+ end
1838
+
1839
+ # = DESCRIPTION
1840
+ # The ProteinAlignFeature class provides an interface to the
1841
+ # protein_align_feature table. This table contains sequence similarity
1842
+ # mappings against a SeqRegion.
1843
+ #
1844
+ # This class uses ActiveRecord to access data in the Ensembl database.
1845
+ # See the general documentation of the Ensembl module for
1846
+ # more information on what this means and what methods are available.
1847
+ #
1848
+ # This class includes the mixin Sliceable, which means that it is mapped
1849
+ # to a SeqRegion object and a Slice can be created for objects of this
1850
+ # class. See Sliceable and Slice for more information.
1851
+ #
1852
+ # = USAGE
1853
+ # uniprot_scan = Analysis.find_by_logic_name('Uniprot')
1854
+ # uniprot_scan.protein_align_features.each do |hit|
1855
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1856
+ # end
1857
+ class ProteinAlignFeature < DBConnection
1858
+ include Sliceable
1859
+
1860
+ set_primary_key 'protein_align_feature_id'
1861
+
1862
+ belongs_to :seq_region
1863
+ belongs_to :analysis
1864
+
1865
+ has_many :exon_supporting_features
1866
+ has_many :transcript_supporting_features
1867
+ end
1868
+
1869
+ # = DESCRIPTION
1870
+ # The RegulatoryFactor class provides an interface to the
1871
+ # regulatory_factor table.
1872
+ #
1873
+ # This class uses ActiveRecord to access data in the Ensembl database.
1874
+ # See the general documentation of the Ensembl module for
1875
+ # more information on what this means and what methods are available.
1876
+ #
1877
+ # = USAGE
1878
+ # factor = RegulatoryFactor.find_by_name('crtHsap8070')
1879
+ # puts factor.to_yaml
1880
+ class RegulatoryFactor < DBConnection
1881
+ set_primary_key 'regulatory_factor_id'
1882
+
1883
+ has_many :regulatory_features
1884
+ end
1885
+
1886
+ # = DESCRIPTION
1887
+ # The RegulatoryFeature class provides an interface to the
1888
+ # regulatory_feature table. This table contains mappings of
1889
+ # RegulatoryFactor objects against a SeqRegion.
1890
+ #
1891
+ # This class uses ActiveRecord to access data in the Ensembl database.
1892
+ # See the general documentation of the Ensembl module for
1893
+ # more information on what this means and what methods are available.
1894
+ #
1895
+ # This class includes the mixin Sliceable, which means that it is mapped
1896
+ # to a SeqRegion object and a Slice can be created for objects of this
1897
+ # class. See Sliceable and Slice for more information.
1898
+ #
1899
+ # = USAGE
1900
+ # analysis = Analysis.find_by_logic_name('miRanda')
1901
+ # analysis.regulatory_features.each do |feature|
1902
+ # puts feature.name + "\t" + feature.regulatory_factor.name
1903
+ # end
1904
+ class RegulatoryFeature < DBConnection
1905
+ include Sliceable
1906
+
1907
+ set_primary_key 'regulatory_feature_id'
1908
+
1909
+ belongs_to :seq_region
1910
+ belongs_to :analysis
1911
+ belongs_to :regulatory_factor
1912
+ end
1913
+ end
1914
+ end