ruby-ensembl-api 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/TUTORIAL.rdoc +623 -0
  2. data/bin/ensembl +40 -0
  3. data/lib/ensembl.rb +64 -0
  4. data/lib/ensembl/core/activerecord.rb +1914 -0
  5. data/lib/ensembl/core/collection.rb +60 -0
  6. data/lib/ensembl/core/project.rb +264 -0
  7. data/lib/ensembl/core/slice.rb +693 -0
  8. data/lib/ensembl/core/transcript.rb +425 -0
  9. data/lib/ensembl/core/transform.rb +97 -0
  10. data/lib/ensembl/db_connection.rb +216 -0
  11. data/lib/ensembl/variation/activerecord.rb +253 -0
  12. data/lib/ensembl/variation/variation.rb +163 -0
  13. data/test/unit/data/seq_c6qbl.fa +10 -0
  14. data/test/unit/data/seq_cso19_coding.fa +16 -0
  15. data/test/unit/data/seq_cso19_transcript.fa +28 -0
  16. data/test/unit/data/seq_drd3_gene.fa +838 -0
  17. data/test/unit/data/seq_drd3_transcript.fa +22 -0
  18. data/test/unit/data/seq_drd4_transcript.fa +24 -0
  19. data/test/unit/data/seq_forward_composite.fa +1669 -0
  20. data/test/unit/data/seq_par_boundary.fa +169 -0
  21. data/test/unit/data/seq_rnd3_transcript.fa +47 -0
  22. data/test/unit/data/seq_ub2r1_coding.fa +13 -0
  23. data/test/unit/data/seq_ub2r1_gene.fa +174 -0
  24. data/test/unit/data/seq_ub2r1_transcript.fa +26 -0
  25. data/test/unit/data/seq_y.fa +2 -0
  26. data/test/unit/ensembl_genomes/test_collection.rb +51 -0
  27. data/test/unit/ensembl_genomes/test_gene.rb +52 -0
  28. data/test/unit/ensembl_genomes/test_slice.rb +71 -0
  29. data/test/unit/ensembl_genomes/test_variation.rb +17 -0
  30. data/test/unit/release_50/core/test_project.rb +215 -0
  31. data/test/unit/release_50/core/test_project_human.rb +58 -0
  32. data/test/unit/release_50/core/test_relationships.rb +66 -0
  33. data/test/unit/release_50/core/test_sequence.rb +175 -0
  34. data/test/unit/release_50/core/test_slice.rb +121 -0
  35. data/test/unit/release_50/core/test_transcript.rb +108 -0
  36. data/test/unit/release_50/core/test_transform.rb +223 -0
  37. data/test/unit/release_50/variation/test_activerecord.rb +143 -0
  38. data/test/unit/release_50/variation/test_variation.rb +84 -0
  39. data/test/unit/release_53/core/test_gene.rb +66 -0
  40. data/test/unit/release_53/core/test_project.rb +96 -0
  41. data/test/unit/release_53/core/test_project_human.rb +65 -0
  42. data/test/unit/release_53/core/test_slice.rb +47 -0
  43. data/test/unit/release_53/core/test_transform.rb +63 -0
  44. data/test/unit/release_53/variation/test_activerecord.rb +145 -0
  45. data/test/unit/release_53/variation/test_variation.rb +71 -0
  46. data/test/unit/release_56/core/test_gene.rb +66 -0
  47. data/test/unit/release_56/core/test_project.rb +96 -0
  48. data/test/unit/release_56/core/test_slice.rb +54 -0
  49. data/test/unit/release_56/core/test_transform.rb +63 -0
  50. data/test/unit/release_56/variation/test_activerecord.rb +142 -0
  51. data/test/unit/release_56/variation/test_variation.rb +68 -0
  52. data/test/unit/test_connection.rb +66 -0
  53. data/test/unit/test_releases.rb +136 -0
  54. metadata +128 -0
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/ruby
2
+ require 'irb'
3
+ require 'ensembl'
4
+
5
+ module IRB
6
+ def self.start_session(binding)
7
+ IRB.setup(nil)
8
+
9
+ workspace = WorkSpace.new(binding)
10
+
11
+ if @CONF[:SCRIPT]
12
+ irb = Irb.new(workspace, @CONF[:SCRIPT])
13
+ else
14
+ irb = Irb.new(workspace)
15
+ end
16
+
17
+ @CONF[:IRB_RC].call(irb.context) if @CONF[:IRB_RC]
18
+ @CONF[:MAIN_CONTEXT] = irb.context
19
+
20
+ trap("SIGINT") do
21
+ irb.signal_handle
22
+ end
23
+
24
+ catch(:IRB_EXIT) do
25
+ irb.eval_input
26
+ end
27
+ end
28
+ end
29
+
30
+ include Ensembl::Core
31
+ include Ensembl::Variation
32
+ if ARGV.length == 2
33
+ species = ARGV.shift
34
+ release = ARGV.shift.to_i
35
+ Ensembl::Core::DBConnection.connect(species,release)
36
+ Ensembl::Variation::DBConnection.connect(species,release)
37
+ IRB.start_session(Kernel.binding)
38
+ else
39
+ raise "ERROR: Please provide snake_case species and Ensembl release number"
40
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ # = ensembl.rb
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+
10
+ module Ensembl
11
+ ENSEMBL_RELEASE = 56
12
+
13
+ class Session
14
+ attr_accessor :coord_systems
15
+ attr_accessor :seqlevel_id, :seqlevel_coord_system
16
+ attr_accessor :toplevel_id, :toplevel_coord_system
17
+ attr_accessor :coord_system_ids #map CS id to CS name
18
+ attr_accessor :seq_regions
19
+ attr_accessor :collection_species
20
+
21
+ def initialize
22
+ @coord_systems = Hash.new # key = id; value = CoordSystem object
23
+ @coord_system_ids = Hash.new # key = id; value = name
24
+ @seq_regions = Hash.new
25
+ end
26
+
27
+ def reset
28
+ @coord_systems = Hash.new
29
+ @coord_system_ids = Hash.new
30
+ @seq_regions = Hash.new
31
+ @seqlevel_id = nil
32
+ @toplevel_id = nil
33
+ @seqlevel_coord_system = nil
34
+ @toplevel_coord_system = nil
35
+ @collection_species = nil
36
+ end
37
+ end
38
+
39
+ SESSION = Ensembl::Session.new
40
+
41
+ end
42
+
43
+ begin
44
+ require 'rubygems'
45
+ require 'bio'
46
+ rescue LoadError
47
+ raise LoadError, "You must have bioruby installed"
48
+ end
49
+
50
+ # Database connection
51
+ require File.dirname(__FILE__) + '/ensembl/db_connection.rb'
52
+
53
+ # Core modules
54
+ require File.dirname(__FILE__) + '/ensembl/core/activerecord.rb'
55
+ require File.dirname(__FILE__) + '/ensembl/core/transcript.rb'
56
+ require File.dirname(__FILE__) + '/ensembl/core/slice.rb'
57
+ require File.dirname(__FILE__) + '/ensembl/core/project.rb'
58
+ require File.dirname(__FILE__) + '/ensembl/core/transform.rb'
59
+ require File.dirname(__FILE__) + '/ensembl/core/collection.rb'
60
+
61
+ # Variation modules
62
+ require File.dirname(__FILE__) + '/ensembl/variation/activerecord.rb'
63
+ require File.dirname(__FILE__) + '/ensembl/variation/variation.rb'
64
+
@@ -0,0 +1,1914 @@
1
+ #
2
+ # = ensembl/core/activerecord.rb - ActiveRecord mappings to Ensembl core
3
+ #
4
+ # Copyright:: Copyright (C) 2007-2009 Jan Aerts <http://jandot.myopenid.com>
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+
9
+ # = DESCRIPTION
10
+ # == What is it?
11
+ # The Ensembl module provides an API to the Ensembl databases
12
+ # stored at ensembldb.ensembl.org. This is the same information that is
13
+ # available from http://www.ensembl.org.
14
+ #
15
+ # The Ensembl::Core module mainly covers sequences and
16
+ # annotations.
17
+ # The Ensembl::Variation module covers variations (e.g. SNPs).
18
+ # The Ensembl::Compara module covers comparative mappings
19
+ # between species.
20
+ #
21
+ # == ActiveRecord
22
+ # The Ensembl API provides a ruby interface to the Ensembl mysql databases
23
+ # at ensembldb.ensembl.org. Most of the API is based on ActiveRecord to
24
+ # get data from that database. In general, each table is described by a
25
+ # class with the same name: the coord_system table is covered by the
26
+ # CoordSystem class, the seq_region table is covered by the SeqRegion class,
27
+ # etc. As a result, accessors are available for all columns in each table.
28
+ # For example, the seq_region table has the following columns: seq_region_id,
29
+ # name, coord_system_id and length. Through ActiveRecord, these column names
30
+ # become available as attributes of SeqRegion objects:
31
+ # puts my_seq_region.seq_region_id
32
+ # puts my_seq_region.name
33
+ # puts my_seq_region.coord_system_id
34
+ # puts my_seq_region.length.to_s
35
+ #
36
+ # ActiveRecord makes it easy to extract data from those tables using the
37
+ # collection of #find methods. There are three types of #find methods (e.g.
38
+ # for the CoordSystem class):
39
+ # a. find based on primary key in table:
40
+ # my_coord_system = CoordSystem.find(5)
41
+ # b. find_by_sql:
42
+ # my_coord_system = CoordSystem.find_by_sql('SELECT * FROM coord_system WHERE name = 'chromosome'")
43
+ # c. find_by_<insert_your_column_name_here>
44
+ # my_coord_system1 = CoordSystem.find_by_name('chromosome')
45
+ # my_coord_system2 = CoordSystem.find_by_rank(3)
46
+ # To find out which find_by_<column> methods are available, you can list the
47
+ # column names using the column_names class methods:
48
+ #
49
+ # puts Ensembl::Core::CoordSystem.column_names.join("\t")
50
+ #
51
+ # For more information on the find methods, see
52
+ # http://ar.rubyonrails.org/classes/ActiveRecord/Base.html#M000344
53
+ #
54
+ # The relationships between different tables are accessible through the
55
+ # classes as well. For example, to loop over all seq_regions belonging to
56
+ # a coord_system (a coord_system "has many" seq_regions):
57
+ # chr_coord_system = CoordSystem.find_by_name('chromosome')
58
+ # chr_coord_system.seq_regions.each do |seq_region|
59
+ # puts seq_region.name
60
+ # end
61
+ # Of course, you can go the other way as well (a seq_region "belongs to"
62
+ # a coord_system):
63
+ # chr4 = SeqRegion.find_by_name('4')
64
+ # puts chr4.coord_system.name #--> 'chromosome'
65
+ #
66
+ # To find out what relationships exist for a given class, you can use the
67
+ # #reflect_on_all_associations class methods:
68
+ # puts SeqRegion.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join("\n")
69
+ # puts SeqRegion.reflect_on_all_associations(:has_one).collect{|a| a.name.to_s}.join("\n")
70
+ # puts SeqRegion.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join("\n")
71
+ module Ensembl
72
+ # = DESCRIPTION
73
+ # The Ensembl::Core module covers the core databases from
74
+ # ensembldb.ensembl.org and covers mainly sequences and their annotations.
75
+ # For a full description of the database (and therefore the classes that
76
+ # are available), see http://www.ensembl.org/info/software/core/schema/index.html
77
+ # and http://www.ensembl.org/info/software/core/schema/schema_description.html
78
+ module Core
79
+ # = DESCRIPTION
80
+ # The Sliceable mixin holds the get_slice method and can be included
81
+ # in any class that lends itself to having a position on a SeqRegion.
82
+ module Sliceable
83
+ # = DESCRIPTION
84
+ # The Sliceable#slice method takes the coordinates on a reference
85
+ # and creates a Ensembl::Core::Slice object.
86
+ # ---
87
+ # *Arguments*:: none
88
+ # *Returns*:: Ensembl::Core::Slice object
89
+ def slice
90
+ start, stop, strand = nil, nil, nil
91
+
92
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_start')
93
+ start = self.seq_region_start
94
+ end
95
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_end')
96
+ stop = self.seq_region_end
97
+ end
98
+ if self.class == Ensembl::Core::Intron or self.class.column_names.include?('seq_region_strand')
99
+ strand = self.seq_region_strand
100
+ else #FIXME: we shouldn't do this, but can't #project if no strand given
101
+ strand = 1
102
+ end
103
+
104
+ return Ensembl::Core::Slice.new(self.seq_region, start, stop, strand)
105
+ end
106
+
107
+ # = DESCRIPTION
108
+ # The Sliceable#seq method takes the coordinates on a reference, transforms
109
+ # onto the seqlevel coordinate system if necessary, and retrieves the
110
+ # sequence.
111
+ # ---
112
+ # *Arguments*:: none
113
+ # *Returns*:: sequence
114
+ def seq
115
+ return self.slice.seq
116
+ end
117
+
118
+ # = DESCRIPTION
119
+ # The Sliceable#start method is a convenience method and returns
120
+ # self.seq_region_start.
121
+ # ---
122
+ # *Arguments*:: none
123
+ # *Returns*:: sequence
124
+ def start
125
+ return self.seq_region_start
126
+ end
127
+
128
+ # = DESCRIPTION
129
+ # The Sliceable#stop method is a convenience method and returns
130
+ # self.seq_region_end.
131
+ # ---
132
+ # *Arguments*:: none
133
+ # *Returns*:: sequence
134
+ def stop
135
+ return self.seq_region_end
136
+ end
137
+
138
+ # = DESCRIPTION
139
+ # The Sliceable#strand method is a convenience method and returns
140
+ # self.seq_region_strand.
141
+ # ---
142
+ # *Arguments*:: none
143
+ # *Returns*:: sequence
144
+ def strand
145
+ return self.seq_region_strand
146
+ end
147
+
148
+ # = DESCRIPTION
149
+ # The Sliceable#length method returns the length of the feature (based on
150
+ # seq_region_start and seq_region_end.
151
+ # ---
152
+ # *Arguments*:: none
153
+ # *Returns*:: sequence
154
+ def length
155
+ return self.stop - self.start + 1
156
+ end
157
+
158
+ # = DESCRIPTION
159
+ # The Sliceable#project method is used to transfer coordinates from one
160
+ # coordinate system to another. Suppose you have a feature on a
161
+ # contig in human (let's say on contig AC000031.6.1.38703) and you
162
+ # want to know the coordinates on the chromosome. This is a
163
+ # projection of coordinates from a higher ranked coordinate system to
164
+ # a lower ranked coordinate system. Projections can also be done
165
+ # from a chromosome to the contig level. However, it might be possible
166
+ # that more than one contig has to be included and that there exist
167
+ # gaps between the contigs. The output of this method therefore is
168
+ # an _array_ of Slice and Gap objects.
169
+ #
170
+ # At the moment, projections can only be done if the two coordinate
171
+ # systems are linked directly in the 'assembly' table.
172
+ #
173
+ # = USAGE
174
+ #
175
+ # # Get a contig slice in cow and project to scaffold level
176
+ # # (i.e. going from a high rank coord system to a lower rank coord
177
+ # # system)
178
+ # original_feature = Gene.find(85743)
179
+ # target_slices = original_feature.project('scaffold')
180
+ #
181
+ # ---
182
+ # *Arguments*:
183
+ # * coord_system_name:: name of coordinate system to project
184
+ # coordinates to
185
+ # *Returns*:: an array consisting of Slices and, if necessary, Gaps
186
+ def project(coord_system_name)
187
+ return self.slice.project(coord_system_name)
188
+ end
189
+
190
+ end
191
+
192
+
193
+ # = DESCRIPTION
194
+ # The CoordSystem class describes the coordinate system to which
195
+ # a given SeqRegion belongs. It is an interface to the coord_system
196
+ # table of the Ensembl mysql database.
197
+ #
198
+ # Two virtual coordinate systems exist for
199
+ # every species:
200
+ # * toplevel: the coordinate system with rank 1
201
+ # * seqlevel: the coordinate system that contains the seq_regions
202
+ # with the sequence
203
+ #
204
+ # This class uses ActiveRecord to access data in the Ensembl database.
205
+ # See the general documentation of the Ensembl module for
206
+ # more information on what this means and what methods are available.
207
+ #
208
+ # = USAGE
209
+ # coord_system = Ensembl::Core::CoordSystem.find_by_name('chromosome')
210
+ # if coord_system == CoordSystem.toplevel
211
+ # puts coord_system.name + " is the toplevel coordinate system."
212
+ # end
213
+ class CoordSystem < DBConnection
214
+ set_primary_key 'coord_system_id'
215
+
216
+ has_many :seq_regions
217
+
218
+ # = DESCRIPTION
219
+ # The CoordSystem#toplevel? method checks if this coordinate system is the
220
+ # toplevel coordinate system or not.
221
+ # ---
222
+ # *Arguments*:: none
223
+ # *Returns*:: TRUE or FALSE
224
+ def toplevel?
225
+ if Collection.check # When usign multi-species databases
226
+ return true if self == CoordSystem.find_by_rank_and_species_id(1,self.species_id)
227
+ else
228
+ return true if self == CoordSystem.find_by_rank(1)
229
+ end
230
+ return false
231
+ end
232
+
233
+ # = DESCRIPTION
234
+ # The CoordSystem#seqlevel? method checks if this coordinate system is the
235
+ # seqlevel coordinate system or not.
236
+ # ---
237
+ # *Arguments*:: none
238
+ # *Returns*:: TRUE or FALSE
239
+ def seqlevel?
240
+ if Collection.check # When usign multi-species databases
241
+ return true if self == CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
242
+ else
243
+ return true if self == CoordSystem.find_seqlevel
244
+ end
245
+ return false
246
+ end
247
+
248
+ # = DESCRIPTION
249
+ # The CoordSystem#find_toplevel class method returns the toplevel coordinate
250
+ # system.
251
+ # ---
252
+ # *Arguments*:: none
253
+ # *Returns*:: CoordSystem object
254
+ def find_toplevel
255
+ not_cached = false
256
+ if Ensembl::SESSION.toplevel_coord_system.nil?
257
+ not_cached = true
258
+ elsif Collection.check
259
+ not_cached = true if Ensembl::SESSION.toplevel_coord_system.species_id != self.species_id
260
+ end
261
+ if not_cached
262
+ if Collection.check # When usign multi-species databases
263
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank_and_species_id(1,self.species_id)
264
+ else
265
+ Ensembl::SESSION.toplevel_coord_system = CoordSystem.find_by_rank(1)
266
+ end
267
+ Ensembl::SESSION.toplevel_id = Ensembl::SESSION.toplevel_coord_system.id
268
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.toplevel_coord_system.name] = Ensembl::SESSION.toplevel_id
269
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.toplevel_id] = Ensembl::SESSION.toplevel_coord_system
270
+ end
271
+ return Ensembl::SESSION.toplevel_coord_system
272
+ end
273
+
274
+ # = DESCRIPTION
275
+ # The CoordSystem#find_seqlevel class method returns the seqlevel coordinate
276
+ # system.
277
+ # ---
278
+ # *Arguments*:: none
279
+ # *Returns*:: CoordSystem object
280
+ def find_seqlevel
281
+ not_cached = false
282
+ if Ensembl::SESSION.seqlevel_coord_system.nil?
283
+ not_cached = true
284
+ elsif Collection.check # When usign multi-species databases
285
+ not_cached = true if Ensembl::SESSION.seqlevel_coord_system.species_id != self.species_id
286
+ end
287
+ if not_cached
288
+ if Collection.check
289
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%' AND species_id = #{self.species_id}")[0]
290
+ else
291
+ Ensembl::SESSION.seqlevel_coord_system = CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE attrib LIKE '%sequence_level%'")[0]
292
+ end
293
+ Ensembl::SESSION.seqlevel_id = Ensembl::SESSION.seqlevel_coord_system.id
294
+ Ensembl::SESSION.coord_system_ids[Ensembl::SESSION.seqlevel_coord_system.name] = Ensembl::SESSION.seqlevel_id
295
+ Ensembl::SESSION.coord_systems[Ensembl::SESSION.seqlevel_id] = Ensembl::SESSION.seqlevel_coord_system
296
+ end
297
+ return Ensembl::SESSION.seqlevel_coord_system
298
+ end
299
+
300
+ # = DESCRIPTION
301
+ # The CoordSystem#find_level class method returns the seqlevel coordinate
302
+ # system corresponding to the name passed.
303
+ # ---
304
+ # *Arguments*:: Coordinate system name
305
+ # *Returns*:: CoordSystem object
306
+ def find_level(coord_system_name)
307
+ if Collection.check # When usign multi-species databases
308
+ return CoordSystem.find_by_sql("SELECT * FROM coord_system WHERE name = '#{coord_system_name}' AND species_id = #{self.species_id}")[0]
309
+ else
310
+ return CoordSystem.find_by_name(coord_system_name)
311
+ end
312
+ end
313
+
314
+ # = DESCRIPTION
315
+ # The CoordSystem#find_default_by_name class method returns the
316
+ # coordinate system by that name with the lowest rank. Normally, a lower
317
+ # rank means a 'bigger' coordinate system. The 'chromosome' typically has
318
+ # rank 1. However, there might be more than one coordinate system with the
319
+ # name chromosome but with different version (e.g. in human, there is one
320
+ # for the NCBI36 and one for the NCBI35 version). The older version of these
321
+ # is typically given a high number and the one with the new version is the
322
+ # 'default' system.
323
+ # ---
324
+ # *Arguments*:: none
325
+ # *Returns*:: CoordSystem object
326
+ def self.find_default_by_name(name)
327
+ all_coord_systems_with_name = Ensembl::Core::CoordSystem.find_all_by_name(name)
328
+ if all_coord_systems_with_name.length == 1
329
+ return all_coord_systems_with_name[0]
330
+ else
331
+ return all_coord_systems_with_name.select{|cs| cs.attrib =~ /default_version/}[0]
332
+ end
333
+ end
334
+
335
+ # = DESCRIPTION
336
+ # The CoordSystem#name_with_version returns a string containing the name
337
+ # and version of the coordinate system. If no version is available, then
338
+ # just the name is returned
339
+ # ---
340
+ # *Arguments*:: none
341
+ # *Returns*:: String object
342
+ def name_with_version
343
+ if self.version.nil?
344
+ return name
345
+ else
346
+ return [name, version].join(':')
347
+ end
348
+ end
349
+
350
+ ## Calculate the shortest path between a source coordinate system and a
351
+ ## target coordinate system. This can be done by looking for the
352
+ ## 'assembly.mapping' records in the meta_coord table.
353
+ ## At the moment, only direct mappings are possible. Later on, this method
354
+ ## should be changed to make longer paths possible.
355
+ ## Is used to get features for a slice object.
356
+ #def calculate_path(target_coord_system)
357
+ # MetaCoord.find_all_by_meta_key('assembly.mapping').each do |mapping|
358
+ # coord_system_names = mapping.meta_value.split(/[#|\|]/)
359
+ # if coord_system_names.sort.join(';') == [self.name_with_version, target_coord_system.name_with_version].sort.join(';')
360
+ # answer = Array.new
361
+ # answer.push(CoordSystem.find_by_name(coord_system_names[0]))
362
+ # answer.push(CoordSystem.find_by_name(coord_system_names[1]))
363
+ # return answer
364
+ # end
365
+ # end
366
+ # return nil
367
+ #
368
+ #end
369
+ end
370
+
371
+ # = DESCRIPTION
372
+ # The SeqRegion class describes a part of a coordinate systems. It is an
373
+ # interface to the seq_region table of the Ensembl mysql database.
374
+ #
375
+ # This class uses ActiveRecord to access data in the Ensembl database.
376
+ # See the general documentation of the Ensembl module for
377
+ # more information on what this means and what methods are available.
378
+ #
379
+ # = USAGE
380
+ # chr4 = SeqRegion.find_by_name('4')
381
+ # puts chr4.coord_system.name #--> 'chromosome'
382
+ # chr4.genes.each do |gene|
383
+ # puts gene.biotype
384
+ # end
385
+ class SeqRegion < DBConnection
386
+ set_primary_key 'seq_region_id'
387
+
388
+ belongs_to :coord_system
389
+ has_many :simple_features
390
+ has_many :marker_features
391
+ has_many :genes
392
+ has_many :exons
393
+ has_many :repeat_features
394
+ has_many :seq_region_attribs
395
+ has_many :attrib_types, :through => :seq_region_attrib
396
+ has_many :transcripts
397
+ has_one :dna
398
+ has_many :dna_align_features
399
+ has_many :misc_features
400
+ has_many :density_features
401
+ has_many :karyotypes
402
+ has_many :oligo_features
403
+ has_many :prediction_exons
404
+ has_many :prediction_transcripts
405
+ has_many :protein_align_features
406
+ has_many :regulatory_features
407
+ has_many :assembly_exceptions
408
+
409
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
410
+ has_many :asm_links_as_asm, :foreign_key => 'asm_seq_region_id', :class_name => 'AssemblyLink'
411
+ has_many :asm_links_as_cmp, :foreign_key => 'cmp_seq_region_id', :class_name => 'AssemblyLink'
412
+ has_many :asm_seq_regions, :through => :asm_links_as_cmp
413
+ has_many :cmp_seq_regions, :through => :asm_links_as_asm
414
+
415
+ alias attribs seq_region_attribs
416
+
417
+ # = DESCRIPTION
418
+ # The SeqRegion#slice method returns a slice object that covers the whole
419
+ # of the seq_region.
420
+ # ---
421
+ # *Arguments*:: none
422
+ # *Returns*:: Ensembl::Core::Slice object
423
+ def slice
424
+ return Ensembl::Core::Slice.new(self)
425
+ end
426
+
427
+ # = DESCRIPTION
428
+ # The SeqRegion#assembled_seq_regions returns the sequence regions on which
429
+ # the current region is assembled. For example, calling this method on a
430
+ # contig sequence region, it might return the chromosome that that contig
431
+ # is part of. Optionally, this method takes a coordinate system name so
432
+ # that only regions of that coordinate system are returned.
433
+ # ---
434
+ # *Arguments*:: coord_system_name (optional)
435
+ # *Returns*:: array of SeqRegion objects
436
+ def assembled_seq_regions(coord_system_name = nil)
437
+ if coord_system_name.nil?
438
+ return self.asm_seq_regions
439
+ else
440
+ answer = Array.new
441
+ coord_system = CoordSystem.find_by_name(coord_system_name)
442
+ self.asm_seq_regions.each do |asr|
443
+ if asr.coord_system_id == coord_system.id
444
+ answer.push(asr)
445
+ end
446
+ end
447
+ return answer
448
+ end
449
+ end
450
+
451
+ # = DESCRIPTION
452
+ # The SeqRegion#component_seq_regions returns the sequence regions
453
+ # contained within the current region (in other words: the bits used to
454
+ # assemble the current region). For example, calling this method on a
455
+ # chromosome sequence region, it might return the contigs that were assembled
456
+ # into this chromosome. Optionally, this method takes a coordinate system
457
+ # name so that only regions of that coordinate system are returned.
458
+ # ---
459
+ # *Arguments*:: coord_system_name (optional)
460
+ # *Returns*:: array of SeqRegion objects
461
+ def component_seq_regions(coord_system_name = nil)
462
+ if coord_system_name.nil?
463
+ return self.cmp_seq_regions
464
+ else
465
+ answer = Array.new
466
+ coord_system = CoordSystem.find_by_name(coord_system_name)
467
+ self.cmp_seq_regions.each do |csr|
468
+ if csr.coord_system_id == coord_system.id
469
+ answer.push(csr)
470
+ end
471
+ end
472
+ return answer
473
+ end
474
+ end
475
+
476
+ # = DESCRIPTION
477
+ # This method queries the assembly table to find those rows (i.e.
478
+ # AssemblyLink objects) for which this seq_region is the assembly.
479
+ #
480
+ # = USAGE
481
+ #
482
+ # my_seq_region = SeqRegion.find('4')
483
+ # first_link = my_seq_region.assembly_links_as_assembly[0]
484
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
485
+ #
486
+ # ---
487
+ # *Arguments*:
488
+ # * coord_system_name: name of coordinate system that the components
489
+ # should belong to (default = nil)
490
+ # *Returns*:: array of AssemblyLink objects
491
+ def assembly_links_as_assembly(coord_system = nil)
492
+ if Ensembl::SESSION.coord_system_ids.has_key?(coord_system.name)
493
+ coord_system_id = Ensembl::SESSION.coord_system_ids[coord_system.name]
494
+ else
495
+ Ensembl::SESSION.coord_systems[cs.id] = coord_system.id
496
+ Ensembl::SESSION.coord_system_ids[coord_system.name] = coord_system.id
497
+ end
498
+ coord_system = Ensembl::SESSION.coord_systems[coord_system.id]
499
+ return AssemblyLink.find_by_sql("SELECT * FROM assembly a WHERE a.asm_seq_region_id = #{self.id} AND a.cmp_seq_region_id IN (SELECT sr.seq_region_id FROM seq_region sr WHERE coord_system_id = #{coord_system.id} )")
500
+ end
501
+
502
+ # = DESCRIPTION
503
+ # This method queries the assembly table to find those rows (i.e.
504
+ # AssemblyLink objects) for which this seq_region is the component.
505
+ #
506
+ # = USAGE
507
+ #
508
+ # my_seq_region = SeqRegion.find('Chr4.003.1')
509
+ # first_link = my_seq_region.assembly_links_as_component[0]
510
+ # puts first_link.asm_start.to_s + "\t" + first_link.asm_end.to_s
511
+ #
512
+ # ---
513
+ # *Arguments*:
514
+ # * coord_system_name: name of coordinate system that the assembly
515
+ # should belong to (default = nil)
516
+ # *Returns*:: array of AssemblyLink objects
517
+ def assembly_links_as_component(coord_system = nil)
518
+ if coord_system.nil?
519
+ return self.asm_links_as_cmp
520
+ else
521
+ return self.asm_links_as_cmp.select{|alac| alac.asm_seq_region.coord_system_id == coord_system.id}
522
+ end
523
+ end
524
+
525
+ # = DESCRIPTION
526
+ # The SeqRegion#sequence method returns the sequence of this seq_region. At
527
+ # the moment, it will only return the sequence if the region belongs to the
528
+ # seqlevel coordinate system.
529
+ # ---
530
+ # *Arguments*:: none
531
+ # *Returns*:: DNA sequence as String
532
+ def sequence
533
+ return self.dna.sequence
534
+ end
535
+ alias seq sequence
536
+
537
+ # = DESCRIPTION
538
+ # The SeqRegion#subsequence method returns a subsequence of this seq_region. At
539
+ # the moment, it will only return the sequence if the region belongs to the
540
+ # seqlevel coordinate system.
541
+ # ---
542
+ # *Arguments*:: start and stop position
543
+ # *Returns*:: DNA sequence as String
544
+ def subsequence(start, stop)
545
+ return self.seq.slice(start - 1, (stop - start) + 1)
546
+ end
547
+ alias subseq subsequence
548
+
549
+ end
550
+
551
+ # = DESCRIPTION
552
+ # The AssemblyLink class describes the relationships between different
553
+ # seq_regions. For example, a chromosome might consist of a number of
554
+ # scaffolds, each of which in turn consists of a number of contigs. The
555
+ # AssemblyLink class
556
+ # This class is an interface to the assembly table of the Ensembl mysql
557
+ # database.
558
+ #
559
+ # This class uses ActiveRecord to access data in the Ensembl database.
560
+ # See the general documentation of the Ensembl module for
561
+ # more information on what this means and what methods are available.
562
+ #
563
+ # = USAGE
564
+ # chr4 = SeqRegion.find_by_name('4')
565
+ # puts chr4.coord_system.name #--> 'chromosome'
566
+ # chr4.genes.each do |gene|
567
+ # puts gene.biotype
568
+ # end
569
+ class AssemblyLink < DBConnection
570
+ set_table_name 'assembly'
571
+ set_primary_key nil
572
+
573
+ # See http://blog.hasmanythrough.com/2006/4/21/self-referential-through
574
+ belongs_to :asm_seq_region, :foreign_key => 'asm_seq_region_id', :class_name => 'SeqRegion'
575
+ belongs_to :cmp_seq_region, :foreign_key => 'cmp_seq_region_id', :class_name => 'SeqRegion'
576
+ end
577
+
578
+ # = DESCRIPTION
579
+ # The AssemblyException class describes the exceptions in to AssemblyLink. Most
580
+ # notably, this concerns the allosomes. In human, for example, only the
581
+ # part of the Y chromosome that is different from X is covered in the
582
+ # assembly table. Therefore, the sequence of the tip and end of the Y
583
+ # chromosome are not stored in the database, but fetched from the X
584
+ # chromosome. The assembly_exception table contain the information on
585
+ # which bits are the same.
586
+ #
587
+ # This class uses ActiveRecord to access data in the Ensembl database.
588
+ # See the general documentation of the Ensembl module for
589
+ # more information on what this means and what methods are available.
590
+ #
591
+ # This class should normally not be used directly by the user.
592
+ class AssemblyException < DBConnection
593
+ include Sliceable
594
+
595
+ set_primary_key 'assembly_exception_id'
596
+
597
+ belongs_to :seq_region
598
+ end
599
+
600
+ # = DESCRIPTION
601
+ # The MetaCoord class describes what coordinate systems are used to annotate
602
+ # features. It will for example tell you that marker_features are annotated
603
+ # either on the chromosome, supercontig and clone level.
604
+ #
605
+ # This class should normally not be used by the end user, but is used internally.
606
+ #
607
+ # This class uses ActiveRecord to access data in the Ensembl database.
608
+ # See the general documentation of the Ensembl module for
609
+ # more information on what this means and what methods are available.
610
+ class MetaCoord < DBConnection
611
+ set_primary_key nil
612
+ end
613
+
614
+ # = DESCRIPTION
615
+ # The Meta class describes meta data of the database. These include information
616
+ # on what coordinate system is mapping on another one and which patches
617
+ # are applied.
618
+ #
619
+ # This class should normally not be used by the end user, but is used internally.
620
+ #
621
+ # This class uses ActiveRecord to access data in the Ensembl database.
622
+ # See the general documentation of the Ensembl module for
623
+ # more information on what this means and what methods are available.
624
+ class Meta < DBConnection
625
+ set_primary_key nil
626
+ end
627
+
628
+ # = DESCRIPTION
629
+ # The Analysis class describes an analysis.
630
+ #
631
+ # This class uses ActiveRecord to access data in the Ensembl database.
632
+ # See the general documentation of the Ensembl module for
633
+ # more information on what this means and what methods are available.
634
+ #
635
+ # = USAGE
636
+ # repeat_masker_analysis = Analysis.find_by_logic_name('RepeatMask')
637
+ # puts repeat_masker_analysis.to_yaml
638
+ class Analysis < DBConnection
639
+ set_primary_key 'analysis_id'
640
+
641
+ has_many :genes
642
+ has_many :dna_align_features
643
+ has_many :protein_align_features
644
+ has_one :analysis_description
645
+ has_many :density_types
646
+ has_many :oligo_features
647
+ has_many :protein_features
648
+ has_many :regulatory_features
649
+ has_many :simple_features
650
+ has_many :prediction_transcripts
651
+ end
652
+
653
+ # = DESCRIPTION
654
+ # The AnalysisDescription class belongs to an analysis.
655
+ #
656
+ # This class uses ActiveRecord to access data in the Ensembl database.
657
+ # See the general documentation of the Ensembl module for
658
+ # more information on what this means and what methods are available.
659
+ #
660
+ # = USAGE
661
+ # descr = AnalysisDescription.find(3)
662
+ # puts descr.to_yaml
663
+ class AnalysisDescription < DBConnection
664
+ set_primary_key nil
665
+
666
+ belongs_to :analysis
667
+ end
668
+
669
+ # = DESCRIPTION
670
+ # The Dna class contains the actual DNA sequence for the sequence regions
671
+ # that belong to the seq_level coordinate system.
672
+ #
673
+ # This class uses ActiveRecord to access data in the Ensembl database.
674
+ # See the general documentation of the Ensembl module for
675
+ # more information on what this means and what methods are available.
676
+ #
677
+ # = USAGE
678
+ # seq_region = SeqRegion.find(1)
679
+ # puts seq_region.dna.sequence
680
+ class Dna < DBConnection
681
+ set_primary_key nil
682
+
683
+ belongs_to :seq_region
684
+ end
685
+
686
+ # = DESCRIPTION
687
+ # The Exon class describes an exon.
688
+ #
689
+ # This class uses ActiveRecord to access data in the Ensembl database.
690
+ # See the general documentation of the Ensembl module for
691
+ # more information on what this means and what methods are available.
692
+ #
693
+ # This class includes the mixin Sliceable, which means that it is mapped
694
+ # to a SeqRegion object and a Slice can be created for objects of this
695
+ # class. See Sliceable and Slice for more information.
696
+ #
697
+ # = USAGE
698
+ # seq_region = SeqRegion.find(1)
699
+ # puts seq_region.exons.length
700
+ class Exon < DBConnection
701
+ include Sliceable
702
+
703
+ set_primary_key 'exon_id'
704
+
705
+ belongs_to :seq_region
706
+ has_many :exon_transcripts
707
+ has_many :transcripts, :through => :exon_transcripts
708
+
709
+ has_many :translations, :foreign_key => 'start_exon_id'
710
+ has_many :translations, :foreign_key => 'end_exon_id'
711
+
712
+ has_one :exon_stable_id
713
+
714
+ has_many :exon_supporting_features
715
+ has_many :dna_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'dna_align_feature'"]
716
+ has_many :protein_align_features, :through => :exon_supporting_features, :conditions => ["feature_type = 'protein_align_feature'"]
717
+
718
+ def stable_id
719
+ return self.exon_stable_id.stable_id
720
+ end
721
+
722
+ # = DESCRIPTION
723
+ # The Exon#seq method returns the sequence of the exon.
724
+ def seq
725
+ seq_region = nil
726
+ if Ensembl::SESSION.seq_regions.has_key?(self.seq_region_id)
727
+ seq_region = Ensembl::SESSION.seq_regions[self.seq_region_id]
728
+ else
729
+ seq_region = self.seq_region
730
+ Ensembl::SESSION.seq_regions[seq_region.id] = seq_region
731
+ end
732
+ slice = Ensembl::Core::Slice.new(seq_region, seq_region_start, seq_region_end, seq_region_strand)
733
+ return slice.seq
734
+ end
735
+ end
736
+
737
+ # = DESCRIPTION
738
+ # The ExonStableId class provides an interface to the exon_stable_id
739
+ # table. This table contains Ensembl stable IDs for exons.
740
+ #
741
+ # This class uses ActiveRecord to access data in the Ensembl database.
742
+ # See the general documentation of the Ensembl module for
743
+ # more information on what this means and what methods are available.
744
+ #
745
+ # = USAGE
746
+ # my_exon = ExonStableId.find_by_stable_id('ENSE00001494622').exon
747
+ class ExonStableId < DBConnection
748
+ set_primary_key 'stable_id'
749
+
750
+ belongs_to :exon
751
+ end
752
+
753
+ # = DESCRIPTION
754
+ # The ExonTranscript class provides the link between exons and transcripts.
755
+ #
756
+ # This class uses ActiveRecord to access data in the Ensembl database.
757
+ # See the general documentation of the Ensembl module for
758
+ # more information on what this means and what methods are available.
759
+ #
760
+ # = USAGE
761
+ # link = ExonTranscript.find(1)
762
+ # puts link.exon.to_yaml
763
+ # puts link.transcript.to_yaml
764
+ class ExonTranscript < DBConnection
765
+ set_primary_key nil
766
+
767
+ belongs_to :exon
768
+ belongs_to :transcript
769
+ end
770
+
771
+ class ExonSupportingFeature < DBConnection
772
+ set_table_name 'supporting_feature'
773
+ set_primary_key nil
774
+
775
+ belongs_to :exon
776
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
777
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
778
+ end
779
+
780
+ class TranscriptSupportingFeature < DBConnection
781
+ set_primary_key nil
782
+
783
+ belongs_to :transcript
784
+ belongs_to :dna_align_feature, :class_name => "DnaAlignFeature", :foreign_key => 'feature_id'
785
+ belongs_to :protein_align_feature, :class_name => "ProteinAlignFeature", :foreign_key => 'feature_id'
786
+ end
787
+
788
+ # = DESCRIPTION
789
+ # The SimpleFeature class describes simple features that have positions
790
+ # on a SeqRegion.
791
+ #
792
+ # This class uses ActiveRecord to access data in the Ensembl database.
793
+ # See the general documentation of the Ensembl module for
794
+ # more information on what this means and what methods are available.
795
+ #
796
+ # This class includes the mixin Sliceable, which means that it is mapped
797
+ # to a SeqRegion object and a Slice can be created for objects of this
798
+ # class. See Sliceable and Slice for more information.
799
+ #
800
+ # = USAGE
801
+ # simple_feature = SimpleFeature.find(123)
802
+ # puts simple_feature.analysis.logic_name
803
+ class SimpleFeature < DBConnection
804
+ include Sliceable
805
+
806
+ set_primary_key 'simple_feature_id'
807
+
808
+ belongs_to :seq_region
809
+ belongs_to :analysis
810
+ end
811
+
812
+ # = DESCRIPTION
813
+ # The DensityFeature class provides an interface to the density_feature
814
+ # table.
815
+ #
816
+ # This class uses ActiveRecord to access data in the Ensembl database.
817
+ # See the general documentation of the Ensembl module for
818
+ # more information on what this means and what methods are available.
819
+ #
820
+ # This class includes the mixin Sliceable, which means that it is mapped
821
+ # to a SeqRegion object and a Slice can be created for objects of this
822
+ # class. See Sliceable and Slice for more information.
823
+ #
824
+ # = USAGE
825
+ # density_feature = DensityFeature.find(2716384)
826
+ # puts density_feature.to_yaml
827
+ class DensityFeature < DBConnection
828
+ set_primary_key 'density_feature_id'
829
+
830
+ belongs_to :density_type
831
+ belongs_to :seq_region
832
+ end
833
+
834
+ # = DESCRIPTION
835
+ # The DensityType class provides an interface to the density_type
836
+ # table.
837
+ #
838
+ # This class uses ActiveRecord to access data in the Ensembl database.
839
+ # See the general documentation of the Ensembl module for
840
+ # more information on what this means and what methods are available.
841
+ #
842
+ # This class includes the mixin Sliceable, which means that it is mapped
843
+ # to a SeqRegion object and a Slice can be created for objects of this
844
+ # class. See Sliceable and Slice for more information.
845
+ #
846
+ class DensityType < DBConnection
847
+ set_primary_key 'density_type_id'
848
+
849
+ has_many :density_features
850
+ belongs_to :analysis
851
+ end
852
+
853
+ # = DESCRIPTION
854
+ # The Marker class provides an interface to the marker
855
+ # table. This table contains primer sequences and PCR product lengths.
856
+ #
857
+ # This class uses ActiveRecord to access data in the Ensembl database.
858
+ # See the general documentation of the Ensembl module for
859
+ # more information on what this means and what methods are available.
860
+ #
861
+ # = USAGE
862
+ # marker = Marker.find(52194)
863
+ # puts marker.left_primer
864
+ # puts marker.right_primer
865
+ # puts marker.min_primer_dist.to_s
866
+ class Marker < DBConnection
867
+ set_primary_key 'marker_id'
868
+
869
+ has_many :marker_features
870
+ has_many :marker_synonyms
871
+ has_many :marker_map_locations
872
+
873
+ def self.inheritance_column
874
+ nil
875
+ end
876
+
877
+ # = DESCRIPTION
878
+ # The Marker#name method returns a comma-separated list of synonyms of
879
+ # this marker
880
+ #
881
+ # = USAGE
882
+ # marker = Marker.find(1)
883
+ # puts marker.name --> 58017,D29149
884
+ def name
885
+ self.marker_synonyms.collect{|ms| ms.name}.join(',')
886
+ end
887
+
888
+ # = DESCRIPTION
889
+ # The Marker#find_by_name class method returns one marker with this name.
890
+ #
891
+ # ---
892
+ # *Arguments*:: name
893
+ # *Returns*:: Marker object or nil
894
+ def self.find_by_name(name)
895
+ all_names = self.find_all_by_name(name)
896
+ if all_names.length == 0
897
+ return nil
898
+ else
899
+ return all_names[0]
900
+ end
901
+ end
902
+
903
+ # = DESCRIPTION
904
+ # The Marker#find_all_by_name class method returns all markers with this
905
+ # name. If no marker is found, it returns an empty array.
906
+ # ---
907
+ # *Arguments*:: name
908
+ # *Returns*:: empty array or array of Marker objects
909
+ def self.find_all_by_name(name)
910
+ marker_synonyms = Ensembl::Core::MarkerSynonym.find_all_by_name(name)
911
+ answers = Array.new
912
+ marker_synonyms.each do |ms|
913
+ answers.push(Ensembl::Core::Marker.find_all_by_marker_id(ms.marker_id))
914
+ end
915
+ answers.flatten!
916
+ return answers
917
+ end
918
+
919
+ #def to_mappings
920
+ # output = Array.new
921
+ # self.marker_features.each do |mf|
922
+ # output.push(mf.slice.display_name)
923
+ # end
924
+ # return output.join("\n")
925
+ #
926
+ #end
927
+
928
+ end
929
+
930
+ # = DESCRIPTION
931
+ # The MarkerSynonym class provides an interface to the marker_synonym
932
+ # table. This table contains names for markers (that are themselves
933
+ # stored in the marker table (so Marker class)).
934
+ #
935
+ # This class uses ActiveRecord to access data in the Ensembl database.
936
+ # See the general documentation of the Ensembl module for
937
+ # more information on what this means and what methods are available.
938
+ #
939
+ # = USAGE
940
+ # marker = Marker.find(52194)
941
+ # puts marker.marker_synonym.source
942
+ # puts marker.marker_synonym.name
943
+ class MarkerSynonym < DBConnection
944
+ set_primary_key 'marker_synonym_id'
945
+
946
+ belongs_to :marker
947
+ end
948
+
949
+ # = DESCRIPTION
950
+ # The MarkerFeature class provides an interface to the marker_feature
951
+ # table. This table contains mappings of markers to a SeqRegion.
952
+ #
953
+ # This class uses ActiveRecord to access data in the Ensembl database.
954
+ # See the general documentation of the Ensembl module for
955
+ # more information on what this means and what methods are available.
956
+ #
957
+ # This class includes the mixin Sliceable, which means that it is mapped
958
+ # to a SeqRegion object and a Slice can be created for objects of this
959
+ # class. See Sliceable and Slice for more information.
960
+ #
961
+ # = USAGE
962
+ # marker = Marker.find(52194)
963
+ # puts marker.marker_feature.seq_region_start.to_s
964
+ # puts marker.marker_feature.seq_region_end.to_s
965
+ class MarkerFeature < DBConnection
966
+ include Sliceable
967
+
968
+ set_primary_key 'marker_feature_id'
969
+
970
+ belongs_to :marker
971
+ belongs_to :seq_region
972
+ end
973
+
974
+ # = DESCRIPTION
975
+ # The MiscFeature class provides an interface to the misc_feature
976
+ # table. The actual type of feature is stored in the MiscSet class.
977
+ #
978
+ # This class uses ActiveRecord to access data in the Ensembl database.
979
+ # See the general documentation of the Ensembl module for
980
+ # more information on what this means and what methods are available.
981
+ #
982
+ # This class includes the mixin Sliceable, which means that it is mapped
983
+ # to a SeqRegion object and a Slice can be created for objects of this
984
+ # class. See Sliceable and Slice for more information.
985
+ #
986
+ # = USAGE
987
+ # #TODO
988
+ class MiscFeature < DBConnection
989
+ include Sliceable
990
+
991
+ set_primary_key 'misc_feature_id'
992
+
993
+ belongs_to :seq_region
994
+ has_one :misc_feature_misc_set
995
+ has_many :misc_sets, :through => :misc_feature_misc_set
996
+
997
+ has_many :misc_attribs
998
+
999
+ alias attribs misc_attribs
1000
+
1001
+ def self.find_by_attrib_type_value(code, value)
1002
+ return self.find_all_by_attrib_type_value(code, value)[0]
1003
+ end
1004
+
1005
+ def self.find_all_by_attrib_type_value(code, value)
1006
+ code_id = AttribType.find_by_code(code)
1007
+ misc_attribs = MiscAttrib.find_all_by_attrib_type_id_and_value(code_id, value)
1008
+ answers = Array.new
1009
+ misc_attribs.each do |ma|
1010
+ answers.push(MiscFeature.find_all_by_misc_feature_id(ma.misc_feature_id))
1011
+ end
1012
+ answers.flatten!
1013
+ return answers
1014
+ end
1015
+ end
1016
+
1017
+
1018
+ # = DESCRIPTION
1019
+ # The MiscAttrib class provides an interface to the misc_attrib
1020
+ # table. It is the link between MiscFeature and AttribType.
1021
+ #
1022
+ # This class uses ActiveRecord to access data in the Ensembl database.
1023
+ # See the general documentation of the Ensembl module for
1024
+ # more information on what this means and what methods are available.
1025
+ #
1026
+ # = USAGE
1027
+ # marker = Marker.find(52194)
1028
+ # puts marker.marker_feature.seq_region_start.to_s
1029
+ # puts marker.marker_feature.seq_region_end.to_s
1030
+ class MiscAttrib < DBConnection
1031
+ set_primary_key nil
1032
+
1033
+ belongs_to :misc_feature
1034
+ belongs_to :attrib_type
1035
+
1036
+ def to_s
1037
+ return self.attrib_type.code + ":" + self.value.to_s
1038
+ end
1039
+ end
1040
+
1041
+ # = DESCRIPTION
1042
+ # The MiscSet class provides an interface to the misc_set
1043
+ # table. This table contains the sets to which MiscFeature objects
1044
+ # belong.
1045
+ #
1046
+ # This class uses ActiveRecord to access data in the Ensembl database.
1047
+ # See the general documentation of the Ensembl module for
1048
+ # more information on what this means and what methods are available.
1049
+ #
1050
+ # = USAGE
1051
+ # feature_set = MiscFeature.find(1)
1052
+ # puts feature_set.features.length.to_s
1053
+ class MiscSet < DBConnection
1054
+ set_primary_key 'misc_set_id'
1055
+
1056
+ has_many :misc_feature_misc_sets
1057
+ has_many :misc_features, :through => :misc_feature_misc_set
1058
+ end
1059
+
1060
+ # = DESCRIPTION
1061
+ # The MiscFeatureMiscSet class provides an interface to the
1062
+ # misc_feature_misc_set table. This table links MiscFeature objects to
1063
+ # their MiscSet.
1064
+ #
1065
+ # This class uses ActiveRecord to access data in the Ensembl database.
1066
+ # See the general documentation of the Ensembl module for
1067
+ # more information on what this means and what methods are available.
1068
+ #
1069
+ # = USAGE
1070
+ # # TODO
1071
+ class MiscFeatureMiscSet < DBConnection
1072
+ set_primary_key nil
1073
+
1074
+ belongs_to :misc_feature
1075
+ belongs_to :misc_set
1076
+ end
1077
+
1078
+ # = DESCRIPTION
1079
+ # The Gene class provides an interface to the gene
1080
+ # table. This table contains mappings of genes to a SeqRegion.
1081
+ #
1082
+ # This class uses ActiveRecord to access data in the Ensembl database.
1083
+ # See the general documentation of the Ensembl module for
1084
+ # more information on what this means and what methods are available.
1085
+ #
1086
+ # This class includes the mixin Sliceable, which means that it is mapped
1087
+ # to a SeqRegion object and a Slice can be created for objects of this
1088
+ # class. See Sliceable and Slice for more information.
1089
+ #
1090
+ # = USAGE
1091
+ # puts Gene.find_by_biotype('protein_coding').length
1092
+ class Gene < DBConnection
1093
+ include Sliceable
1094
+
1095
+ set_primary_key 'gene_id'
1096
+
1097
+ belongs_to :seq_region
1098
+ has_one :gene_stable_id
1099
+
1100
+ has_many :gene_attribs
1101
+ has_many :attrib_types, :through => :gene_attrib
1102
+
1103
+ has_many :transcripts
1104
+
1105
+ belongs_to :analysis
1106
+
1107
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Gene'"
1108
+ has_many :xrefs, :through => :object_xrefs
1109
+
1110
+ alias attribs gene_attribs
1111
+
1112
+ # = DESCRIPTION
1113
+ # The Gene#stable_id method returns the stable_id of the gene (i.e. the
1114
+ # ENSG id).
1115
+ def stable_id
1116
+ return self.gene_stable_id.stable_id
1117
+
1118
+ end
1119
+
1120
+ # = DESCRIPTION
1121
+ # The Gene#display_label method returns the default name of the gene.
1122
+ def display_label
1123
+ return Xref.find(self.display_xref_id).display_label
1124
+ end
1125
+ alias :display_name :display_label
1126
+ alias :label :display_label
1127
+ alias :name :display_label
1128
+
1129
+ # = DESCRIPTION
1130
+ # The Gene#find_all_by_name class method searches the Xrefs for that name
1131
+ # and returns an array of the corresponding Gene objects. If the name is
1132
+ # not found, it returns an empty array.
1133
+ def self.find_all_by_name(name)
1134
+ answer = Array.new
1135
+ xrefs = Ensembl::Core::Xref.find_all_by_display_label(name)
1136
+ xrefs.each do |xref|
1137
+ answer.push(Ensembl::Core::Gene.find_by_display_xref_id(xref.xref_id))
1138
+ end
1139
+
1140
+ return answer
1141
+ end
1142
+
1143
+ # = DESCRIPTION
1144
+ # The Gene#find_by_name class method searches the Xrefs for that name
1145
+ # and returns one Gene objects (even if there should be more). If the name is
1146
+ # not found, it returns nil.
1147
+ def self.find_by_name(name)
1148
+ all_names = self.find_all_by_name(name)
1149
+ if all_names.length == 0
1150
+ return nil
1151
+ else
1152
+ return all_names[0]
1153
+ end
1154
+ end
1155
+
1156
+ # = DESCRIPTION
1157
+ # The Gene#find_by_stable_id class method fetches a Gene object based on
1158
+ # its stable ID (i.e. the "ENSG" accession number). If the name is
1159
+ # not found, it returns nil.
1160
+ def self.find_by_stable_id(stable_id)
1161
+ gene_stable_id = GeneStableId.find_by_stable_id(stable_id)
1162
+ if gene_stable_id.nil?
1163
+ return nil
1164
+ else
1165
+ return gene_stable_id.gene
1166
+ end
1167
+ end
1168
+
1169
+ # = DESCRIPTION
1170
+ # The Gene#all_xrefs method is a convenience method in that it combines
1171
+ # three methods into one. It collects all xrefs for the gene itself, plus
1172
+ # all xrefs for all transcripts for the gene, and all xrefs for all
1173
+ # translations for those transcripts.
1174
+ def all_xrefs
1175
+ answer = Array.new
1176
+ answer.push(self.xrefs)
1177
+ self.transcripts.each do |transcript|
1178
+ answer.push(transcript.xrefs)
1179
+ if ! transcript.translation.nil?
1180
+ answer.push(transcript.translation.xrefs)
1181
+ end
1182
+ end
1183
+ answer.flatten!
1184
+ return answer
1185
+ end
1186
+
1187
+ # = DESCRIPTION
1188
+ # The Gene#go_terms method returns all GO terms associated with a gene.
1189
+ def go_terms
1190
+ go_db_id = ExternalDb.find_by_db_name('GO').id
1191
+ return self.all_xrefs.select{|x| x.external_db_id == go_db_id}.collect{|x| x.dbprimary_acc}.uniq
1192
+ end
1193
+
1194
+ # = DESCRIPTION
1195
+ # The Gene#hgnc returns the HGNC symbol for the gene.
1196
+ def hgnc
1197
+ hgnc_db_id = ExternalDb.find_by_db_name('HGNC_curated_gene').id
1198
+ xref = self.all_xrefs.select{|x| x.external_db_id == hgnc_db_id}[0]
1199
+ return nil if xref.nil?
1200
+ return xref.display_label
1201
+ end
1202
+
1203
+ end
1204
+
1205
+ # = DESCRIPTION
1206
+ # The Gene#canonical_transcript returns the longest transcript for that gene.
1207
+ #
1208
+ def canonical_transcript
1209
+ ct = self.transcripts.sort {|a,b| b.seq.length <=> a.seq.length}
1210
+ return ct[0]
1211
+ end
1212
+
1213
+ # = DESCRIPTION
1214
+ # The GeneStableId class provides an interface to the gene_stable_id
1215
+ # table. This table contains Ensembl stable IDs for genes.
1216
+ #
1217
+ # This class uses ActiveRecord to access data in the Ensembl database.
1218
+ # See the general documentation of the Ensembl module for
1219
+ # more information on what this means and what methods are available.
1220
+ #
1221
+ # = USAGE
1222
+ # my_gene = GeneStableId.find_by_stable_id('ENSBTAG00000011670').gene
1223
+ class GeneStableId < DBConnection
1224
+ set_primary_key 'stable_id'
1225
+
1226
+ belongs_to :gene
1227
+ end
1228
+
1229
+ # = DESCRIPTION
1230
+ # The MarkerMapLocation class provides an interface to the
1231
+ # marker_map_location table. This table contains mappings of
1232
+ # MarkerSynonym objects to a chromosome, and basically just stores
1233
+ # the genetic maps.
1234
+ #
1235
+ # This class uses ActiveRecord to access data in the Ensembl database.
1236
+ # See the general documentation of the Ensembl module for
1237
+ # more information on what this means and what methods are available.
1238
+ #
1239
+ # = USAGE
1240
+ # marker_synonym = MarkerSynonym.find_by_name('CYP19A1_(5)')
1241
+ # marker_synonym.marker_map_locations.each do |mapping|
1242
+ # puts mapping.chromosome_name + "\t" + mapping.position.to_s
1243
+ # end
1244
+ class MarkerMapLocation < DBConnection
1245
+ set_primary_key nil
1246
+
1247
+ belongs_to :map
1248
+ belongs_to :marker
1249
+
1250
+ end
1251
+
1252
+ # = DESCRIPTION
1253
+ # The Map class provides an interface to the map
1254
+ # table. This table contains genetic maps.
1255
+ #
1256
+ # This class uses ActiveRecord to access data in the Ensembl database.
1257
+ # See the general documentation of the Ensembl module for
1258
+ # more information on what this means and what methods are available.
1259
+ #
1260
+ # = USAGE
1261
+ # map = Map.find_by_name('MARC')
1262
+ # puts map.markers.length.to_s
1263
+ class Map < DBConnection
1264
+ set_primary_key 'map_id'
1265
+
1266
+ has_many :marker_map_locations
1267
+ has_many :markers, :through => :marker_map_locations
1268
+
1269
+ def name
1270
+ return self.map_name
1271
+ end
1272
+ end
1273
+
1274
+ # = DESCRIPTION
1275
+ # The RepeatConsensus class provides an interface to the repeat_consensus
1276
+ # table. This table contains consensus sequences for repeats.
1277
+ #
1278
+ # This class uses ActiveRecord to access data in the Ensembl database.
1279
+ # See the general documentation of the Ensembl module for
1280
+ # more information on what this means and what methods are available.
1281
+ #
1282
+ # = USAGE
1283
+ # repeat = RepeatFeature.find(29)
1284
+ # puts repeat.repeat_consensus.repeat_name + "\t" + repeat.repeat_consensus.repeat_consensus
1285
+ class RepeatConsensus < DBConnection
1286
+ set_primary_key 'repeat_consensus_id'
1287
+
1288
+ has_many :repeat_features
1289
+ end
1290
+
1291
+ # = DESCRIPTION
1292
+ # The RepeatFeature class provides an interface to the repeat_feature
1293
+ # table. This table contains mappings of repeats to a SeqRegion.
1294
+ #
1295
+ # This class uses ActiveRecord to access data in the Ensembl database.
1296
+ # See the general documentation of the Ensembl module for
1297
+ # more information on what this means and what methods are available.
1298
+ #
1299
+ # This class includes the mixin Sliceable, which means that it is mapped
1300
+ # to a SeqRegion object and a Slice can be created for objects of this
1301
+ # class. See Sliceable and Slice for more information.
1302
+ #
1303
+ # = USAGE
1304
+ # repeat_feature = RepeatFeature.find(29)
1305
+ # puts repeat_feature.seq_region_start.to_s
1306
+ class RepeatFeature < DBConnection
1307
+ include Sliceable
1308
+
1309
+ set_primary_key 'repeat_feature_id'
1310
+
1311
+ belongs_to :repeat_consensus
1312
+ belongs_to :seq_region
1313
+ end
1314
+
1315
+ # = DESCRIPTION
1316
+ # The SeqRegionAttrib class provides an interface to the seq_region_attrib
1317
+ # table. This table contains attribute values for SeqRegion objects
1318
+ #
1319
+ # This class uses ActiveRecord to access data in the Ensembl database.
1320
+ # See the general documentation of the Ensembl module for
1321
+ # more information on what this means and what methods are available.
1322
+ #
1323
+ # = USAGE
1324
+ # chr4 = SeqRegion.find_by_name('4')
1325
+ # chr4.seq_region_attribs.each do |attrib|
1326
+ # puts attrib.attrib_type.name + "\t" + attrib.value.to_s
1327
+ # end
1328
+ class SeqRegionAttrib < DBConnection
1329
+ set_primary_key nil
1330
+
1331
+ belongs_to :seq_region
1332
+ belongs_to :attrib_type
1333
+ end
1334
+
1335
+ # = DESCRIPTION
1336
+ # The GeneAttrib class provides an interface to the gene_attrib
1337
+ # table. This table contains attribute values for Gene objects
1338
+ #
1339
+ # This class uses ActiveRecord to access data in the Ensembl database.
1340
+ # See the general documentation of the Ensembl module for
1341
+ # more information on what this means and what methods are available.
1342
+ #
1343
+ # = USAGE
1344
+ # #TODO
1345
+ class GeneAttrib < DBConnection
1346
+ set_primary_key nil
1347
+
1348
+ belongs_to :gene
1349
+ belongs_to :attrib_type
1350
+ end
1351
+
1352
+ # = DESCRIPTION
1353
+ # The AttribType class provides an interface to the attrib_type
1354
+ # table. This table contains the types that attributes can belong to for
1355
+ # SeqRegion, Gene and Transcript.
1356
+ #
1357
+ # This class uses ActiveRecord to access data in the Ensembl database.
1358
+ # See the general documentation of the Ensembl module for
1359
+ # more information on what this means and what methods are available.
1360
+ #
1361
+ # = USAGE
1362
+ # #TODO
1363
+ class AttribType < DBConnection
1364
+ set_primary_key 'attrib_type_id'
1365
+
1366
+ has_many :seq_region_attribs
1367
+ has_many :seq_regions, :through => :seq_region_attrib
1368
+
1369
+ has_many :gene_attribs
1370
+ has_many :genes, :through => :gene_attrib
1371
+
1372
+ has_many :transcript_attribs
1373
+ has_many :transcripts, :through => :transcript_attrib
1374
+ end
1375
+
1376
+ # = DESCRIPTION
1377
+ # The Transcript class provides an interface to the transcript_stable_id
1378
+ # table. This table contains the Ensembl stable IDs for Transcript
1379
+ # objects.
1380
+ #
1381
+ # This class uses ActiveRecord to access data in the Ensembl database.
1382
+ # See the general documentation of the Ensembl module for
1383
+ # more information on what this means and what methods are available.
1384
+ #
1385
+ # = USAGE
1386
+ # transcript_stable_id = TranscriptStableId.find_by_stable_id('ENSBTAT00000015494')
1387
+ # puts transcript_stable_id.transcript.to_yaml
1388
+ class TranscriptStableId < DBConnection
1389
+ set_primary_key 'stable_id'
1390
+
1391
+ belongs_to :transcript
1392
+ end
1393
+
1394
+ # = DESCRIPTION
1395
+ # The TranscriptAttrib class provides an interface to the transcript_attrib
1396
+ # table. This table contains the attributes for Transcript objects.
1397
+ #
1398
+ # This class uses ActiveRecord to access data in the Ensembl database.
1399
+ # See the general documentation of the Ensembl module for
1400
+ # more information on what this means and what methods are available.
1401
+ #
1402
+ # = USAGE
1403
+ # transcript = Transcript.find(32495)
1404
+ # transcript.transcript_attribs.each do |attr|
1405
+ # puts attr.attrib_type.name + "\t" + attr.value
1406
+ # end
1407
+ class TranscriptAttrib < DBConnection
1408
+ set_primary_key nil
1409
+
1410
+ belongs_to :transcript
1411
+ belongs_to :attrib_type
1412
+ end
1413
+
1414
+ # = DESCRIPTION
1415
+ # The DnaAlignFeature class provides an interface to the
1416
+ # dna_align_feature table. This table contains sequence similarity
1417
+ # mappings against a SeqRegion.
1418
+ #
1419
+ # This class uses ActiveRecord to access data in the Ensembl database.
1420
+ # See the general documentation of the Ensembl module for
1421
+ # more information on what this means and what methods are available.
1422
+ #
1423
+ # This class includes the mixin Sliceable, which means that it is mapped
1424
+ # to a SeqRegion object and a Slice can be created for objects of this
1425
+ # class. See Sliceable and Slice for more information.
1426
+ #
1427
+ # = USAGE
1428
+ # unigene_scan = Analysis.find_by_logic_name('Unigene')
1429
+ # unigene_scan.dna_align_features.each do |hit|
1430
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1431
+ # end
1432
+ class DnaAlignFeature < DBConnection
1433
+ include Sliceable
1434
+
1435
+ set_primary_key 'dna_align_feature_id'
1436
+
1437
+ belongs_to :seq_region
1438
+ belongs_to :analysis
1439
+
1440
+ has_many :exon_supporting_features
1441
+ has_many :protein_supporting_features
1442
+ end
1443
+
1444
+ # = DESCRIPTION
1445
+ # The Translation class provides an interface to the
1446
+ # translation table. This table contains the translation start and
1447
+ # stop positions and exons for a given Transcript
1448
+ #
1449
+ # This class uses ActiveRecord to access data in the Ensembl database.
1450
+ # See the general documentation of the Ensembl module for
1451
+ # more information on what this means and what methods are available.
1452
+ #
1453
+ # = USAGE
1454
+ # #TODO
1455
+ class Translation < DBConnection
1456
+ set_primary_key 'translation_id'
1457
+
1458
+ belongs_to :transcript
1459
+ has_many :translation_stable_ids
1460
+
1461
+ has_many :translation_attribs
1462
+ has_many :protein_features
1463
+
1464
+ has_one :translation_stable_id
1465
+
1466
+ has_many :object_xrefs, :foreign_key => 'ensembl_id', :conditions => "ensembl_object_type = 'Translation'"
1467
+ has_many :xrefs, :through => :object_xrefs
1468
+
1469
+ belongs_to :start_exon, :class_name => 'Exon', :foreign_key => 'start_exon_id'
1470
+ belongs_to :end_exon, :class_name => 'Exon', :foreign_key => 'end_exon_id'
1471
+
1472
+ alias attribs translation_attribs
1473
+
1474
+ # The Translation#stable_id method returns the stable ID of the translation.
1475
+ # ---
1476
+ # *Arguments*:: none
1477
+ # *Returns*:: String
1478
+ def stable_id
1479
+ return self.translation_stable_id.stable_id
1480
+ end
1481
+
1482
+ # = DESCRIPTION
1483
+ # The Translation#display_label method returns the default name of the translation.
1484
+ def display_label
1485
+ return Xref.find(self.display_xref_id).display_label
1486
+ end
1487
+ alias :display_name :display_label
1488
+ alias :label :display_label
1489
+ alias :name :display_label
1490
+
1491
+ # = DESCRIPTION
1492
+ # The Translation#find_by_stable_id class method fetches a Translation
1493
+ # object based on its stable ID (i.e. the "ENSP" accession number). If the
1494
+ # name is not found, it returns nil.
1495
+ def self.find_by_stable_id(stable_id)
1496
+ translation_stable_id = TranslationStableId.find_by_stable_id(stable_id)
1497
+ if translation_stable_id.nil?
1498
+ return nil
1499
+ else
1500
+ return translation_stable_id.translation
1501
+ end
1502
+ end
1503
+ end
1504
+
1505
+ # = DESCRIPTION
1506
+ # The TranslationStableId class provides an interface to the
1507
+ # translation_stable_id table. This table contains the Ensembl stable IDs
1508
+ # for a given Translation.
1509
+ #
1510
+ # This class uses ActiveRecord to access data in the Ensembl database.
1511
+ # See the general documentation of the Ensembl module for
1512
+ # more information on what this means and what methods are available.
1513
+ #
1514
+ # = USAGE
1515
+ # stable_id = TranslationStableId.find_by_name('ENSBTAP00000015494')
1516
+ # puts stable_id.to_yaml
1517
+ class TranslationStableId < DBConnection
1518
+ set_primary_key 'stable_id'
1519
+
1520
+ belongs_to :translation
1521
+ end
1522
+
1523
+ # = DESCRIPTION
1524
+ # The TranslationAttrib class provides an interface to the
1525
+ # translation_attrib table. This table contains attribute values for the
1526
+ # Translation class.
1527
+ #
1528
+ # This class uses ActiveRecord to access data in the Ensembl database.
1529
+ # See the general documentation of the Ensembl module for
1530
+ # more information on what this means and what methods are available.
1531
+ #
1532
+ # = USAGE
1533
+ # translation = Translation.find(9979)
1534
+ # translation.translation_attribs.each do |attr|
1535
+ # puts attr.attr_type.name + "\t" + attr.value
1536
+ # end
1537
+ class TranslationAttrib < DBConnection
1538
+ set_primary_key nil
1539
+
1540
+ belongs_to :translation
1541
+ belongs_to :attrib_type
1542
+ end
1543
+
1544
+ # = DESCRIPTION
1545
+ # The Xref class provides an interface to the
1546
+ # xref table. This table contains external references for objects in the
1547
+ # database.
1548
+ #
1549
+ # This class uses ActiveRecord to access data in the Ensembl database.
1550
+ # See the general documentation of the Ensembl module for
1551
+ # more information on what this means and what methods are available.
1552
+ #
1553
+ # = USAGE
1554
+ # gene = Gene.find(1)
1555
+ # gene.xrefs.each do |xref|
1556
+ # puts xref.display_label + "\t" + xref.description
1557
+ # end
1558
+ class Xref < DBConnection
1559
+ set_primary_key 'xref_id'
1560
+
1561
+ belongs_to :external_db
1562
+ has_many :external_synonyms
1563
+
1564
+ has_many :genes
1565
+
1566
+ def to_s
1567
+ return self.external_db.db_name.to_s + ":" + self.display_label
1568
+ end
1569
+ end
1570
+
1571
+ # = DESCRIPTION
1572
+ # The ObjectXref class provides the link between gene, transcript and
1573
+ # translation objects on the one hand and an xref on the other.
1574
+ #
1575
+ # This class uses ActiveRecord to access data in the Ensembl database.
1576
+ # See the general documentation of the Ensembl module for
1577
+ # more information on what this means and what methods are available.
1578
+ #
1579
+ # = USAGE
1580
+ # gene = Gene.find(1)
1581
+ # gene.object_xrefs.each do |ox|
1582
+ # puts ox.to_yaml
1583
+ # end
1584
+ class ObjectXref < DBConnection
1585
+ set_primary_key 'object_xref_id'
1586
+
1587
+ belongs_to :gene, :class_name => "Gene", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Gene'"]
1588
+ belongs_to :transcript, :class_name => "Transcript", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Transcript'"]
1589
+ belongs_to :translation, :class_name => "Translation", :foreign_key => 'ensembl_id', :conditions => ["ensembl_object_type = 'Translation'"]
1590
+ belongs_to :xref
1591
+ has_one :go_xref
1592
+ end
1593
+
1594
+ # = DESCRIPTION
1595
+ # The GoXref class provides an interface to the
1596
+ # go_xref table. This table contains the evidence codes for those object_refs
1597
+ # that are GO terms.
1598
+ #
1599
+ # This class uses ActiveRecord to access data in the Ensembl database.
1600
+ # See the general documentation of the Ensembl module for
1601
+ # more information on what this means and what methods are available.
1602
+ class GoXref < DBConnection
1603
+ set_primary_key nil
1604
+
1605
+ belongs_to :xref
1606
+ end
1607
+
1608
+ # = DESCRIPTION
1609
+ # The ExternalDb class provides an interface to the
1610
+ # external_db table. This table contains references to databases to which
1611
+ # xrefs can point to
1612
+ #
1613
+ # This class uses ActiveRecord to access data in the Ensembl database.
1614
+ # See the general documentation of the Ensembl module for
1615
+ # more information on what this means and what methods are available.
1616
+ #
1617
+ # = USAGE
1618
+ # embl_db = ExternalDb.find_by_db_name('EMBL')
1619
+ # puts embl_db.xrefs.length.to_s
1620
+ class ExternalDb < DBConnection
1621
+ set_primary_key 'external_db_id'
1622
+
1623
+ has_many :xrefs
1624
+
1625
+ def self.inheritance_column
1626
+ nil
1627
+ end
1628
+
1629
+ # = DESCRIPTION
1630
+ # The ExternalDb#find_all_by_display_label method returns all external
1631
+ # databases that have this label. There should normally be no more than
1632
+ # one. If no databases are found with this name, this method returns an
1633
+ # empty array.
1634
+ def self.find_all_by_display_label(label)
1635
+ answer = Array.new
1636
+ xrefs = Xref.find_all_by_display_label(label)
1637
+ xrefs.each do |xref|
1638
+ answer.push(self.class.find_by_xref_id(xref.xref_id))
1639
+ end
1640
+
1641
+ return answer
1642
+ end
1643
+
1644
+ # = DESCRIPTION
1645
+ # The ExternalDb#find_by_display_label method returns a
1646
+ # database that has this label. If no databases are found with this name,
1647
+ # this method returns nil.
1648
+ # empty array.
1649
+ def self.find_by_display_label(label)
1650
+ all_dbs = self.find_all_by_display_label(label)
1651
+ if all_dbs.length == 0
1652
+ return nil
1653
+ else
1654
+ return all_dbs[0]
1655
+ end
1656
+ end
1657
+
1658
+
1659
+ end
1660
+
1661
+ # = DESCRIPTION
1662
+ # The ExternalSynonym class provides an interface to the
1663
+ # external_synonym table. This table contains synonyms for Xref objects.
1664
+ #
1665
+ # This class uses ActiveRecord to access data in the Ensembl database.
1666
+ # See the general documentation of the Ensembl module for
1667
+ # more information on what this means and what methods are available.
1668
+ #
1669
+ # This class includes the mixin Sliceable, which means that it is mapped
1670
+ # to a SeqRegion object and a Slice can be created for objects of this
1671
+ # class. See Sliceable and Slice for more information.
1672
+ #
1673
+ # = USAGE
1674
+ # xref = Xref.find(185185)
1675
+ # puts xref.external_synonyms[0].synonyms
1676
+ class ExternalSynonym < DBConnection
1677
+ set_primary_key nil
1678
+
1679
+ belongs_to :xref
1680
+ end
1681
+
1682
+ # = DESCRIPTION
1683
+ # The Karyotype class provides an interface to the
1684
+ # karyotype table. This table contains <>.
1685
+ #
1686
+ # This class uses ActiveRecord to access data in the Ensembl database.
1687
+ # See the general documentation of the Ensembl module for
1688
+ # more information on what this means and what methods are available.
1689
+ #
1690
+ # This class includes the mixin Sliceable, which means that it is mapped
1691
+ # to a SeqRegion object and a Slice can be created for objects of this
1692
+ # class. See Sliceable and Slice for more information.
1693
+ #
1694
+ # = USAGE
1695
+ # band = Karyotype.find_by_band('p36.32')
1696
+ # puts band.to_yaml
1697
+ class Karyotype < DBConnection
1698
+ include Sliceable
1699
+
1700
+ set_primary_key 'karyotype_id'
1701
+
1702
+ belongs_to :seq_region
1703
+ end
1704
+
1705
+ # = DESCRIPTION
1706
+ # The OligoFeature class provides an interface to the
1707
+ # oligo_feature table. This table contains mappings of Oligo objects to
1708
+ # a SeqRegion.
1709
+ #
1710
+ # This class uses ActiveRecord to access data in the Ensembl database.
1711
+ # See the general documentation of the Ensembl module for
1712
+ # more information on what this means and what methods are available.
1713
+ #
1714
+ # This class includes the mixin Sliceable, which means that it is mapped
1715
+ # to a SeqRegion object and a Slice can be created for objects of this
1716
+ # class. See Sliceable and Slice for more information.
1717
+ #
1718
+ # = USAGE
1719
+ # seq_region = SeqRegion.find_by_name('4')
1720
+ # puts seq_region.oligo_features.length
1721
+ class OligoFeature < DBConnection
1722
+ include Sliceable
1723
+
1724
+ set_primary_key 'oligo_feature_id'
1725
+
1726
+ belongs_to :seq_region
1727
+ belongs_to :oligo_probe
1728
+ belongs_to :analysis
1729
+ end
1730
+
1731
+ # = DESCRIPTION
1732
+ # The OligoProbe class provides an interface to the
1733
+ # oligo_probe table.
1734
+ #
1735
+ # This class uses ActiveRecord to access data in the Ensembl database.
1736
+ # See the general documentation of the Ensembl module for
1737
+ # more information on what this means and what methods are available.
1738
+ #
1739
+ # = USAGE
1740
+ # probe = OligoProbe.find_by_name('373:434;')
1741
+ # puts probe.probeset + "\t" + probe.oligo_array.name
1742
+ class OligoProbe < DBConnection
1743
+ set_primary_key 'oligo_probe_id'
1744
+
1745
+ has_many :oligo_features
1746
+ belongs_to :oligo_array
1747
+ end
1748
+
1749
+ # = DESCRIPTION
1750
+ # The OligoArray class provides an interface to the
1751
+ # oligo_array table. This table contains data describing a microarray
1752
+ # slide.
1753
+ #
1754
+ # This class uses ActiveRecord to access data in the Ensembl database.
1755
+ # See the general documentation of the Ensembl module for
1756
+ # more information on what this means and what methods are available.
1757
+ #
1758
+ # = USAGE
1759
+ # array = OligoArray.find_by_name_and_type('Bovine','AFFY')
1760
+ # puts array.oligo_probes.length
1761
+ class OligoArray < DBConnection
1762
+ set_primary_key 'oligo_array_id'
1763
+
1764
+ has_many :oligo_probes
1765
+ end
1766
+
1767
+ # = DESCRIPTION
1768
+ # The PredictionExon class provides an interface to the
1769
+ # prediction_exon table. This table contains <>.
1770
+ #
1771
+ # This class uses ActiveRecord to access data in the Ensembl database.
1772
+ # See the general documentation of the Ensembl module for
1773
+ # more information on what this means and what methods are available.
1774
+ #
1775
+ # This class includes the mixin Sliceable, which means that it is mapped
1776
+ # to a SeqRegion object and a Slice can be created for objects of this
1777
+ # class. See Sliceable and Slice for more information.
1778
+ #
1779
+ # = USAGE
1780
+ # #TODO
1781
+ class PredictionExon < DBConnection
1782
+ include Sliceable
1783
+
1784
+ set_primary_key 'prediction_exon_id'
1785
+
1786
+ belongs_to :prediction_transcript
1787
+ belongs_to :seq_region
1788
+ end
1789
+
1790
+ # = DESCRIPTION
1791
+ # The PredictionTranscript class provides an interface to the
1792
+ # prediction_transcript table.
1793
+ #
1794
+ # This class uses ActiveRecord to access data in the Ensembl database.
1795
+ # See the general documentation of the Ensembl module for
1796
+ # more information on what this means and what methods are available.
1797
+ #
1798
+ # This class includes the mixin Sliceable, which means that it is mapped
1799
+ # to a SeqRegion object and a Slice can be created for objects of this
1800
+ # class. See Sliceable and Slice for more information.
1801
+ #
1802
+ # = USAGE
1803
+ # predicted_transcript = PredictionTranscript.find_by_display_label('GENSCAN00000000006')
1804
+ # puts predicted_transcript.prediction_exons.length
1805
+ class PredictionTranscript < DBConnection
1806
+ include Sliceable
1807
+
1808
+ set_primary_key 'prediction_transcript_id'
1809
+
1810
+ has_many :prediction_exons
1811
+ belongs_to :seq_region
1812
+ belongs_to :analysis
1813
+ end
1814
+
1815
+ # = DESCRIPTION
1816
+ # The ProteinFeature class provides an interface to the
1817
+ # protein_feature table. This table contains mappings of a Translation
1818
+ # onto a SeqRegion.
1819
+ #
1820
+ # This class uses ActiveRecord to access data in the Ensembl database.
1821
+ # See the general documentation of the Ensembl module for
1822
+ # more information on what this means and what methods are available.
1823
+ #
1824
+ # This class includes the mixin Sliceable, which means that it is mapped
1825
+ # to a SeqRegion object and a Slice can be created for objects of this
1826
+ # class. See Sliceable and Slice for more information.
1827
+ #
1828
+ # = USAGE
1829
+ # #TODO
1830
+ class ProteinFeature < DBConnection
1831
+ include Sliceable
1832
+
1833
+ set_primary_key 'protein_feature_id'
1834
+
1835
+ belongs_to :translation
1836
+ belongs_to :analysis
1837
+ end
1838
+
1839
+ # = DESCRIPTION
1840
+ # The ProteinAlignFeature class provides an interface to the
1841
+ # protein_align_feature table. This table contains sequence similarity
1842
+ # mappings against a SeqRegion.
1843
+ #
1844
+ # This class uses ActiveRecord to access data in the Ensembl database.
1845
+ # See the general documentation of the Ensembl module for
1846
+ # more information on what this means and what methods are available.
1847
+ #
1848
+ # This class includes the mixin Sliceable, which means that it is mapped
1849
+ # to a SeqRegion object and a Slice can be created for objects of this
1850
+ # class. See Sliceable and Slice for more information.
1851
+ #
1852
+ # = USAGE
1853
+ # uniprot_scan = Analysis.find_by_logic_name('Uniprot')
1854
+ # uniprot_scan.protein_align_features.each do |hit|
1855
+ # puts hit.seq_region.name + "\t" + hit.hit_name + "\t" + hit.cigar_line
1856
+ # end
1857
+ class ProteinAlignFeature < DBConnection
1858
+ include Sliceable
1859
+
1860
+ set_primary_key 'protein_align_feature_id'
1861
+
1862
+ belongs_to :seq_region
1863
+ belongs_to :analysis
1864
+
1865
+ has_many :exon_supporting_features
1866
+ has_many :transcript_supporting_features
1867
+ end
1868
+
1869
+ # = DESCRIPTION
1870
+ # The RegulatoryFactor class provides an interface to the
1871
+ # regulatory_factor table.
1872
+ #
1873
+ # This class uses ActiveRecord to access data in the Ensembl database.
1874
+ # See the general documentation of the Ensembl module for
1875
+ # more information on what this means and what methods are available.
1876
+ #
1877
+ # = USAGE
1878
+ # factor = RegulatoryFactor.find_by_name('crtHsap8070')
1879
+ # puts factor.to_yaml
1880
+ class RegulatoryFactor < DBConnection
1881
+ set_primary_key 'regulatory_factor_id'
1882
+
1883
+ has_many :regulatory_features
1884
+ end
1885
+
1886
+ # = DESCRIPTION
1887
+ # The RegulatoryFeature class provides an interface to the
1888
+ # regulatory_feature table. This table contains mappings of
1889
+ # RegulatoryFactor objects against a SeqRegion.
1890
+ #
1891
+ # This class uses ActiveRecord to access data in the Ensembl database.
1892
+ # See the general documentation of the Ensembl module for
1893
+ # more information on what this means and what methods are available.
1894
+ #
1895
+ # This class includes the mixin Sliceable, which means that it is mapped
1896
+ # to a SeqRegion object and a Slice can be created for objects of this
1897
+ # class. See Sliceable and Slice for more information.
1898
+ #
1899
+ # = USAGE
1900
+ # analysis = Analysis.find_by_logic_name('miRanda')
1901
+ # analysis.regulatory_features.each do |feature|
1902
+ # puts feature.name + "\t" + feature.regulatory_factor.name
1903
+ # end
1904
+ class RegulatoryFeature < DBConnection
1905
+ include Sliceable
1906
+
1907
+ set_primary_key 'regulatory_feature_id'
1908
+
1909
+ belongs_to :seq_region
1910
+ belongs_to :analysis
1911
+ belongs_to :regulatory_factor
1912
+ end
1913
+ end
1914
+ end