ruby-ensembl-api 0.9.6 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/TUTORIAL.rdoc +1 -1
  2. data/bin/variation_effect_predictor +106 -0
  3. data/lib/ensembl.rb +2 -2
  4. data/lib/ensembl/core/activerecord.rb +119 -225
  5. data/lib/ensembl/core/collection.rb +14 -10
  6. data/lib/ensembl/core/project.rb +6 -8
  7. data/lib/ensembl/core/slice.rb +87 -123
  8. data/lib/ensembl/core/transcript.rb +49 -65
  9. data/lib/ensembl/core/transform.rb +6 -8
  10. data/lib/ensembl/db_connection.rb +56 -72
  11. data/lib/ensembl/variation/activerecord.rb +138 -8
  12. data/lib/ensembl/variation/variation.rb +284 -46
  13. data/samples/ensembl_genomes_example.rb +60 -0
  14. data/samples/examples_perl_tutorial.rb +125 -0
  15. data/samples/small_example_ruby_api.rb +34 -0
  16. data/samples/variation_example.rb +67 -0
  17. data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
  18. data/test/unit/release_60/core/test_project_human.rb +38 -0
  19. data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
  20. data/test/unit/release_60/core/test_transcript.rb +126 -0
  21. data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
  22. data/test/unit/release_60/variation/test_activerecord.rb +213 -0
  23. data/test/unit/release_60/variation/test_consequence.rb +158 -0
  24. data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
  25. data/test/unit/test_connection.rb +2 -2
  26. data/test/unit/test_releases.rb +8 -8
  27. metadata +27 -43
  28. data/test/unit/data/seq_c6qbl.fa +0 -10
  29. data/test/unit/data/seq_cso19_coding.fa +0 -16
  30. data/test/unit/data/seq_cso19_transcript.fa +0 -28
  31. data/test/unit/data/seq_drd3_gene.fa +0 -838
  32. data/test/unit/data/seq_drd3_transcript.fa +0 -22
  33. data/test/unit/data/seq_drd4_transcript.fa +0 -24
  34. data/test/unit/data/seq_forward_composite.fa +0 -1669
  35. data/test/unit/data/seq_par_boundary.fa +0 -169
  36. data/test/unit/data/seq_rnd3_transcript.fa +0 -47
  37. data/test/unit/data/seq_ub2r1_coding.fa +0 -13
  38. data/test/unit/data/seq_ub2r1_gene.fa +0 -174
  39. data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
  40. data/test/unit/data/seq_y.fa +0 -2
  41. data/test/unit/ensembl_genomes/test_collection.rb +0 -51
  42. data/test/unit/ensembl_genomes/test_gene.rb +0 -52
  43. data/test/unit/ensembl_genomes/test_slice.rb +0 -71
  44. data/test/unit/ensembl_genomes/test_variation.rb +0 -17
  45. data/test/unit/release_50/core/test_project.rb +0 -215
  46. data/test/unit/release_50/core/test_project_human.rb +0 -58
  47. data/test/unit/release_50/core/test_relationships.rb +0 -66
  48. data/test/unit/release_50/core/test_sequence.rb +0 -175
  49. data/test/unit/release_50/core/test_slice.rb +0 -121
  50. data/test/unit/release_50/core/test_transcript.rb +0 -108
  51. data/test/unit/release_50/core/test_transform.rb +0 -223
  52. data/test/unit/release_50/variation/test_activerecord.rb +0 -143
  53. data/test/unit/release_50/variation/test_variation.rb +0 -84
  54. data/test/unit/release_53/core/test_gene.rb +0 -66
  55. data/test/unit/release_53/core/test_project.rb +0 -96
  56. data/test/unit/release_53/core/test_project_human.rb +0 -65
  57. data/test/unit/release_53/core/test_slice.rb +0 -47
  58. data/test/unit/release_53/variation/test_activerecord.rb +0 -145
  59. data/test/unit/release_53/variation/test_variation.rb +0 -71
  60. data/test/unit/release_56/core/test_project.rb +0 -96
  61. data/test/unit/release_56/core/test_transform.rb +0 -63
  62. data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'lib/ensembl'
4
+ include Ensembl::Core
5
+
6
+ DBConnection.ensemblgenomes_connect('bacillus_cereus_ZK',7) # Connect to the Ensembl Genomes MySQL server and fetch the data for 'bacillus_cereus_ZK'
7
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",4791870,4799024) # retrieve a slice for this specie
8
+
9
+ puts "\nConnecting to 'bacillus_cereus_ZK' database..."
10
+ # show all the species inside the collection, as 'bacillus_cereus_ZK' genome is stored inside the bacillus_collection database
11
+ if Collection.check
12
+ puts "Is this a collection? #{Collection.check}"
13
+ puts "\nOther species present inside the collection:"
14
+ Collection.species.each do |s|
15
+ puts s
16
+ end
17
+ end
18
+
19
+ puts "\nSequence:"
20
+ # get the sequence
21
+ puts slice.seq
22
+
23
+ puts "\nGenes:"
24
+ # get all the genes overlapping the slice
25
+ genes = slice.genes
26
+ genes.each do |g|
27
+ print "#{g.stable_id} #{g.name}\n"
28
+ end
29
+
30
+ # CHANGE DATABASE
31
+
32
+ puts "\n########################\nConnecting to 'mycobacterium_collection' database..."
33
+ DBConnection.ensemblgenomes_connect('mycobacterium_collection',7) # connect directly to a collection database
34
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",752908,759374,1,"Mycobacterium tuberculosis H37Rv") # manually set the species to fetch the slice from
35
+
36
+ # show all the species inside the collection
37
+ if Collection.check
38
+ puts "\nIs this a collection? #{Collection.check}"
39
+ puts "\nOther species present inside the collection:"
40
+ Collection.species.each do |s|
41
+ puts s
42
+ end
43
+ end
44
+
45
+ puts "\nSequence:"
46
+ # get the sequence
47
+ puts slice.seq
48
+
49
+ puts "\nGenes:"
50
+ # get all the genes overlapping the slice
51
+ genes = slice.genes
52
+ genes.each do |g|
53
+ print "#{g.stable_id} #{g.name}\n"
54
+ end
55
+
56
+
57
+
58
+
59
+
60
+
@@ -0,0 +1,125 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ require File.dirname(__FILE__) + '/../lib/ensembl.rb'
4
+ require 'yaml'
5
+ require 'progressbar'
6
+
7
+ include Ensembl::Core
8
+
9
+ ## Connecting to the Database
10
+ DBConnection.connect('homo_sapiens',45)
11
+
12
+ ## Object adaptors
13
+ # not necessary, ruby uses class methods instead
14
+
15
+ ## Slices
16
+ puts "== Some slices: =="
17
+ puts Slice.fetch_by_region('chromosome','X').to_yaml
18
+ puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
19
+ puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
20
+ puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
21
+ puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
22
+
23
+ puts "== All chromosomes: =="
24
+ Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
25
+ puts chr.display_name
26
+ end
27
+
28
+ puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
29
+
30
+ puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
31
+ Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
32
+ puts sub_slice.display_name
33
+ end
34
+
35
+ puts "== Sequence of a very small slice: Chr19:112200..112250 =="
36
+ slice = Slice.fetch_by_region('chromosome','19',112200,112250)
37
+ puts slice.seq
38
+
39
+ puts "== Query a slice about itself =="
40
+ puts slice.to_yaml
41
+
42
+ puts "== Get genes for a slice and print exons and introns=="
43
+ slice = Slice.fetch_by_region('chromosome','19',112200,500000)
44
+ slice.genes.each do |gene|
45
+ puts gene.stable_id
46
+ puts 'Exons for first transcript:'
47
+ puts gene.transcripts[0].exons.sort_by{|e| e.start}.collect{|e| e.start.to_s + '->' + e.stop.to_s}.join("\t")
48
+ puts 'Introns for first transcript:'
49
+ puts gene.transcripts[0].introns.sort_by{|i| i.start}.collect{|i| i.start.to_s + '->' + i.stop.to_s}.join("\t")
50
+ puts
51
+ end
52
+
53
+ puts "== Get DNA alignment features for 20:80000..88000 =="
54
+ slice = Slice.fetch_by_region('chromosome','20',80000,88000)
55
+ slice.dna_align_features[0..2].each do |daf|
56
+ puts daf.to_yaml
57
+ end
58
+
59
+ puts "== Get sequence for transcript ENST00000383673 =="
60
+ transcript = Transcript.find_by_stable_id('ENST00000383673')
61
+ puts transcript.seq
62
+
63
+ puts "== Get synonyms for marker D9S1038E =="
64
+ marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
65
+ marker.marker_synonyms[0..5].each do |ms|
66
+ puts ms.to_yaml
67
+ end
68
+
69
+ puts "== Get 5 features for this marker =="
70
+ marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
71
+ marker.marker_features[0..5].each do |mf|
72
+ puts 'name: ' + marker.name
73
+ puts 'seq_region name: ' + mf.seq_region.name
74
+ puts 'start: ' + mf.seq_region_start.to_s
75
+ puts 'stop: ' + mf.seq_region_end.to_s
76
+ end
77
+
78
+ puts "== Get 5 features for chromosome 22 =="
79
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
80
+ slice.marker_features.slice(0,5).each do |mf|
81
+ puts mf.marker.name + "\t" + mf.slice.display_name
82
+ end
83
+
84
+ puts "== Transcript: from cDNA to genomic positions =="
85
+ transcript = Ensembl::Core::Transcript.find(276333)
86
+ puts "Transcript is ENST00000215574"
87
+ puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
88
+ puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
89
+ puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
90
+ puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
91
+
92
+ puts "== Transcript: get pieces of DNA for a transcript =="
93
+ transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
94
+ puts transcript.stable_id
95
+ puts "5'UTR: " + transcript.five_prime_utr_seq
96
+ puts "3'UTR: " + transcript.three_prime_utr_seq
97
+ puts "CDS: " + transcript.cds_seq
98
+ puts "protein: " + transcript.protein_seq
99
+
100
+ #### And now we'll do some stuff with cows.
101
+ CoreDBConnection.connection.disconnect!
102
+ CoreDBConnection.connect('bos_taurus')
103
+
104
+ puts "== Projecting a slice from component to assembly: =="
105
+ puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
106
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
107
+ target_slices = source_slice.project('chromosome')
108
+ puts target_slices.collect{|s| s.display_name}.join("\n")
109
+
110
+ puts "== Projecting a slice from assembly to components: =="
111
+ puts "== chromosome slice chr4:329500..380000 to contig level =="
112
+ source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
113
+ target_slices = source_slice.project('contig')
114
+ puts target_slices.collect{|s| s.display_name}.join("\n")
115
+
116
+ puts "== Transforming a gene from chromosome level to scaffold level =="
117
+ gene = Gene.find(2408)
118
+ cloned_gene = gene.transform('scaffold')
119
+ puts gene.slice.display_name
120
+ puts cloned_gene.slice.display_name
121
+
122
+ puts "== Relationships for Gene class =="
123
+ puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
124
+ puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
125
+
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+ require '../lib/ensembl'
3
+
4
+ include Ensembl::Core
5
+
6
+ DBConnection.connect('homo_sapiens')
7
+
8
+ puts "== Get a slice =="
9
+ slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
10
+ puts slice.display_name
11
+
12
+ puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
13
+ slice.genes.each do |gene|
14
+ puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
15
+ end
16
+
17
+ puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
18
+ transcript = Transcript.find_by_stable_id('ENST00000380593')
19
+ puts "5'UTR: " + transcript.five_prime_utr_seq
20
+ puts "CDS: " + transcript.cds_seq
21
+ puts "peptide: " + transcript.protein_seq
22
+
23
+ DBConnection.connection.disconnect!
24
+ DBConnection.connect('bos_taurus',45)
25
+
26
+ puts "== Transforming a cow gene from chromosome level to scaffold level =="
27
+ gene = Gene.find(2408)
28
+ cloned_gene = gene.transform('scaffold')
29
+ puts "Original: " + gene.slice.display_name
30
+ puts "Now: " + cloned_gene.slice.display_name
31
+
32
+ puts "== What things are related to a 'gene' object? =="
33
+ puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
34
+ puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/ruby
2
+
3
+ # Based on Perl API tutorial
4
+ # http://www.ensembl.org/info/using/api/variation/variation_tutorial.html
5
+
6
+
7
+ require File.dirname(__FILE__) + '/../lib/ensembl.rb'
8
+ include Ensembl::Core
9
+ include Ensembl::Variation
10
+
11
+ Ensembl::Variation::DBConnection.connect('homo_sapiens',60)
12
+ # The connection with the Core database can be omitted. It is created automatically
13
+ # when needed, using Variation DB connection parameters. The database name is derived
14
+ # from Variation DB name. If you are using non conventional DB names (i.e. for a local copy of Ensembl database)
15
+ # an exception will be raised. Otherwise, if a Core DB connection is already
16
+ # present, that connection will be used by default, instead of creating a new one.
17
+
18
+ id = ['rs73514758','rs77811736']
19
+
20
+ id.each do |i|
21
+ v = Variation.find_by_name(i)
22
+ v.variation_features.each do |vf|
23
+
24
+ up_seq,down_seq = vf.flanking_seq # retrieve upstream and downstream flanking sequences
25
+
26
+ seq_region_name = vf.fetch_region.seq_region.name # fetch the genomic region of the Variation and get the region name.
27
+ # Automatically sets the connection with Core DB, if needed.
28
+
29
+ puts "\n== VARIATION FEATURE =="
30
+ print "NAME: #{vf.variation_name}\n ALLELE: #{vf.allele_string}\n UPSTREAM SEQ: #{up_seq.seq} \n DOWNSTREAM SEQ: #{down_seq.seq}\n SEQ REGION NAME :#{seq_region_name}\n"
31
+ vf.transcript_variations.each do |tv|
32
+ t = tv.transcript # retrieve Ensembl::Core::Transcript from Core DB. Automatically sets the connection, if necessary.
33
+ puts "== TRANSCRIPT VARIATION =="
34
+ print "ALLELE PEPTIDE STRING: #{tv.peptide_allele_string} TRANSCRIPT STABLE ID: #{t.stable_id} GENE STABLE ID: #{t.gene.stable_id}\n"
35
+ end
36
+ end
37
+ end
38
+
39
+ # Returns all Variations present on a gemomic region
40
+
41
+ puts "\n== SEARCHING FOR VARIATIONS ON CHR:1:50000:51000 =="
42
+
43
+ # Even in this case, Variation DB connection can be set automatically by specific Slice methods
44
+
45
+ s = Slice.fetch_by_region('chromosome',1,50000,51000)
46
+ variation_features = s.get_variation_features # automatically sets the connection with Variation DB, if needed.
47
+ variation_features.each do |vf|
48
+ print "NAME: #{vf.variation_name} ALLELE: #{vf.allele_string} SEQ REGION NAME: #{vf.seq_region.name}\n"
49
+ end
50
+
51
+ puts "\n== GENOTYPED VARIATIONS =="
52
+
53
+ genotyped_variation_features = s.get_genotyped_variation_features # automatically sets the connection with Variation DB, if needed.
54
+ genotyped_variation_features.each do |gvf|
55
+ print "NAME: #{gvf.variation_name} ALLELE: #{gvf.allele_string} SEQ REGION NAME: #{gvf.seq_region.name}\n"
56
+ end
57
+
58
+ puts "\n== STRUCTURAL VARIATIONS ON CHR:11:60125:320837 =="
59
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome',11,60125,320837)
60
+ structural_variations = s.get_structural_variations
61
+ structural_variations.each do |sv|
62
+ print "NAME: #{sv.variation_name} SEQ REGION NAME: #{sv.seq_region.name}\n"
63
+ end
64
+
65
+ puts "\n== What things are related to a 'variation' object? =="
66
+ puts 'Variation belong to: ' + Variation.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
67
+ puts 'Variation have many: ' + Variation.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
@@ -1,5 +1,5 @@
1
1
  #
2
- # = test/unit/release_56/core/test_gene.rb - Unit test for Ensembl::Core
2
+ # = test/unit/release_53/core/test_gene.rb - Unit test for Ensembl::Core
3
3
  #
4
4
  # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
5
  #
@@ -19,7 +19,7 @@ include Ensembl::Core
19
19
  class TestGene < Test::Unit::TestCase
20
20
 
21
21
  def setup
22
- DBConnection.connect('homo_sapiens', 56)
22
+ DBConnection.connect('homo_sapiens', 60)
23
23
  end
24
24
 
25
25
  def teardown
@@ -31,10 +31,10 @@ class TestGene < Test::Unit::TestCase
31
31
  assert_equal("ENSG00000006451",g.stable_id)
32
32
  assert_equal("7",g.seq_region.name)
33
33
  assert_equal(39663082,g.start)
34
- assert_equal(39747723 ,g.stop)
34
+ assert_equal(39747723,g.stop)
35
35
  assert_equal(1,g.strand)
36
36
  assert_equal(84642,g.seq.length)
37
- assert_equal("Ras-related protein Ral-A Precursor [Source:UniProtKB/Swiss-Prot;Acc:P11233]",g.description)
37
+ assert_equal("v-ral simian leukemia viral oncogene homolog A (ras related) [Source:HGNC Symbol;Acc:9839]",g.description)
38
38
  assert_equal("RALA",g.name)
39
39
  end
40
40
 
@@ -42,9 +42,9 @@ class TestGene < Test::Unit::TestCase
42
42
  g = Gene.find_by_stable_id("ENSG00000006451")
43
43
  t = g.transcripts
44
44
  assert_equal(5,t.size)
45
- assert_equal("ENST00000434466",t[0].stable_id)
45
+ assert_equal("ENST00000005257",t[0].stable_id)
46
46
  t = t[0]
47
- assert_equal(768,t.seq.length)
47
+ assert_equal(2880,t.seq.length)
48
48
  end
49
49
 
50
50
  def test_exons
@@ -0,0 +1,38 @@
1
+ #
2
+ # = test/unit/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+ require 'pathname'
11
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
12
+ $:.unshift(libpath) unless $:.include?(libpath)
13
+ require 'test/unit'
14
+ require 'lib/ensembl'
15
+
16
+ include Ensembl::Core
17
+
18
+ class AssemblyExceptions < Test::Unit::TestCase
19
+
20
+ def setup
21
+ DBConnection.connect('homo_sapiens', 60)
22
+ end
23
+
24
+ def teardown
25
+ DBConnection.remove_connection
26
+ end
27
+
28
+ def test_chr_x
29
+ source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
30
+ assert_equal('ctgaagaattgtgtttcttcccta', source_slice.seq)
31
+ end
32
+
33
+ def test_slice_overlapping_PAR_and_allosome
34
+ source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
35
+ assert_equal('AGAAACTGAAAATGCTAAGAAATTCAGTTCCAGGATATGAA', source_slice.seq.upcase)
36
+ end
37
+
38
+ end
@@ -19,7 +19,7 @@ include Ensembl::Core
19
19
  class TestSlice < Test::Unit::TestCase
20
20
 
21
21
  def setup
22
- DBConnection.connect('homo_sapiens', 56)
22
+ DBConnection.connect('homo_sapiens', 60)
23
23
  end
24
24
 
25
25
  def teardown
@@ -44,11 +44,4 @@ class TestSlice < Test::Unit::TestCase
44
44
  assert_equal(31786816,s_rev.stop)
45
45
  end
46
46
 
47
- def test_slice_genes
48
- slice = Slice.fetch_by_region("chromosome","13",31773073,31909413,1)
49
- genes = slice.genes
50
- assert_equal("ENSG00000229427",genes[1].stable_id)
51
- assert_equal("ENSG00000187676",genes[0].stable_id)
52
- end
53
-
54
47
  end
@@ -0,0 +1,126 @@
1
+ #
2
+ # = test/unit/test_transcript.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2007
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+ require 'pathname'
10
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
11
+ $:.unshift(libpath) unless $:.include?(libpath)
12
+
13
+ require 'test/unit'
14
+
15
+ require 'lib/ensembl'
16
+
17
+ include Ensembl::Core
18
+
19
+ DBConnection.connect('homo_sapiens', 60)
20
+
21
+ class CodingPositions < Test::Unit::TestCase
22
+ def setup
23
+ # Transcript tr_fw is ENST00000215574
24
+ @tr_fw = Transcript.find_by_stable_id("ENST00000215574")
25
+ # Transcript tr_rev is ENST00000358041
26
+ @tr_rev = Transcript.find_by_stable_id("ENST00000358041")
27
+ end
28
+
29
+ def test_transcript_coords
30
+ assert_equal(531733, @tr_fw.seq_region_start)
31
+ assert_equal(542084, @tr_fw.seq_region_end)
32
+ assert_equal(644371, @tr_rev.seq_region_start)
33
+ assert_equal(647309, @tr_rev.seq_region_end)
34
+ end
35
+
36
+ def test_coding_regions_genomic_coords_of_fw
37
+ assert_equal(531932, @tr_fw.coding_region_genomic_start)
38
+ assert_equal(541552, @tr_fw.coding_region_genomic_end)
39
+ end
40
+
41
+ def test_coding_regions_genomic_coords_of_rev
42
+ assert_equal(646652, @tr_rev.coding_region_genomic_start)
43
+ assert_equal(647047, @tr_rev.coding_region_genomic_end)
44
+ end
45
+
46
+ def test_coding_regions_cdna_coords_of_fw
47
+ assert_equal(200, @tr_fw.coding_region_cdna_start)
48
+ assert_equal(910, @tr_fw.coding_region_cdna_end)
49
+ end
50
+
51
+ def test_coding_regions_cdna_coords_of_rev
52
+ assert_equal(263, @tr_rev.coding_region_cdna_start)
53
+ assert_equal(658, @tr_rev.coding_region_cdna_end)
54
+ end
55
+
56
+ end
57
+
58
+ class GenomicVsCDna < Test::Unit::TestCase
59
+ #From BioMart. Columns:
60
+ # Ensembl_Transcript_ID
61
+ # Chromosome
62
+ # Strand
63
+ # Ensembl_Exon_ID
64
+ # Exon_Chr_Start
65
+ # Exon_Chr_End
66
+ # Exon_Rank_in_Transcript
67
+ #ENST00000215574 19 1 ENSE00000655676 531733 532108 1
68
+ #ENST00000215574 19 1 ENSE00000655677 535837 535923 2
69
+ #ENST00000215574 19 1 ENSE00000655678 536243 536340 3
70
+ #ENST00000215574 19 1 ENSE00000655679 537013 537147 4
71
+ #ENST00000215574 19 1 ENSE00000655680 541339 542084 5
72
+ #
73
+ #ENST00000315489 19 -1 ENSE00001215510 474621 474983 1
74
+ #ENST00000315489 19 -1 ENSE00001215495 472394 472501 2
75
+ #ENST00000315489 19 -1 ENSE00001215487 467649 467762 3
76
+ #ENST00000315489 19 -1 ENSE00001215506 463344 464364 4
77
+ def setup
78
+ # Transcript tr_fw is ENST00000215574
79
+ @tr_fw = Transcript.find_by_stable_id("ENST00000215574")
80
+ # Transcript tr_rev is ENST00000315489
81
+ @tr_rev = Transcript.find_by_stable_id("ENST00000315489")
82
+ end
83
+
84
+ def test_identify_exon
85
+ assert_equal("ENSE00000655679", @tr_fw.exon_for_cdna_position(601).stable_id)
86
+ assert_equal("ENSE00000655679", @tr_fw.exon_for_genomic_position(537052).stable_id)
87
+ assert_equal("ENSE00001215487", @tr_rev.exon_for_cdna_position(541).stable_id)
88
+ assert_equal("ENSE00001215487", @tr_rev.exon_for_genomic_position(467693).stable_id)
89
+ end
90
+
91
+ def test_cdna2genomic
92
+ assert_equal(537052, @tr_fw.cdna2genomic(601))
93
+ assert_equal(467693, @tr_rev.cdna2genomic(541))
94
+ end
95
+
96
+ def test_cds2genomic
97
+ assert_equal(537052, @tr_fw.cds2genomic(401))
98
+ assert_equal(467693, @tr_rev.cds2genomic(304))
99
+ end
100
+
101
+ def test_genomic2cdna
102
+ assert_equal(601, @tr_fw.genomic2cdna(537052))
103
+ assert_equal(541, @tr_rev.genomic2cdna(467693))
104
+ end
105
+
106
+ def test_genomic2cds
107
+ assert_equal(401, @tr_fw.genomic2cds(537052))
108
+ assert_equal(304, @tr_rev.genomic2cds(467693))
109
+ end
110
+ end
111
+
112
+ class TestIntron < Test::Unit::TestCase
113
+ def setup
114
+ @transcript = Transcript.find_by_stable_id("ENST00000215574")
115
+ @introns = @transcript.introns
116
+ end
117
+
118
+ def test_get_introns
119
+ assert_equal(4, @introns.length)
120
+ end
121
+
122
+ def test_intron_slices
123
+ assert_equal('chromosome:GRCh37:19:532109:535836:1', @introns[0].slice.to_s)
124
+ end
125
+ end
126
+