ruby-ensembl-api 0.9.6 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/TUTORIAL.rdoc +1 -1
  2. data/bin/variation_effect_predictor +106 -0
  3. data/lib/ensembl.rb +2 -2
  4. data/lib/ensembl/core/activerecord.rb +119 -225
  5. data/lib/ensembl/core/collection.rb +14 -10
  6. data/lib/ensembl/core/project.rb +6 -8
  7. data/lib/ensembl/core/slice.rb +87 -123
  8. data/lib/ensembl/core/transcript.rb +49 -65
  9. data/lib/ensembl/core/transform.rb +6 -8
  10. data/lib/ensembl/db_connection.rb +56 -72
  11. data/lib/ensembl/variation/activerecord.rb +138 -8
  12. data/lib/ensembl/variation/variation.rb +284 -46
  13. data/samples/ensembl_genomes_example.rb +60 -0
  14. data/samples/examples_perl_tutorial.rb +125 -0
  15. data/samples/small_example_ruby_api.rb +34 -0
  16. data/samples/variation_example.rb +67 -0
  17. data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
  18. data/test/unit/release_60/core/test_project_human.rb +38 -0
  19. data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
  20. data/test/unit/release_60/core/test_transcript.rb +126 -0
  21. data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
  22. data/test/unit/release_60/variation/test_activerecord.rb +213 -0
  23. data/test/unit/release_60/variation/test_consequence.rb +158 -0
  24. data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
  25. data/test/unit/test_connection.rb +2 -2
  26. data/test/unit/test_releases.rb +8 -8
  27. metadata +27 -43
  28. data/test/unit/data/seq_c6qbl.fa +0 -10
  29. data/test/unit/data/seq_cso19_coding.fa +0 -16
  30. data/test/unit/data/seq_cso19_transcript.fa +0 -28
  31. data/test/unit/data/seq_drd3_gene.fa +0 -838
  32. data/test/unit/data/seq_drd3_transcript.fa +0 -22
  33. data/test/unit/data/seq_drd4_transcript.fa +0 -24
  34. data/test/unit/data/seq_forward_composite.fa +0 -1669
  35. data/test/unit/data/seq_par_boundary.fa +0 -169
  36. data/test/unit/data/seq_rnd3_transcript.fa +0 -47
  37. data/test/unit/data/seq_ub2r1_coding.fa +0 -13
  38. data/test/unit/data/seq_ub2r1_gene.fa +0 -174
  39. data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
  40. data/test/unit/data/seq_y.fa +0 -2
  41. data/test/unit/ensembl_genomes/test_collection.rb +0 -51
  42. data/test/unit/ensembl_genomes/test_gene.rb +0 -52
  43. data/test/unit/ensembl_genomes/test_slice.rb +0 -71
  44. data/test/unit/ensembl_genomes/test_variation.rb +0 -17
  45. data/test/unit/release_50/core/test_project.rb +0 -215
  46. data/test/unit/release_50/core/test_project_human.rb +0 -58
  47. data/test/unit/release_50/core/test_relationships.rb +0 -66
  48. data/test/unit/release_50/core/test_sequence.rb +0 -175
  49. data/test/unit/release_50/core/test_slice.rb +0 -121
  50. data/test/unit/release_50/core/test_transcript.rb +0 -108
  51. data/test/unit/release_50/core/test_transform.rb +0 -223
  52. data/test/unit/release_50/variation/test_activerecord.rb +0 -143
  53. data/test/unit/release_50/variation/test_variation.rb +0 -84
  54. data/test/unit/release_53/core/test_gene.rb +0 -66
  55. data/test/unit/release_53/core/test_project.rb +0 -96
  56. data/test/unit/release_53/core/test_project_human.rb +0 -65
  57. data/test/unit/release_53/core/test_slice.rb +0 -47
  58. data/test/unit/release_53/variation/test_activerecord.rb +0 -145
  59. data/test/unit/release_53/variation/test_variation.rb +0 -71
  60. data/test/unit/release_56/core/test_project.rb +0 -96
  61. data/test/unit/release_56/core/test_transform.rb +0 -63
  62. data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'lib/ensembl'
4
+ include Ensembl::Core
5
+
6
+ DBConnection.ensemblgenomes_connect('bacillus_cereus_ZK',7) # Connect to the Ensembl Genomes MySQL server and fetch the data for 'bacillus_cereus_ZK'
7
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",4791870,4799024) # retrieve a slice for this specie
8
+
9
+ puts "\nConnecting to 'bacillus_cereus_ZK' database..."
10
+ # show all the species inside the collection, as 'bacillus_cereus_ZK' genome is stored inside the bacillus_collection database
11
+ if Collection.check
12
+ puts "Is this a collection? #{Collection.check}"
13
+ puts "\nOther species present inside the collection:"
14
+ Collection.species.each do |s|
15
+ puts s
16
+ end
17
+ end
18
+
19
+ puts "\nSequence:"
20
+ # get the sequence
21
+ puts slice.seq
22
+
23
+ puts "\nGenes:"
24
+ # get all the genes overlapping the slice
25
+ genes = slice.genes
26
+ genes.each do |g|
27
+ print "#{g.stable_id} #{g.name}\n"
28
+ end
29
+
30
+ # CHANGE DATABASE
31
+
32
+ puts "\n########################\nConnecting to 'mycobacterium_collection' database..."
33
+ DBConnection.ensemblgenomes_connect('mycobacterium_collection',7) # connect directly to a collection database
34
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",752908,759374,1,"Mycobacterium tuberculosis H37Rv") # manually set the species to fetch the slice from
35
+
36
+ # show all the species inside the collection
37
+ if Collection.check
38
+ puts "\nIs this a collection? #{Collection.check}"
39
+ puts "\nOther species present inside the collection:"
40
+ Collection.species.each do |s|
41
+ puts s
42
+ end
43
+ end
44
+
45
+ puts "\nSequence:"
46
+ # get the sequence
47
+ puts slice.seq
48
+
49
+ puts "\nGenes:"
50
+ # get all the genes overlapping the slice
51
+ genes = slice.genes
52
+ genes.each do |g|
53
+ print "#{g.stable_id} #{g.name}\n"
54
+ end
55
+
56
+
57
+
58
+
59
+
60
+
@@ -0,0 +1,125 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ require File.dirname(__FILE__) + '/../lib/ensembl.rb'
4
+ require 'yaml'
5
+ require 'progressbar'
6
+
7
+ include Ensembl::Core
8
+
9
+ ## Connecting to the Database
10
+ DBConnection.connect('homo_sapiens',45)
11
+
12
+ ## Object adaptors
13
+ # not necessary, ruby uses class methods instead
14
+
15
+ ## Slices
16
+ puts "== Some slices: =="
17
+ puts Slice.fetch_by_region('chromosome','X').to_yaml
18
+ puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
19
+ puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
20
+ puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
21
+ puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
22
+
23
+ puts "== All chromosomes: =="
24
+ Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
25
+ puts chr.display_name
26
+ end
27
+
28
+ puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
29
+
30
+ puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
31
+ Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
32
+ puts sub_slice.display_name
33
+ end
34
+
35
+ puts "== Sequence of a very small slice: Chr19:112200..112250 =="
36
+ slice = Slice.fetch_by_region('chromosome','19',112200,112250)
37
+ puts slice.seq
38
+
39
+ puts "== Query a slice about itself =="
40
+ puts slice.to_yaml
41
+
42
+ puts "== Get genes for a slice and print exons and introns=="
43
+ slice = Slice.fetch_by_region('chromosome','19',112200,500000)
44
+ slice.genes.each do |gene|
45
+ puts gene.stable_id
46
+ puts 'Exons for first transcript:'
47
+ puts gene.transcripts[0].exons.sort_by{|e| e.start}.collect{|e| e.start.to_s + '->' + e.stop.to_s}.join("\t")
48
+ puts 'Introns for first transcript:'
49
+ puts gene.transcripts[0].introns.sort_by{|i| i.start}.collect{|i| i.start.to_s + '->' + i.stop.to_s}.join("\t")
50
+ puts
51
+ end
52
+
53
+ puts "== Get DNA alignment features for 20:80000..88000 =="
54
+ slice = Slice.fetch_by_region('chromosome','20',80000,88000)
55
+ slice.dna_align_features[0..2].each do |daf|
56
+ puts daf.to_yaml
57
+ end
58
+
59
+ puts "== Get sequence for transcript ENST00000383673 =="
60
+ transcript = Transcript.find_by_stable_id('ENST00000383673')
61
+ puts transcript.seq
62
+
63
+ puts "== Get synonyms for marker D9S1038E =="
64
+ marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
65
+ marker.marker_synonyms[0..5].each do |ms|
66
+ puts ms.to_yaml
67
+ end
68
+
69
+ puts "== Get 5 features for this marker =="
70
+ marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
71
+ marker.marker_features[0..5].each do |mf|
72
+ puts 'name: ' + marker.name
73
+ puts 'seq_region name: ' + mf.seq_region.name
74
+ puts 'start: ' + mf.seq_region_start.to_s
75
+ puts 'stop: ' + mf.seq_region_end.to_s
76
+ end
77
+
78
+ puts "== Get 5 features for chromosome 22 =="
79
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
80
+ slice.marker_features.slice(0,5).each do |mf|
81
+ puts mf.marker.name + "\t" + mf.slice.display_name
82
+ end
83
+
84
+ puts "== Transcript: from cDNA to genomic positions =="
85
+ transcript = Ensembl::Core::Transcript.find(276333)
86
+ puts "Transcript is ENST00000215574"
87
+ puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
88
+ puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
89
+ puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
90
+ puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
91
+
92
+ puts "== Transcript: get pieces of DNA for a transcript =="
93
+ transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
94
+ puts transcript.stable_id
95
+ puts "5'UTR: " + transcript.five_prime_utr_seq
96
+ puts "3'UTR: " + transcript.three_prime_utr_seq
97
+ puts "CDS: " + transcript.cds_seq
98
+ puts "protein: " + transcript.protein_seq
99
+
100
+ #### And now we'll do some stuff with cows.
101
+ CoreDBConnection.connection.disconnect!
102
+ CoreDBConnection.connect('bos_taurus')
103
+
104
+ puts "== Projecting a slice from component to assembly: =="
105
+ puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
106
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
107
+ target_slices = source_slice.project('chromosome')
108
+ puts target_slices.collect{|s| s.display_name}.join("\n")
109
+
110
+ puts "== Projecting a slice from assembly to components: =="
111
+ puts "== chromosome slice chr4:329500..380000 to contig level =="
112
+ source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
113
+ target_slices = source_slice.project('contig')
114
+ puts target_slices.collect{|s| s.display_name}.join("\n")
115
+
116
+ puts "== Transforming a gene from chromosome level to scaffold level =="
117
+ gene = Gene.find(2408)
118
+ cloned_gene = gene.transform('scaffold')
119
+ puts gene.slice.display_name
120
+ puts cloned_gene.slice.display_name
121
+
122
+ puts "== Relationships for Gene class =="
123
+ puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
124
+ puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
125
+
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+ require '../lib/ensembl'
3
+
4
+ include Ensembl::Core
5
+
6
+ DBConnection.connect('homo_sapiens')
7
+
8
+ puts "== Get a slice =="
9
+ slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
10
+ puts slice.display_name
11
+
12
+ puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
13
+ slice.genes.each do |gene|
14
+ puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
15
+ end
16
+
17
+ puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
18
+ transcript = Transcript.find_by_stable_id('ENST00000380593')
19
+ puts "5'UTR: " + transcript.five_prime_utr_seq
20
+ puts "CDS: " + transcript.cds_seq
21
+ puts "peptide: " + transcript.protein_seq
22
+
23
+ DBConnection.connection.disconnect!
24
+ DBConnection.connect('bos_taurus',45)
25
+
26
+ puts "== Transforming a cow gene from chromosome level to scaffold level =="
27
+ gene = Gene.find(2408)
28
+ cloned_gene = gene.transform('scaffold')
29
+ puts "Original: " + gene.slice.display_name
30
+ puts "Now: " + cloned_gene.slice.display_name
31
+
32
+ puts "== What things are related to a 'gene' object? =="
33
+ puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
34
+ puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/ruby
2
+
3
+ # Based on Perl API tutorial
4
+ # http://www.ensembl.org/info/using/api/variation/variation_tutorial.html
5
+
6
+
7
+ require File.dirname(__FILE__) + '/../lib/ensembl.rb'
8
+ include Ensembl::Core
9
+ include Ensembl::Variation
10
+
11
+ Ensembl::Variation::DBConnection.connect('homo_sapiens',60)
12
+ # The connection with the Core database can be omitted. It is created automatically
13
+ # when needed, using Variation DB connection parameters. The database name is derived
14
+ # from Variation DB name. If you are using non conventional DB names (i.e. for a local copy of Ensembl database)
15
+ # an exception will be raised. Otherwise, if a Core DB connection is already
16
+ # present, that connection will be used by default, instead of creating a new one.
17
+
18
+ id = ['rs73514758','rs77811736']
19
+
20
+ id.each do |i|
21
+ v = Variation.find_by_name(i)
22
+ v.variation_features.each do |vf|
23
+
24
+ up_seq,down_seq = vf.flanking_seq # retrieve upstream and downstream flanking sequences
25
+
26
+ seq_region_name = vf.fetch_region.seq_region.name # fetch the genomic region of the Variation and get the region name.
27
+ # Automatically sets the connection with Core DB, if needed.
28
+
29
+ puts "\n== VARIATION FEATURE =="
30
+ print "NAME: #{vf.variation_name}\n ALLELE: #{vf.allele_string}\n UPSTREAM SEQ: #{up_seq.seq} \n DOWNSTREAM SEQ: #{down_seq.seq}\n SEQ REGION NAME :#{seq_region_name}\n"
31
+ vf.transcript_variations.each do |tv|
32
+ t = tv.transcript # retrieve Ensembl::Core::Transcript from Core DB. Automatically sets the connection, if necessary.
33
+ puts "== TRANSCRIPT VARIATION =="
34
+ print "ALLELE PEPTIDE STRING: #{tv.peptide_allele_string} TRANSCRIPT STABLE ID: #{t.stable_id} GENE STABLE ID: #{t.gene.stable_id}\n"
35
+ end
36
+ end
37
+ end
38
+
39
+ # Returns all Variations present on a gemomic region
40
+
41
+ puts "\n== SEARCHING FOR VARIATIONS ON CHR:1:50000:51000 =="
42
+
43
+ # Even in this case, Variation DB connection can be set automatically by specific Slice methods
44
+
45
+ s = Slice.fetch_by_region('chromosome',1,50000,51000)
46
+ variation_features = s.get_variation_features # automatically sets the connection with Variation DB, if needed.
47
+ variation_features.each do |vf|
48
+ print "NAME: #{vf.variation_name} ALLELE: #{vf.allele_string} SEQ REGION NAME: #{vf.seq_region.name}\n"
49
+ end
50
+
51
+ puts "\n== GENOTYPED VARIATIONS =="
52
+
53
+ genotyped_variation_features = s.get_genotyped_variation_features # automatically sets the connection with Variation DB, if needed.
54
+ genotyped_variation_features.each do |gvf|
55
+ print "NAME: #{gvf.variation_name} ALLELE: #{gvf.allele_string} SEQ REGION NAME: #{gvf.seq_region.name}\n"
56
+ end
57
+
58
+ puts "\n== STRUCTURAL VARIATIONS ON CHR:11:60125:320837 =="
59
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome',11,60125,320837)
60
+ structural_variations = s.get_structural_variations
61
+ structural_variations.each do |sv|
62
+ print "NAME: #{sv.variation_name} SEQ REGION NAME: #{sv.seq_region.name}\n"
63
+ end
64
+
65
+ puts "\n== What things are related to a 'variation' object? =="
66
+ puts 'Variation belong to: ' + Variation.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
67
+ puts 'Variation have many: ' + Variation.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
@@ -1,5 +1,5 @@
1
1
  #
2
- # = test/unit/release_56/core/test_gene.rb - Unit test for Ensembl::Core
2
+ # = test/unit/release_53/core/test_gene.rb - Unit test for Ensembl::Core
3
3
  #
4
4
  # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
5
  #
@@ -19,7 +19,7 @@ include Ensembl::Core
19
19
  class TestGene < Test::Unit::TestCase
20
20
 
21
21
  def setup
22
- DBConnection.connect('homo_sapiens', 56)
22
+ DBConnection.connect('homo_sapiens', 60)
23
23
  end
24
24
 
25
25
  def teardown
@@ -31,10 +31,10 @@ class TestGene < Test::Unit::TestCase
31
31
  assert_equal("ENSG00000006451",g.stable_id)
32
32
  assert_equal("7",g.seq_region.name)
33
33
  assert_equal(39663082,g.start)
34
- assert_equal(39747723 ,g.stop)
34
+ assert_equal(39747723,g.stop)
35
35
  assert_equal(1,g.strand)
36
36
  assert_equal(84642,g.seq.length)
37
- assert_equal("Ras-related protein Ral-A Precursor [Source:UniProtKB/Swiss-Prot;Acc:P11233]",g.description)
37
+ assert_equal("v-ral simian leukemia viral oncogene homolog A (ras related) [Source:HGNC Symbol;Acc:9839]",g.description)
38
38
  assert_equal("RALA",g.name)
39
39
  end
40
40
 
@@ -42,9 +42,9 @@ class TestGene < Test::Unit::TestCase
42
42
  g = Gene.find_by_stable_id("ENSG00000006451")
43
43
  t = g.transcripts
44
44
  assert_equal(5,t.size)
45
- assert_equal("ENST00000434466",t[0].stable_id)
45
+ assert_equal("ENST00000005257",t[0].stable_id)
46
46
  t = t[0]
47
- assert_equal(768,t.seq.length)
47
+ assert_equal(2880,t.seq.length)
48
48
  end
49
49
 
50
50
  def test_exons
@@ -0,0 +1,38 @@
1
+ #
2
+ # = test/unit/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+ require 'pathname'
11
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
12
+ $:.unshift(libpath) unless $:.include?(libpath)
13
+ require 'test/unit'
14
+ require 'lib/ensembl'
15
+
16
+ include Ensembl::Core
17
+
18
+ class AssemblyExceptions < Test::Unit::TestCase
19
+
20
+ def setup
21
+ DBConnection.connect('homo_sapiens', 60)
22
+ end
23
+
24
+ def teardown
25
+ DBConnection.remove_connection
26
+ end
27
+
28
+ def test_chr_x
29
+ source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
30
+ assert_equal('ctgaagaattgtgtttcttcccta', source_slice.seq)
31
+ end
32
+
33
+ def test_slice_overlapping_PAR_and_allosome
34
+ source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
35
+ assert_equal('AGAAACTGAAAATGCTAAGAAATTCAGTTCCAGGATATGAA', source_slice.seq.upcase)
36
+ end
37
+
38
+ end
@@ -19,7 +19,7 @@ include Ensembl::Core
19
19
  class TestSlice < Test::Unit::TestCase
20
20
 
21
21
  def setup
22
- DBConnection.connect('homo_sapiens', 56)
22
+ DBConnection.connect('homo_sapiens', 60)
23
23
  end
24
24
 
25
25
  def teardown
@@ -44,11 +44,4 @@ class TestSlice < Test::Unit::TestCase
44
44
  assert_equal(31786816,s_rev.stop)
45
45
  end
46
46
 
47
- def test_slice_genes
48
- slice = Slice.fetch_by_region("chromosome","13",31773073,31909413,1)
49
- genes = slice.genes
50
- assert_equal("ENSG00000229427",genes[1].stable_id)
51
- assert_equal("ENSG00000187676",genes[0].stable_id)
52
- end
53
-
54
47
  end
@@ -0,0 +1,126 @@
1
+ #
2
+ # = test/unit/test_transcript.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2007
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+ require 'pathname'
10
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
11
+ $:.unshift(libpath) unless $:.include?(libpath)
12
+
13
+ require 'test/unit'
14
+
15
+ require 'lib/ensembl'
16
+
17
+ include Ensembl::Core
18
+
19
+ DBConnection.connect('homo_sapiens', 60)
20
+
21
+ class CodingPositions < Test::Unit::TestCase
22
+ def setup
23
+ # Transcript tr_fw is ENST00000215574
24
+ @tr_fw = Transcript.find_by_stable_id("ENST00000215574")
25
+ # Transcript tr_rev is ENST00000358041
26
+ @tr_rev = Transcript.find_by_stable_id("ENST00000358041")
27
+ end
28
+
29
+ def test_transcript_coords
30
+ assert_equal(531733, @tr_fw.seq_region_start)
31
+ assert_equal(542084, @tr_fw.seq_region_end)
32
+ assert_equal(644371, @tr_rev.seq_region_start)
33
+ assert_equal(647309, @tr_rev.seq_region_end)
34
+ end
35
+
36
+ def test_coding_regions_genomic_coords_of_fw
37
+ assert_equal(531932, @tr_fw.coding_region_genomic_start)
38
+ assert_equal(541552, @tr_fw.coding_region_genomic_end)
39
+ end
40
+
41
+ def test_coding_regions_genomic_coords_of_rev
42
+ assert_equal(646652, @tr_rev.coding_region_genomic_start)
43
+ assert_equal(647047, @tr_rev.coding_region_genomic_end)
44
+ end
45
+
46
+ def test_coding_regions_cdna_coords_of_fw
47
+ assert_equal(200, @tr_fw.coding_region_cdna_start)
48
+ assert_equal(910, @tr_fw.coding_region_cdna_end)
49
+ end
50
+
51
+ def test_coding_regions_cdna_coords_of_rev
52
+ assert_equal(263, @tr_rev.coding_region_cdna_start)
53
+ assert_equal(658, @tr_rev.coding_region_cdna_end)
54
+ end
55
+
56
+ end
57
+
58
+ class GenomicVsCDna < Test::Unit::TestCase
59
+ #From BioMart. Columns:
60
+ # Ensembl_Transcript_ID
61
+ # Chromosome
62
+ # Strand
63
+ # Ensembl_Exon_ID
64
+ # Exon_Chr_Start
65
+ # Exon_Chr_End
66
+ # Exon_Rank_in_Transcript
67
+ #ENST00000215574 19 1 ENSE00000655676 531733 532108 1
68
+ #ENST00000215574 19 1 ENSE00000655677 535837 535923 2
69
+ #ENST00000215574 19 1 ENSE00000655678 536243 536340 3
70
+ #ENST00000215574 19 1 ENSE00000655679 537013 537147 4
71
+ #ENST00000215574 19 1 ENSE00000655680 541339 542084 5
72
+ #
73
+ #ENST00000315489 19 -1 ENSE00001215510 474621 474983 1
74
+ #ENST00000315489 19 -1 ENSE00001215495 472394 472501 2
75
+ #ENST00000315489 19 -1 ENSE00001215487 467649 467762 3
76
+ #ENST00000315489 19 -1 ENSE00001215506 463344 464364 4
77
+ def setup
78
+ # Transcript tr_fw is ENST00000215574
79
+ @tr_fw = Transcript.find_by_stable_id("ENST00000215574")
80
+ # Transcript tr_rev is ENST00000315489
81
+ @tr_rev = Transcript.find_by_stable_id("ENST00000315489")
82
+ end
83
+
84
+ def test_identify_exon
85
+ assert_equal("ENSE00000655679", @tr_fw.exon_for_cdna_position(601).stable_id)
86
+ assert_equal("ENSE00000655679", @tr_fw.exon_for_genomic_position(537052).stable_id)
87
+ assert_equal("ENSE00001215487", @tr_rev.exon_for_cdna_position(541).stable_id)
88
+ assert_equal("ENSE00001215487", @tr_rev.exon_for_genomic_position(467693).stable_id)
89
+ end
90
+
91
+ def test_cdna2genomic
92
+ assert_equal(537052, @tr_fw.cdna2genomic(601))
93
+ assert_equal(467693, @tr_rev.cdna2genomic(541))
94
+ end
95
+
96
+ def test_cds2genomic
97
+ assert_equal(537052, @tr_fw.cds2genomic(401))
98
+ assert_equal(467693, @tr_rev.cds2genomic(304))
99
+ end
100
+
101
+ def test_genomic2cdna
102
+ assert_equal(601, @tr_fw.genomic2cdna(537052))
103
+ assert_equal(541, @tr_rev.genomic2cdna(467693))
104
+ end
105
+
106
+ def test_genomic2cds
107
+ assert_equal(401, @tr_fw.genomic2cds(537052))
108
+ assert_equal(304, @tr_rev.genomic2cds(467693))
109
+ end
110
+ end
111
+
112
+ class TestIntron < Test::Unit::TestCase
113
+ def setup
114
+ @transcript = Transcript.find_by_stable_id("ENST00000215574")
115
+ @introns = @transcript.introns
116
+ end
117
+
118
+ def test_get_introns
119
+ assert_equal(4, @introns.length)
120
+ end
121
+
122
+ def test_intron_slices
123
+ assert_equal('chromosome:GRCh37:19:532109:535836:1', @introns[0].slice.to_s)
124
+ end
125
+ end
126
+