bio-ensembl 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,61 @@
1
+ #
2
+ # = test/unit/release_56/core/test_gene.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ #
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Core
13
+
14
+ class TestGene < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens', 56)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_gene
25
+ g = Gene.find_by_stable_id("ENSG00000006451")
26
+ assert_equal("ENSG00000006451",g.stable_id)
27
+ assert_equal("7",g.seq_region.name)
28
+ assert_equal(39663082,g.start)
29
+ assert_equal(39747723 ,g.stop)
30
+ assert_equal(1,g.strand)
31
+ assert_equal(84642,g.seq.length)
32
+ assert_equal("Ras-related protein Ral-A Precursor [Source:UniProtKB/Swiss-Prot;Acc:P11233]",g.description)
33
+ assert_equal("RALA",g.name)
34
+ end
35
+
36
+ def test_transcript
37
+ g = Gene.find_by_stable_id("ENSG00000006451")
38
+ t = g.transcripts
39
+ assert_equal(5,t.size)
40
+ assert_equal("ENST00000434466",t[0].stable_id)
41
+ t = t[0]
42
+ assert_equal(768,t.seq.length)
43
+ end
44
+
45
+ def test_exons
46
+ t = Transcript.find_by_stable_id("ENST00000005257")
47
+ e = t.exons
48
+ assert_equal(5,e.size)
49
+ assert_equal("ENSE00001829077",e[0].stable_id)
50
+ seq1 = "AGGCCCATGATCACCCTCCTCTCAGCCCACGGACAGGAAGTCGCTCCCCAGCTGCCCCGCCCCGCTCCCCAGCGCCCCGGAAGTGATCTGTGGCGGCTGCTGCAGAGCCGCCAGGAGGAGGGTGGATCTCCCCAGAGCAAAGCGTCGGAGTCCTCCTCCTCCTTCTCCTCCTCCTCCTCCTCCTCCTCCAGCCGCCCAGGCTCCCCCGCCACCCGTCAGACTCCTCCTTCGACCGCTCCCGGCGCGGGGCCTTCCAGGCGACAAGGACCGAGTACCCTCCGGCCGGAGCCACGCAGCCGCGGCTTCCGGAGCCCTCGGGGCGGCGGACTGGCTCGCGGTGCAG"
51
+ assert_equal(seq1,e[0].seq.upcase)
52
+ assert_equal(39663082,e[0].start)
53
+ assert_equal(39663424,e[0].stop)
54
+ assert_equal("ENSE00000832451",e[1].stable_id)
55
+ seq2 = "ATTCTTCTTAATCCTTTGGTGAAAACTGAGACACAAAATGGCTGCAAATAAGCCCAAGGGTCAGAATTCTTTGGCTTTACACAAAGTCATCATGGTGGGCAGTGGTGGCGTGGGCAAGTCAGCTCTGACTCTACAGTTCATGTACGATGAG"
56
+ assert_equal(seq2,e[1].seq.upcase)
57
+ assert_equal(39726230,e[1].start)
58
+ assert_equal(39726380,e[1].stop)
59
+ end
60
+
61
+ end
@@ -0,0 +1,91 @@
1
+ #
2
+ # = test/unit/release_53/core/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+
11
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
12
+
13
+ include Ensembl::Core
14
+
15
+ class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
16
+ def setup
17
+ DBConnection.connect('bos_taurus', 56)
18
+ @source_slice_single_contig = Slice.fetch_by_region('chromosome', '20', 175000, 180000)
19
+ @target_slices_single_contig = @source_slice_single_contig.project('contig')
20
+
21
+ @source_slice_two_contigs = Slice.fetch_by_region('chromosome','20', 175000, 190000)
22
+ @target_slices_two_contigs = @source_slice_two_contigs.project('contig')
23
+
24
+ @source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '20', 160000, 190000)
25
+ @target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
26
+
27
+ @source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '20', 170950, 196000)
28
+ @target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
29
+ end
30
+
31
+ def teardown
32
+ DBConnection.remove_connection
33
+ end
34
+
35
+ # |-----------------> contig
36
+ # ^ ^
37
+ # | |
38
+ # |------------------------------------------> chromosome
39
+ def test_project_from_assembly_to_single_component
40
+ # Position 175000 on chr20 is position 4030 on contig, position 180000 is 9030
41
+ assert_equal('AAFC03028970', @target_slices_single_contig[0].seq_region.name)
42
+ assert_equal(4030, @target_slices_single_contig[0].start)
43
+ assert_equal(9030, @target_slices_single_contig[0].stop)
44
+ end
45
+
46
+ # |-----> |--------> contig
47
+ # ^ ^
48
+ # | |
49
+ # |------------------------------------------> chromosome
50
+ def test_project_from_assembly_to_two_components
51
+ # This chromosomal region is covered by contigs AAFC03028970, a gap and AAFC03028962
52
+ # * Position 175000 on chr 20 is position 4030 on contig AAFC03028970
53
+ # * Position 190000 on chr 20 is position 35 on contig AAFC03028962
54
+ assert_equal(3, @target_slices_two_contigs.length)
55
+ assert_equal('contig:Btau_4.0:AAFC03028970:4030:17365:1', @target_slices_two_contigs[0].display_name)
56
+ assert_equal(Gap, @target_slices_two_contigs[1].class)
57
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_two_contigs[2].display_name)
58
+ end
59
+
60
+ # |-----> <-------| |-------> |-------> contig
61
+ # ^ ^
62
+ # | |
63
+ # |--------------------------------------------------> chromosome
64
+ def test_project_from_assembly_to_contigs_with_strand
65
+ # This chromosomal region is covered by 4 contigs and 3 gaps
66
+ # One of the contigs are on the reverse strand.
67
+ assert_equal(7, @target_slices_contigs_with_strand.length)
68
+ assert_equal('contig:Btau_4.0:AAFC03028964:90:9214:1', @target_slices_contigs_with_strand[0].display_name)
69
+ assert_equal(Gap, @target_slices_contigs_with_strand[1].class)
70
+ assert_equal('contig:Btau_4.0:AAFC03028959:1:1746:-1', @target_slices_contigs_with_strand[2].display_name)
71
+ assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
72
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand[4].display_name)
73
+ assert_equal(Gap, @target_slices_contigs_with_strand[5].class)
74
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_contigs_with_strand[6].display_name)
75
+ end
76
+
77
+ # <--| |-----> contig
78
+ # ^ ^
79
+ # | |
80
+ # |--------------------------------------------------> chromosome
81
+ def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
82
+ # This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoGaCoGa
83
+ assert_equal(5, @target_slices_contigs_with_strand_ends_in_gaps.length)
84
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
85
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
86
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[2].class)
87
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:5704:1', @target_slices_contigs_with_strand_ends_in_gaps[3].display_name)
88
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[4].class)
89
+ end
90
+
91
+ end
@@ -0,0 +1,49 @@
1
+ #
2
+ # = test/unit/release_53/core/test_slice.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Core
13
+
14
+ class TestSlice < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens', 56)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_forward
25
+ seq1 = "GGCCAAGCTGGTCTCAAACTCCTGACCTCGTGATCCGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGGGCCACTGGGTCCGGCCGCATGTCTGGCTAATTTTGTAGAGACAGGGCCTCCCTATACTGCCCAGGCTGGTTTCAAGCTCCTGGCTTCAAGGGGTCCTCCTGCCTTGGCCTCCCGAAGTGCTGG"
26
+ s = Slice.fetch_by_region("chromosome","13",31786617,31786816,1)
27
+ assert_equal(seq1,s.seq.upcase)
28
+ assert_equal("13",s.seq_region.name)
29
+ assert_equal(31786617,s.start)
30
+ assert_equal(31786816,s.stop)
31
+ end
32
+
33
+ def test_reverse
34
+ seq2 = "CCAGCACTTCGGGAGGCCAAGGCAGGAGGACCCCTTGAAGCCAGGAGCTTGAAACCAGCCTGGGCAGTATAGGGAGGCCCTGTCTCTACAAAATTAGCCAGACATGCGGCCGGACCCAGTGGCCCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGCGGATCACGAGGTCAGGAGTTTGAGACCAGCTTGGCC"
35
+ s_rev = Slice.fetch_by_region("chromosome","13",31786617,31786816,-1)
36
+ assert_equal(seq2,s_rev.seq.upcase)
37
+ assert_equal("13",s_rev.seq_region.name)
38
+ assert_equal(31786617,s_rev.start)
39
+ assert_equal(31786816,s_rev.stop)
40
+ end
41
+
42
+ def test_slice_genes
43
+ slice = Slice.fetch_by_region("chromosome","13",31773073,31909413,1)
44
+ genes = slice.genes
45
+ assert_equal("ENSG00000229427",genes[1].stable_id)
46
+ assert_equal("ENSG00000187676",genes[0].stable_id)
47
+ end
48
+
49
+ end
@@ -0,0 +1,57 @@
1
+ #
2
+ # = test/unit/release_53/core/test_transform.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+
11
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
12
+
13
+ include Ensembl::Core
14
+
15
+
16
+ # For all tests, the source (i.e. the seq_region that the feature is annotated
17
+ # on initially) remains forward.
18
+ #
19
+ # Same coordinate system: test names refer to direction of gene vs chromosome
20
+ class TransformOntoSameCoordinateSystem < Test::Unit::TestCase
21
+
22
+ def setup
23
+ DBConnection.connect('homo_sapiens', 56)
24
+ end
25
+
26
+ def teardown
27
+ DBConnection.remove_connection
28
+ end
29
+
30
+ def test_rev
31
+ source_gene = Gene.find_by_stable_id("ENSG00000165322")
32
+ target_gene = source_gene.transform('chromosome')
33
+
34
+ assert_equal('10', source_gene.seq_region.name)
35
+ assert_equal(32094365, source_gene.seq_region_start)
36
+ assert_equal(32217770, source_gene.seq_region_end)
37
+ assert_equal(-1, source_gene.seq_region_strand)
38
+ assert_equal('10', target_gene.seq_region.name)
39
+ assert_equal(32094365, target_gene.seq_region_start)
40
+ assert_equal(32217770, target_gene.seq_region_end)
41
+ assert_equal(-1, target_gene.seq_region_strand)
42
+ end
43
+
44
+ def test_fw
45
+ source_gene = Gene.find_by_stable_id("ENSG00000133401")
46
+ target_gene = source_gene.transform('chromosome')
47
+ assert_equal('5', source_gene.seq_region.name)
48
+ assert_equal(31639517, source_gene.seq_region_start)
49
+ assert_equal(32111037, source_gene.seq_region_end)
50
+ assert_equal(1, source_gene.seq_region_strand)
51
+ assert_equal('5', target_gene.seq_region.name)
52
+ assert_equal(31639517, target_gene.seq_region_start)
53
+ assert_equal(32111037, target_gene.seq_region_end)
54
+ assert_equal(1, target_gene.seq_region_strand)
55
+ end
56
+
57
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_activerecord.rb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Variation
13
+
14
+ class ActiveRecordVariation < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens',56)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_allele
25
+ allele = Allele.find(1)
26
+ assert_equal('T', allele.allele)
27
+ assert_equal(0.04, allele.frequency)
28
+ end
29
+
30
+ def test_sample
31
+ n = Sample.count(:all)
32
+ assert_equal(17822,n)
33
+ individual = Sample.find(12468).individual
34
+ assert_equal('Male',individual.gender)
35
+ i = Sample.find(13131).individual_genotype_multiple_bp
36
+ assert_equal(3,i.size)
37
+ assert_equal(17510,i[0].variation_id)
38
+ syn = Sample.find(21).sample_synonym
39
+ assert_equal('477',syn.name)
40
+ end
41
+
42
+ def test_individual
43
+ n = Individual.count(:all)
44
+ assert_equal(10132,n)
45
+ end
46
+
47
+ def test_individual_genotype_multiple_bp
48
+ n = IndividualGenotypeMultipleBp.count(:all)
49
+ assert_equal(712267,n)
50
+ end
51
+
52
+ def test_compressed_genotype_single_bp
53
+ n = CompressedGenotypeSingleBp.count(:all)
54
+ assert_equal(12736658,n)
55
+ end
56
+
57
+ def test_read_coverage
58
+ n = ReadCoverage.count(:all)
59
+ assert_equal(6521608,n)
60
+ end
61
+
62
+ def test_population
63
+ n = Population.count(:all)
64
+ assert_equal(7690,n)
65
+ end
66
+
67
+ def test_variation
68
+ n = Variation.count(:all)
69
+ assert_equal(18909925,n)
70
+
71
+ syn = Variation.find(712422).variation_synonyms
72
+ assert_equal(6,syn.size)
73
+ assert_equal('SNP_A-1507972',syn[0].name)
74
+
75
+ flanking = Variation.find(10000).flanking_sequence
76
+ assert_equal(3705521,flanking.up_seq_region_start)
77
+ assert_equal(3705770,flanking.up_seq_region_end)
78
+ assert_equal(3705772,flanking.down_seq_region_start)
79
+ assert_equal(3706021,flanking.down_seq_region_end)
80
+ assert_equal(27509,flanking.seq_region_id)
81
+ assert_equal(1,flanking.seq_region_strand)
82
+
83
+ ag = Variation.find(10000).allele_groups
84
+ assert_nil ag[0]
85
+
86
+ pg = Variation.find(10000).population_genotypes
87
+ assert_equal(12,pg.size)
88
+ assert_equal('C',pg[0].allele_1)
89
+ assert_equal('C',pg[0].allele_2)
90
+ assert_equal(1,pg[0].frequency)
91
+
92
+ a = Variation.find(115).alleles
93
+ assert_equal(8,a.size)
94
+ assert_equal('C',a[0].allele)
95
+ assert_equal(0.733,a[0].frequency)
96
+
97
+ vf = Variation.find(5345540).variation_features[0]
98
+ assert_equal('T/A',vf.allele_string)
99
+ assert_equal('rs8189747',vf.variation_name)
100
+ assert_equal(27526,vf.seq_region_id)
101
+ assert_equal(24606076,vf.seq_region_start)
102
+ assert_equal(24606076,vf.seq_region_end)
103
+ assert_equal(1,vf.seq_region_strand)
104
+
105
+ vg = Variation.find(1352735).variation_groups
106
+ assert_nil vg[0]
107
+
108
+ i = Variation.find(1533176).individual_genotype_multiple_bps
109
+ assert_equal(42,i.size)
110
+ end
111
+
112
+ def test_variation_feature
113
+ vf_sample = VariationFeature.find(38461).samples
114
+ assert_equal(8,vf_sample.size)
115
+ assert_equal('PERLEGEN:AFD_EUR_PANEL',vf_sample[0].name)
116
+ end
117
+
118
+ def test_variation_transcript
119
+ t = Variation.find_by_name('rs7671997').variation_features[0].transcript_variations
120
+ assert_equal(2,t.size)
121
+ assert_equal('INTRONIC',t[0].consequence_type)
122
+ assert_equal('5PRIME_UTR',t[1].consequence_type)
123
+ assert_equal(15008544,t[0].variation_feature_id)
124
+ transcript = t[0].transcript
125
+ assert_equal('protein_coding',transcript.biotype)
126
+ assert_equal(2230096,transcript.seq_region_start)
127
+ assert_equal(2243860,transcript.seq_region_end)
128
+ assert_equal('ENST00000243706',transcript.stable_id)
129
+ e = transcript.exons
130
+ assert_equal('CTCCCGTGAGGCAGTGCGAGGCGCGCGGGGCACGGAGGGCGGTGGCGGCGGGCTCCTGCGAGAAGCAAGCGGAACTTCCTGAG',e[0].seq.upcase)
131
+
132
+ end
133
+
134
+ def test_source
135
+ syn = Source.find(1).sample_synonyms
136
+ assert_equal(17806,syn.size)
137
+
138
+ ag = Source.find(1).allele_groups
139
+ assert_nil ag[0]
140
+ end
141
+ end
@@ -0,0 +1,131 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_variation.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+
12
+ class TestVariation < Test::Unit::TestCase
13
+
14
+ def setup
15
+ DBConnection.connect('homo_sapiens',56)
16
+ end
17
+
18
+ def teardown
19
+ DBConnection.remove_connection
20
+ end
21
+
22
+ def test_calculations
23
+
24
+ # INTERGENIC
25
+ vf = VariationFeature.new(:seq_region_id => SeqRegion.find_by_name("X").seq_region_id, :seq_region_start => 23694, :seq_region_end => 23694, :seq_region_strand => 1, :allele_string => "A/T",:variation_name => "fake_SNP")
26
+ tv = vf.transcript_variations
27
+ assert_instance_of(TranscriptVariation,tv[0])
28
+ assert_equal("INTERGENIC",tv[0].consequence_type)
29
+
30
+
31
+ # 3PRIME_UTR
32
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96810688, :seq_region_end => 96810688, :seq_region_strand => 1, :allele_string => "G/C", :variation_name => "rs16869283")
33
+ tv = vf.transcript_variations
34
+ assert_equal("3PRIME_UTR", tv[1].consequence_type)
35
+
36
+ # 5PRIME_UTR
37
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 158536411, :seq_region_end => 158536411, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs71547565")
38
+ tv = vf.transcript_variations
39
+ assert_equal("5PRIME_UTR", tv[3].consequence_type)
40
+
41
+ # UPSTREAM
42
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96831018, :seq_region_end => 96831018, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs6975185")
43
+ tv = vf.transcript_variations
44
+ assert_equal("UPSTREAM",tv[0].consequence_type)
45
+
46
+ # DOWNSTREAM
47
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 105727321, :seq_region_end => 105727321, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs35113830")
48
+ tv = vf.transcript_variations
49
+ assert_equal("DOWNSTREAM",tv[0].consequence_type)
50
+
51
+ # WITHIN_MATURE_miRNA
52
+ vf = VariationFeature.new(:seq_region_id => 27518, :seq_region_start => 135895002, :seq_region_end => 135895002, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs11266800")
53
+ tv = vf.transcript_variations
54
+ assert_equal("WITHIN_MATURE_miRNA",tv[2].consequence_type)
55
+
56
+ # WITHIN_NON_CODING_GENE
57
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 97601052, :seq_region_end => 97601052, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs13245475")
58
+ tv = vf.transcript_variations
59
+ assert_equal("WITHIN_NON_CODING_GENE",tv[1].consequence_type)
60
+
61
+ # COMPLEX_INDEL
62
+ vf = VariationFeature.new(:seq_region_id => 27752, :seq_region_start => 37529, :seq_region_end => 37535, :seq_region_strand => 1, :allele_string => "CCACCCA/ACACCCG", :variation_name => "rs71228679")
63
+ tv = vf.transcript_variations
64
+ assert_equal("COMPLEX_INDEL",tv[0].consequence_type)
65
+
66
+ # ESSENTIAL_SPLICE_SITE
67
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 818059, :seq_region_end => 818059, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs3888067")
68
+ tv = vf.transcript_variations
69
+ assert_equal("ESSENTIAL_SPLICE_SITE",tv[0].consequence_type)
70
+
71
+ # SPLICE_SITE
72
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 102301587, :seq_region_end => 102301587, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs434833")
73
+ tv = vf.transcript_variations
74
+ assert_equal("SPLICE_SITE",tv[5].consequence_type)
75
+
76
+ # INTRONIC
77
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 101165365, :seq_region_end => 101165365, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs1859633")
78
+ tv = vf.transcript_variations
79
+ assert_equal("INTRONIC",tv[2].consequence_type)
80
+
81
+ # FRAMESHIFT
82
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 78958619, :seq_region_end => 78958618, :seq_region_strand => 1, :allele_string => "-/G", :variation_name => "rs35065683")
83
+ tv = vf.transcript_variations
84
+ assert_equal("FRAMESHIFT_CODING",tv[0].consequence_type)
85
+
86
+ # STOP_GAINED
87
+ vf = VariationFeature.new(:seq_region_id => 27516, :seq_region_start => 38262908, :seq_region_end => 38262908, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs72556299")
88
+ tv = vf.transcript_variations
89
+ assert_equal("STOP_GAINED",tv[-1].consequence_type)
90
+
91
+ # STOP_LOST
92
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 152770613, :seq_region_end => 152770613, :seq_region_strand => 1, :allele_string => "T/G", :variation_name => "rs41268500")
93
+ tv = vf.transcript_variations
94
+ assert_equal("STOP_LOST",tv[0].consequence_type)
95
+
96
+ # SYNONYMOUS_CODING
97
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99688238, :seq_region_end => 99688238, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs11550651")
98
+ tv = vf.transcript_variations
99
+ assert_equal("SYNONYMOUS_CODING",tv[0].consequence_type)
100
+
101
+ # NON_SYNONYMOUS_CODING
102
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99057720, :seq_region_end => 99057720, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs11545970")
103
+ tv = vf.transcript_variations
104
+ assert_equal("NON_SYNONYMOUS_CODING",tv[1].consequence_type)
105
+
106
+ end
107
+
108
+ # Checking CDNA coordinates calculation
109
+
110
+ def test_genomic2cdna_fw # forward strand (variation rs67960011)
111
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
112
+ assert_equal(573,t.genomic2cdna(38260562))
113
+ end
114
+
115
+ def test_cdna2genomic_fw # forward strand (variation rs67960011)
116
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
117
+ assert_equal(38260562,t.cdna2genomic(573))
118
+ end
119
+
120
+ def test_genomic2cdna_rev # reverse strand (variation rs11545970)
121
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
122
+ assert_equal(110,t.genomic2cdna(99057720))
123
+ end
124
+
125
+ def test_cdna2genomic_rev # reverse strand (variation rs11545970)
126
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
127
+ assert_equal(99057720,t.cdna2genomic(110))
128
+ end
129
+
130
+
131
+ end