bio-ensembl 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,61 @@
1
+ #
2
+ # = test/unit/release_56/core/test_gene.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ #
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Core
13
+
14
+ class TestGene < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens', 56)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_gene
25
+ g = Gene.find_by_stable_id("ENSG00000006451")
26
+ assert_equal("ENSG00000006451",g.stable_id)
27
+ assert_equal("7",g.seq_region.name)
28
+ assert_equal(39663082,g.start)
29
+ assert_equal(39747723 ,g.stop)
30
+ assert_equal(1,g.strand)
31
+ assert_equal(84642,g.seq.length)
32
+ assert_equal("Ras-related protein Ral-A Precursor [Source:UniProtKB/Swiss-Prot;Acc:P11233]",g.description)
33
+ assert_equal("RALA",g.name)
34
+ end
35
+
36
+ def test_transcript
37
+ g = Gene.find_by_stable_id("ENSG00000006451")
38
+ t = g.transcripts
39
+ assert_equal(5,t.size)
40
+ assert_equal("ENST00000434466",t[0].stable_id)
41
+ t = t[0]
42
+ assert_equal(768,t.seq.length)
43
+ end
44
+
45
+ def test_exons
46
+ t = Transcript.find_by_stable_id("ENST00000005257")
47
+ e = t.exons
48
+ assert_equal(5,e.size)
49
+ assert_equal("ENSE00001829077",e[0].stable_id)
50
+ seq1 = "AGGCCCATGATCACCCTCCTCTCAGCCCACGGACAGGAAGTCGCTCCCCAGCTGCCCCGCCCCGCTCCCCAGCGCCCCGGAAGTGATCTGTGGCGGCTGCTGCAGAGCCGCCAGGAGGAGGGTGGATCTCCCCAGAGCAAAGCGTCGGAGTCCTCCTCCTCCTTCTCCTCCTCCTCCTCCTCCTCCTCCAGCCGCCCAGGCTCCCCCGCCACCCGTCAGACTCCTCCTTCGACCGCTCCCGGCGCGGGGCCTTCCAGGCGACAAGGACCGAGTACCCTCCGGCCGGAGCCACGCAGCCGCGGCTTCCGGAGCCCTCGGGGCGGCGGACTGGCTCGCGGTGCAG"
51
+ assert_equal(seq1,e[0].seq.upcase)
52
+ assert_equal(39663082,e[0].start)
53
+ assert_equal(39663424,e[0].stop)
54
+ assert_equal("ENSE00000832451",e[1].stable_id)
55
+ seq2 = "ATTCTTCTTAATCCTTTGGTGAAAACTGAGACACAAAATGGCTGCAAATAAGCCCAAGGGTCAGAATTCTTTGGCTTTACACAAAGTCATCATGGTGGGCAGTGGTGGCGTGGGCAAGTCAGCTCTGACTCTACAGTTCATGTACGATGAG"
56
+ assert_equal(seq2,e[1].seq.upcase)
57
+ assert_equal(39726230,e[1].start)
58
+ assert_equal(39726380,e[1].stop)
59
+ end
60
+
61
+ end
@@ -0,0 +1,91 @@
1
+ #
2
+ # = test/unit/release_53/core/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+
11
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
12
+
13
+ include Ensembl::Core
14
+
15
+ class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
16
+ def setup
17
+ DBConnection.connect('bos_taurus', 56)
18
+ @source_slice_single_contig = Slice.fetch_by_region('chromosome', '20', 175000, 180000)
19
+ @target_slices_single_contig = @source_slice_single_contig.project('contig')
20
+
21
+ @source_slice_two_contigs = Slice.fetch_by_region('chromosome','20', 175000, 190000)
22
+ @target_slices_two_contigs = @source_slice_two_contigs.project('contig')
23
+
24
+ @source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '20', 160000, 190000)
25
+ @target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
26
+
27
+ @source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '20', 170950, 196000)
28
+ @target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
29
+ end
30
+
31
+ def teardown
32
+ DBConnection.remove_connection
33
+ end
34
+
35
+ # |-----------------> contig
36
+ # ^ ^
37
+ # | |
38
+ # |------------------------------------------> chromosome
39
+ def test_project_from_assembly_to_single_component
40
+ # Position 175000 on chr20 is position 4030 on contig, position 180000 is 9030
41
+ assert_equal('AAFC03028970', @target_slices_single_contig[0].seq_region.name)
42
+ assert_equal(4030, @target_slices_single_contig[0].start)
43
+ assert_equal(9030, @target_slices_single_contig[0].stop)
44
+ end
45
+
46
+ # |-----> |--------> contig
47
+ # ^ ^
48
+ # | |
49
+ # |------------------------------------------> chromosome
50
+ def test_project_from_assembly_to_two_components
51
+ # This chromosomal region is covered by contigs AAFC03028970, a gap and AAFC03028962
52
+ # * Position 175000 on chr 20 is position 4030 on contig AAFC03028970
53
+ # * Position 190000 on chr 20 is position 35 on contig AAFC03028962
54
+ assert_equal(3, @target_slices_two_contigs.length)
55
+ assert_equal('contig:Btau_4.0:AAFC03028970:4030:17365:1', @target_slices_two_contigs[0].display_name)
56
+ assert_equal(Gap, @target_slices_two_contigs[1].class)
57
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_two_contigs[2].display_name)
58
+ end
59
+
60
+ # |-----> <-------| |-------> |-------> contig
61
+ # ^ ^
62
+ # | |
63
+ # |--------------------------------------------------> chromosome
64
+ def test_project_from_assembly_to_contigs_with_strand
65
+ # This chromosomal region is covered by 4 contigs and 3 gaps
66
+ # One of the contigs are on the reverse strand.
67
+ assert_equal(7, @target_slices_contigs_with_strand.length)
68
+ assert_equal('contig:Btau_4.0:AAFC03028964:90:9214:1', @target_slices_contigs_with_strand[0].display_name)
69
+ assert_equal(Gap, @target_slices_contigs_with_strand[1].class)
70
+ assert_equal('contig:Btau_4.0:AAFC03028959:1:1746:-1', @target_slices_contigs_with_strand[2].display_name)
71
+ assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
72
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand[4].display_name)
73
+ assert_equal(Gap, @target_slices_contigs_with_strand[5].class)
74
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_contigs_with_strand[6].display_name)
75
+ end
76
+
77
+ # <--| |-----> contig
78
+ # ^ ^
79
+ # | |
80
+ # |--------------------------------------------------> chromosome
81
+ def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
82
+ # This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoGaCoGa
83
+ assert_equal(5, @target_slices_contigs_with_strand_ends_in_gaps.length)
84
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
85
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
86
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[2].class)
87
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:5704:1', @target_slices_contigs_with_strand_ends_in_gaps[3].display_name)
88
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[4].class)
89
+ end
90
+
91
+ end
@@ -0,0 +1,49 @@
1
+ #
2
+ # = test/unit/release_53/core/test_slice.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Core
13
+
14
+ class TestSlice < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens', 56)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_forward
25
+ seq1 = "GGCCAAGCTGGTCTCAAACTCCTGACCTCGTGATCCGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGGGCCACTGGGTCCGGCCGCATGTCTGGCTAATTTTGTAGAGACAGGGCCTCCCTATACTGCCCAGGCTGGTTTCAAGCTCCTGGCTTCAAGGGGTCCTCCTGCCTTGGCCTCCCGAAGTGCTGG"
26
+ s = Slice.fetch_by_region("chromosome","13",31786617,31786816,1)
27
+ assert_equal(seq1,s.seq.upcase)
28
+ assert_equal("13",s.seq_region.name)
29
+ assert_equal(31786617,s.start)
30
+ assert_equal(31786816,s.stop)
31
+ end
32
+
33
+ def test_reverse
34
+ seq2 = "CCAGCACTTCGGGAGGCCAAGGCAGGAGGACCCCTTGAAGCCAGGAGCTTGAAACCAGCCTGGGCAGTATAGGGAGGCCCTGTCTCTACAAAATTAGCCAGACATGCGGCCGGACCCAGTGGCCCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGCGGATCACGAGGTCAGGAGTTTGAGACCAGCTTGGCC"
35
+ s_rev = Slice.fetch_by_region("chromosome","13",31786617,31786816,-1)
36
+ assert_equal(seq2,s_rev.seq.upcase)
37
+ assert_equal("13",s_rev.seq_region.name)
38
+ assert_equal(31786617,s_rev.start)
39
+ assert_equal(31786816,s_rev.stop)
40
+ end
41
+
42
+ def test_slice_genes
43
+ slice = Slice.fetch_by_region("chromosome","13",31773073,31909413,1)
44
+ genes = slice.genes
45
+ assert_equal("ENSG00000229427",genes[1].stable_id)
46
+ assert_equal("ENSG00000187676",genes[0].stable_id)
47
+ end
48
+
49
+ end
@@ -0,0 +1,57 @@
1
+ #
2
+ # = test/unit/release_53/core/test_transform.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+
11
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
12
+
13
+ include Ensembl::Core
14
+
15
+
16
+ # For all tests, the source (i.e. the seq_region that the feature is annotated
17
+ # on initially) remains forward.
18
+ #
19
+ # Same coordinate system: test names refer to direction of gene vs chromosome
20
+ class TransformOntoSameCoordinateSystem < Test::Unit::TestCase
21
+
22
+ def setup
23
+ DBConnection.connect('homo_sapiens', 56)
24
+ end
25
+
26
+ def teardown
27
+ DBConnection.remove_connection
28
+ end
29
+
30
+ def test_rev
31
+ source_gene = Gene.find_by_stable_id("ENSG00000165322")
32
+ target_gene = source_gene.transform('chromosome')
33
+
34
+ assert_equal('10', source_gene.seq_region.name)
35
+ assert_equal(32094365, source_gene.seq_region_start)
36
+ assert_equal(32217770, source_gene.seq_region_end)
37
+ assert_equal(-1, source_gene.seq_region_strand)
38
+ assert_equal('10', target_gene.seq_region.name)
39
+ assert_equal(32094365, target_gene.seq_region_start)
40
+ assert_equal(32217770, target_gene.seq_region_end)
41
+ assert_equal(-1, target_gene.seq_region_strand)
42
+ end
43
+
44
+ def test_fw
45
+ source_gene = Gene.find_by_stable_id("ENSG00000133401")
46
+ target_gene = source_gene.transform('chromosome')
47
+ assert_equal('5', source_gene.seq_region.name)
48
+ assert_equal(31639517, source_gene.seq_region_start)
49
+ assert_equal(32111037, source_gene.seq_region_end)
50
+ assert_equal(1, source_gene.seq_region_strand)
51
+ assert_equal('5', target_gene.seq_region.name)
52
+ assert_equal(31639517, target_gene.seq_region_start)
53
+ assert_equal(32111037, target_gene.seq_region_end)
54
+ assert_equal(1, target_gene.seq_region_strand)
55
+ end
56
+
57
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_activerecord.rb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Variation
13
+
14
+ class ActiveRecordVariation < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens',56)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_allele
25
+ allele = Allele.find(1)
26
+ assert_equal('T', allele.allele)
27
+ assert_equal(0.04, allele.frequency)
28
+ end
29
+
30
+ def test_sample
31
+ n = Sample.count(:all)
32
+ assert_equal(17822,n)
33
+ individual = Sample.find(12468).individual
34
+ assert_equal('Male',individual.gender)
35
+ i = Sample.find(13131).individual_genotype_multiple_bp
36
+ assert_equal(3,i.size)
37
+ assert_equal(17510,i[0].variation_id)
38
+ syn = Sample.find(21).sample_synonym
39
+ assert_equal('477',syn.name)
40
+ end
41
+
42
+ def test_individual
43
+ n = Individual.count(:all)
44
+ assert_equal(10132,n)
45
+ end
46
+
47
+ def test_individual_genotype_multiple_bp
48
+ n = IndividualGenotypeMultipleBp.count(:all)
49
+ assert_equal(712267,n)
50
+ end
51
+
52
+ def test_compressed_genotype_single_bp
53
+ n = CompressedGenotypeSingleBp.count(:all)
54
+ assert_equal(12736658,n)
55
+ end
56
+
57
+ def test_read_coverage
58
+ n = ReadCoverage.count(:all)
59
+ assert_equal(6521608,n)
60
+ end
61
+
62
+ def test_population
63
+ n = Population.count(:all)
64
+ assert_equal(7690,n)
65
+ end
66
+
67
+ def test_variation
68
+ n = Variation.count(:all)
69
+ assert_equal(18909925,n)
70
+
71
+ syn = Variation.find(712422).variation_synonyms
72
+ assert_equal(6,syn.size)
73
+ assert_equal('SNP_A-1507972',syn[0].name)
74
+
75
+ flanking = Variation.find(10000).flanking_sequence
76
+ assert_equal(3705521,flanking.up_seq_region_start)
77
+ assert_equal(3705770,flanking.up_seq_region_end)
78
+ assert_equal(3705772,flanking.down_seq_region_start)
79
+ assert_equal(3706021,flanking.down_seq_region_end)
80
+ assert_equal(27509,flanking.seq_region_id)
81
+ assert_equal(1,flanking.seq_region_strand)
82
+
83
+ ag = Variation.find(10000).allele_groups
84
+ assert_nil ag[0]
85
+
86
+ pg = Variation.find(10000).population_genotypes
87
+ assert_equal(12,pg.size)
88
+ assert_equal('C',pg[0].allele_1)
89
+ assert_equal('C',pg[0].allele_2)
90
+ assert_equal(1,pg[0].frequency)
91
+
92
+ a = Variation.find(115).alleles
93
+ assert_equal(8,a.size)
94
+ assert_equal('C',a[0].allele)
95
+ assert_equal(0.733,a[0].frequency)
96
+
97
+ vf = Variation.find(5345540).variation_features[0]
98
+ assert_equal('T/A',vf.allele_string)
99
+ assert_equal('rs8189747',vf.variation_name)
100
+ assert_equal(27526,vf.seq_region_id)
101
+ assert_equal(24606076,vf.seq_region_start)
102
+ assert_equal(24606076,vf.seq_region_end)
103
+ assert_equal(1,vf.seq_region_strand)
104
+
105
+ vg = Variation.find(1352735).variation_groups
106
+ assert_nil vg[0]
107
+
108
+ i = Variation.find(1533176).individual_genotype_multiple_bps
109
+ assert_equal(42,i.size)
110
+ end
111
+
112
+ def test_variation_feature
113
+ vf_sample = VariationFeature.find(38461).samples
114
+ assert_equal(8,vf_sample.size)
115
+ assert_equal('PERLEGEN:AFD_EUR_PANEL',vf_sample[0].name)
116
+ end
117
+
118
+ def test_variation_transcript
119
+ t = Variation.find_by_name('rs7671997').variation_features[0].transcript_variations
120
+ assert_equal(2,t.size)
121
+ assert_equal('INTRONIC',t[0].consequence_type)
122
+ assert_equal('5PRIME_UTR',t[1].consequence_type)
123
+ assert_equal(15008544,t[0].variation_feature_id)
124
+ transcript = t[0].transcript
125
+ assert_equal('protein_coding',transcript.biotype)
126
+ assert_equal(2230096,transcript.seq_region_start)
127
+ assert_equal(2243860,transcript.seq_region_end)
128
+ assert_equal('ENST00000243706',transcript.stable_id)
129
+ e = transcript.exons
130
+ assert_equal('CTCCCGTGAGGCAGTGCGAGGCGCGCGGGGCACGGAGGGCGGTGGCGGCGGGCTCCTGCGAGAAGCAAGCGGAACTTCCTGAG',e[0].seq.upcase)
131
+
132
+ end
133
+
134
+ def test_source
135
+ syn = Source.find(1).sample_synonyms
136
+ assert_equal(17806,syn.size)
137
+
138
+ ag = Source.find(1).allele_groups
139
+ assert_nil ag[0]
140
+ end
141
+ end
@@ -0,0 +1,131 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_variation.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+
12
+ class TestVariation < Test::Unit::TestCase
13
+
14
+ def setup
15
+ DBConnection.connect('homo_sapiens',56)
16
+ end
17
+
18
+ def teardown
19
+ DBConnection.remove_connection
20
+ end
21
+
22
+ def test_calculations
23
+
24
+ # INTERGENIC
25
+ vf = VariationFeature.new(:seq_region_id => SeqRegion.find_by_name("X").seq_region_id, :seq_region_start => 23694, :seq_region_end => 23694, :seq_region_strand => 1, :allele_string => "A/T",:variation_name => "fake_SNP")
26
+ tv = vf.transcript_variations
27
+ assert_instance_of(TranscriptVariation,tv[0])
28
+ assert_equal("INTERGENIC",tv[0].consequence_type)
29
+
30
+
31
+ # 3PRIME_UTR
32
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96810688, :seq_region_end => 96810688, :seq_region_strand => 1, :allele_string => "G/C", :variation_name => "rs16869283")
33
+ tv = vf.transcript_variations
34
+ assert_equal("3PRIME_UTR", tv[1].consequence_type)
35
+
36
+ # 5PRIME_UTR
37
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 158536411, :seq_region_end => 158536411, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs71547565")
38
+ tv = vf.transcript_variations
39
+ assert_equal("5PRIME_UTR", tv[3].consequence_type)
40
+
41
+ # UPSTREAM
42
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96831018, :seq_region_end => 96831018, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs6975185")
43
+ tv = vf.transcript_variations
44
+ assert_equal("UPSTREAM",tv[0].consequence_type)
45
+
46
+ # DOWNSTREAM
47
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 105727321, :seq_region_end => 105727321, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs35113830")
48
+ tv = vf.transcript_variations
49
+ assert_equal("DOWNSTREAM",tv[0].consequence_type)
50
+
51
+ # WITHIN_MATURE_miRNA
52
+ vf = VariationFeature.new(:seq_region_id => 27518, :seq_region_start => 135895002, :seq_region_end => 135895002, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs11266800")
53
+ tv = vf.transcript_variations
54
+ assert_equal("WITHIN_MATURE_miRNA",tv[2].consequence_type)
55
+
56
+ # WITHIN_NON_CODING_GENE
57
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 97601052, :seq_region_end => 97601052, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs13245475")
58
+ tv = vf.transcript_variations
59
+ assert_equal("WITHIN_NON_CODING_GENE",tv[1].consequence_type)
60
+
61
+ # COMPLEX_INDEL
62
+ vf = VariationFeature.new(:seq_region_id => 27752, :seq_region_start => 37529, :seq_region_end => 37535, :seq_region_strand => 1, :allele_string => "CCACCCA/ACACCCG", :variation_name => "rs71228679")
63
+ tv = vf.transcript_variations
64
+ assert_equal("COMPLEX_INDEL",tv[0].consequence_type)
65
+
66
+ # ESSENTIAL_SPLICE_SITE
67
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 818059, :seq_region_end => 818059, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs3888067")
68
+ tv = vf.transcript_variations
69
+ assert_equal("ESSENTIAL_SPLICE_SITE",tv[0].consequence_type)
70
+
71
+ # SPLICE_SITE
72
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 102301587, :seq_region_end => 102301587, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs434833")
73
+ tv = vf.transcript_variations
74
+ assert_equal("SPLICE_SITE",tv[5].consequence_type)
75
+
76
+ # INTRONIC
77
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 101165365, :seq_region_end => 101165365, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs1859633")
78
+ tv = vf.transcript_variations
79
+ assert_equal("INTRONIC",tv[2].consequence_type)
80
+
81
+ # FRAMESHIFT
82
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 78958619, :seq_region_end => 78958618, :seq_region_strand => 1, :allele_string => "-/G", :variation_name => "rs35065683")
83
+ tv = vf.transcript_variations
84
+ assert_equal("FRAMESHIFT_CODING",tv[0].consequence_type)
85
+
86
+ # STOP_GAINED
87
+ vf = VariationFeature.new(:seq_region_id => 27516, :seq_region_start => 38262908, :seq_region_end => 38262908, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs72556299")
88
+ tv = vf.transcript_variations
89
+ assert_equal("STOP_GAINED",tv[-1].consequence_type)
90
+
91
+ # STOP_LOST
92
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 152770613, :seq_region_end => 152770613, :seq_region_strand => 1, :allele_string => "T/G", :variation_name => "rs41268500")
93
+ tv = vf.transcript_variations
94
+ assert_equal("STOP_LOST",tv[0].consequence_type)
95
+
96
+ # SYNONYMOUS_CODING
97
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99688238, :seq_region_end => 99688238, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs11550651")
98
+ tv = vf.transcript_variations
99
+ assert_equal("SYNONYMOUS_CODING",tv[0].consequence_type)
100
+
101
+ # NON_SYNONYMOUS_CODING
102
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99057720, :seq_region_end => 99057720, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs11545970")
103
+ tv = vf.transcript_variations
104
+ assert_equal("NON_SYNONYMOUS_CODING",tv[1].consequence_type)
105
+
106
+ end
107
+
108
+ # Checking CDNA coordinates calculation
109
+
110
+ def test_genomic2cdna_fw # forward strand (variation rs67960011)
111
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
112
+ assert_equal(573,t.genomic2cdna(38260562))
113
+ end
114
+
115
+ def test_cdna2genomic_fw # forward strand (variation rs67960011)
116
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
117
+ assert_equal(38260562,t.cdna2genomic(573))
118
+ end
119
+
120
+ def test_genomic2cdna_rev # reverse strand (variation rs11545970)
121
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
122
+ assert_equal(110,t.genomic2cdna(99057720))
123
+ end
124
+
125
+ def test_cdna2genomic_rev # reverse strand (variation rs11545970)
126
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
127
+ assert_equal(99057720,t.cdna2genomic(110))
128
+ end
129
+
130
+
131
+ end