bio-ensembl 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,153 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_variation.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+ DBConnection.connect('homo_sapiens',60)
12
+
13
+ class TestVariation < Test::Unit::TestCase
14
+
15
+ def test_intergenic
16
+ # INTERGENIC
17
+ vf = VariationFeature.new(:seq_region_id => SeqRegion.find_by_name("X").seq_region_id, :seq_region_start => 23694, :seq_region_end => 23694, :seq_region_strand => 1, :allele_string => "A/T",:variation_name => "fake_SNP")
18
+ tv = vf.transcript_variations
19
+ assert_instance_of(TranscriptVariation,tv[0])
20
+ assert_equal("INTERGENIC",tv[0].consequence_type)
21
+ end
22
+
23
+ def test_3prime
24
+ # 3PRIME_UTR
25
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96810688, :seq_region_end => 96810688, :seq_region_strand => 1, :allele_string => "G/C", :variation_name => "rs16869283")
26
+ tv = vf.transcript_variations
27
+ assert_equal("3PRIME_UTR", tv[0].consequence_type)
28
+ end
29
+
30
+ def test_5prime
31
+ # 5PRIME_UTR
32
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 158536411, :seq_region_end => 158536411, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs71547565")
33
+ tv = vf.transcript_variations
34
+ assert_equal("5PRIME_UTR", tv[3].consequence_type)
35
+ end
36
+
37
+ def test_upstream
38
+ # UPSTREAM
39
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96831018, :seq_region_end => 96831018, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs6975185")
40
+ tv = vf.transcript_variations
41
+ assert_equal("UPSTREAM",tv[0].consequence_type)
42
+ end
43
+
44
+ def test_downstream
45
+ # DOWNSTREAM
46
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 105727321, :seq_region_end => 105727321, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs35113830")
47
+ tv = vf.transcript_variations
48
+ assert_equal("DOWNSTREAM",tv[-1].consequence_type)
49
+ end
50
+
51
+ def test_mirna
52
+ # WITHIN_MATURE_miRNA
53
+ vf = VariationFeature.new(:seq_region_id => 27527, :seq_region_start => 175878848, :seq_region_end => 175878848, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs12716316")
54
+ tv = vf.transcript_variations
55
+ assert_equal("WITHIN_MATURE_miRNA",tv[-1].consequence_type)
56
+ end
57
+
58
+
59
+ def test_non_coding
60
+ # WITHIN_NON_CODING_GENE
61
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 97601052, :seq_region_end => 97601052, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs13245475")
62
+ tv = vf.transcript_variations
63
+ assert_equal("WITHIN_NON_CODING_GENE",tv[1].consequence_type)
64
+ end
65
+
66
+ def test_complex_indel
67
+ # COMPLEX_INDEL
68
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 31902068, :seq_region_end => 31902095, :seq_region_strand => 1, :allele_string => "GTGGACAGGGTCAGGAATCAGGAGTCTG/-", :variation_name => "rs9332736")
69
+ tv = vf.transcript_variations
70
+ assert_equal("COMPLEX_INDEL",tv[6].consequence_type)
71
+ end
72
+
73
+ def test_essential_splice
74
+ # ESSENTIAL_SPLICE_SITE
75
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 33385862, :seq_region_end => 33385862, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "GA005718")
76
+ tv = vf.transcript_variations
77
+ assert_equal("ESSENTIAL_SPLICE_SITE",tv[21].consequence_type)
78
+ end
79
+
80
+ def test_splice_site
81
+ # SPLICE_SITE
82
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 102301587, :seq_region_end => 102301587, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs434833")
83
+ tv = vf.transcript_variations
84
+ assert_equal("SPLICE_SITE",tv[5].consequence_type)
85
+ end
86
+
87
+ def test_intronic
88
+ # INTRONIC
89
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 31902068, :seq_region_end => 31902095, :seq_region_strand => 1, :allele_string => "GTGGACAGGGTCAGGAATCAGGAGTCTG/-", :variation_name => "rs9332736")
90
+ tv = vf.transcript_variations
91
+ assert_equal("INTRONIC",tv[3].consequence_type)
92
+ end
93
+
94
+ def test_frameshift
95
+ # FRAMESHIFT
96
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 78958619, :seq_region_end => 78958618, :seq_region_strand => 1, :allele_string => "-/G", :variation_name => "rs35065683")
97
+ tv = vf.transcript_variations
98
+ assert_equal("FRAMESHIFT_CODING",tv[1].consequence_type)
99
+ end
100
+
101
+ def test_stop_gained
102
+ # STOP_GAINED
103
+ vf = VariationFeature.new(:seq_region_id => 27516, :seq_region_start => 38262908, :seq_region_end => 38262908, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs72556299")
104
+ tv = vf.transcript_variations
105
+ assert_equal("STOP_GAINED",tv[-1].consequence_type)
106
+ end
107
+
108
+ def test_stop_lost
109
+ # STOP_LOST
110
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 152770613, :seq_region_end => 152770613, :seq_region_strand => 1, :allele_string => "T/G", :variation_name => "rs41268500")
111
+ tv = vf.transcript_variations
112
+ assert_equal("STOP_LOST",tv[0].consequence_type)
113
+ end
114
+
115
+ def test_synonymous
116
+ # SYNONYMOUS_CODING
117
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99688238, :seq_region_end => 99688238, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs11550651")
118
+ tv = vf.transcript_variations
119
+ assert_equal("SYNONYMOUS_CODING",tv[5].consequence_type)
120
+ end
121
+
122
+ def test_non_synonymous
123
+ # NON_SYNONYMOUS_CODING
124
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99057720, :seq_region_end => 99057720, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs11545970")
125
+ tv = vf.transcript_variations
126
+ assert_equal("NON_SYNONYMOUS_CODING",tv[9].consequence_type)
127
+ assert_equal("A/V",tv[9].peptide_allele_string)
128
+ end
129
+
130
+ # Checking CDNA coordinates calculation
131
+
132
+ def test_genomic2cdna_fw # forward strand (variation rs67960011)
133
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
134
+ assert_equal(573,t.genomic2cdna(38260562))
135
+ end
136
+
137
+ def test_cdna2genomic_fw # forward strand (variation rs67960011)
138
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
139
+ assert_equal(38260562,t.cdna2genomic(573))
140
+ end
141
+
142
+ def test_genomic2cdna_rev # reverse strand (variation rs11545970)
143
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
144
+ assert_equal(110,t.genomic2cdna(99057720))
145
+ end
146
+
147
+ def test_cdna2genomic_rev # reverse strand (variation rs11545970)
148
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
149
+ assert_equal(99057720,t.cdna2genomic(110))
150
+ end
151
+
152
+
153
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_variation.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+
12
+ class TestVariation < Test::Unit::TestCase
13
+
14
+ def setup
15
+ DBConnection.connect('homo_sapiens',60)
16
+ end
17
+
18
+ def teardown
19
+ DBConnection.remove_connection
20
+ end
21
+
22
+ def test_fetch_region
23
+ vf = Variation.find_by_name('rs2076175').variation_features[0]
24
+ slice = vf.fetch_region
25
+ assert_equal(29708370,slice.start)
26
+ assert_equal(29718370,slice.stop)
27
+ assert_equal('6',slice.seq_region.name)
28
+ slice = vf.fetch_region(30,30)
29
+ assert_equal(29713340,slice.start)
30
+ assert_equal(29713400,slice.stop)
31
+ assert_equal('CTCCCAGGACTGCTTCTGCCCACTGTCCCCGGGGCCCTGCCCTGCCTTTCTGCCTGTCACA',slice.seq.upcase)
32
+ end
33
+
34
+ def test_flanking_seq
35
+ vf = Variation.find_by_name('rs2076175').variation_features[0]
36
+ up,down = vf.flanking_seq
37
+ assert_equal(29712970,up.start)
38
+ assert_equal(29713369,up.stop)
39
+ assert_equal(29713371,down.start)
40
+ assert_equal(29713770,down.stop)
41
+ assert_equal('TCCTGATCTCACAAACCCTAATCTCCTGGAGGGAATGCAAGGCTGCCTGCCCCTACCCAGCAGTGACTTCTCCATTCCAGTCCAAGTGAGGAACTCGGACCAGGAAGGACCCCTCCCTGGCCCTCTTCCATCCCTCCCTGTGTGGGCTGAGCCCCGCTGAGCACCATTCCTCACCCCTACTCACAGCCAAATCCAGTGGGAAGAGACAGGTCCTGCTCTCTGCCCCCAACTCTCCTGGAAAAGGCCTCTCCCATTACTCTTGCCCACTGCCCACTCTCACCTCCTTTCTGGCCCTTGATATGAGCCAGGGTCCTCCTGAGCTCCTGCCCATTCTCTGTCAAGTCTTCAGTCTCTGTGTCCCAGGTCTCAGCTCCCAGGACTGCTTCTGCCCACTGTCCCC',
42
+ up.seq.upcase)
43
+ assert_equal('GGGCCCTGCCCTGCCTTTCTGCCTGTCACAGAGCAGGAAGAGCTGACCATCCAGATGTCCCTCAGCGAGAAACCCTGACTGCACAGATCCATCCTGGGACAGCACCGTGAGGTTGTAACAAAGACTGTGGGGCTCTGGGGAAGAGGAAATCACAGATGAAACTTCTTCCTGGAAGTAACTTCACATCAATGTTTAACACACAGGTCTGCTGTCCCGACCTTCCTGAGGAGGCAGGAAATGCACACGGGCAAAGGGACAAGAATGAGGATTTCAGACGCAAGGAAAACTGGGAAGGTGGGAGGATAGAGGAGGGGACTGAGGAACAGAAGAAGGGGGAATGGGGATGGCAAACTTGTAGGCCAGGTGCCAGGGCAGGGCAGCCACAGGCCCCCTCAGGATA',
44
+ down.seq.upcase)
45
+
46
+ end
47
+
48
+ def test_slice_variation
49
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome',1,100834,101331)
50
+ variations = slice.get_variation_features
51
+ assert_equal(1,variations.size)
52
+ assert_equal('rs78180088',variations[0].variation_name)
53
+ end
54
+
55
+ def test_slice_structural_variation
56
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome',11,60125,320837)
57
+ sv = slice.get_structural_variations
58
+ assert_equal(16,sv.size)
59
+ assert_equal('nsv8753',sv[0].variation_name)
60
+ end
61
+
62
+
63
+
64
+ end
@@ -0,0 +1,42 @@
1
+ #
2
+ # = test/unit/release_62/core/test_gene.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ #
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Core
13
+
14
+ class TestGene < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens', 62)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_stable_id
25
+ ids = %w(ENSG00000243485 ENSG00000221311 ENSG00000237613 ENSG00000240361 ENSG00000186092)
26
+ genes = Gene.find_by_stable_id(ids)
27
+ assert_equal(5,genes.size)
28
+ assert_equal("ENSG00000186092",genes[0].stable_id)
29
+ assert_equal(65882,genes[0].seq_region_start)
30
+ assert_equal(70008,genes[0].seq_region_end)
31
+ assert_equal("Olfactory receptor 4F4 [Source:UniProtKB/Swiss-Prot;Acc:Q96R69]",genes[0].description)
32
+
33
+ gene = Gene.find_by_stable_id("ENSG00000186092")
34
+ genes = Gene.find_by_stable_id(ids)
35
+ assert_equal("ENSG00000186092",gene.stable_id)
36
+ assert_equal(65882,gene.seq_region_start)
37
+ assert_equal(70008,gene.seq_region_end)
38
+ assert_equal("Olfactory receptor 4F4 [Source:UniProtKB/Swiss-Prot;Acc:Q96R69]",gene.description)
39
+ end
40
+
41
+
42
+ end
@@ -0,0 +1,86 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_activerecord.rb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Variation
13
+
14
+ class ActiveRecordVariation < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens',62)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_study
25
+ s = Study.find(134)
26
+ assert_equal(13,s.source_id)
27
+ assert_equal("Genome-wide association study of 14,000 cases of seven common diseases and 3,000 shared controls.",s.description)
28
+ assert_equal("pubmed/17554300",s.external_reference)
29
+ assert_equal("GWAS",s.study_type)
30
+ a = s.associate_studies
31
+ assert_equal(17,a.size)
32
+
33
+ study = Study.find_by_name "estd19"
34
+ assert_equal("Ahn 2009 \"The first Korean genome sequence and analysis: full genome sequencing for a socio-ethnic group.\" PMID:19470904 [remapped from build NCBI36]",study.description)
35
+ assert_equal("ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd19_Ahn_et_al_2009",study.url)
36
+ assert_equal("pubmed/19470904",study.external_reference)
37
+ struct = study.structural_variations
38
+ assert_equal(4281,struct.size)
39
+ assert_equal("esv9167",struct[0].variation_name)
40
+ assert_equal("SV",struct[0].sv_class)
41
+ assert_equal(27515,struct[0].seq_region_id)
42
+ assert_equal(3478018,struct[0].seq_region_start)
43
+ assert_equal(3478196,struct[0].seq_region_end)
44
+
45
+ study = Study.find(33)
46
+ ann = study.variation_annotations
47
+ assert_equal(17,ann.size)
48
+ assert_equal(30235485,ann[0].variation_id)
49
+ assert_equal("Intergenic",ann[0].associated_gene)
50
+ assert_equal("rs11206801-A",ann[0].associated_variant_risk_allele)
51
+ assert_equal("6E-8",ann[0].p_value)
52
+
53
+ end
54
+
55
+ def test_protein_info
56
+ i = ProteinInfo.find_by_transcript_stable_id "ENST00000358183"
57
+ position = i.protein_positions
58
+ assert_equal(1195,position.size)
59
+ assert_equal(1,position[0].position)
60
+ assert_equal("M",position[0].amino_acid)
61
+ assert_equal(4.32,position[0].sift_median_conservation)
62
+ assert_equal(12,position[0].sift_num_sequences_represented)
63
+ end
64
+
65
+ def test_predictions
66
+ i = ProteinInfo.find_by_transcript_stable_id "ENST00000228777"
67
+ position = i.protein_positions[3]
68
+ sift = position.sift_predictions[0]
69
+ assert_equal("A",sift.amino_acid)
70
+ assert_equal("tolerated",sift.prediction)
71
+ assert_equal(0.05,sift.score)
72
+
73
+ polyphen = position.polyphen_predictions[0]
74
+ assert_equal("A",polyphen.amino_acid)
75
+ assert_equal("benign",polyphen.prediction)
76
+ assert_equal(0.006,polyphen.probability)
77
+ end
78
+
79
+ def test_structural_variation
80
+ s = StructuralVariation.find_by_variation_name "esv9167"
81
+ supp = s.supporting_structural_variations
82
+ assert_equal(1,supp.size)
83
+ assert_equal("essv31608",supp[0].name)
84
+ end
85
+
86
+ end
@@ -0,0 +1,191 @@
1
+ #
2
+ # = test/unit/release_62/variation/test_consequence.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+ DBConnection.connect('homo_sapiens',62)
12
+
13
+ class TestVariation < Test::Unit::TestCase
14
+
15
+ def test_3prime
16
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 299504, :seq_region_end => 299504, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs78048681")
17
+ tv = vf.transcript_variations
18
+ assert_equal("3_prime_UTR_variant", tv[0].consequence_types)
19
+ end
20
+
21
+ def test_intergenic
22
+ vf = VariationFeature.new(:seq_region_id => SeqRegion.find_by_name("X").seq_region_id, :seq_region_start => 23694, :seq_region_end => 23694, :seq_region_strand => 1, :allele_string => "A/T",:variation_name => "fake_SNP")
23
+ tv = vf.transcript_variations
24
+ assert_instance_of(TranscriptVariation,tv[0])
25
+ assert_equal("intergenic_variant",tv[0].consequence_types)
26
+ end
27
+
28
+ def test_splice_acceptor
29
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 1248331, :seq_region_end => 1248331, :seq_region_strand => 1, :allele_string => "T/A", :variation_name => "rs113769441")
30
+ tv = vf.transcript_variations
31
+ assert_equal("splice_acceptor_variant", tv[34].consequence_types)
32
+ assert_equal("ENST00000527098",tv[43].feature_stable_id)
33
+ end
34
+
35
+ def test_splice_donor
36
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 1247605, :seq_region_end => 1247605, :seq_region_strand => 1, :allele_string => "C/G", :variation_name => "rs113643330")
37
+ tv = vf.transcript_variations
38
+ assert_equal("splice_donor_variant", tv[34].consequence_types)
39
+ assert_equal("ENST00000545578",tv[34].feature_stable_id)
40
+ end
41
+
42
+ def test_complex_change_in_transcript
43
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4837133, :seq_region_end => 4837210, :seq_region_strand => 1, :allele_string => "-/GAGCCCACCTCAGAGCCCGCCCCCAGCCCGACCACCCCG", :variation_name => "rs41439349")
44
+ tv = vf.transcript_variations
45
+ assert_equal("complex_change_in_transcript", tv[0].consequence_types)
46
+ assert_equal("ENST00000438881",tv[0].feature_stable_id)
47
+ end
48
+
49
+ def test_stop_lost
50
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4701716, :seq_region_end => 4701716, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs1804248")
51
+ tv = vf.transcript_variations
52
+ assert_equal("stop_lost", tv[0].consequence_types)
53
+ assert_equal("ENST00000270586",tv[0].feature_stable_id)
54
+ end
55
+
56
+ def test_coding_sequence
57
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4836003, :seq_region_end => 4836003, :seq_region_strand => 1, :allele_string => "A/HGMD_MUTATION", :variation_name => "rs1804248")
58
+ tv = vf.transcript_variations
59
+ assert_equal("coding_sequence_variant", tv[0].consequence_types)
60
+ assert_equal("ENST00000438881",tv[0].feature_stable_id)
61
+ end
62
+
63
+ def test_non_synonymous_codon
64
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 313785, :seq_region_end => 313785, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs112982618")
65
+ tv = vf.transcript_variations
66
+ assert_equal("non_synonymous_codon", tv[12].consequence_types)
67
+ assert_equal("ENST00000535347",tv[12].feature_stable_id)
68
+ assert_equal("N/S",tv[12].pep_allele_string)
69
+ end
70
+
71
+ def test_stop_gained
72
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4836976, :seq_region_end => 4836976, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs121908061")
73
+ tv = vf.transcript_variations
74
+ assert_equal("stop_gained", tv[1].consequence_types)
75
+ assert_equal("ENST00000329125",tv[1].feature_stable_id)
76
+ end
77
+
78
+ def test_synonymous_codon
79
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 313751, :seq_region_end => 313751, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs11609900")
80
+ tv = vf.transcript_variations
81
+ assert_equal("synonymous_codon", tv[12].consequence_types)
82
+ assert_equal("ENST00000535347",tv[12].feature_stable_id)
83
+ end
84
+
85
+ def test_frameshift_variant
86
+ vf = VariationFeature.new(:seq_region_id => 27527, :seq_region_start => 156589990, :seq_region_end => 156589989, :seq_region_strand => 1, :allele_string => "-/C", :variation_name => "rs35703155")
87
+ tv = vf.transcript_variations
88
+ assert_equal("frameshift_variant", tv[-1].consequence_types)
89
+ assert_equal("ENST00000302938",tv[-1].feature_stable_id)
90
+ end
91
+
92
+ def test_nc_transcript
93
+ vf = VariationFeature.new(:seq_region_id => 27523, :seq_region_start => 43139829, :seq_region_end => 43139829, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs4736847")
94
+ tv = vf.transcript_variations
95
+ assert_equal("nc_transcript_variant", tv[0].consequence_types)
96
+ assert_equal("ENST00000522985",tv[0].feature_stable_id)
97
+ end
98
+
99
+ def test_mature_miRNA
100
+ vf = VariationFeature.new(:seq_region_id => 27504, :seq_region_start => 18204679, :seq_region_end => 18204679, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs10832898")
101
+ tv = vf.transcript_variations
102
+ assert_equal("mature_miRNA_variant", tv[1].consequence_types)
103
+ assert_equal("ENST00000408110",tv[1].feature_stable_id)
104
+ end
105
+
106
+ def test_5prime
107
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4835627, :seq_region_end => 4835627, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs56337033")
108
+ tv = vf.transcript_variations
109
+ assert_equal("5_prime_UTR_variant", tv[1].consequence_types)
110
+ assert_equal("ENST00000329125",tv[1].feature_stable_id)
111
+ end
112
+
113
+ def test_incomplete_terminal_codon
114
+ vf = VariationFeature.new(:seq_region_id => 27525, :seq_region_start => 118397884, :seq_region_end => 118397884, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs4751995")
115
+ tv = vf.transcript_variations
116
+ assert_equal("incomplete_terminal_codon_variant", tv[4].consequence_types)
117
+ assert_equal("ENST00000433618",tv[4].feature_stable_id)
118
+ end
119
+
120
+ #def test_splice_region # THE EXAMPLES WITHIN HUMAN DATABASE CAN'T BE USED AS TEST
121
+ # vf = VariationFeature.new(:seq_region_id => 27505, :seq_region_start => 14743754, :seq_region_end => 14743754, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs74734987")
122
+ # tv = vf.transcript_variations
123
+ # tv.each {|var| puts var.consequence_types+" "+var.feature_stable_id}
124
+ # assert_equal("splice_region_variant", tv[0].consequence_types)
125
+ # assert_equal("ENST00000540061",tv[0].feature_stable_id)
126
+ #end
127
+
128
+ def test_intron_variant
129
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 312127, :seq_region_end => 312127, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs75380489")
130
+ tv = vf.transcript_variations
131
+ assert_equal("intron_variant", tv[0].consequence_types)
132
+ assert_equal("ENST00000228777",tv[0].feature_stable_id)
133
+ end
134
+
135
+ def test_5KB_downstream_variant
136
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 300500, :seq_region_end => 300500, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs10128942")
137
+ tv = vf.transcript_variations
138
+ assert_equal("5KB_downstream_variant", tv[15].consequence_types)
139
+ assert_equal("ENST00000535498",tv[15].feature_stable_id)
140
+ end
141
+
142
+ def test_2KB_upstream_variant
143
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 300500, :seq_region_end => 300500, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs10128942")
144
+ tv = vf.transcript_variations
145
+ assert_equal("2KB_upstream_variant", tv[-1].consequence_types)
146
+ assert_equal("ENST00000544067",tv[-1].feature_stable_id)
147
+ end
148
+
149
+ def test_5KB_upstream_variant
150
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 242503860, :seq_region_end => 242503860, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs12727465")
151
+ tv = vf.transcript_variations
152
+ assert_equal("5KB_upstream_variant", tv[-1].consequence_types)
153
+ assert_equal("ENST00000447710",tv[-1].feature_stable_id)
154
+ end
155
+
156
+ def test_500B_downstream_variant
157
+ vf = VariationFeature.new(:seq_region_id => 27523, :seq_region_start => 43139379, :seq_region_end => 43139379, :seq_region_strand => 1, :allele_string => "T/A", :variation_name => "rs114568988")
158
+ tv = vf.transcript_variations
159
+ assert_equal("500B_downstream_variant", tv[0].consequence_types)
160
+ assert_equal("ENST00000522985",tv[0].feature_stable_id)
161
+ end
162
+
163
+ def test_initiator_codon_change
164
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 3450007, :seq_region_end => 3450007, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs390804")
165
+ tv = vf.transcript_variations
166
+ assert_equal("initiator_codon_change", tv[1].consequence_types)
167
+ assert_equal("ENST00000430263",tv[1].feature_stable_id)
168
+ end
169
+
170
+ def test_stop_retained
171
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 138202334, :seq_region_end => 138202334, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "COSM35908")
172
+ tv = vf.transcript_variations
173
+ assert_equal("stop_retained_variant", tv[0].consequence_types)
174
+ assert_equal("ENST00000535574",tv[0].feature_stable_id)
175
+ end
176
+
177
+ def test_inframe_codon_gain
178
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 636321, :seq_region_end => 636320, :seq_region_strand => 1, :allele_string => "-/CTC", :variation_name => "rs111405529")
179
+ tv = vf.transcript_variations
180
+ assert_equal("inframe_codon_gain", tv[1].consequence_types)
181
+ assert_equal("ENST00000451373",tv[1].feature_stable_id)
182
+ end
183
+
184
+ def test_inframe_codon_loss
185
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4837133, :seq_region_end => 4837210, :seq_region_strand => 1, :allele_string => "GAGCCCACCTCAGAGCCCGCCCCCAGCCCGACCACCCCGGAGCCCACCTCAGAGCCCGCCCCCAGCCCGACCACCCCA/-", :variation_name => "rs41439349")
186
+ tv = vf.transcript_variations
187
+ assert_equal("inframe_codon_loss", tv[1].consequence_types)
188
+ assert_equal("ENST00000329125",tv[1].feature_stable_id)
189
+ end
190
+
191
+ end