bio-ensembl 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,153 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_variation.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+ DBConnection.connect('homo_sapiens',60)
12
+
13
+ class TestVariation < Test::Unit::TestCase
14
+
15
+ def test_intergenic
16
+ # INTERGENIC
17
+ vf = VariationFeature.new(:seq_region_id => SeqRegion.find_by_name("X").seq_region_id, :seq_region_start => 23694, :seq_region_end => 23694, :seq_region_strand => 1, :allele_string => "A/T",:variation_name => "fake_SNP")
18
+ tv = vf.transcript_variations
19
+ assert_instance_of(TranscriptVariation,tv[0])
20
+ assert_equal("INTERGENIC",tv[0].consequence_type)
21
+ end
22
+
23
+ def test_3prime
24
+ # 3PRIME_UTR
25
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96810688, :seq_region_end => 96810688, :seq_region_strand => 1, :allele_string => "G/C", :variation_name => "rs16869283")
26
+ tv = vf.transcript_variations
27
+ assert_equal("3PRIME_UTR", tv[0].consequence_type)
28
+ end
29
+
30
+ def test_5prime
31
+ # 5PRIME_UTR
32
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 158536411, :seq_region_end => 158536411, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs71547565")
33
+ tv = vf.transcript_variations
34
+ assert_equal("5PRIME_UTR", tv[3].consequence_type)
35
+ end
36
+
37
+ def test_upstream
38
+ # UPSTREAM
39
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 96831018, :seq_region_end => 96831018, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs6975185")
40
+ tv = vf.transcript_variations
41
+ assert_equal("UPSTREAM",tv[0].consequence_type)
42
+ end
43
+
44
+ def test_downstream
45
+ # DOWNSTREAM
46
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 105727321, :seq_region_end => 105727321, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs35113830")
47
+ tv = vf.transcript_variations
48
+ assert_equal("DOWNSTREAM",tv[-1].consequence_type)
49
+ end
50
+
51
+ def test_mirna
52
+ # WITHIN_MATURE_miRNA
53
+ vf = VariationFeature.new(:seq_region_id => 27527, :seq_region_start => 175878848, :seq_region_end => 175878848, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs12716316")
54
+ tv = vf.transcript_variations
55
+ assert_equal("WITHIN_MATURE_miRNA",tv[-1].consequence_type)
56
+ end
57
+
58
+
59
+ def test_non_coding
60
+ # WITHIN_NON_CODING_GENE
61
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 97601052, :seq_region_end => 97601052, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs13245475")
62
+ tv = vf.transcript_variations
63
+ assert_equal("WITHIN_NON_CODING_GENE",tv[1].consequence_type)
64
+ end
65
+
66
+ def test_complex_indel
67
+ # COMPLEX_INDEL
68
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 31902068, :seq_region_end => 31902095, :seq_region_strand => 1, :allele_string => "GTGGACAGGGTCAGGAATCAGGAGTCTG/-", :variation_name => "rs9332736")
69
+ tv = vf.transcript_variations
70
+ assert_equal("COMPLEX_INDEL",tv[6].consequence_type)
71
+ end
72
+
73
+ def test_essential_splice
74
+ # ESSENTIAL_SPLICE_SITE
75
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 33385862, :seq_region_end => 33385862, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "GA005718")
76
+ tv = vf.transcript_variations
77
+ assert_equal("ESSENTIAL_SPLICE_SITE",tv[21].consequence_type)
78
+ end
79
+
80
+ def test_splice_site
81
+ # SPLICE_SITE
82
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 102301587, :seq_region_end => 102301587, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs434833")
83
+ tv = vf.transcript_variations
84
+ assert_equal("SPLICE_SITE",tv[5].consequence_type)
85
+ end
86
+
87
+ def test_intronic
88
+ # INTRONIC
89
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 31902068, :seq_region_end => 31902095, :seq_region_strand => 1, :allele_string => "GTGGACAGGGTCAGGAATCAGGAGTCTG/-", :variation_name => "rs9332736")
90
+ tv = vf.transcript_variations
91
+ assert_equal("INTRONIC",tv[3].consequence_type)
92
+ end
93
+
94
+ def test_frameshift
95
+ # FRAMESHIFT
96
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 78958619, :seq_region_end => 78958618, :seq_region_strand => 1, :allele_string => "-/G", :variation_name => "rs35065683")
97
+ tv = vf.transcript_variations
98
+ assert_equal("FRAMESHIFT_CODING",tv[1].consequence_type)
99
+ end
100
+
101
+ def test_stop_gained
102
+ # STOP_GAINED
103
+ vf = VariationFeature.new(:seq_region_id => 27516, :seq_region_start => 38262908, :seq_region_end => 38262908, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs72556299")
104
+ tv = vf.transcript_variations
105
+ assert_equal("STOP_GAINED",tv[-1].consequence_type)
106
+ end
107
+
108
+ def test_stop_lost
109
+ # STOP_LOST
110
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 152770613, :seq_region_end => 152770613, :seq_region_strand => 1, :allele_string => "T/G", :variation_name => "rs41268500")
111
+ tv = vf.transcript_variations
112
+ assert_equal("STOP_LOST",tv[0].consequence_type)
113
+ end
114
+
115
+ def test_synonymous
116
+ # SYNONYMOUS_CODING
117
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99688238, :seq_region_end => 99688238, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs11550651")
118
+ tv = vf.transcript_variations
119
+ assert_equal("SYNONYMOUS_CODING",tv[5].consequence_type)
120
+ end
121
+
122
+ def test_non_synonymous
123
+ # NON_SYNONYMOUS_CODING
124
+ vf = VariationFeature.new(:seq_region_id => 27506, :seq_region_start => 99057720, :seq_region_end => 99057720, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs11545970")
125
+ tv = vf.transcript_variations
126
+ assert_equal("NON_SYNONYMOUS_CODING",tv[9].consequence_type)
127
+ assert_equal("A/V",tv[9].peptide_allele_string)
128
+ end
129
+
130
+ # Checking CDNA coordinates calculation
131
+
132
+ def test_genomic2cdna_fw # forward strand (variation rs67960011)
133
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
134
+ assert_equal(573,t.genomic2cdna(38260562))
135
+ end
136
+
137
+ def test_cdna2genomic_fw # forward strand (variation rs67960011)
138
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000039007")
139
+ assert_equal(38260562,t.cdna2genomic(573))
140
+ end
141
+
142
+ def test_genomic2cdna_rev # reverse strand (variation rs11545970)
143
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
144
+ assert_equal(110,t.genomic2cdna(99057720))
145
+ end
146
+
147
+ def test_cdna2genomic_rev # reverse strand (variation rs11545970)
148
+ t = Ensembl::Core::Transcript.find_by_stable_id("ENST00000422429")
149
+ assert_equal(99057720,t.cdna2genomic(110))
150
+ end
151
+
152
+
153
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_variation.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+
12
+ class TestVariation < Test::Unit::TestCase
13
+
14
+ def setup
15
+ DBConnection.connect('homo_sapiens',60)
16
+ end
17
+
18
+ def teardown
19
+ DBConnection.remove_connection
20
+ end
21
+
22
+ def test_fetch_region
23
+ vf = Variation.find_by_name('rs2076175').variation_features[0]
24
+ slice = vf.fetch_region
25
+ assert_equal(29708370,slice.start)
26
+ assert_equal(29718370,slice.stop)
27
+ assert_equal('6',slice.seq_region.name)
28
+ slice = vf.fetch_region(30,30)
29
+ assert_equal(29713340,slice.start)
30
+ assert_equal(29713400,slice.stop)
31
+ assert_equal('CTCCCAGGACTGCTTCTGCCCACTGTCCCCGGGGCCCTGCCCTGCCTTTCTGCCTGTCACA',slice.seq.upcase)
32
+ end
33
+
34
+ def test_flanking_seq
35
+ vf = Variation.find_by_name('rs2076175').variation_features[0]
36
+ up,down = vf.flanking_seq
37
+ assert_equal(29712970,up.start)
38
+ assert_equal(29713369,up.stop)
39
+ assert_equal(29713371,down.start)
40
+ assert_equal(29713770,down.stop)
41
+ assert_equal('TCCTGATCTCACAAACCCTAATCTCCTGGAGGGAATGCAAGGCTGCCTGCCCCTACCCAGCAGTGACTTCTCCATTCCAGTCCAAGTGAGGAACTCGGACCAGGAAGGACCCCTCCCTGGCCCTCTTCCATCCCTCCCTGTGTGGGCTGAGCCCCGCTGAGCACCATTCCTCACCCCTACTCACAGCCAAATCCAGTGGGAAGAGACAGGTCCTGCTCTCTGCCCCCAACTCTCCTGGAAAAGGCCTCTCCCATTACTCTTGCCCACTGCCCACTCTCACCTCCTTTCTGGCCCTTGATATGAGCCAGGGTCCTCCTGAGCTCCTGCCCATTCTCTGTCAAGTCTTCAGTCTCTGTGTCCCAGGTCTCAGCTCCCAGGACTGCTTCTGCCCACTGTCCCC',
42
+ up.seq.upcase)
43
+ assert_equal('GGGCCCTGCCCTGCCTTTCTGCCTGTCACAGAGCAGGAAGAGCTGACCATCCAGATGTCCCTCAGCGAGAAACCCTGACTGCACAGATCCATCCTGGGACAGCACCGTGAGGTTGTAACAAAGACTGTGGGGCTCTGGGGAAGAGGAAATCACAGATGAAACTTCTTCCTGGAAGTAACTTCACATCAATGTTTAACACACAGGTCTGCTGTCCCGACCTTCCTGAGGAGGCAGGAAATGCACACGGGCAAAGGGACAAGAATGAGGATTTCAGACGCAAGGAAAACTGGGAAGGTGGGAGGATAGAGGAGGGGACTGAGGAACAGAAGAAGGGGGAATGGGGATGGCAAACTTGTAGGCCAGGTGCCAGGGCAGGGCAGCCACAGGCCCCCTCAGGATA',
44
+ down.seq.upcase)
45
+
46
+ end
47
+
48
+ def test_slice_variation
49
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome',1,100834,101331)
50
+ variations = slice.get_variation_features
51
+ assert_equal(1,variations.size)
52
+ assert_equal('rs78180088',variations[0].variation_name)
53
+ end
54
+
55
+ def test_slice_structural_variation
56
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome',11,60125,320837)
57
+ sv = slice.get_structural_variations
58
+ assert_equal(16,sv.size)
59
+ assert_equal('nsv8753',sv[0].variation_name)
60
+ end
61
+
62
+
63
+
64
+ end
@@ -0,0 +1,42 @@
1
+ #
2
+ # = test/unit/release_62/core/test_gene.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ #
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Core
13
+
14
+ class TestGene < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens', 62)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_stable_id
25
+ ids = %w(ENSG00000243485 ENSG00000221311 ENSG00000237613 ENSG00000240361 ENSG00000186092)
26
+ genes = Gene.find_by_stable_id(ids)
27
+ assert_equal(5,genes.size)
28
+ assert_equal("ENSG00000186092",genes[0].stable_id)
29
+ assert_equal(65882,genes[0].seq_region_start)
30
+ assert_equal(70008,genes[0].seq_region_end)
31
+ assert_equal("Olfactory receptor 4F4 [Source:UniProtKB/Swiss-Prot;Acc:Q96R69]",genes[0].description)
32
+
33
+ gene = Gene.find_by_stable_id("ENSG00000186092")
34
+ genes = Gene.find_by_stable_id(ids)
35
+ assert_equal("ENSG00000186092",gene.stable_id)
36
+ assert_equal(65882,gene.seq_region_start)
37
+ assert_equal(70008,gene.seq_region_end)
38
+ assert_equal("Olfactory receptor 4F4 [Source:UniProtKB/Swiss-Prot;Acc:Q96R69]",gene.description)
39
+ end
40
+
41
+
42
+ end
@@ -0,0 +1,86 @@
1
+ #
2
+ # = test/unit/release_56/variation/test_activerecord.rb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ #
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
11
+
12
+ include Ensembl::Variation
13
+
14
+ class ActiveRecordVariation < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.connect('homo_sapiens',62)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_study
25
+ s = Study.find(134)
26
+ assert_equal(13,s.source_id)
27
+ assert_equal("Genome-wide association study of 14,000 cases of seven common diseases and 3,000 shared controls.",s.description)
28
+ assert_equal("pubmed/17554300",s.external_reference)
29
+ assert_equal("GWAS",s.study_type)
30
+ a = s.associate_studies
31
+ assert_equal(17,a.size)
32
+
33
+ study = Study.find_by_name "estd19"
34
+ assert_equal("Ahn 2009 \"The first Korean genome sequence and analysis: full genome sequencing for a socio-ethnic group.\" PMID:19470904 [remapped from build NCBI36]",study.description)
35
+ assert_equal("ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd19_Ahn_et_al_2009",study.url)
36
+ assert_equal("pubmed/19470904",study.external_reference)
37
+ struct = study.structural_variations
38
+ assert_equal(4281,struct.size)
39
+ assert_equal("esv9167",struct[0].variation_name)
40
+ assert_equal("SV",struct[0].sv_class)
41
+ assert_equal(27515,struct[0].seq_region_id)
42
+ assert_equal(3478018,struct[0].seq_region_start)
43
+ assert_equal(3478196,struct[0].seq_region_end)
44
+
45
+ study = Study.find(33)
46
+ ann = study.variation_annotations
47
+ assert_equal(17,ann.size)
48
+ assert_equal(30235485,ann[0].variation_id)
49
+ assert_equal("Intergenic",ann[0].associated_gene)
50
+ assert_equal("rs11206801-A",ann[0].associated_variant_risk_allele)
51
+ assert_equal("6E-8",ann[0].p_value)
52
+
53
+ end
54
+
55
+ def test_protein_info
56
+ i = ProteinInfo.find_by_transcript_stable_id "ENST00000358183"
57
+ position = i.protein_positions
58
+ assert_equal(1195,position.size)
59
+ assert_equal(1,position[0].position)
60
+ assert_equal("M",position[0].amino_acid)
61
+ assert_equal(4.32,position[0].sift_median_conservation)
62
+ assert_equal(12,position[0].sift_num_sequences_represented)
63
+ end
64
+
65
+ def test_predictions
66
+ i = ProteinInfo.find_by_transcript_stable_id "ENST00000228777"
67
+ position = i.protein_positions[3]
68
+ sift = position.sift_predictions[0]
69
+ assert_equal("A",sift.amino_acid)
70
+ assert_equal("tolerated",sift.prediction)
71
+ assert_equal(0.05,sift.score)
72
+
73
+ polyphen = position.polyphen_predictions[0]
74
+ assert_equal("A",polyphen.amino_acid)
75
+ assert_equal("benign",polyphen.prediction)
76
+ assert_equal(0.006,polyphen.probability)
77
+ end
78
+
79
+ def test_structural_variation
80
+ s = StructuralVariation.find_by_variation_name "esv9167"
81
+ supp = s.supporting_structural_variations
82
+ assert_equal(1,supp.size)
83
+ assert_equal("essv31608",supp[0].name)
84
+ end
85
+
86
+ end
@@ -0,0 +1,191 @@
1
+ #
2
+ # = test/unit/release_62/variation/test_consequence.tb - Unit test for Ensembl::Variation
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: Ruby's
7
+ #
8
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
9
+
10
+ include Ensembl::Variation
11
+ DBConnection.connect('homo_sapiens',62)
12
+
13
+ class TestVariation < Test::Unit::TestCase
14
+
15
+ def test_3prime
16
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 299504, :seq_region_end => 299504, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs78048681")
17
+ tv = vf.transcript_variations
18
+ assert_equal("3_prime_UTR_variant", tv[0].consequence_types)
19
+ end
20
+
21
+ def test_intergenic
22
+ vf = VariationFeature.new(:seq_region_id => SeqRegion.find_by_name("X").seq_region_id, :seq_region_start => 23694, :seq_region_end => 23694, :seq_region_strand => 1, :allele_string => "A/T",:variation_name => "fake_SNP")
23
+ tv = vf.transcript_variations
24
+ assert_instance_of(TranscriptVariation,tv[0])
25
+ assert_equal("intergenic_variant",tv[0].consequence_types)
26
+ end
27
+
28
+ def test_splice_acceptor
29
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 1248331, :seq_region_end => 1248331, :seq_region_strand => 1, :allele_string => "T/A", :variation_name => "rs113769441")
30
+ tv = vf.transcript_variations
31
+ assert_equal("splice_acceptor_variant", tv[34].consequence_types)
32
+ assert_equal("ENST00000527098",tv[43].feature_stable_id)
33
+ end
34
+
35
+ def test_splice_donor
36
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 1247605, :seq_region_end => 1247605, :seq_region_strand => 1, :allele_string => "C/G", :variation_name => "rs113643330")
37
+ tv = vf.transcript_variations
38
+ assert_equal("splice_donor_variant", tv[34].consequence_types)
39
+ assert_equal("ENST00000545578",tv[34].feature_stable_id)
40
+ end
41
+
42
+ def test_complex_change_in_transcript
43
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4837133, :seq_region_end => 4837210, :seq_region_strand => 1, :allele_string => "-/GAGCCCACCTCAGAGCCCGCCCCCAGCCCGACCACCCCG", :variation_name => "rs41439349")
44
+ tv = vf.transcript_variations
45
+ assert_equal("complex_change_in_transcript", tv[0].consequence_types)
46
+ assert_equal("ENST00000438881",tv[0].feature_stable_id)
47
+ end
48
+
49
+ def test_stop_lost
50
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4701716, :seq_region_end => 4701716, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs1804248")
51
+ tv = vf.transcript_variations
52
+ assert_equal("stop_lost", tv[0].consequence_types)
53
+ assert_equal("ENST00000270586",tv[0].feature_stable_id)
54
+ end
55
+
56
+ def test_coding_sequence
57
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4836003, :seq_region_end => 4836003, :seq_region_strand => 1, :allele_string => "A/HGMD_MUTATION", :variation_name => "rs1804248")
58
+ tv = vf.transcript_variations
59
+ assert_equal("coding_sequence_variant", tv[0].consequence_types)
60
+ assert_equal("ENST00000438881",tv[0].feature_stable_id)
61
+ end
62
+
63
+ def test_non_synonymous_codon
64
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 313785, :seq_region_end => 313785, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs112982618")
65
+ tv = vf.transcript_variations
66
+ assert_equal("non_synonymous_codon", tv[12].consequence_types)
67
+ assert_equal("ENST00000535347",tv[12].feature_stable_id)
68
+ assert_equal("N/S",tv[12].pep_allele_string)
69
+ end
70
+
71
+ def test_stop_gained
72
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4836976, :seq_region_end => 4836976, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "rs121908061")
73
+ tv = vf.transcript_variations
74
+ assert_equal("stop_gained", tv[1].consequence_types)
75
+ assert_equal("ENST00000329125",tv[1].feature_stable_id)
76
+ end
77
+
78
+ def test_synonymous_codon
79
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 313751, :seq_region_end => 313751, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs11609900")
80
+ tv = vf.transcript_variations
81
+ assert_equal("synonymous_codon", tv[12].consequence_types)
82
+ assert_equal("ENST00000535347",tv[12].feature_stable_id)
83
+ end
84
+
85
+ def test_frameshift_variant
86
+ vf = VariationFeature.new(:seq_region_id => 27527, :seq_region_start => 156589990, :seq_region_end => 156589989, :seq_region_strand => 1, :allele_string => "-/C", :variation_name => "rs35703155")
87
+ tv = vf.transcript_variations
88
+ assert_equal("frameshift_variant", tv[-1].consequence_types)
89
+ assert_equal("ENST00000302938",tv[-1].feature_stable_id)
90
+ end
91
+
92
+ def test_nc_transcript
93
+ vf = VariationFeature.new(:seq_region_id => 27523, :seq_region_start => 43139829, :seq_region_end => 43139829, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs4736847")
94
+ tv = vf.transcript_variations
95
+ assert_equal("nc_transcript_variant", tv[0].consequence_types)
96
+ assert_equal("ENST00000522985",tv[0].feature_stable_id)
97
+ end
98
+
99
+ def test_mature_miRNA
100
+ vf = VariationFeature.new(:seq_region_id => 27504, :seq_region_start => 18204679, :seq_region_end => 18204679, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs10832898")
101
+ tv = vf.transcript_variations
102
+ assert_equal("mature_miRNA_variant", tv[1].consequence_types)
103
+ assert_equal("ENST00000408110",tv[1].feature_stable_id)
104
+ end
105
+
106
+ def test_5prime
107
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4835627, :seq_region_end => 4835627, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs56337033")
108
+ tv = vf.transcript_variations
109
+ assert_equal("5_prime_UTR_variant", tv[1].consequence_types)
110
+ assert_equal("ENST00000329125",tv[1].feature_stable_id)
111
+ end
112
+
113
+ def test_incomplete_terminal_codon
114
+ vf = VariationFeature.new(:seq_region_id => 27525, :seq_region_start => 118397884, :seq_region_end => 118397884, :seq_region_strand => 1, :allele_string => "A/G", :variation_name => "rs4751995")
115
+ tv = vf.transcript_variations
116
+ assert_equal("incomplete_terminal_codon_variant", tv[4].consequence_types)
117
+ assert_equal("ENST00000433618",tv[4].feature_stable_id)
118
+ end
119
+
120
+ #def test_splice_region # THE EXAMPLES WITHIN HUMAN DATABASE CAN'T BE USED AS TEST
121
+ # vf = VariationFeature.new(:seq_region_id => 27505, :seq_region_start => 14743754, :seq_region_end => 14743754, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs74734987")
122
+ # tv = vf.transcript_variations
123
+ # tv.each {|var| puts var.consequence_types+" "+var.feature_stable_id}
124
+ # assert_equal("splice_region_variant", tv[0].consequence_types)
125
+ # assert_equal("ENST00000540061",tv[0].feature_stable_id)
126
+ #end
127
+
128
+ def test_intron_variant
129
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 312127, :seq_region_end => 312127, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs75380489")
130
+ tv = vf.transcript_variations
131
+ assert_equal("intron_variant", tv[0].consequence_types)
132
+ assert_equal("ENST00000228777",tv[0].feature_stable_id)
133
+ end
134
+
135
+ def test_5KB_downstream_variant
136
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 300500, :seq_region_end => 300500, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs10128942")
137
+ tv = vf.transcript_variations
138
+ assert_equal("5KB_downstream_variant", tv[15].consequence_types)
139
+ assert_equal("ENST00000535498",tv[15].feature_stable_id)
140
+ end
141
+
142
+ def test_2KB_upstream_variant
143
+ vf = VariationFeature.new(:seq_region_id => 27519, :seq_region_start => 300500, :seq_region_end => 300500, :seq_region_strand => 1, :allele_string => "T/C", :variation_name => "rs10128942")
144
+ tv = vf.transcript_variations
145
+ assert_equal("2KB_upstream_variant", tv[-1].consequence_types)
146
+ assert_equal("ENST00000544067",tv[-1].feature_stable_id)
147
+ end
148
+
149
+ def test_5KB_upstream_variant
150
+ vf = VariationFeature.new(:seq_region_id => 27511, :seq_region_start => 242503860, :seq_region_end => 242503860, :seq_region_strand => 1, :allele_string => "G/T", :variation_name => "rs12727465")
151
+ tv = vf.transcript_variations
152
+ assert_equal("5KB_upstream_variant", tv[-1].consequence_types)
153
+ assert_equal("ENST00000447710",tv[-1].feature_stable_id)
154
+ end
155
+
156
+ def test_500B_downstream_variant
157
+ vf = VariationFeature.new(:seq_region_id => 27523, :seq_region_start => 43139379, :seq_region_end => 43139379, :seq_region_strand => 1, :allele_string => "T/A", :variation_name => "rs114568988")
158
+ tv = vf.transcript_variations
159
+ assert_equal("500B_downstream_variant", tv[0].consequence_types)
160
+ assert_equal("ENST00000522985",tv[0].feature_stable_id)
161
+ end
162
+
163
+ def test_initiator_codon_change
164
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 3450007, :seq_region_end => 3450007, :seq_region_strand => 1, :allele_string => "C/T", :variation_name => "rs390804")
165
+ tv = vf.transcript_variations
166
+ assert_equal("initiator_codon_change", tv[1].consequence_types)
167
+ assert_equal("ENST00000430263",tv[1].feature_stable_id)
168
+ end
169
+
170
+ def test_stop_retained
171
+ vf = VariationFeature.new(:seq_region_id => 27515, :seq_region_start => 138202334, :seq_region_end => 138202334, :seq_region_strand => 1, :allele_string => "G/A", :variation_name => "COSM35908")
172
+ tv = vf.transcript_variations
173
+ assert_equal("stop_retained_variant", tv[0].consequence_types)
174
+ assert_equal("ENST00000535574",tv[0].feature_stable_id)
175
+ end
176
+
177
+ def test_inframe_codon_gain
178
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 636321, :seq_region_end => 636320, :seq_region_strand => 1, :allele_string => "-/CTC", :variation_name => "rs111405529")
179
+ tv = vf.transcript_variations
180
+ assert_equal("inframe_codon_gain", tv[1].consequence_types)
181
+ assert_equal("ENST00000451373",tv[1].feature_stable_id)
182
+ end
183
+
184
+ def test_inframe_codon_loss
185
+ vf = VariationFeature.new(:seq_region_id => 27509, :seq_region_start => 4837133, :seq_region_end => 4837210, :seq_region_strand => 1, :allele_string => "GAGCCCACCTCAGAGCCCGCCCCCAGCCCGACCACCCCGGAGCCCACCTCAGAGCCCGCCCCCAGCCCGACCACCCCA/-", :variation_name => "rs41439349")
186
+ tv = vf.transcript_variations
187
+ assert_equal("inframe_codon_loss", tv[1].consequence_types)
188
+ assert_equal("ENST00000329125",tv[1].feature_stable_id)
189
+ end
190
+
191
+ end