bio-ensembl 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,130 @@
1
+ #
2
+ # = test/unit/test_releases.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+
11
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
12
+
13
+ include Ensembl::Core
14
+
15
+ class TestRelease53 < Test::Unit::TestCase
16
+
17
+ def teardown
18
+ DBConnection.remove_connection
19
+ end
20
+
21
+ def test_gene_stable_id_human
22
+ DBConnection.connect('homo_sapiens', 53)
23
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
24
+ assert_equal(["ENSG00000146556","ENSG00000177693","ENSG00000197490","ENSG00000205292","ENSG00000219789","ENSG00000221311","ENSG00000222003","ENSG00000222027"], slice.genes.collect{|g| g.stable_id}.sort)
25
+ end
26
+ end
27
+
28
+ class TestRelease50 < Test::Unit::TestCase
29
+
30
+ def teardown
31
+ DBConnection.remove_connection
32
+ end
33
+
34
+ def test_gene_stable_id
35
+ DBConnection.connect('homo_sapiens', 50)
36
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
37
+ assert_equal(["ENSG00000146556", "ENSG00000177693", "ENSG00000197194", "ENSG00000197490", "ENSG00000205292", "ENSG00000219789", "ENSG00000221311"], slice.genes.collect{|g| g.stable_id}.sort)
38
+ end
39
+ end
40
+
41
+ class TestRelease49 < Test::Unit::TestCase
42
+
43
+ def teardown
44
+ DBConnection.remove_connection
45
+ end
46
+
47
+ def test_slice_and_genes_mouse
48
+ DBConnection.connect('mus_musculus',49)
49
+
50
+ slice = Slice.fetch_by_region('chromosome',"19",52571924,52572023)
51
+ assert_equal("AAGGTTGTATTCTAGTTTGCTCTCTGTTATTGTGACAAAGACAGGACCAAAGAAACTTGAGTAGGAAATGGTTGATAAAATCTTACAAGTTAGAAGGCAG",slice.seq.upcase)
52
+
53
+ gene = Gene.find_by_stable_id("ENSMUSG00000017167")
54
+ assert_equal(101037431, gene.start)
55
+ assert_equal(101052034, gene.stop)
56
+ assert_equal(1,gene.transcripts.size)
57
+ assert_equal("ENSMUST00000103109",gene.transcripts[0].stable_id)
58
+ end
59
+
60
+ end
61
+
62
+ class TestRelease47 < Test::Unit::TestCase
63
+
64
+ def teardown
65
+ DBConnection.remove_connection
66
+ end
67
+
68
+ def test_slice_and_genes_mouse
69
+ DBConnection.connect('mus_musculus',47)
70
+
71
+ slice = Slice.fetch_by_region('chromosome',"5",123840876,123912619)
72
+ genes = slice.genes
73
+ assert_equal("ENSMUSG00000038342",genes[0].stable_id)
74
+ slice = Slice.fetch_by_region('chromosome',"5",123840876,123840975)
75
+ assert_equal("TCTCAGTTCAGGTTCTATGGGGGGGAGGGGAGGGAATGAAAAGGATGTTAACAATCACCATCACCAGGGGGGACCAATTTGAAGATCTGATCGCCGGTGT",slice.seq.upcase)
76
+
77
+ gene = Gene.find_by_stable_id("ENSMUSG00000017167")
78
+ assert_equal(101037431, gene.start)
79
+ assert_equal(101052034, gene.stop)
80
+ assert_equal(1,gene.transcripts.size)
81
+ assert_equal("ENSMUST00000103109",gene.transcripts[0].stable_id)
82
+ end
83
+
84
+ end
85
+
86
+ class TestRelease45 < Test::Unit::TestCase
87
+
88
+ def teardown
89
+ DBConnection.remove_connection
90
+ end
91
+
92
+ def test_gene_stable_id_human
93
+ DBConnection.connect('homo_sapiens', 45)
94
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
95
+ assert_equal(["ENSG00000146556", "ENSG00000177693", "ENSG00000197194", "ENSG00000197490", "ENSG00000205292"], slice.genes.collect{|g| g.stable_id}.sort)
96
+ end
97
+
98
+ def test_slice_and_genes_mouse
99
+ DBConnection.connect('mus_musculus',45)
100
+
101
+ slice = Slice.fetch_by_region('chromosome',"11",101037431,101037530)
102
+ assert_equal("ACTCTATCCAACTGAAACTGGAGATTAGTAACAGGGAAAAACAAACTCAACTGACAGCTGCTCCCAGTACAGTTCTTATGGTACAGGGAGCGTGGGAGTG",slice.seq.upcase)
103
+
104
+ gene = Gene.find_by_stable_id("ENSMUSG00000017167")
105
+ assert_equal(100992131,gene.start)
106
+ assert_equal(101006814,gene.stop)
107
+ assert_equal("11",gene.seq_region.name)
108
+ assert_equal(2,gene.transcripts.size)
109
+ end
110
+
111
+ end
112
+
113
+ class TestRelease37 < Test::Unit::TestCase
114
+
115
+ def teardown
116
+ DBConnection.remove_connection
117
+ end
118
+
119
+ def test_gene_stable_id_human
120
+ DBConnection.connect('homo_sapiens', 37)
121
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
122
+ assert_equal(["ENSG00000146556", "ENSG00000177693", "ENSG00000197194", "ENSG00000197490"], slice.genes.collect{|g| g.stable_id}.sort)
123
+ end
124
+ end
125
+
126
+
127
+
128
+
129
+
130
+
@@ -0,0 +1,122 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+
14
+ class TestCollection < Test::Unit::TestCase
15
+
16
+ def teardown
17
+ DBConnection.remove_connection
18
+ end
19
+
20
+ def test_check_collection
21
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
22
+ assert_equal(true,Collection.check)
23
+ DBConnection.connect('homo_sapiens',56)
24
+ assert_equal(false,Collection.check)
25
+ end
26
+
27
+ def test_check_species
28
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
29
+ assert_equal(["b_subtilis",
30
+ "b_amyloliquefaciens",
31
+ "b_anthracis_ames",
32
+ "b_anthracis_ames_ancestor",
33
+ "b_anthracis_sterne",
34
+ "b_cereus_atcc_10987",
35
+ "b_cereus_atcc_14579",
36
+ "b_cereus_cytotoxis",
37
+ "b_cereus_zk",
38
+ "b_clausii",
39
+ "b_halodurans",
40
+ "b_licheniformis_goettingen",
41
+ "b_licheniformis_novozymes",
42
+ "b_pumilus",
43
+ "b_thuringiensis",
44
+ "b_thuringiensis_konkukian",
45
+ "b_weihenstephanensis",
46
+ "b_cereus_ah820",
47
+ "b_cereus_ah187",
48
+ "b_cereus_03bb102",
49
+ "b_cereus_q1",
50
+ "b_cereus_g9842",
51
+ "b_cereus_b4264",
52
+ "b_anthracis_cdc_684",
53
+ "b_anthracis_a0248",
54
+ "b_thuringiensis_bt407",
55
+ "b_thuringiensis_ibl200",
56
+ "b_mycoides_rock1_4",
57
+ "b_thuringiensis_bgsc_4ba1_pondicheri",
58
+ "b_cereus_f65185",
59
+ "b_thuringiensis_bgsc_4bd1_huazhong",
60
+ "b_selenitireducens",
61
+ "b_cereus_r309803",
62
+ "b_cereus_bdrd_st196",
63
+ "b_thuringiensis_bgsc_4cc1_pulsiensis",
64
+ "b_cereus_95_8201",
65
+ "b_cereus_bdrd_st24",
66
+ "b_cereus_ah676",
67
+ "b_cereus_bdrd_st26",
68
+ "b_cereus_m1293",
69
+ "b_thuringiensis_atcc_10792",
70
+ "b_cereus_rock1_15",
71
+ "b_cereus_ah1273",
72
+ "b_thuringiensis_bgsc_4y1_tochigiensis",
73
+ "b_thuringiensis_t13001_pakistani",
74
+ "b_cereus_ah621",
75
+ "b_mycoides_dsm_2048",
76
+ "b_cereus_rock3_44",
77
+ "b_cereus_ah603",
78
+ "b_cereus_172560w",
79
+ "b_cereus_rock3_29",
80
+ "b_cereus_mm3",
81
+ "b_cereus_ah1272",
82
+ "b_pseudofirmus",
83
+ "b_cereus_rock4_18",
84
+ "b_thuringiensis_bgsc_4aw1_andalous",
85
+ "b_cereus_rock3_28",
86
+ "b_pseudomycoides",
87
+ "b_mycoides_rock3_17",
88
+ "b_thuringiensis_t01001",
89
+ "b_thuringiensis_t04001_sotto",
90
+ "b_cereus_bdrd_bcer4",
91
+ "b_thuringiensis_t03a001_kurstaki",
92
+ "b_thuringiensis_ibl4222",
93
+ "b_cereus_mm1550",
94
+ "b_cereus_bgsc_6e1",
95
+ "b_thuringiensis_bmb171",
96
+ "b_cereus_var_anthracis",
97
+ "b_cereus_atcc_10876",
98
+ "b_cereus_ah1271",
99
+ "b_cereus_atcc_4342",
100
+ "b_megaterium_atcc_12872",
101
+ "b_thuringiensis_bgsc_4aj1",
102
+ "b_cereus_rock4_2",
103
+ "b_cereus_rock3_42",
104
+ "b_cereus_rock1_3",
105
+ "b_tusciae",
106
+ "b_megaterium_dsm_319"],Collection.species)
107
+ end
108
+
109
+ def test_get_species_id
110
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
111
+ assert_equal(9,Collection.get_species_id("Bacillus cereus ZK"))
112
+ assert_nil(Collection.get_species_id("Dummy specie"))
113
+ end
114
+
115
+ def test_connection_with_a_species
116
+ assert_nothing_raised do
117
+ DBConnection.ensemblgenomes_connect('b_licheniformis_goettingen',8)
118
+ end
119
+ assert_equal('b_licheniformis_goettingen', Ensembl::SESSION.collection_species)
120
+ end
121
+
122
+ end
@@ -0,0 +1,46 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+ class TestGene < Test::Unit::TestCase
14
+
15
+ def setup
16
+ DBConnection.ensemblgenomes_connect('pyrococcus_collection',8)
17
+ end
18
+
19
+ def teardown
20
+ DBConnection.remove_connection
21
+ end
22
+
23
+ def test_find_gene
24
+ g = Gene.find_by_stable_id("EBPYRG00000005609")
25
+ assert_equal("EBPYRG00000005609",g.stable_id)
26
+ assert_equal(1195302,g.start)
27
+ assert_equal(1196675,g.stop)
28
+ assert_equal("Chromosome",g.seq_region.name)
29
+ assert_equal("ATGAATAGGAGCTTGTACTTGATTTTTATAATTGTAGGATATACTTTGGGAATATGGACA",g.seq.slice(0,60).upcase)
30
+ end
31
+
32
+ def test_find_transcript
33
+ g = Gene.find_by_stable_id("EBPYRG00000005609")
34
+ t = g.transcripts
35
+ assert_equal("EBPYRT00000005610",t[0].stable_id)
36
+ end
37
+
38
+ def test_find_exons
39
+ g = Gene.find_by_stable_id("EBPYRG00000005609")
40
+ t = g.transcripts
41
+ e = t[0].exons
42
+ assert_equal("EBPYRE00000005617",e[0].stable_id)
43
+ assert_equal("ATGAATAGGAGCTTGTACTTGATTTTTATAATTGTAGGATATACTTTGGGAATATGGACA",e[0].seq.slice(0,60).upcase)
44
+ end
45
+
46
+ end
@@ -0,0 +1,65 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+ class TestSlice < Test::Unit::TestCase
14
+
15
+ def teardown
16
+ DBConnection.remove_connection
17
+ end
18
+
19
+ def test_fetch_by_region
20
+ DBConnection.ensemblgenomes_connect('mycobacterium_collection',8)
21
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",183617,183716,1,"Mycobacterium tuberculosis H37Rv")
22
+ assert_equal("GCGCCATGACAGATCCGCAGACGCAGAGCACCAGGGTCGGGGTGGTTGCCGAGTCGGGGCCCGACGAACGACGGGTCGCGCTGGTTCCCAAGGCGGTCGC",slice.seq.upcase)
23
+
24
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",4285422,4285521,1,"Mycobacterium paratuberculosis")
25
+ assert_equal("GGTGTTAACGGCCGAAAGGTGGTTGAAAGATCGGCGGAATCGGGCGCACCCGGGTGGTCGTCGACGCCGCGCTGGTGGTGCTCGGCTGCGCCGTCGTGGT",slice.seq.upcase)
26
+
27
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",2667164,2667263,1,"Mycobacterium paratuberculosis")
28
+ assert_equal("GTTCCACCTGCCGATCGTCTTCCTCGCCGATAACCCGGGCATGCTGCCCGGCAGCCGGTCCGAACGCAGCGGTGTGCTGCGCGCCGGCGCGCGGATGTTC",slice.seq.upcase)
29
+ end
30
+
31
+ def test_fetch_genes_from_slice
32
+ DBConnection.ensemblgenomes_connect('mycobacterium_collection',8)
33
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",620900,622130 ,1,"Mycobacterium tuberculosis H37Rv")
34
+ genes = slice.genes
35
+ assert_equal("EBMYCG00000001929",genes[0].stable_id)
36
+
37
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",923890,925120 ,1,"Mycobacterium paratuberculosis")
38
+ genes = slice.genes
39
+ assert_equal("EBMYCG00000037956",genes[0].stable_id)
40
+ end
41
+
42
+ def test_new_db_and_reverse_slice
43
+ DBConnection.ensemblgenomes_connect('escherichia_coli_K12',8)
44
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",831691,831790,-1)
45
+ assert_equal("AAACGATGCTTACTGGGGAGACGGTGGTCATGGTAAGGGCAAGAATCGACTGGGCTACCTTTTAATGGAGTTGCGCGAACAATTGGCTATAGAGAAGTAA",slice.seq.upcase)
46
+
47
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",831690,832175,-1)
48
+ genes = slice.genes
49
+ assert_equal("EBESCG00000001341",genes[0].stable_id)
50
+ end
51
+
52
+ def test_fetch_all
53
+ DBConnection.ensemblgenomes_connect('b_anthracis_Sterne',8)
54
+ slices = Slice.fetch_all('chromosome')
55
+ assert_equal(5228663,slices[0].length)
56
+ end
57
+
58
+ def test_error_species
59
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
60
+ assert_raise ArgumentError do
61
+ Slice.fetch_by_region('chromosome',"Chromosome",831690,832175,1,"Wrong specie name")
62
+ end
63
+ end
64
+
65
+ end
@@ -0,0 +1,38 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Variation
12
+
13
+
14
+ class TestVariation < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.ensemblgenomes_connect('vitis_vinifera',8)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_fetch_region
25
+ vf = Variation.find_by_name('ENSVVVI00603004').variation_features[0]
26
+ assert_equal(8789,vf.seq_region_start)
27
+ assert_equal(8789,vf.seq_region_end)
28
+ assert_equal('INTRONIC',vf.consequence_type)
29
+ assert_equal('T/A',vf.allele_string)
30
+ tv = vf.transcript_variations
31
+ t = tv[0].transcript
32
+ assert_equal("GSVIVT01004799001",t.stable_id)
33
+ end
34
+
35
+
36
+
37
+
38
+ end
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-ensembl'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,210 @@
1
+ #
2
+ # = test/unit/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2007
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+ DBConnection.connect('bos_taurus', 50)
14
+
15
+ class CoordinateMappingsTestSimple < Test::Unit::TestCase
16
+ # First see if the relationships work
17
+ def test_assemblies
18
+ # Contig AAFC03055291 should only be a component of chromosome 20
19
+ contig_coord_system = CoordSystem.find_by_name('contig')
20
+ aafc03055291 = SeqRegion.find_by_name_and_coord_system_id('AAFC03055291', contig_coord_system.id)
21
+ assert_equal(1, aafc03055291.assembled_seq_regions.length)
22
+
23
+ # Chromosome 20 has 2970 components
24
+ chr_coord_system = CoordSystem.find_by_name('chromosome')
25
+ chr20 = SeqRegion.find_by_name_and_coord_system_id('20', chr_coord_system.id)
26
+ assert_equal(2970, chr20.component_seq_regions.length)
27
+
28
+ # Chromosome 20 has 2970 contigs
29
+ assert_equal(2970, chr20.component_seq_regions('contig').length)
30
+
31
+ # Positions of the link between Chr20 and AAFC03055291
32
+ # * Contig AAFC03055291 starts at position 13970982 on chromosome Chr20
33
+ assert_equal(13970982, aafc03055291.assembly_links_as_component(chr_coord_system)[0].asm_start)
34
+ end
35
+ end
36
+
37
+ class Sequences < Test::Unit::TestCase
38
+ def setup
39
+ @seq_region = SeqRegion.find(92594)
40
+ end
41
+
42
+ def test_simple
43
+ assert_equal('AGCTATTTTATGACTT', @seq_region.seq.slice(4,16))
44
+ end
45
+
46
+ def test_subseq
47
+ assert_equal('AGCTATTTTATGACTT', @seq_region.subseq(5,20))
48
+ end
49
+ end
50
+
51
+ #class SliceProjectFromComponentToAssembly < Test::Unit::TestCase
52
+ # # |------------------------------------------> chromosome
53
+ # # ^ ^
54
+ # # | |
55
+ # # |-----------------> scaffold
56
+ # def test_project_from_whole_component_to_assembly
57
+ # source_slice = Slice.fetch_by_region('contig','AAFC03055291')
58
+ # target_slices = source_slice.project('chromosome')
59
+ #
60
+ # # Start and stop of chr4_105 on Chr4
61
+ # assert_equal(13970982, target_slices[0].start)
62
+ # assert_equal(13982069, target_slices[0].stop)
63
+ # end
64
+ #
65
+ # # |------------------------------------------> chromosome
66
+ # # ^ ^
67
+ # # | |
68
+ # # |-----------------> scaffold
69
+ # def test_project_from_component_to_assembly_with_positions
70
+ # source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
71
+ # target_slices = source_slice.project('chromosome')
72
+ #
73
+ # # Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
74
+ # assert_equal(96652152, target_slices[0].start)
75
+ # assert_equal(96654117, target_slices[0].stop)
76
+ # end
77
+ #
78
+ # # |------------------------------------------> scaffold
79
+ # # ^ ^
80
+ # # | |
81
+ # # ----------------> contig
82
+ # # /
83
+ # # |--
84
+ # def test_project_from_component_to_assembly_with_positions_and_cmp_start_not_1
85
+ # source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
86
+ # target_slices = source_slice.project('scaffold')
87
+ #
88
+ # # Position 42 on AAFC03020247 is position 6570 on ChrUn.003.3522, position 2007 is 8565
89
+ # assert_equal(6570, target_slices[0].start)
90
+ # assert_equal(8535, target_slices[0].stop)
91
+ # end
92
+ #
93
+ # # |------------------------------------------> scaffold
94
+ # # ^ ^
95
+ # # | |
96
+ # # <-----------------| contig
97
+ # def test_project_from_component_to_assembly_with_strand
98
+ # source_slice_fw = Slice.fetch_by_region('contig', 'AAFC03020247')
99
+ # target_slices_fw = source_slice_fw.project('scaffold')
100
+ #
101
+ # assert_equal(1, target_slices_fw[0].strand)
102
+ #
103
+ # source_slice_rev = Slice.fetch_by_region('contig', 'AAFC03061502')
104
+ # target_slices_rev = source_slice_rev.project('scaffold')
105
+ #
106
+ # assert_equal(-1, target_slices_rev[0].strand)
107
+ # end
108
+ #end
109
+
110
+ #class SliceProjectFromComponentToAssemblyUsingTopLevel < Test::Unit::TestCase
111
+ # # |------------------------------------------> chromosome
112
+ # # ^ ^
113
+ # # | |
114
+ # # |-----------------> scaffold
115
+ # def test_project_from_whole_component_to_assembly
116
+ # source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
117
+ # target_slices = source_slice.project('toplevel')
118
+ #
119
+ # # Start and stop of chr4_105 on Chr4
120
+ # assert_equal(96652111, target_slices[0].start)
121
+ # assert_equal(97251689, target_slices[0].stop)
122
+ # end
123
+ #
124
+ # # |------------------------------------------> chromosome
125
+ # # ^ ^
126
+ # # | |
127
+ # # |-----------------> scaffold
128
+ # def test_project_from_component_to_assembly_with_positions
129
+ # source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
130
+ # target_slices = source_slice.project('toplevel')
131
+ #
132
+ # # Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
133
+ # assert_equal(96652152, target_slices[0].start)
134
+ # assert_equal(96654117, target_slices[0].stop)
135
+ # end
136
+ #end
137
+
138
+ class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
139
+ def setup
140
+ @source_slice_single_contig = Slice.fetch_by_region('chromosome', '20', 175000, 180000)
141
+ @target_slices_single_contig = @source_slice_single_contig.project('contig')
142
+
143
+ @source_slice_two_contigs = Slice.fetch_by_region('chromosome','20', 175000, 190000)
144
+ @target_slices_two_contigs = @source_slice_two_contigs.project('contig')
145
+
146
+ @source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '20', 160000, 190000)
147
+ @target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
148
+
149
+ @source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '20', 170950, 196000)
150
+ @target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
151
+ end
152
+
153
+ # |-----------------> contig
154
+ # ^ ^
155
+ # | |
156
+ # |------------------------------------------> chromosome
157
+ def test_project_from_assembly_to_single_component
158
+ # Position 175000 on chr20 is position 4030 on contig, position 180000 is 9030
159
+ assert_equal('AAFC03028970', @target_slices_single_contig[0].seq_region.name)
160
+ assert_equal(4030, @target_slices_single_contig[0].start)
161
+ assert_equal(9030, @target_slices_single_contig[0].stop)
162
+ end
163
+
164
+ # |-----> |--------> contig
165
+ # ^ ^
166
+ # | |
167
+ # |------------------------------------------> chromosome
168
+ def test_project_from_assembly_to_two_components
169
+ # This chromosomal region is covered by contigs AAFC03028970, a gap and AAFC03028962
170
+ # * Position 175000 on chr 20 is position 4030 on contig AAFC03028970
171
+ # * Position 190000 on chr 20 is position 35 on contig AAFC03028962
172
+ assert_equal(3, @target_slices_two_contigs.length)
173
+ assert_equal('contig:Btau_4.0:AAFC03028970:4030:17365:1', @target_slices_two_contigs[0].display_name)
174
+ assert_equal(Gap, @target_slices_two_contigs[1].class)
175
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_two_contigs[2].display_name)
176
+ end
177
+
178
+ # |-----> <-------| |-------> |-------> contig
179
+ # ^ ^
180
+ # | |
181
+ # |--------------------------------------------------> chromosome
182
+ def test_project_from_assembly_to_contigs_with_strand
183
+ # This chromosomal region is covered by 4 contigs and 3 gaps
184
+ # One of the contigs are on the reverse strand.
185
+ assert_equal(7, @target_slices_contigs_with_strand.length)
186
+ assert_equal('contig:Btau_4.0:AAFC03028964:90:9214:1', @target_slices_contigs_with_strand[0].display_name)
187
+ assert_equal(Gap, @target_slices_contigs_with_strand[1].class)
188
+ assert_equal('contig:Btau_4.0:AAFC03028959:1:1746:-1', @target_slices_contigs_with_strand[2].display_name)
189
+ assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
190
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand[4].display_name)
191
+ assert_equal(Gap, @target_slices_contigs_with_strand[5].class)
192
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_contigs_with_strand[6].display_name)
193
+ end
194
+
195
+ # <--| |-----> contig
196
+ # ^ ^
197
+ # | |
198
+ # |--------------------------------------------------> chromosome
199
+ def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
200
+ # This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoGaCoGa
201
+ assert_equal(5, @target_slices_contigs_with_strand_ends_in_gaps.length)
202
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
203
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
204
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[2].class)
205
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:5704:1', @target_slices_contigs_with_strand_ends_in_gaps[3].display_name)
206
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[4].class)
207
+ end
208
+
209
+
210
+ end