bio-ensembl 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +40 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +19 -0
  6. data/Rakefile +71 -0
  7. data/VERSION +1 -0
  8. data/bin/ensembl +40 -0
  9. data/bin/variation_effect_predictor +106 -0
  10. data/bio-ensembl.gemspec +190 -0
  11. data/lib/bio-ensembl.rb +65 -0
  12. data/lib/bio-ensembl/core/activerecord.rb +1812 -0
  13. data/lib/bio-ensembl/core/collection.rb +64 -0
  14. data/lib/bio-ensembl/core/project.rb +262 -0
  15. data/lib/bio-ensembl/core/slice.rb +657 -0
  16. data/lib/bio-ensembl/core/transcript.rb +409 -0
  17. data/lib/bio-ensembl/core/transform.rb +95 -0
  18. data/lib/bio-ensembl/db_connection.rb +205 -0
  19. data/lib/bio-ensembl/variation/activerecord.rb +536 -0
  20. data/lib/bio-ensembl/variation/variation_feature.rb +376 -0
  21. data/lib/bio-ensembl/variation/variation_feature62.rb +444 -0
  22. data/samples/ensembl_genomes_example.rb +60 -0
  23. data/samples/examples_perl_tutorial.rb +125 -0
  24. data/samples/small_example_ruby_api.rb +34 -0
  25. data/samples/variation_effect_predictor_data.txt +4 -0
  26. data/samples/variation_example.rb +67 -0
  27. data/test/data/seq_c6qbl.fa +10 -0
  28. data/test/data/seq_cso19_coding.fa +16 -0
  29. data/test/data/seq_cso19_transcript.fa +28 -0
  30. data/test/data/seq_drd3_gene.fa +838 -0
  31. data/test/data/seq_drd3_transcript.fa +22 -0
  32. data/test/data/seq_drd4_transcript.fa +24 -0
  33. data/test/data/seq_forward_composite.fa +1669 -0
  34. data/test/data/seq_par_boundary.fa +169 -0
  35. data/test/data/seq_rnd3_transcript.fa +47 -0
  36. data/test/data/seq_ub2r1_coding.fa +13 -0
  37. data/test/data/seq_ub2r1_gene.fa +174 -0
  38. data/test/data/seq_ub2r1_transcript.fa +26 -0
  39. data/test/data/seq_y.fa +2 -0
  40. data/test/default/test_connection.rb +60 -0
  41. data/test/default/test_releases.rb +130 -0
  42. data/test/ensembl_genomes/test_collection.rb +122 -0
  43. data/test/ensembl_genomes/test_gene.rb +46 -0
  44. data/test/ensembl_genomes/test_slice.rb +65 -0
  45. data/test/ensembl_genomes/test_variation.rb +38 -0
  46. data/test/helper.rb +18 -0
  47. data/test/release_50/core/test_project.rb +210 -0
  48. data/test/release_50/core/test_project_human.rb +52 -0
  49. data/test/release_50/core/test_relationships.rb +72 -0
  50. data/test/release_50/core/test_sequence.rb +170 -0
  51. data/test/release_50/core/test_slice.rb +116 -0
  52. data/test/release_50/core/test_transcript.rb +125 -0
  53. data/test/release_50/core/test_transform.rb +217 -0
  54. data/test/release_50/variation/test_activerecord.rb +138 -0
  55. data/test/release_50/variation/test_variation.rb +79 -0
  56. data/test/release_53/core/test_gene.rb +61 -0
  57. data/test/release_53/core/test_project.rb +91 -0
  58. data/test/release_53/core/test_project_human.rb +61 -0
  59. data/test/release_53/core/test_slice.rb +42 -0
  60. data/test/release_53/core/test_transform.rb +57 -0
  61. data/test/release_53/variation/test_activerecord.rb +137 -0
  62. data/test/release_53/variation/test_variation.rb +66 -0
  63. data/test/release_56/core/test_gene.rb +61 -0
  64. data/test/release_56/core/test_project.rb +91 -0
  65. data/test/release_56/core/test_slice.rb +49 -0
  66. data/test/release_56/core/test_transform.rb +57 -0
  67. data/test/release_56/variation/test_activerecord.rb +141 -0
  68. data/test/release_56/variation/test_consequence.rb +131 -0
  69. data/test/release_56/variation/test_variation.rb +63 -0
  70. data/test/release_60/core/test_gene.rb +61 -0
  71. data/test/release_60/core/test_project_human.rb +34 -0
  72. data/test/release_60/core/test_slice.rb +42 -0
  73. data/test/release_60/core/test_transcript.rb +120 -0
  74. data/test/release_60/core/test_transform.rb +57 -0
  75. data/test/release_60/variation/test_activerecord.rb +216 -0
  76. data/test/release_60/variation/test_consequence.rb +153 -0
  77. data/test/release_60/variation/test_variation.rb +64 -0
  78. data/test/release_62/core/test_gene.rb +42 -0
  79. data/test/release_62/variation/test_activerecord.rb +86 -0
  80. data/test/release_62/variation/test_consequence.rb +191 -0
  81. metadata +287 -0
@@ -0,0 +1,130 @@
1
+ #
2
+ # = test/unit/test_releases.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id:
10
+
11
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
12
+
13
+ include Ensembl::Core
14
+
15
+ class TestRelease53 < Test::Unit::TestCase
16
+
17
+ def teardown
18
+ DBConnection.remove_connection
19
+ end
20
+
21
+ def test_gene_stable_id_human
22
+ DBConnection.connect('homo_sapiens', 53)
23
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
24
+ assert_equal(["ENSG00000146556","ENSG00000177693","ENSG00000197490","ENSG00000205292","ENSG00000219789","ENSG00000221311","ENSG00000222003","ENSG00000222027"], slice.genes.collect{|g| g.stable_id}.sort)
25
+ end
26
+ end
27
+
28
+ class TestRelease50 < Test::Unit::TestCase
29
+
30
+ def teardown
31
+ DBConnection.remove_connection
32
+ end
33
+
34
+ def test_gene_stable_id
35
+ DBConnection.connect('homo_sapiens', 50)
36
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
37
+ assert_equal(["ENSG00000146556", "ENSG00000177693", "ENSG00000197194", "ENSG00000197490", "ENSG00000205292", "ENSG00000219789", "ENSG00000221311"], slice.genes.collect{|g| g.stable_id}.sort)
38
+ end
39
+ end
40
+
41
+ class TestRelease49 < Test::Unit::TestCase
42
+
43
+ def teardown
44
+ DBConnection.remove_connection
45
+ end
46
+
47
+ def test_slice_and_genes_mouse
48
+ DBConnection.connect('mus_musculus',49)
49
+
50
+ slice = Slice.fetch_by_region('chromosome',"19",52571924,52572023)
51
+ assert_equal("AAGGTTGTATTCTAGTTTGCTCTCTGTTATTGTGACAAAGACAGGACCAAAGAAACTTGAGTAGGAAATGGTTGATAAAATCTTACAAGTTAGAAGGCAG",slice.seq.upcase)
52
+
53
+ gene = Gene.find_by_stable_id("ENSMUSG00000017167")
54
+ assert_equal(101037431, gene.start)
55
+ assert_equal(101052034, gene.stop)
56
+ assert_equal(1,gene.transcripts.size)
57
+ assert_equal("ENSMUST00000103109",gene.transcripts[0].stable_id)
58
+ end
59
+
60
+ end
61
+
62
+ class TestRelease47 < Test::Unit::TestCase
63
+
64
+ def teardown
65
+ DBConnection.remove_connection
66
+ end
67
+
68
+ def test_slice_and_genes_mouse
69
+ DBConnection.connect('mus_musculus',47)
70
+
71
+ slice = Slice.fetch_by_region('chromosome',"5",123840876,123912619)
72
+ genes = slice.genes
73
+ assert_equal("ENSMUSG00000038342",genes[0].stable_id)
74
+ slice = Slice.fetch_by_region('chromosome',"5",123840876,123840975)
75
+ assert_equal("TCTCAGTTCAGGTTCTATGGGGGGGAGGGGAGGGAATGAAAAGGATGTTAACAATCACCATCACCAGGGGGGACCAATTTGAAGATCTGATCGCCGGTGT",slice.seq.upcase)
76
+
77
+ gene = Gene.find_by_stable_id("ENSMUSG00000017167")
78
+ assert_equal(101037431, gene.start)
79
+ assert_equal(101052034, gene.stop)
80
+ assert_equal(1,gene.transcripts.size)
81
+ assert_equal("ENSMUST00000103109",gene.transcripts[0].stable_id)
82
+ end
83
+
84
+ end
85
+
86
+ class TestRelease45 < Test::Unit::TestCase
87
+
88
+ def teardown
89
+ DBConnection.remove_connection
90
+ end
91
+
92
+ def test_gene_stable_id_human
93
+ DBConnection.connect('homo_sapiens', 45)
94
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
95
+ assert_equal(["ENSG00000146556", "ENSG00000177693", "ENSG00000197194", "ENSG00000197490", "ENSG00000205292"], slice.genes.collect{|g| g.stable_id}.sort)
96
+ end
97
+
98
+ def test_slice_and_genes_mouse
99
+ DBConnection.connect('mus_musculus',45)
100
+
101
+ slice = Slice.fetch_by_region('chromosome',"11",101037431,101037530)
102
+ assert_equal("ACTCTATCCAACTGAAACTGGAGATTAGTAACAGGGAAAAACAAACTCAACTGACAGCTGCTCCCAGTACAGTTCTTATGGTACAGGGAGCGTGGGAGTG",slice.seq.upcase)
103
+
104
+ gene = Gene.find_by_stable_id("ENSMUSG00000017167")
105
+ assert_equal(100992131,gene.start)
106
+ assert_equal(101006814,gene.stop)
107
+ assert_equal("11",gene.seq_region.name)
108
+ assert_equal(2,gene.transcripts.size)
109
+ end
110
+
111
+ end
112
+
113
+ class TestRelease37 < Test::Unit::TestCase
114
+
115
+ def teardown
116
+ DBConnection.remove_connection
117
+ end
118
+
119
+ def test_gene_stable_id_human
120
+ DBConnection.connect('homo_sapiens', 37)
121
+ slice = Slice.fetch_by_region('chromosome','1',1000,100000)
122
+ assert_equal(["ENSG00000146556", "ENSG00000177693", "ENSG00000197194", "ENSG00000197490"], slice.genes.collect{|g| g.stable_id}.sort)
123
+ end
124
+ end
125
+
126
+
127
+
128
+
129
+
130
+
@@ -0,0 +1,122 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+
14
+ class TestCollection < Test::Unit::TestCase
15
+
16
+ def teardown
17
+ DBConnection.remove_connection
18
+ end
19
+
20
+ def test_check_collection
21
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
22
+ assert_equal(true,Collection.check)
23
+ DBConnection.connect('homo_sapiens',56)
24
+ assert_equal(false,Collection.check)
25
+ end
26
+
27
+ def test_check_species
28
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
29
+ assert_equal(["b_subtilis",
30
+ "b_amyloliquefaciens",
31
+ "b_anthracis_ames",
32
+ "b_anthracis_ames_ancestor",
33
+ "b_anthracis_sterne",
34
+ "b_cereus_atcc_10987",
35
+ "b_cereus_atcc_14579",
36
+ "b_cereus_cytotoxis",
37
+ "b_cereus_zk",
38
+ "b_clausii",
39
+ "b_halodurans",
40
+ "b_licheniformis_goettingen",
41
+ "b_licheniformis_novozymes",
42
+ "b_pumilus",
43
+ "b_thuringiensis",
44
+ "b_thuringiensis_konkukian",
45
+ "b_weihenstephanensis",
46
+ "b_cereus_ah820",
47
+ "b_cereus_ah187",
48
+ "b_cereus_03bb102",
49
+ "b_cereus_q1",
50
+ "b_cereus_g9842",
51
+ "b_cereus_b4264",
52
+ "b_anthracis_cdc_684",
53
+ "b_anthracis_a0248",
54
+ "b_thuringiensis_bt407",
55
+ "b_thuringiensis_ibl200",
56
+ "b_mycoides_rock1_4",
57
+ "b_thuringiensis_bgsc_4ba1_pondicheri",
58
+ "b_cereus_f65185",
59
+ "b_thuringiensis_bgsc_4bd1_huazhong",
60
+ "b_selenitireducens",
61
+ "b_cereus_r309803",
62
+ "b_cereus_bdrd_st196",
63
+ "b_thuringiensis_bgsc_4cc1_pulsiensis",
64
+ "b_cereus_95_8201",
65
+ "b_cereus_bdrd_st24",
66
+ "b_cereus_ah676",
67
+ "b_cereus_bdrd_st26",
68
+ "b_cereus_m1293",
69
+ "b_thuringiensis_atcc_10792",
70
+ "b_cereus_rock1_15",
71
+ "b_cereus_ah1273",
72
+ "b_thuringiensis_bgsc_4y1_tochigiensis",
73
+ "b_thuringiensis_t13001_pakistani",
74
+ "b_cereus_ah621",
75
+ "b_mycoides_dsm_2048",
76
+ "b_cereus_rock3_44",
77
+ "b_cereus_ah603",
78
+ "b_cereus_172560w",
79
+ "b_cereus_rock3_29",
80
+ "b_cereus_mm3",
81
+ "b_cereus_ah1272",
82
+ "b_pseudofirmus",
83
+ "b_cereus_rock4_18",
84
+ "b_thuringiensis_bgsc_4aw1_andalous",
85
+ "b_cereus_rock3_28",
86
+ "b_pseudomycoides",
87
+ "b_mycoides_rock3_17",
88
+ "b_thuringiensis_t01001",
89
+ "b_thuringiensis_t04001_sotto",
90
+ "b_cereus_bdrd_bcer4",
91
+ "b_thuringiensis_t03a001_kurstaki",
92
+ "b_thuringiensis_ibl4222",
93
+ "b_cereus_mm1550",
94
+ "b_cereus_bgsc_6e1",
95
+ "b_thuringiensis_bmb171",
96
+ "b_cereus_var_anthracis",
97
+ "b_cereus_atcc_10876",
98
+ "b_cereus_ah1271",
99
+ "b_cereus_atcc_4342",
100
+ "b_megaterium_atcc_12872",
101
+ "b_thuringiensis_bgsc_4aj1",
102
+ "b_cereus_rock4_2",
103
+ "b_cereus_rock3_42",
104
+ "b_cereus_rock1_3",
105
+ "b_tusciae",
106
+ "b_megaterium_dsm_319"],Collection.species)
107
+ end
108
+
109
+ def test_get_species_id
110
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
111
+ assert_equal(9,Collection.get_species_id("Bacillus cereus ZK"))
112
+ assert_nil(Collection.get_species_id("Dummy specie"))
113
+ end
114
+
115
+ def test_connection_with_a_species
116
+ assert_nothing_raised do
117
+ DBConnection.ensemblgenomes_connect('b_licheniformis_goettingen',8)
118
+ end
119
+ assert_equal('b_licheniformis_goettingen', Ensembl::SESSION.collection_species)
120
+ end
121
+
122
+ end
@@ -0,0 +1,46 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+ class TestGene < Test::Unit::TestCase
14
+
15
+ def setup
16
+ DBConnection.ensemblgenomes_connect('pyrococcus_collection',8)
17
+ end
18
+
19
+ def teardown
20
+ DBConnection.remove_connection
21
+ end
22
+
23
+ def test_find_gene
24
+ g = Gene.find_by_stable_id("EBPYRG00000005609")
25
+ assert_equal("EBPYRG00000005609",g.stable_id)
26
+ assert_equal(1195302,g.start)
27
+ assert_equal(1196675,g.stop)
28
+ assert_equal("Chromosome",g.seq_region.name)
29
+ assert_equal("ATGAATAGGAGCTTGTACTTGATTTTTATAATTGTAGGATATACTTTGGGAATATGGACA",g.seq.slice(0,60).upcase)
30
+ end
31
+
32
+ def test_find_transcript
33
+ g = Gene.find_by_stable_id("EBPYRG00000005609")
34
+ t = g.transcripts
35
+ assert_equal("EBPYRT00000005610",t[0].stable_id)
36
+ end
37
+
38
+ def test_find_exons
39
+ g = Gene.find_by_stable_id("EBPYRG00000005609")
40
+ t = g.transcripts
41
+ e = t[0].exons
42
+ assert_equal("EBPYRE00000005617",e[0].stable_id)
43
+ assert_equal("ATGAATAGGAGCTTGTACTTGATTTTTATAATTGTAGGATATACTTTGGGAATATGGACA",e[0].seq.slice(0,60).upcase)
44
+ end
45
+
46
+ end
@@ -0,0 +1,65 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+ class TestSlice < Test::Unit::TestCase
14
+
15
+ def teardown
16
+ DBConnection.remove_connection
17
+ end
18
+
19
+ def test_fetch_by_region
20
+ DBConnection.ensemblgenomes_connect('mycobacterium_collection',8)
21
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",183617,183716,1,"Mycobacterium tuberculosis H37Rv")
22
+ assert_equal("GCGCCATGACAGATCCGCAGACGCAGAGCACCAGGGTCGGGGTGGTTGCCGAGTCGGGGCCCGACGAACGACGGGTCGCGCTGGTTCCCAAGGCGGTCGC",slice.seq.upcase)
23
+
24
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",4285422,4285521,1,"Mycobacterium paratuberculosis")
25
+ assert_equal("GGTGTTAACGGCCGAAAGGTGGTTGAAAGATCGGCGGAATCGGGCGCACCCGGGTGGTCGTCGACGCCGCGCTGGTGGTGCTCGGCTGCGCCGTCGTGGT",slice.seq.upcase)
26
+
27
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",2667164,2667263,1,"Mycobacterium paratuberculosis")
28
+ assert_equal("GTTCCACCTGCCGATCGTCTTCCTCGCCGATAACCCGGGCATGCTGCCCGGCAGCCGGTCCGAACGCAGCGGTGTGCTGCGCGCCGGCGCGCGGATGTTC",slice.seq.upcase)
29
+ end
30
+
31
+ def test_fetch_genes_from_slice
32
+ DBConnection.ensemblgenomes_connect('mycobacterium_collection',8)
33
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",620900,622130 ,1,"Mycobacterium tuberculosis H37Rv")
34
+ genes = slice.genes
35
+ assert_equal("EBMYCG00000001929",genes[0].stable_id)
36
+
37
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",923890,925120 ,1,"Mycobacterium paratuberculosis")
38
+ genes = slice.genes
39
+ assert_equal("EBMYCG00000037956",genes[0].stable_id)
40
+ end
41
+
42
+ def test_new_db_and_reverse_slice
43
+ DBConnection.ensemblgenomes_connect('escherichia_coli_K12',8)
44
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",831691,831790,-1)
45
+ assert_equal("AAACGATGCTTACTGGGGAGACGGTGGTCATGGTAAGGGCAAGAATCGACTGGGCTACCTTTTAATGGAGTTGCGCGAACAATTGGCTATAGAGAAGTAA",slice.seq.upcase)
46
+
47
+ slice = Slice.fetch_by_region('chromosome',"Chromosome",831690,832175,-1)
48
+ genes = slice.genes
49
+ assert_equal("EBESCG00000001341",genes[0].stable_id)
50
+ end
51
+
52
+ def test_fetch_all
53
+ DBConnection.ensemblgenomes_connect('b_anthracis_Sterne',8)
54
+ slices = Slice.fetch_all('chromosome')
55
+ assert_equal(5228663,slices[0].length)
56
+ end
57
+
58
+ def test_error_species
59
+ DBConnection.ensemblgenomes_connect('bacillus_collection',8)
60
+ assert_raise ArgumentError do
61
+ Slice.fetch_by_region('chromosome',"Chromosome",831690,832175,1,"Wrong specie name")
62
+ end
63
+ end
64
+
65
+ end
@@ -0,0 +1,38 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: Ruby's
6
+ #
7
+ # $Id:
8
+
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../helper.rb")
10
+
11
+ include Ensembl::Variation
12
+
13
+
14
+ class TestVariation < Test::Unit::TestCase
15
+
16
+ def setup
17
+ DBConnection.ensemblgenomes_connect('vitis_vinifera',8)
18
+ end
19
+
20
+ def teardown
21
+ DBConnection.remove_connection
22
+ end
23
+
24
+ def test_fetch_region
25
+ vf = Variation.find_by_name('ENSVVVI00603004').variation_features[0]
26
+ assert_equal(8789,vf.seq_region_start)
27
+ assert_equal(8789,vf.seq_region_end)
28
+ assert_equal('INTRONIC',vf.consequence_type)
29
+ assert_equal('T/A',vf.allele_string)
30
+ tv = vf.transcript_variations
31
+ t = tv[0].transcript
32
+ assert_equal("GSVIVT01004799001",t.stable_id)
33
+ end
34
+
35
+
36
+
37
+
38
+ end
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-ensembl'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,210 @@
1
+ #
2
+ # = test/unit/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2007
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+ require File.expand_path File.join(File.dirname(__FILE__),"../../helper.rb")
10
+
11
+ include Ensembl::Core
12
+
13
+ DBConnection.connect('bos_taurus', 50)
14
+
15
+ class CoordinateMappingsTestSimple < Test::Unit::TestCase
16
+ # First see if the relationships work
17
+ def test_assemblies
18
+ # Contig AAFC03055291 should only be a component of chromosome 20
19
+ contig_coord_system = CoordSystem.find_by_name('contig')
20
+ aafc03055291 = SeqRegion.find_by_name_and_coord_system_id('AAFC03055291', contig_coord_system.id)
21
+ assert_equal(1, aafc03055291.assembled_seq_regions.length)
22
+
23
+ # Chromosome 20 has 2970 components
24
+ chr_coord_system = CoordSystem.find_by_name('chromosome')
25
+ chr20 = SeqRegion.find_by_name_and_coord_system_id('20', chr_coord_system.id)
26
+ assert_equal(2970, chr20.component_seq_regions.length)
27
+
28
+ # Chromosome 20 has 2970 contigs
29
+ assert_equal(2970, chr20.component_seq_regions('contig').length)
30
+
31
+ # Positions of the link between Chr20 and AAFC03055291
32
+ # * Contig AAFC03055291 starts at position 13970982 on chromosome Chr20
33
+ assert_equal(13970982, aafc03055291.assembly_links_as_component(chr_coord_system)[0].asm_start)
34
+ end
35
+ end
36
+
37
+ class Sequences < Test::Unit::TestCase
38
+ def setup
39
+ @seq_region = SeqRegion.find(92594)
40
+ end
41
+
42
+ def test_simple
43
+ assert_equal('AGCTATTTTATGACTT', @seq_region.seq.slice(4,16))
44
+ end
45
+
46
+ def test_subseq
47
+ assert_equal('AGCTATTTTATGACTT', @seq_region.subseq(5,20))
48
+ end
49
+ end
50
+
51
+ #class SliceProjectFromComponentToAssembly < Test::Unit::TestCase
52
+ # # |------------------------------------------> chromosome
53
+ # # ^ ^
54
+ # # | |
55
+ # # |-----------------> scaffold
56
+ # def test_project_from_whole_component_to_assembly
57
+ # source_slice = Slice.fetch_by_region('contig','AAFC03055291')
58
+ # target_slices = source_slice.project('chromosome')
59
+ #
60
+ # # Start and stop of chr4_105 on Chr4
61
+ # assert_equal(13970982, target_slices[0].start)
62
+ # assert_equal(13982069, target_slices[0].stop)
63
+ # end
64
+ #
65
+ # # |------------------------------------------> chromosome
66
+ # # ^ ^
67
+ # # | |
68
+ # # |-----------------> scaffold
69
+ # def test_project_from_component_to_assembly_with_positions
70
+ # source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
71
+ # target_slices = source_slice.project('chromosome')
72
+ #
73
+ # # Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
74
+ # assert_equal(96652152, target_slices[0].start)
75
+ # assert_equal(96654117, target_slices[0].stop)
76
+ # end
77
+ #
78
+ # # |------------------------------------------> scaffold
79
+ # # ^ ^
80
+ # # | |
81
+ # # ----------------> contig
82
+ # # /
83
+ # # |--
84
+ # def test_project_from_component_to_assembly_with_positions_and_cmp_start_not_1
85
+ # source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
86
+ # target_slices = source_slice.project('scaffold')
87
+ #
88
+ # # Position 42 on AAFC03020247 is position 6570 on ChrUn.003.3522, position 2007 is 8565
89
+ # assert_equal(6570, target_slices[0].start)
90
+ # assert_equal(8535, target_slices[0].stop)
91
+ # end
92
+ #
93
+ # # |------------------------------------------> scaffold
94
+ # # ^ ^
95
+ # # | |
96
+ # # <-----------------| contig
97
+ # def test_project_from_component_to_assembly_with_strand
98
+ # source_slice_fw = Slice.fetch_by_region('contig', 'AAFC03020247')
99
+ # target_slices_fw = source_slice_fw.project('scaffold')
100
+ #
101
+ # assert_equal(1, target_slices_fw[0].strand)
102
+ #
103
+ # source_slice_rev = Slice.fetch_by_region('contig', 'AAFC03061502')
104
+ # target_slices_rev = source_slice_rev.project('scaffold')
105
+ #
106
+ # assert_equal(-1, target_slices_rev[0].strand)
107
+ # end
108
+ #end
109
+
110
+ #class SliceProjectFromComponentToAssemblyUsingTopLevel < Test::Unit::TestCase
111
+ # # |------------------------------------------> chromosome
112
+ # # ^ ^
113
+ # # | |
114
+ # # |-----------------> scaffold
115
+ # def test_project_from_whole_component_to_assembly
116
+ # source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
117
+ # target_slices = source_slice.project('toplevel')
118
+ #
119
+ # # Start and stop of chr4_105 on Chr4
120
+ # assert_equal(96652111, target_slices[0].start)
121
+ # assert_equal(97251689, target_slices[0].stop)
122
+ # end
123
+ #
124
+ # # |------------------------------------------> chromosome
125
+ # # ^ ^
126
+ # # | |
127
+ # # |-----------------> scaffold
128
+ # def test_project_from_component_to_assembly_with_positions
129
+ # source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
130
+ # target_slices = source_slice.project('toplevel')
131
+ #
132
+ # # Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
133
+ # assert_equal(96652152, target_slices[0].start)
134
+ # assert_equal(96654117, target_slices[0].stop)
135
+ # end
136
+ #end
137
+
138
+ class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
139
+ def setup
140
+ @source_slice_single_contig = Slice.fetch_by_region('chromosome', '20', 175000, 180000)
141
+ @target_slices_single_contig = @source_slice_single_contig.project('contig')
142
+
143
+ @source_slice_two_contigs = Slice.fetch_by_region('chromosome','20', 175000, 190000)
144
+ @target_slices_two_contigs = @source_slice_two_contigs.project('contig')
145
+
146
+ @source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '20', 160000, 190000)
147
+ @target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
148
+
149
+ @source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '20', 170950, 196000)
150
+ @target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
151
+ end
152
+
153
+ # |-----------------> contig
154
+ # ^ ^
155
+ # | |
156
+ # |------------------------------------------> chromosome
157
+ def test_project_from_assembly_to_single_component
158
+ # Position 175000 on chr20 is position 4030 on contig, position 180000 is 9030
159
+ assert_equal('AAFC03028970', @target_slices_single_contig[0].seq_region.name)
160
+ assert_equal(4030, @target_slices_single_contig[0].start)
161
+ assert_equal(9030, @target_slices_single_contig[0].stop)
162
+ end
163
+
164
+ # |-----> |--------> contig
165
+ # ^ ^
166
+ # | |
167
+ # |------------------------------------------> chromosome
168
+ def test_project_from_assembly_to_two_components
169
+ # This chromosomal region is covered by contigs AAFC03028970, a gap and AAFC03028962
170
+ # * Position 175000 on chr 20 is position 4030 on contig AAFC03028970
171
+ # * Position 190000 on chr 20 is position 35 on contig AAFC03028962
172
+ assert_equal(3, @target_slices_two_contigs.length)
173
+ assert_equal('contig:Btau_4.0:AAFC03028970:4030:17365:1', @target_slices_two_contigs[0].display_name)
174
+ assert_equal(Gap, @target_slices_two_contigs[1].class)
175
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_two_contigs[2].display_name)
176
+ end
177
+
178
+ # |-----> <-------| |-------> |-------> contig
179
+ # ^ ^
180
+ # | |
181
+ # |--------------------------------------------------> chromosome
182
+ def test_project_from_assembly_to_contigs_with_strand
183
+ # This chromosomal region is covered by 4 contigs and 3 gaps
184
+ # One of the contigs are on the reverse strand.
185
+ assert_equal(7, @target_slices_contigs_with_strand.length)
186
+ assert_equal('contig:Btau_4.0:AAFC03028964:90:9214:1', @target_slices_contigs_with_strand[0].display_name)
187
+ assert_equal(Gap, @target_slices_contigs_with_strand[1].class)
188
+ assert_equal('contig:Btau_4.0:AAFC03028959:1:1746:-1', @target_slices_contigs_with_strand[2].display_name)
189
+ assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
190
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand[4].display_name)
191
+ assert_equal(Gap, @target_slices_contigs_with_strand[5].class)
192
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:35:1', @target_slices_contigs_with_strand[6].display_name)
193
+ end
194
+
195
+ # <--| |-----> contig
196
+ # ^ ^
197
+ # | |
198
+ # |--------------------------------------------------> chromosome
199
+ def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
200
+ # This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoGaCoGa
201
+ assert_equal(5, @target_slices_contigs_with_strand_ends_in_gaps.length)
202
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
203
+ assert_equal('contig:Btau_4.0:AAFC03028970:1:17365:1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
204
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[2].class)
205
+ assert_equal('contig:Btau_4.0:AAFC03028962:1:5704:1', @target_slices_contigs_with_strand_ends_in_gaps[3].display_name)
206
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[4].class)
207
+ end
208
+
209
+
210
+ end