jandot-ruby-ensembl-api 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/TUTORIAL +623 -0
  2. data/bin/ensembl +39 -0
  3. data/lib/ensembl/core/activerecord.rb +1847 -0
  4. data/lib/ensembl/core/project.rb +248 -0
  5. data/lib/ensembl/core/slice.rb +627 -0
  6. data/lib/ensembl/core/transcript.rb +425 -0
  7. data/lib/ensembl/core/transform.rb +97 -0
  8. data/lib/ensembl/db_connection.rb +148 -0
  9. data/lib/ensembl/variation/activerecord.rb +308 -0
  10. data/lib/ensembl.rb +23 -0
  11. data/samples/examples_perl_tutorial.rb +120 -0
  12. data/samples/small_example_ruby_api.rb +34 -0
  13. data/test/unit/release_45/core/run_tests.rb +12 -0
  14. data/test/unit/release_45/core/test_project.rb +235 -0
  15. data/test/unit/release_45/core/test_project_human.rb +58 -0
  16. data/test/unit/release_45/core/test_relationships.rb +61 -0
  17. data/test/unit/release_45/core/test_sequence.rb +175 -0
  18. data/test/unit/release_45/core/test_slice.rb +56 -0
  19. data/test/unit/release_45/core/test_transcript.rb +94 -0
  20. data/test/unit/release_45/core/test_transform.rb +223 -0
  21. data/test/unit/release_45/variation/test_activerecord.rb +32 -0
  22. data/test/unit/release_50/core/run_tests.rb +12 -0
  23. data/test/unit/release_50/core/test_project.rb +215 -0
  24. data/test/unit/release_50/core/test_project_human.rb +58 -0
  25. data/test/unit/release_50/core/test_relationships.rb +66 -0
  26. data/test/unit/release_50/core/test_sequence.rb +175 -0
  27. data/test/unit/release_50/core/test_slice.rb +121 -0
  28. data/test/unit/release_50/core/test_transcript.rb +108 -0
  29. data/test/unit/release_50/core/test_transform.rb +223 -0
  30. data/test/unit/release_50/variation/test_activerecord.rb +136 -0
  31. data/test/unit/test_connection.rb +58 -0
  32. data/test/unit/test_releases.rb +40 -0
  33. metadata +243 -0
data/lib/ensembl.rb ADDED
@@ -0,0 +1,23 @@
1
+ module Ensembl
2
+ ENSEMBL_RELEASE = 50
3
+ end
4
+
5
+ begin
6
+ require 'rubygems'
7
+ require 'bio'
8
+ rescue LoadError
9
+ raise LoadError, "You must have bioruby installed"
10
+ end
11
+
12
+ # Database connection
13
+ require File.dirname(__FILE__) + '/ensembl/db_connection.rb'
14
+
15
+ # Core modules
16
+ require File.dirname(__FILE__) + '/ensembl/core/activerecord.rb'
17
+ require File.dirname(__FILE__) + '/ensembl/core/transcript.rb'
18
+ require File.dirname(__FILE__) + '/ensembl/core/slice.rb'
19
+ require File.dirname(__FILE__) + '/ensembl/core/project.rb'
20
+ require File.dirname(__FILE__) + '/ensembl/core/transform.rb'
21
+
22
+ # Variation modules
23
+ require File.dirname(__FILE__) + '/ensembl/variation/activerecord.rb'
@@ -0,0 +1,120 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ require File.dirname(__FILE__) + '/../lib/ensembl.rb'
4
+ require 'yaml'
5
+ require 'progressbar'
6
+
7
+ include Ensembl::Core
8
+
9
+ ## Connecting to the Database
10
+ DBConnection.connect('homo_sapiens')
11
+
12
+ ## Object adaptors
13
+ # not necessary, ruby uses class methods instead
14
+
15
+ ## Slices
16
+ puts "== Some slices: =="
17
+ puts Slice.fetch_by_region('chromosome','X').to_yaml
18
+ puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
19
+ puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
20
+ puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
21
+ puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
22
+
23
+ puts "== All chromosomes: =="
24
+ Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
25
+ puts chr.display_name
26
+ end
27
+
28
+ puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
29
+
30
+ puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
31
+ Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
32
+ puts sub_slice.display_name
33
+ end
34
+
35
+ puts "== Sequence of a very small slice: Chr19:112200..112250 =="
36
+ slice = Slice.fetch_by_region('chromosome','19',112200,112250)
37
+ puts slice.seq
38
+
39
+ puts "== Query a slice about itself =="
40
+ puts slice.to_yaml
41
+
42
+ puts "== Get genes for a slice =="
43
+ slice = Slice.fetch_by_region('chromosome','19',112200,1122000)
44
+ slice.genes.each do |gene|
45
+ puts gene.stable_id
46
+ end
47
+
48
+ puts "== Get DNA alignment features for 20:80000..88000 =="
49
+ slice = Slice.fetch_by_region('chromosome','20',80000,88000)
50
+ slice.dna_align_features[0..2].each do |daf|
51
+ puts daf.to_yaml
52
+ end
53
+
54
+ puts "== Get sequence for transcript ENST00000383673 =="
55
+ transcript = Transcript.find_by_stable_id('ENST00000383673')
56
+ puts transcript.seq
57
+
58
+ puts "== Get synonyms for marker D9S1038E =="
59
+ marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
60
+ marker.marker_synonyms[0..5].each do |ms|
61
+ puts ms.to_yaml
62
+ end
63
+
64
+ puts "== Get 5 features for this marker =="
65
+ marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
66
+ marker.marker_features[0..5].each do |mf|
67
+ puts 'name: ' + marker.name
68
+ puts 'seq_region name: ' + mf.seq_region.name
69
+ puts 'start: ' + mf.seq_region_start.to_s
70
+ puts 'stop: ' + mf.seq_region_end.to_s
71
+ end
72
+
73
+ puts "== Get 5 features for chromosome 22 =="
74
+ slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
75
+ slice.marker_features.slice(0,5).each do |mf|
76
+ puts mf.marker.name + "\t" + mf.slice.display_name
77
+ end
78
+
79
+ puts "== Transcript: from cDNA to genomic positions =="
80
+ transcript = Ensembl::Core::Transcript.find(276333)
81
+ puts "Transcript is ENST00000215574"
82
+ puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
83
+ puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
84
+ puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
85
+ puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
86
+
87
+ puts "== Transcript: get pieces of DNA for a transcript =="
88
+ transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
89
+ puts transcript.stable_id
90
+ puts "5'UTR: " + transcript.five_prime_utr_seq
91
+ puts "3'UTR: " + transcript.three_prime_utr_seq
92
+ puts "CDS: " + transcript.cds_seq
93
+ puts "protein: " + transcript.protein_seq
94
+
95
+ #### And now we'll do some stuff with cows.
96
+ CoreDBConnection.connection.disconnect!
97
+ CoreDBConnection.connect('bos_taurus')
98
+
99
+ puts "== Projecting a slice from component to assembly: =="
100
+ puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
101
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
102
+ target_slices = source_slice.project('chromosome')
103
+ puts target_slices.collect{|s| s.display_name}.join("\n")
104
+
105
+ puts "== Projecting a slice from assembly to components: =="
106
+ puts "== chromosome slice chr4:329500..380000 to contig level =="
107
+ source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
108
+ target_slices = source_slice.project('contig')
109
+ puts target_slices.collect{|s| s.display_name}.join("\n")
110
+
111
+ puts "== Transforming a gene from chromosome level to scaffold level =="
112
+ gene = Gene.find(2408)
113
+ cloned_gene = gene.transform('scaffold')
114
+ puts gene.slice.display_name
115
+ puts cloned_gene.slice.display_name
116
+
117
+ puts "== Relationships for Gene class =="
118
+ puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
119
+ puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
120
+
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+ require '../lib/ensembl'
3
+
4
+ include Ensembl::Core
5
+
6
+ DBConnection.connect('homo_sapiens')
7
+
8
+ puts "== Get a slice =="
9
+ slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
10
+ puts slice.display_name
11
+
12
+ puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
13
+ slice.genes.each do |gene|
14
+ puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
15
+ end
16
+
17
+ puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
18
+ transcript = Transcript.find_by_stable_id('ENST00000380593')
19
+ puts "5'UTR: " + transcript.five_prime_utr_seq
20
+ puts "CDS: " + transcript.cds_seq
21
+ puts "peptide: " + transcript.protein_seq
22
+
23
+ DBConnection.connection.disconnect!
24
+ DBConnection.connect('bos_taurus',45)
25
+
26
+ puts "== Transforming a cow gene from chromosome level to scaffold level =="
27
+ gene = Gene.find(2408)
28
+ cloned_gene = gene.transform('scaffold')
29
+ puts "Original: " + gene.slice.display_name
30
+ puts "Now: " + cloned_gene.slice.display_name
31
+
32
+ puts "== What things are related to a 'gene' object? =="
33
+ puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
34
+ puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
@@ -0,0 +1,12 @@
1
+ require 'test/unit'
2
+
3
+ # Either run the tests that use Bos taurus
4
+ #require 'test_project'
5
+ #require 'test_transform'
6
+ #require 'test_slice'
7
+
8
+ # Or the ones using Homo sapiens
9
+ require 'test_relationships'
10
+ require 'test_project_human'
11
+ require 'test_sequence'
12
+ require 'test_transcript'
@@ -0,0 +1,235 @@
1
+ #
2
+ # = test/unit/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2007
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+ require 'pathname'
10
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
11
+ $:.unshift(libpath) unless $:.include?(libpath)
12
+
13
+ require 'test/unit'
14
+ require 'ensembl'
15
+
16
+ include Ensembl::Core
17
+
18
+ DBConnection.connect('bos_taurus')
19
+
20
+ class CoordinateMappingsTestSimple < Test::Unit::TestCase
21
+ # First see if the relationships work
22
+ def test_assemblies
23
+ # Scaffold Chr4.003.105 should only be a component of chromosome 4
24
+ scaffold_coord_system = CoordSystem.find_by_name('scaffold')
25
+ chr4_105 = SeqRegion.find_by_name_and_coord_system_id('Chr4.003.105', scaffold_coord_system.id)
26
+ assert_equal(1, chr4_105.assembled_seq_regions.length)
27
+
28
+ # Chromosome 4 has 4118 components (127 scaffolds and 3991 contigs)
29
+ chr_coord_system = CoordSystem.find_by_name('chromosome')
30
+ chr4 = SeqRegion.find_by_name_and_coord_system_id('4', chr_coord_system.id)
31
+ assert_equal(4118, chr4.component_seq_regions.length)
32
+
33
+ # Chromosome 4 has 127 scaffolds
34
+ assert_equal(127, chr4.component_seq_regions('scaffold').length)
35
+
36
+ # Positions of the link between Chr4 and Chr4.003.105
37
+ # * Scaffold Chr4.003.105 starts at position 96652111 on chromosome Chr4
38
+ # * Scaffold Chr4.003.105 does not have links as assembly with coord_system 'chromosome'
39
+ assert_equal(96652111, chr4_105.assembly_links_as_component('chromosome')[0].asm_start)
40
+ assert_equal(nil, chr4_105.assembly_links_as_assembly('chromosome')[0])
41
+ end
42
+ end
43
+
44
+ class Sequences < Test::Unit::TestCase
45
+ def setup
46
+ @seq_region = SeqRegion.find(92594)
47
+ end
48
+
49
+ def test_simple
50
+ assert_equal('AGCTATTTTATGACTT', @seq_region.seq.slice(4,16))
51
+ end
52
+
53
+ def test_subseq
54
+ assert_equal('AGCTATTTTATGACTT', @seq_region.subseq(5,20))
55
+ end
56
+ end
57
+
58
+ class SliceProjectFromComponentToAssembly < Test::Unit::TestCase
59
+ # |------------------------------------------> chromosome
60
+ # ^ ^
61
+ # | |
62
+ # |-----------------> scaffold
63
+ def test_project_from_whole_component_to_assembly
64
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
65
+ target_slices = source_slice.project('chromosome')
66
+
67
+ # Start and stop of chr4_105 on Chr4
68
+ assert_equal(96652111, target_slices[0].start)
69
+ assert_equal(97251689, target_slices[0].stop)
70
+ end
71
+
72
+ # |------------------------------------------> chromosome
73
+ # ^ ^
74
+ # | |
75
+ # |-----------------> scaffold
76
+ def test_project_from_component_to_assembly_with_positions
77
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
78
+ target_slices = source_slice.project('chromosome')
79
+
80
+ # Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
81
+ assert_equal(96652152, target_slices[0].start)
82
+ assert_equal(96654117, target_slices[0].stop)
83
+ end
84
+
85
+ # |------------------------------------------> scaffold
86
+ # ^ ^
87
+ # | |
88
+ # ----------------> contig
89
+ # /
90
+ # |--
91
+ def test_project_from_component_to_assembly_with_positions_and_cmp_start_not_1
92
+ source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
93
+ target_slices = source_slice.project('scaffold')
94
+
95
+ # Position 42 on AAFC03020247 is position 6570 on ChrUn.003.3522, position 2007 is 8565
96
+ assert_equal(6570, target_slices[0].start)
97
+ assert_equal(8535, target_slices[0].stop)
98
+ end
99
+
100
+ # |------------------------------------------> scaffold
101
+ # ^ ^
102
+ # | |
103
+ # <-----------------| contig
104
+ def test_project_from_component_to_assembly_with_strand
105
+ source_slice_fw = Slice.fetch_by_region('contig', 'AAFC03020247')
106
+ target_slices_fw = source_slice_fw.project('scaffold')
107
+
108
+ assert_equal(1, target_slices_fw[0].strand)
109
+
110
+ source_slice_rev = Slice.fetch_by_region('contig', 'AAFC03061502')
111
+ target_slices_rev = source_slice_rev.project('scaffold')
112
+
113
+ assert_equal(-1, target_slices_rev[0].strand)
114
+ end
115
+ end
116
+
117
+ class SliceProjectFromComponentToAssemblyUsingTopLevel < Test::Unit::TestCase
118
+ # |------------------------------------------> chromosome
119
+ # ^ ^
120
+ # | |
121
+ # |-----------------> scaffold
122
+ def test_project_from_whole_component_to_assembly
123
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
124
+ target_slices = source_slice.project('toplevel')
125
+
126
+ # Start and stop of chr4_105 on Chr4
127
+ assert_equal(96652111, target_slices[0].start)
128
+ assert_equal(97251689, target_slices[0].stop)
129
+ end
130
+
131
+ # |------------------------------------------> chromosome
132
+ # ^ ^
133
+ # | |
134
+ # |-----------------> scaffold
135
+ def test_project_from_component_to_assembly_with_positions
136
+ source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
137
+ target_slices = source_slice.project('toplevel')
138
+
139
+ # Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
140
+ assert_equal(96652152, target_slices[0].start)
141
+ assert_equal(96654117, target_slices[0].stop)
142
+ end
143
+ end
144
+
145
+ class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
146
+ def setup
147
+ @source_slice_single_scaffold = Slice.fetch_by_region('chromosome', '4', 96652152, 96654117)
148
+ @target_slices_single_scaffold = @source_slice_single_scaffold.project('scaffold')
149
+
150
+ @source_slice_two_scaffolds = Slice.fetch_by_region('chromosome','4', 96652152, 98000000)
151
+ @target_slices_two_scaffolds = @source_slice_two_scaffolds.project('scaffold')
152
+
153
+ @source_slice_four_scaffolds = Slice.fetch_by_region('chromosome', '4', 96652152, 99000000)
154
+ @target_slices_four_scaffolds = @source_slice_four_scaffolds.project('scaffold')
155
+
156
+ @source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
157
+ @target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
158
+
159
+ @source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '4', 345032, 388626)
160
+ @target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
161
+ end
162
+
163
+ # |-----------------> scaffold
164
+ # ^ ^
165
+ # | |
166
+ # |------------------------------------------> chromosome
167
+ def test_project_from_assembly_to_single_component
168
+ # Position 96652152 on chr4 is position 42 on scaffold, position 96654117 is 2007
169
+ assert_equal('Chr4.003.105', @target_slices_single_scaffold[0].seq_region.name)
170
+ assert_equal(42, @target_slices_single_scaffold[0].start)
171
+ assert_equal(2007, @target_slices_single_scaffold[0].stop)
172
+ end
173
+
174
+ # |-----> |--------> scaffold
175
+ # ^ ^
176
+ # | |
177
+ # |------------------------------------------> chromosome
178
+ def test_project_from_assembly_to_two_components
179
+ # This chromosomal region is covered by scaffolds Chr4.003.105, a gap and Chr5.003.106
180
+ # * Position 96652152 on chr 4 is position 42 on scaffold Chr4.105
181
+ # * Position 98000000 on chr 4 is position 738311 on scaffold Chr4.106
182
+ assert_equal(3, @target_slices_two_scaffolds.length)
183
+ assert_equal('scaffold:Btau_3.1:Chr4.003.105:42:599579:1', @target_slices_two_scaffolds[0].display_name)
184
+ assert_equal(Gap, @target_slices_two_scaffolds[1].class)
185
+ assert_equal('scaffold:Btau_3.1:Chr4.003.106:1:738311:1', @target_slices_two_scaffolds[2].display_name)
186
+ end
187
+
188
+ # |-----> |--------> |---> |-------> scaffold
189
+ # ^ ^
190
+ # | |
191
+ # |--------------------------------------------------> chromosome
192
+ def test_project_from_assembly_to_four_components
193
+ # This chromosomal region is covered by scaffolds Chr4.003.105 and Chr5.003.106
194
+ assert_equal(7, @target_slices_four_scaffolds.length)
195
+ assert_equal('scaffold:Btau_3.1:Chr4.003.105:42:599579:1', @target_slices_four_scaffolds[0].display_name)
196
+ assert_equal(Gap, @target_slices_four_scaffolds[1].class)
197
+ assert_equal('scaffold:Btau_3.1:Chr4.003.106:1:1009889:1', @target_slices_four_scaffolds[2].display_name)
198
+ assert_equal(Gap, @target_slices_four_scaffolds[3].class)
199
+ assert_equal('scaffold:Btau_3.1:Chr4.003.107:1:608924:1', @target_slices_four_scaffolds[4].display_name)
200
+ assert_equal(Gap, @target_slices_four_scaffolds[5].class)
201
+ assert_equal('scaffold:Btau_3.1:Chr4.003.108:1:99498:1', @target_slices_four_scaffolds[6].display_name)
202
+ end
203
+
204
+ # |-----> |--------> <---| <--| |-----> contig
205
+ # ^ ^
206
+ # | |
207
+ # |--------------------------------------------------> chromosome
208
+ def test_project_from_assembly_to_contigs_with_strand
209
+ # This chromosomal region is covered by 5 contigs and 1 gap: CoCoCoGaCoCo
210
+ # Two of the contigs are on the reverse strand.
211
+ assert_equal(6, @target_slices_contigs_with_strand.length)
212
+ assert_equal('contig::AAFC03092598:60948:61145:1', @target_slices_contigs_with_strand[0].display_name)
213
+ assert_equal('contig::AAFC03118261:25411:37082:1', @target_slices_contigs_with_strand[1].display_name)
214
+ assert_equal('contig::AAFC03092594:1:3622:-1', @target_slices_contigs_with_strand[2].display_name)
215
+ assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
216
+ assert_equal('contig::AAFC03092597:820:35709:-1', @target_slices_contigs_with_strand[4].display_name)
217
+ assert_equal('contig::AAFC03032210:13347:13415:1', @target_slices_contigs_with_strand[5].display_name)
218
+ end
219
+
220
+ # <--| |-----> contig
221
+ # ^ ^
222
+ # | |
223
+ # |--------------------------------------------------> chromosome
224
+ def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
225
+ # This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoCoGa
226
+ # Two of the contigs are on the reverse strand.
227
+ assert_equal(4, @target_slices_contigs_with_strand_ends_in_gaps.length)
228
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
229
+ assert_equal('contig::AAFC03092597:820:35709:-1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
230
+ assert_equal('contig::AAFC03032210:13347:22036:1', @target_slices_contigs_with_strand_ends_in_gaps[2].display_name)
231
+ assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[3].class)
232
+ end
233
+
234
+
235
+ end
@@ -0,0 +1,58 @@
1
+ #
2
+ # = test/unit/test_project.rb - Unit test for Ensembl::Core
3
+ #
4
+ # Copyright:: Copyright (C) 2007
5
+ # Jan Aerts <http://jandot.myopenid.com>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id:
9
+ require 'pathname'
10
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
11
+ $:.unshift(libpath) unless $:.include?(libpath)
12
+
13
+ require 'test/unit'
14
+ require 'ensembl'
15
+
16
+ include Ensembl::Core
17
+
18
+ DBConnection.connect('homo_sapiens')
19
+
20
+ class AssemblyExceptions < Test::Unit::TestCase
21
+ def test_chr_x
22
+ source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
23
+ assert_equal('tagttatagattaaaagaagttaa', source_slice.seq)
24
+ end
25
+
26
+ def test_slice_overlapping_PAR_and_allosome
27
+ source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
28
+ target_slices = source_slice.project('contig')
29
+ assert_equal('contig::AC006209.25.1.141759:23323:23343:-1', target_slices[0].display_name)
30
+ assert_equal('contig::AC006040.3.1.186504:57272:57291:1', target_slices[1].display_name)
31
+ end
32
+
33
+ def test_seq_slice_overlapping_PAR
34
+ seq = ''
35
+ File.open('../../data/seq_y.fa').reject{|l| l=~/^>/}.each do |line|
36
+ line.chomp!
37
+ seq += line
38
+ end
39
+ seq.downcase!
40
+
41
+ source_slice = Slice.fetch_by_region('chromosome', 'Y', 2709497, 2709542)
42
+ assert_equal(seq.downcase, source_slice.seq)
43
+ end
44
+
45
+ # The MHC haplotypes for human are not implemented yet, so we raise an error
46
+ # in the code.
47
+ def test_seq_slice_overlapping_HAP
48
+ seq = ''
49
+ File.open('../../data/seq_c6qbl.fa').reject{|l| l=~/^>/}.each do |line|
50
+ line.chomp!
51
+ seq += line
52
+ end
53
+ seq.downcase!
54
+
55
+ source_slice = Slice.fetch_by_region('chromosome', 'c6_QBL', 33451191, 33451690)
56
+ assert_raise(NotImplementedError) {source_slice.seq}
57
+ end
58
+ end