jandot-ruby-ensembl-api 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/TUTORIAL +623 -0
- data/bin/ensembl +39 -0
- data/lib/ensembl/core/activerecord.rb +1847 -0
- data/lib/ensembl/core/project.rb +248 -0
- data/lib/ensembl/core/slice.rb +627 -0
- data/lib/ensembl/core/transcript.rb +425 -0
- data/lib/ensembl/core/transform.rb +97 -0
- data/lib/ensembl/db_connection.rb +148 -0
- data/lib/ensembl/variation/activerecord.rb +308 -0
- data/lib/ensembl.rb +23 -0
- data/samples/examples_perl_tutorial.rb +120 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/test/unit/release_45/core/run_tests.rb +12 -0
- data/test/unit/release_45/core/test_project.rb +235 -0
- data/test/unit/release_45/core/test_project_human.rb +58 -0
- data/test/unit/release_45/core/test_relationships.rb +61 -0
- data/test/unit/release_45/core/test_sequence.rb +175 -0
- data/test/unit/release_45/core/test_slice.rb +56 -0
- data/test/unit/release_45/core/test_transcript.rb +94 -0
- data/test/unit/release_45/core/test_transform.rb +223 -0
- data/test/unit/release_45/variation/test_activerecord.rb +32 -0
- data/test/unit/release_50/core/run_tests.rb +12 -0
- data/test/unit/release_50/core/test_project.rb +215 -0
- data/test/unit/release_50/core/test_project_human.rb +58 -0
- data/test/unit/release_50/core/test_relationships.rb +66 -0
- data/test/unit/release_50/core/test_sequence.rb +175 -0
- data/test/unit/release_50/core/test_slice.rb +121 -0
- data/test/unit/release_50/core/test_transcript.rb +108 -0
- data/test/unit/release_50/core/test_transform.rb +223 -0
- data/test/unit/release_50/variation/test_activerecord.rb +136 -0
- data/test/unit/test_connection.rb +58 -0
- data/test/unit/test_releases.rb +40 -0
- metadata +243 -0
data/lib/ensembl.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Ensembl
|
2
|
+
ENSEMBL_RELEASE = 50
|
3
|
+
end
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'rubygems'
|
7
|
+
require 'bio'
|
8
|
+
rescue LoadError
|
9
|
+
raise LoadError, "You must have bioruby installed"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Database connection
|
13
|
+
require File.dirname(__FILE__) + '/ensembl/db_connection.rb'
|
14
|
+
|
15
|
+
# Core modules
|
16
|
+
require File.dirname(__FILE__) + '/ensembl/core/activerecord.rb'
|
17
|
+
require File.dirname(__FILE__) + '/ensembl/core/transcript.rb'
|
18
|
+
require File.dirname(__FILE__) + '/ensembl/core/slice.rb'
|
19
|
+
require File.dirname(__FILE__) + '/ensembl/core/project.rb'
|
20
|
+
require File.dirname(__FILE__) + '/ensembl/core/transform.rb'
|
21
|
+
|
22
|
+
# Variation modules
|
23
|
+
require File.dirname(__FILE__) + '/ensembl/variation/activerecord.rb'
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/../lib/ensembl.rb'
|
4
|
+
require 'yaml'
|
5
|
+
require 'progressbar'
|
6
|
+
|
7
|
+
include Ensembl::Core
|
8
|
+
|
9
|
+
## Connecting to the Database
|
10
|
+
DBConnection.connect('homo_sapiens')
|
11
|
+
|
12
|
+
## Object adaptors
|
13
|
+
# not necessary, ruby uses class methods instead
|
14
|
+
|
15
|
+
## Slices
|
16
|
+
puts "== Some slices: =="
|
17
|
+
puts Slice.fetch_by_region('chromosome','X').to_yaml
|
18
|
+
puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
|
19
|
+
puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
|
20
|
+
puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
|
21
|
+
puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
|
22
|
+
|
23
|
+
puts "== All chromosomes: =="
|
24
|
+
Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
|
25
|
+
puts chr.display_name
|
26
|
+
end
|
27
|
+
|
28
|
+
puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
|
29
|
+
|
30
|
+
puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
|
31
|
+
Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
|
32
|
+
puts sub_slice.display_name
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "== Sequence of a very small slice: Chr19:112200..112250 =="
|
36
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,112250)
|
37
|
+
puts slice.seq
|
38
|
+
|
39
|
+
puts "== Query a slice about itself =="
|
40
|
+
puts slice.to_yaml
|
41
|
+
|
42
|
+
puts "== Get genes for a slice =="
|
43
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,1122000)
|
44
|
+
slice.genes.each do |gene|
|
45
|
+
puts gene.stable_id
|
46
|
+
end
|
47
|
+
|
48
|
+
puts "== Get DNA alignment features for 20:80000..88000 =="
|
49
|
+
slice = Slice.fetch_by_region('chromosome','20',80000,88000)
|
50
|
+
slice.dna_align_features[0..2].each do |daf|
|
51
|
+
puts daf.to_yaml
|
52
|
+
end
|
53
|
+
|
54
|
+
puts "== Get sequence for transcript ENST00000383673 =="
|
55
|
+
transcript = Transcript.find_by_stable_id('ENST00000383673')
|
56
|
+
puts transcript.seq
|
57
|
+
|
58
|
+
puts "== Get synonyms for marker D9S1038E =="
|
59
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
60
|
+
marker.marker_synonyms[0..5].each do |ms|
|
61
|
+
puts ms.to_yaml
|
62
|
+
end
|
63
|
+
|
64
|
+
puts "== Get 5 features for this marker =="
|
65
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
66
|
+
marker.marker_features[0..5].each do |mf|
|
67
|
+
puts 'name: ' + marker.name
|
68
|
+
puts 'seq_region name: ' + mf.seq_region.name
|
69
|
+
puts 'start: ' + mf.seq_region_start.to_s
|
70
|
+
puts 'stop: ' + mf.seq_region_end.to_s
|
71
|
+
end
|
72
|
+
|
73
|
+
puts "== Get 5 features for chromosome 22 =="
|
74
|
+
slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
|
75
|
+
slice.marker_features.slice(0,5).each do |mf|
|
76
|
+
puts mf.marker.name + "\t" + mf.slice.display_name
|
77
|
+
end
|
78
|
+
|
79
|
+
puts "== Transcript: from cDNA to genomic positions =="
|
80
|
+
transcript = Ensembl::Core::Transcript.find(276333)
|
81
|
+
puts "Transcript is ENST00000215574"
|
82
|
+
puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
|
83
|
+
puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
|
84
|
+
puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
|
85
|
+
puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
|
86
|
+
|
87
|
+
puts "== Transcript: get pieces of DNA for a transcript =="
|
88
|
+
transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
|
89
|
+
puts transcript.stable_id
|
90
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
91
|
+
puts "3'UTR: " + transcript.three_prime_utr_seq
|
92
|
+
puts "CDS: " + transcript.cds_seq
|
93
|
+
puts "protein: " + transcript.protein_seq
|
94
|
+
|
95
|
+
#### And now we'll do some stuff with cows.
|
96
|
+
CoreDBConnection.connection.disconnect!
|
97
|
+
CoreDBConnection.connect('bos_taurus')
|
98
|
+
|
99
|
+
puts "== Projecting a slice from component to assembly: =="
|
100
|
+
puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
|
101
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
102
|
+
target_slices = source_slice.project('chromosome')
|
103
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
104
|
+
|
105
|
+
puts "== Projecting a slice from assembly to components: =="
|
106
|
+
puts "== chromosome slice chr4:329500..380000 to contig level =="
|
107
|
+
source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
|
108
|
+
target_slices = source_slice.project('contig')
|
109
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
110
|
+
|
111
|
+
puts "== Transforming a gene from chromosome level to scaffold level =="
|
112
|
+
gene = Gene.find(2408)
|
113
|
+
cloned_gene = gene.transform('scaffold')
|
114
|
+
puts gene.slice.display_name
|
115
|
+
puts cloned_gene.slice.display_name
|
116
|
+
|
117
|
+
puts "== Relationships for Gene class =="
|
118
|
+
puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
119
|
+
puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
120
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require '../lib/ensembl'
|
3
|
+
|
4
|
+
include Ensembl::Core
|
5
|
+
|
6
|
+
DBConnection.connect('homo_sapiens')
|
7
|
+
|
8
|
+
puts "== Get a slice =="
|
9
|
+
slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
|
10
|
+
puts slice.display_name
|
11
|
+
|
12
|
+
puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
|
13
|
+
slice.genes.each do |gene|
|
14
|
+
puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
|
15
|
+
end
|
16
|
+
|
17
|
+
puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
|
18
|
+
transcript = Transcript.find_by_stable_id('ENST00000380593')
|
19
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
20
|
+
puts "CDS: " + transcript.cds_seq
|
21
|
+
puts "peptide: " + transcript.protein_seq
|
22
|
+
|
23
|
+
DBConnection.connection.disconnect!
|
24
|
+
DBConnection.connect('bos_taurus',45)
|
25
|
+
|
26
|
+
puts "== Transforming a cow gene from chromosome level to scaffold level =="
|
27
|
+
gene = Gene.find(2408)
|
28
|
+
cloned_gene = gene.transform('scaffold')
|
29
|
+
puts "Original: " + gene.slice.display_name
|
30
|
+
puts "Now: " + cloned_gene.slice.display_name
|
31
|
+
|
32
|
+
puts "== What things are related to a 'gene' object? =="
|
33
|
+
puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
34
|
+
puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
# Either run the tests that use Bos taurus
|
4
|
+
#require 'test_project'
|
5
|
+
#require 'test_transform'
|
6
|
+
#require 'test_slice'
|
7
|
+
|
8
|
+
# Or the ones using Homo sapiens
|
9
|
+
require 'test_relationships'
|
10
|
+
require 'test_project_human'
|
11
|
+
require 'test_sequence'
|
12
|
+
require 'test_transcript'
|
@@ -0,0 +1,235 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_project.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2007
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
require 'ensembl'
|
15
|
+
|
16
|
+
include Ensembl::Core
|
17
|
+
|
18
|
+
DBConnection.connect('bos_taurus')
|
19
|
+
|
20
|
+
class CoordinateMappingsTestSimple < Test::Unit::TestCase
|
21
|
+
# First see if the relationships work
|
22
|
+
def test_assemblies
|
23
|
+
# Scaffold Chr4.003.105 should only be a component of chromosome 4
|
24
|
+
scaffold_coord_system = CoordSystem.find_by_name('scaffold')
|
25
|
+
chr4_105 = SeqRegion.find_by_name_and_coord_system_id('Chr4.003.105', scaffold_coord_system.id)
|
26
|
+
assert_equal(1, chr4_105.assembled_seq_regions.length)
|
27
|
+
|
28
|
+
# Chromosome 4 has 4118 components (127 scaffolds and 3991 contigs)
|
29
|
+
chr_coord_system = CoordSystem.find_by_name('chromosome')
|
30
|
+
chr4 = SeqRegion.find_by_name_and_coord_system_id('4', chr_coord_system.id)
|
31
|
+
assert_equal(4118, chr4.component_seq_regions.length)
|
32
|
+
|
33
|
+
# Chromosome 4 has 127 scaffolds
|
34
|
+
assert_equal(127, chr4.component_seq_regions('scaffold').length)
|
35
|
+
|
36
|
+
# Positions of the link between Chr4 and Chr4.003.105
|
37
|
+
# * Scaffold Chr4.003.105 starts at position 96652111 on chromosome Chr4
|
38
|
+
# * Scaffold Chr4.003.105 does not have links as assembly with coord_system 'chromosome'
|
39
|
+
assert_equal(96652111, chr4_105.assembly_links_as_component('chromosome')[0].asm_start)
|
40
|
+
assert_equal(nil, chr4_105.assembly_links_as_assembly('chromosome')[0])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Sequences < Test::Unit::TestCase
|
45
|
+
def setup
|
46
|
+
@seq_region = SeqRegion.find(92594)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_simple
|
50
|
+
assert_equal('AGCTATTTTATGACTT', @seq_region.seq.slice(4,16))
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_subseq
|
54
|
+
assert_equal('AGCTATTTTATGACTT', @seq_region.subseq(5,20))
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class SliceProjectFromComponentToAssembly < Test::Unit::TestCase
|
59
|
+
# |------------------------------------------> chromosome
|
60
|
+
# ^ ^
|
61
|
+
# | |
|
62
|
+
# |-----------------> scaffold
|
63
|
+
def test_project_from_whole_component_to_assembly
|
64
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
|
65
|
+
target_slices = source_slice.project('chromosome')
|
66
|
+
|
67
|
+
# Start and stop of chr4_105 on Chr4
|
68
|
+
assert_equal(96652111, target_slices[0].start)
|
69
|
+
assert_equal(97251689, target_slices[0].stop)
|
70
|
+
end
|
71
|
+
|
72
|
+
# |------------------------------------------> chromosome
|
73
|
+
# ^ ^
|
74
|
+
# | |
|
75
|
+
# |-----------------> scaffold
|
76
|
+
def test_project_from_component_to_assembly_with_positions
|
77
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
78
|
+
target_slices = source_slice.project('chromosome')
|
79
|
+
|
80
|
+
# Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
|
81
|
+
assert_equal(96652152, target_slices[0].start)
|
82
|
+
assert_equal(96654117, target_slices[0].stop)
|
83
|
+
end
|
84
|
+
|
85
|
+
# |------------------------------------------> scaffold
|
86
|
+
# ^ ^
|
87
|
+
# | |
|
88
|
+
# ----------------> contig
|
89
|
+
# /
|
90
|
+
# |--
|
91
|
+
def test_project_from_component_to_assembly_with_positions_and_cmp_start_not_1
|
92
|
+
source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
|
93
|
+
target_slices = source_slice.project('scaffold')
|
94
|
+
|
95
|
+
# Position 42 on AAFC03020247 is position 6570 on ChrUn.003.3522, position 2007 is 8565
|
96
|
+
assert_equal(6570, target_slices[0].start)
|
97
|
+
assert_equal(8535, target_slices[0].stop)
|
98
|
+
end
|
99
|
+
|
100
|
+
# |------------------------------------------> scaffold
|
101
|
+
# ^ ^
|
102
|
+
# | |
|
103
|
+
# <-----------------| contig
|
104
|
+
def test_project_from_component_to_assembly_with_strand
|
105
|
+
source_slice_fw = Slice.fetch_by_region('contig', 'AAFC03020247')
|
106
|
+
target_slices_fw = source_slice_fw.project('scaffold')
|
107
|
+
|
108
|
+
assert_equal(1, target_slices_fw[0].strand)
|
109
|
+
|
110
|
+
source_slice_rev = Slice.fetch_by_region('contig', 'AAFC03061502')
|
111
|
+
target_slices_rev = source_slice_rev.project('scaffold')
|
112
|
+
|
113
|
+
assert_equal(-1, target_slices_rev[0].strand)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class SliceProjectFromComponentToAssemblyUsingTopLevel < Test::Unit::TestCase
|
118
|
+
# |------------------------------------------> chromosome
|
119
|
+
# ^ ^
|
120
|
+
# | |
|
121
|
+
# |-----------------> scaffold
|
122
|
+
def test_project_from_whole_component_to_assembly
|
123
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
|
124
|
+
target_slices = source_slice.project('toplevel')
|
125
|
+
|
126
|
+
# Start and stop of chr4_105 on Chr4
|
127
|
+
assert_equal(96652111, target_slices[0].start)
|
128
|
+
assert_equal(97251689, target_slices[0].stop)
|
129
|
+
end
|
130
|
+
|
131
|
+
# |------------------------------------------> chromosome
|
132
|
+
# ^ ^
|
133
|
+
# | |
|
134
|
+
# |-----------------> scaffold
|
135
|
+
def test_project_from_component_to_assembly_with_positions
|
136
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
137
|
+
target_slices = source_slice.project('toplevel')
|
138
|
+
|
139
|
+
# Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
|
140
|
+
assert_equal(96652152, target_slices[0].start)
|
141
|
+
assert_equal(96654117, target_slices[0].stop)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
|
146
|
+
def setup
|
147
|
+
@source_slice_single_scaffold = Slice.fetch_by_region('chromosome', '4', 96652152, 96654117)
|
148
|
+
@target_slices_single_scaffold = @source_slice_single_scaffold.project('scaffold')
|
149
|
+
|
150
|
+
@source_slice_two_scaffolds = Slice.fetch_by_region('chromosome','4', 96652152, 98000000)
|
151
|
+
@target_slices_two_scaffolds = @source_slice_two_scaffolds.project('scaffold')
|
152
|
+
|
153
|
+
@source_slice_four_scaffolds = Slice.fetch_by_region('chromosome', '4', 96652152, 99000000)
|
154
|
+
@target_slices_four_scaffolds = @source_slice_four_scaffolds.project('scaffold')
|
155
|
+
|
156
|
+
@source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
|
157
|
+
@target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
|
158
|
+
|
159
|
+
@source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '4', 345032, 388626)
|
160
|
+
@target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
|
161
|
+
end
|
162
|
+
|
163
|
+
# |-----------------> scaffold
|
164
|
+
# ^ ^
|
165
|
+
# | |
|
166
|
+
# |------------------------------------------> chromosome
|
167
|
+
def test_project_from_assembly_to_single_component
|
168
|
+
# Position 96652152 on chr4 is position 42 on scaffold, position 96654117 is 2007
|
169
|
+
assert_equal('Chr4.003.105', @target_slices_single_scaffold[0].seq_region.name)
|
170
|
+
assert_equal(42, @target_slices_single_scaffold[0].start)
|
171
|
+
assert_equal(2007, @target_slices_single_scaffold[0].stop)
|
172
|
+
end
|
173
|
+
|
174
|
+
# |-----> |--------> scaffold
|
175
|
+
# ^ ^
|
176
|
+
# | |
|
177
|
+
# |------------------------------------------> chromosome
|
178
|
+
def test_project_from_assembly_to_two_components
|
179
|
+
# This chromosomal region is covered by scaffolds Chr4.003.105, a gap and Chr5.003.106
|
180
|
+
# * Position 96652152 on chr 4 is position 42 on scaffold Chr4.105
|
181
|
+
# * Position 98000000 on chr 4 is position 738311 on scaffold Chr4.106
|
182
|
+
assert_equal(3, @target_slices_two_scaffolds.length)
|
183
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.105:42:599579:1', @target_slices_two_scaffolds[0].display_name)
|
184
|
+
assert_equal(Gap, @target_slices_two_scaffolds[1].class)
|
185
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.106:1:738311:1', @target_slices_two_scaffolds[2].display_name)
|
186
|
+
end
|
187
|
+
|
188
|
+
# |-----> |--------> |---> |-------> scaffold
|
189
|
+
# ^ ^
|
190
|
+
# | |
|
191
|
+
# |--------------------------------------------------> chromosome
|
192
|
+
def test_project_from_assembly_to_four_components
|
193
|
+
# This chromosomal region is covered by scaffolds Chr4.003.105 and Chr5.003.106
|
194
|
+
assert_equal(7, @target_slices_four_scaffolds.length)
|
195
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.105:42:599579:1', @target_slices_four_scaffolds[0].display_name)
|
196
|
+
assert_equal(Gap, @target_slices_four_scaffolds[1].class)
|
197
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.106:1:1009889:1', @target_slices_four_scaffolds[2].display_name)
|
198
|
+
assert_equal(Gap, @target_slices_four_scaffolds[3].class)
|
199
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.107:1:608924:1', @target_slices_four_scaffolds[4].display_name)
|
200
|
+
assert_equal(Gap, @target_slices_four_scaffolds[5].class)
|
201
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.108:1:99498:1', @target_slices_four_scaffolds[6].display_name)
|
202
|
+
end
|
203
|
+
|
204
|
+
# |-----> |--------> <---| <--| |-----> contig
|
205
|
+
# ^ ^
|
206
|
+
# | |
|
207
|
+
# |--------------------------------------------------> chromosome
|
208
|
+
def test_project_from_assembly_to_contigs_with_strand
|
209
|
+
# This chromosomal region is covered by 5 contigs and 1 gap: CoCoCoGaCoCo
|
210
|
+
# Two of the contigs are on the reverse strand.
|
211
|
+
assert_equal(6, @target_slices_contigs_with_strand.length)
|
212
|
+
assert_equal('contig::AAFC03092598:60948:61145:1', @target_slices_contigs_with_strand[0].display_name)
|
213
|
+
assert_equal('contig::AAFC03118261:25411:37082:1', @target_slices_contigs_with_strand[1].display_name)
|
214
|
+
assert_equal('contig::AAFC03092594:1:3622:-1', @target_slices_contigs_with_strand[2].display_name)
|
215
|
+
assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
|
216
|
+
assert_equal('contig::AAFC03092597:820:35709:-1', @target_slices_contigs_with_strand[4].display_name)
|
217
|
+
assert_equal('contig::AAFC03032210:13347:13415:1', @target_slices_contigs_with_strand[5].display_name)
|
218
|
+
end
|
219
|
+
|
220
|
+
# <--| |-----> contig
|
221
|
+
# ^ ^
|
222
|
+
# | |
|
223
|
+
# |--------------------------------------------------> chromosome
|
224
|
+
def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
|
225
|
+
# This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoCoGa
|
226
|
+
# Two of the contigs are on the reverse strand.
|
227
|
+
assert_equal(4, @target_slices_contigs_with_strand_ends_in_gaps.length)
|
228
|
+
assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
|
229
|
+
assert_equal('contig::AAFC03092597:820:35709:-1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
|
230
|
+
assert_equal('contig::AAFC03032210:13347:22036:1', @target_slices_contigs_with_strand_ends_in_gaps[2].display_name)
|
231
|
+
assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[3].class)
|
232
|
+
end
|
233
|
+
|
234
|
+
|
235
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_project.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2007
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
require 'ensembl'
|
15
|
+
|
16
|
+
include Ensembl::Core
|
17
|
+
|
18
|
+
DBConnection.connect('homo_sapiens')
|
19
|
+
|
20
|
+
class AssemblyExceptions < Test::Unit::TestCase
|
21
|
+
def test_chr_x
|
22
|
+
source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
|
23
|
+
assert_equal('tagttatagattaaaagaagttaa', source_slice.seq)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_slice_overlapping_PAR_and_allosome
|
27
|
+
source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
|
28
|
+
target_slices = source_slice.project('contig')
|
29
|
+
assert_equal('contig::AC006209.25.1.141759:23323:23343:-1', target_slices[0].display_name)
|
30
|
+
assert_equal('contig::AC006040.3.1.186504:57272:57291:1', target_slices[1].display_name)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_seq_slice_overlapping_PAR
|
34
|
+
seq = ''
|
35
|
+
File.open('../../data/seq_y.fa').reject{|l| l=~/^>/}.each do |line|
|
36
|
+
line.chomp!
|
37
|
+
seq += line
|
38
|
+
end
|
39
|
+
seq.downcase!
|
40
|
+
|
41
|
+
source_slice = Slice.fetch_by_region('chromosome', 'Y', 2709497, 2709542)
|
42
|
+
assert_equal(seq.downcase, source_slice.seq)
|
43
|
+
end
|
44
|
+
|
45
|
+
# The MHC haplotypes for human are not implemented yet, so we raise an error
|
46
|
+
# in the code.
|
47
|
+
def test_seq_slice_overlapping_HAP
|
48
|
+
seq = ''
|
49
|
+
File.open('../../data/seq_c6qbl.fa').reject{|l| l=~/^>/}.each do |line|
|
50
|
+
line.chomp!
|
51
|
+
seq += line
|
52
|
+
end
|
53
|
+
seq.downcase!
|
54
|
+
|
55
|
+
source_slice = Slice.fetch_by_region('chromosome', 'c6_QBL', 33451191, 33451690)
|
56
|
+
assert_raise(NotImplementedError) {source_slice.seq}
|
57
|
+
end
|
58
|
+
end
|