jandot-ruby-ensembl-api 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL +623 -0
- data/bin/ensembl +39 -0
- data/lib/ensembl/core/activerecord.rb +1847 -0
- data/lib/ensembl/core/project.rb +248 -0
- data/lib/ensembl/core/slice.rb +627 -0
- data/lib/ensembl/core/transcript.rb +425 -0
- data/lib/ensembl/core/transform.rb +97 -0
- data/lib/ensembl/db_connection.rb +148 -0
- data/lib/ensembl/variation/activerecord.rb +308 -0
- data/lib/ensembl.rb +23 -0
- data/samples/examples_perl_tutorial.rb +120 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/test/unit/release_45/core/run_tests.rb +12 -0
- data/test/unit/release_45/core/test_project.rb +235 -0
- data/test/unit/release_45/core/test_project_human.rb +58 -0
- data/test/unit/release_45/core/test_relationships.rb +61 -0
- data/test/unit/release_45/core/test_sequence.rb +175 -0
- data/test/unit/release_45/core/test_slice.rb +56 -0
- data/test/unit/release_45/core/test_transcript.rb +94 -0
- data/test/unit/release_45/core/test_transform.rb +223 -0
- data/test/unit/release_45/variation/test_activerecord.rb +32 -0
- data/test/unit/release_50/core/run_tests.rb +12 -0
- data/test/unit/release_50/core/test_project.rb +215 -0
- data/test/unit/release_50/core/test_project_human.rb +58 -0
- data/test/unit/release_50/core/test_relationships.rb +66 -0
- data/test/unit/release_50/core/test_sequence.rb +175 -0
- data/test/unit/release_50/core/test_slice.rb +121 -0
- data/test/unit/release_50/core/test_transcript.rb +108 -0
- data/test/unit/release_50/core/test_transform.rb +223 -0
- data/test/unit/release_50/variation/test_activerecord.rb +136 -0
- data/test/unit/test_connection.rb +58 -0
- data/test/unit/test_releases.rb +40 -0
- metadata +243 -0
data/lib/ensembl.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Ensembl
|
2
|
+
ENSEMBL_RELEASE = 50
|
3
|
+
end
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'rubygems'
|
7
|
+
require 'bio'
|
8
|
+
rescue LoadError
|
9
|
+
raise LoadError, "You must have bioruby installed"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Database connection
|
13
|
+
require File.dirname(__FILE__) + '/ensembl/db_connection.rb'
|
14
|
+
|
15
|
+
# Core modules
|
16
|
+
require File.dirname(__FILE__) + '/ensembl/core/activerecord.rb'
|
17
|
+
require File.dirname(__FILE__) + '/ensembl/core/transcript.rb'
|
18
|
+
require File.dirname(__FILE__) + '/ensembl/core/slice.rb'
|
19
|
+
require File.dirname(__FILE__) + '/ensembl/core/project.rb'
|
20
|
+
require File.dirname(__FILE__) + '/ensembl/core/transform.rb'
|
21
|
+
|
22
|
+
# Variation modules
|
23
|
+
require File.dirname(__FILE__) + '/ensembl/variation/activerecord.rb'
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/../lib/ensembl.rb'
|
4
|
+
require 'yaml'
|
5
|
+
require 'progressbar'
|
6
|
+
|
7
|
+
include Ensembl::Core
|
8
|
+
|
9
|
+
## Connecting to the Database
|
10
|
+
DBConnection.connect('homo_sapiens')
|
11
|
+
|
12
|
+
## Object adaptors
|
13
|
+
# not necessary, ruby uses class methods instead
|
14
|
+
|
15
|
+
## Slices
|
16
|
+
puts "== Some slices: =="
|
17
|
+
puts Slice.fetch_by_region('chromosome','X').to_yaml
|
18
|
+
puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
|
19
|
+
puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
|
20
|
+
puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
|
21
|
+
puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
|
22
|
+
|
23
|
+
puts "== All chromosomes: =="
|
24
|
+
Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
|
25
|
+
puts chr.display_name
|
26
|
+
end
|
27
|
+
|
28
|
+
puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
|
29
|
+
|
30
|
+
puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
|
31
|
+
Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
|
32
|
+
puts sub_slice.display_name
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "== Sequence of a very small slice: Chr19:112200..112250 =="
|
36
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,112250)
|
37
|
+
puts slice.seq
|
38
|
+
|
39
|
+
puts "== Query a slice about itself =="
|
40
|
+
puts slice.to_yaml
|
41
|
+
|
42
|
+
puts "== Get genes for a slice =="
|
43
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,1122000)
|
44
|
+
slice.genes.each do |gene|
|
45
|
+
puts gene.stable_id
|
46
|
+
end
|
47
|
+
|
48
|
+
puts "== Get DNA alignment features for 20:80000..88000 =="
|
49
|
+
slice = Slice.fetch_by_region('chromosome','20',80000,88000)
|
50
|
+
slice.dna_align_features[0..2].each do |daf|
|
51
|
+
puts daf.to_yaml
|
52
|
+
end
|
53
|
+
|
54
|
+
puts "== Get sequence for transcript ENST00000383673 =="
|
55
|
+
transcript = Transcript.find_by_stable_id('ENST00000383673')
|
56
|
+
puts transcript.seq
|
57
|
+
|
58
|
+
puts "== Get synonyms for marker D9S1038E =="
|
59
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
60
|
+
marker.marker_synonyms[0..5].each do |ms|
|
61
|
+
puts ms.to_yaml
|
62
|
+
end
|
63
|
+
|
64
|
+
puts "== Get 5 features for this marker =="
|
65
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
66
|
+
marker.marker_features[0..5].each do |mf|
|
67
|
+
puts 'name: ' + marker.name
|
68
|
+
puts 'seq_region name: ' + mf.seq_region.name
|
69
|
+
puts 'start: ' + mf.seq_region_start.to_s
|
70
|
+
puts 'stop: ' + mf.seq_region_end.to_s
|
71
|
+
end
|
72
|
+
|
73
|
+
puts "== Get 5 features for chromosome 22 =="
|
74
|
+
slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
|
75
|
+
slice.marker_features.slice(0,5).each do |mf|
|
76
|
+
puts mf.marker.name + "\t" + mf.slice.display_name
|
77
|
+
end
|
78
|
+
|
79
|
+
puts "== Transcript: from cDNA to genomic positions =="
|
80
|
+
transcript = Ensembl::Core::Transcript.find(276333)
|
81
|
+
puts "Transcript is ENST00000215574"
|
82
|
+
puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
|
83
|
+
puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
|
84
|
+
puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
|
85
|
+
puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
|
86
|
+
|
87
|
+
puts "== Transcript: get pieces of DNA for a transcript =="
|
88
|
+
transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
|
89
|
+
puts transcript.stable_id
|
90
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
91
|
+
puts "3'UTR: " + transcript.three_prime_utr_seq
|
92
|
+
puts "CDS: " + transcript.cds_seq
|
93
|
+
puts "protein: " + transcript.protein_seq
|
94
|
+
|
95
|
+
#### And now we'll do some stuff with cows.
|
96
|
+
CoreDBConnection.connection.disconnect!
|
97
|
+
CoreDBConnection.connect('bos_taurus')
|
98
|
+
|
99
|
+
puts "== Projecting a slice from component to assembly: =="
|
100
|
+
puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
|
101
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
102
|
+
target_slices = source_slice.project('chromosome')
|
103
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
104
|
+
|
105
|
+
puts "== Projecting a slice from assembly to components: =="
|
106
|
+
puts "== chromosome slice chr4:329500..380000 to contig level =="
|
107
|
+
source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
|
108
|
+
target_slices = source_slice.project('contig')
|
109
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
110
|
+
|
111
|
+
puts "== Transforming a gene from chromosome level to scaffold level =="
|
112
|
+
gene = Gene.find(2408)
|
113
|
+
cloned_gene = gene.transform('scaffold')
|
114
|
+
puts gene.slice.display_name
|
115
|
+
puts cloned_gene.slice.display_name
|
116
|
+
|
117
|
+
puts "== Relationships for Gene class =="
|
118
|
+
puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
119
|
+
puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
120
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require '../lib/ensembl'
|
3
|
+
|
4
|
+
include Ensembl::Core
|
5
|
+
|
6
|
+
DBConnection.connect('homo_sapiens')
|
7
|
+
|
8
|
+
puts "== Get a slice =="
|
9
|
+
slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
|
10
|
+
puts slice.display_name
|
11
|
+
|
12
|
+
puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
|
13
|
+
slice.genes.each do |gene|
|
14
|
+
puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
|
15
|
+
end
|
16
|
+
|
17
|
+
puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
|
18
|
+
transcript = Transcript.find_by_stable_id('ENST00000380593')
|
19
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
20
|
+
puts "CDS: " + transcript.cds_seq
|
21
|
+
puts "peptide: " + transcript.protein_seq
|
22
|
+
|
23
|
+
DBConnection.connection.disconnect!
|
24
|
+
DBConnection.connect('bos_taurus',45)
|
25
|
+
|
26
|
+
puts "== Transforming a cow gene from chromosome level to scaffold level =="
|
27
|
+
gene = Gene.find(2408)
|
28
|
+
cloned_gene = gene.transform('scaffold')
|
29
|
+
puts "Original: " + gene.slice.display_name
|
30
|
+
puts "Now: " + cloned_gene.slice.display_name
|
31
|
+
|
32
|
+
puts "== What things are related to a 'gene' object? =="
|
33
|
+
puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
34
|
+
puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
# Either run the tests that use Bos taurus
|
4
|
+
#require 'test_project'
|
5
|
+
#require 'test_transform'
|
6
|
+
#require 'test_slice'
|
7
|
+
|
8
|
+
# Or the ones using Homo sapiens
|
9
|
+
require 'test_relationships'
|
10
|
+
require 'test_project_human'
|
11
|
+
require 'test_sequence'
|
12
|
+
require 'test_transcript'
|
@@ -0,0 +1,235 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_project.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2007
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
require 'ensembl'
|
15
|
+
|
16
|
+
include Ensembl::Core
|
17
|
+
|
18
|
+
DBConnection.connect('bos_taurus')
|
19
|
+
|
20
|
+
class CoordinateMappingsTestSimple < Test::Unit::TestCase
|
21
|
+
# First see if the relationships work
|
22
|
+
def test_assemblies
|
23
|
+
# Scaffold Chr4.003.105 should only be a component of chromosome 4
|
24
|
+
scaffold_coord_system = CoordSystem.find_by_name('scaffold')
|
25
|
+
chr4_105 = SeqRegion.find_by_name_and_coord_system_id('Chr4.003.105', scaffold_coord_system.id)
|
26
|
+
assert_equal(1, chr4_105.assembled_seq_regions.length)
|
27
|
+
|
28
|
+
# Chromosome 4 has 4118 components (127 scaffolds and 3991 contigs)
|
29
|
+
chr_coord_system = CoordSystem.find_by_name('chromosome')
|
30
|
+
chr4 = SeqRegion.find_by_name_and_coord_system_id('4', chr_coord_system.id)
|
31
|
+
assert_equal(4118, chr4.component_seq_regions.length)
|
32
|
+
|
33
|
+
# Chromosome 4 has 127 scaffolds
|
34
|
+
assert_equal(127, chr4.component_seq_regions('scaffold').length)
|
35
|
+
|
36
|
+
# Positions of the link between Chr4 and Chr4.003.105
|
37
|
+
# * Scaffold Chr4.003.105 starts at position 96652111 on chromosome Chr4
|
38
|
+
# * Scaffold Chr4.003.105 does not have links as assembly with coord_system 'chromosome'
|
39
|
+
assert_equal(96652111, chr4_105.assembly_links_as_component('chromosome')[0].asm_start)
|
40
|
+
assert_equal(nil, chr4_105.assembly_links_as_assembly('chromosome')[0])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Sequences < Test::Unit::TestCase
|
45
|
+
def setup
|
46
|
+
@seq_region = SeqRegion.find(92594)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_simple
|
50
|
+
assert_equal('AGCTATTTTATGACTT', @seq_region.seq.slice(4,16))
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_subseq
|
54
|
+
assert_equal('AGCTATTTTATGACTT', @seq_region.subseq(5,20))
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class SliceProjectFromComponentToAssembly < Test::Unit::TestCase
|
59
|
+
# |------------------------------------------> chromosome
|
60
|
+
# ^ ^
|
61
|
+
# | |
|
62
|
+
# |-----------------> scaffold
|
63
|
+
def test_project_from_whole_component_to_assembly
|
64
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
|
65
|
+
target_slices = source_slice.project('chromosome')
|
66
|
+
|
67
|
+
# Start and stop of chr4_105 on Chr4
|
68
|
+
assert_equal(96652111, target_slices[0].start)
|
69
|
+
assert_equal(97251689, target_slices[0].stop)
|
70
|
+
end
|
71
|
+
|
72
|
+
# |------------------------------------------> chromosome
|
73
|
+
# ^ ^
|
74
|
+
# | |
|
75
|
+
# |-----------------> scaffold
|
76
|
+
def test_project_from_component_to_assembly_with_positions
|
77
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
78
|
+
target_slices = source_slice.project('chromosome')
|
79
|
+
|
80
|
+
# Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
|
81
|
+
assert_equal(96652152, target_slices[0].start)
|
82
|
+
assert_equal(96654117, target_slices[0].stop)
|
83
|
+
end
|
84
|
+
|
85
|
+
# |------------------------------------------> scaffold
|
86
|
+
# ^ ^
|
87
|
+
# | |
|
88
|
+
# ----------------> contig
|
89
|
+
# /
|
90
|
+
# |--
|
91
|
+
def test_project_from_component_to_assembly_with_positions_and_cmp_start_not_1
|
92
|
+
source_slice = Slice.fetch_by_region('contig', 'AAFC03020247', 42, 2007)
|
93
|
+
target_slices = source_slice.project('scaffold')
|
94
|
+
|
95
|
+
# Position 42 on AAFC03020247 is position 6570 on ChrUn.003.3522, position 2007 is 8565
|
96
|
+
assert_equal(6570, target_slices[0].start)
|
97
|
+
assert_equal(8535, target_slices[0].stop)
|
98
|
+
end
|
99
|
+
|
100
|
+
# |------------------------------------------> scaffold
|
101
|
+
# ^ ^
|
102
|
+
# | |
|
103
|
+
# <-----------------| contig
|
104
|
+
def test_project_from_component_to_assembly_with_strand
|
105
|
+
source_slice_fw = Slice.fetch_by_region('contig', 'AAFC03020247')
|
106
|
+
target_slices_fw = source_slice_fw.project('scaffold')
|
107
|
+
|
108
|
+
assert_equal(1, target_slices_fw[0].strand)
|
109
|
+
|
110
|
+
source_slice_rev = Slice.fetch_by_region('contig', 'AAFC03061502')
|
111
|
+
target_slices_rev = source_slice_rev.project('scaffold')
|
112
|
+
|
113
|
+
assert_equal(-1, target_slices_rev[0].strand)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class SliceProjectFromComponentToAssemblyUsingTopLevel < Test::Unit::TestCase
|
118
|
+
# |------------------------------------------> chromosome
|
119
|
+
# ^ ^
|
120
|
+
# | |
|
121
|
+
# |-----------------> scaffold
|
122
|
+
def test_project_from_whole_component_to_assembly
|
123
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105')
|
124
|
+
target_slices = source_slice.project('toplevel')
|
125
|
+
|
126
|
+
# Start and stop of chr4_105 on Chr4
|
127
|
+
assert_equal(96652111, target_slices[0].start)
|
128
|
+
assert_equal(97251689, target_slices[0].stop)
|
129
|
+
end
|
130
|
+
|
131
|
+
# |------------------------------------------> chromosome
|
132
|
+
# ^ ^
|
133
|
+
# | |
|
134
|
+
# |-----------------> scaffold
|
135
|
+
def test_project_from_component_to_assembly_with_positions
|
136
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
137
|
+
target_slices = source_slice.project('toplevel')
|
138
|
+
|
139
|
+
# Position 42 on chr4_105 is position 96652152, position 2007 is 96654117
|
140
|
+
assert_equal(96652152, target_slices[0].start)
|
141
|
+
assert_equal(96654117, target_slices[0].stop)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
class SliceProjectFromAssemblyToComponentForwardStrands < Test::Unit::TestCase
|
146
|
+
def setup
|
147
|
+
@source_slice_single_scaffold = Slice.fetch_by_region('chromosome', '4', 96652152, 96654117)
|
148
|
+
@target_slices_single_scaffold = @source_slice_single_scaffold.project('scaffold')
|
149
|
+
|
150
|
+
@source_slice_two_scaffolds = Slice.fetch_by_region('chromosome','4', 96652152, 98000000)
|
151
|
+
@target_slices_two_scaffolds = @source_slice_two_scaffolds.project('scaffold')
|
152
|
+
|
153
|
+
@source_slice_four_scaffolds = Slice.fetch_by_region('chromosome', '4', 96652152, 99000000)
|
154
|
+
@target_slices_four_scaffolds = @source_slice_four_scaffolds.project('scaffold')
|
155
|
+
|
156
|
+
@source_slice_contigs_with_strand = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
|
157
|
+
@target_slices_contigs_with_strand = @source_slice_contigs_with_strand.project('contig')
|
158
|
+
|
159
|
+
@source_slice_contigs_with_strand_ends_in_gaps = Slice.fetch_by_region('chromosome', '4', 345032, 388626)
|
160
|
+
@target_slices_contigs_with_strand_ends_in_gaps = @source_slice_contigs_with_strand_ends_in_gaps.project('contig')
|
161
|
+
end
|
162
|
+
|
163
|
+
# |-----------------> scaffold
|
164
|
+
# ^ ^
|
165
|
+
# | |
|
166
|
+
# |------------------------------------------> chromosome
|
167
|
+
def test_project_from_assembly_to_single_component
|
168
|
+
# Position 96652152 on chr4 is position 42 on scaffold, position 96654117 is 2007
|
169
|
+
assert_equal('Chr4.003.105', @target_slices_single_scaffold[0].seq_region.name)
|
170
|
+
assert_equal(42, @target_slices_single_scaffold[0].start)
|
171
|
+
assert_equal(2007, @target_slices_single_scaffold[0].stop)
|
172
|
+
end
|
173
|
+
|
174
|
+
# |-----> |--------> scaffold
|
175
|
+
# ^ ^
|
176
|
+
# | |
|
177
|
+
# |------------------------------------------> chromosome
|
178
|
+
def test_project_from_assembly_to_two_components
|
179
|
+
# This chromosomal region is covered by scaffolds Chr4.003.105, a gap and Chr5.003.106
|
180
|
+
# * Position 96652152 on chr 4 is position 42 on scaffold Chr4.105
|
181
|
+
# * Position 98000000 on chr 4 is position 738311 on scaffold Chr4.106
|
182
|
+
assert_equal(3, @target_slices_two_scaffolds.length)
|
183
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.105:42:599579:1', @target_slices_two_scaffolds[0].display_name)
|
184
|
+
assert_equal(Gap, @target_slices_two_scaffolds[1].class)
|
185
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.106:1:738311:1', @target_slices_two_scaffolds[2].display_name)
|
186
|
+
end
|
187
|
+
|
188
|
+
# |-----> |--------> |---> |-------> scaffold
|
189
|
+
# ^ ^
|
190
|
+
# | |
|
191
|
+
# |--------------------------------------------------> chromosome
|
192
|
+
def test_project_from_assembly_to_four_components
|
193
|
+
# This chromosomal region is covered by scaffolds Chr4.003.105 and Chr5.003.106
|
194
|
+
assert_equal(7, @target_slices_four_scaffolds.length)
|
195
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.105:42:599579:1', @target_slices_four_scaffolds[0].display_name)
|
196
|
+
assert_equal(Gap, @target_slices_four_scaffolds[1].class)
|
197
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.106:1:1009889:1', @target_slices_four_scaffolds[2].display_name)
|
198
|
+
assert_equal(Gap, @target_slices_four_scaffolds[3].class)
|
199
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.107:1:608924:1', @target_slices_four_scaffolds[4].display_name)
|
200
|
+
assert_equal(Gap, @target_slices_four_scaffolds[5].class)
|
201
|
+
assert_equal('scaffold:Btau_3.1:Chr4.003.108:1:99498:1', @target_slices_four_scaffolds[6].display_name)
|
202
|
+
end
|
203
|
+
|
204
|
+
# |-----> |--------> <---| <--| |-----> contig
|
205
|
+
# ^ ^
|
206
|
+
# | |
|
207
|
+
# |--------------------------------------------------> chromosome
|
208
|
+
def test_project_from_assembly_to_contigs_with_strand
|
209
|
+
# This chromosomal region is covered by 5 contigs and 1 gap: CoCoCoGaCoCo
|
210
|
+
# Two of the contigs are on the reverse strand.
|
211
|
+
assert_equal(6, @target_slices_contigs_with_strand.length)
|
212
|
+
assert_equal('contig::AAFC03092598:60948:61145:1', @target_slices_contigs_with_strand[0].display_name)
|
213
|
+
assert_equal('contig::AAFC03118261:25411:37082:1', @target_slices_contigs_with_strand[1].display_name)
|
214
|
+
assert_equal('contig::AAFC03092594:1:3622:-1', @target_slices_contigs_with_strand[2].display_name)
|
215
|
+
assert_equal(Gap, @target_slices_contigs_with_strand[3].class)
|
216
|
+
assert_equal('contig::AAFC03092597:820:35709:-1', @target_slices_contigs_with_strand[4].display_name)
|
217
|
+
assert_equal('contig::AAFC03032210:13347:13415:1', @target_slices_contigs_with_strand[5].display_name)
|
218
|
+
end
|
219
|
+
|
220
|
+
# <--| |-----> contig
|
221
|
+
# ^ ^
|
222
|
+
# | |
|
223
|
+
# |--------------------------------------------------> chromosome
|
224
|
+
def test_project_from_assembly_to_contigs_with_strand_and_ending_in_gaps
|
225
|
+
# This chromosomal region is covered by 2 contigs and 2 gaps at the end: GaCoCoGa
|
226
|
+
# Two of the contigs are on the reverse strand.
|
227
|
+
assert_equal(4, @target_slices_contigs_with_strand_ends_in_gaps.length)
|
228
|
+
assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[0].class)
|
229
|
+
assert_equal('contig::AAFC03092597:820:35709:-1', @target_slices_contigs_with_strand_ends_in_gaps[1].display_name)
|
230
|
+
assert_equal('contig::AAFC03032210:13347:22036:1', @target_slices_contigs_with_strand_ends_in_gaps[2].display_name)
|
231
|
+
assert_equal(Gap, @target_slices_contigs_with_strand_ends_in_gaps[3].class)
|
232
|
+
end
|
233
|
+
|
234
|
+
|
235
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_project.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2007
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
require 'ensembl'
|
15
|
+
|
16
|
+
include Ensembl::Core
|
17
|
+
|
18
|
+
DBConnection.connect('homo_sapiens')
|
19
|
+
|
20
|
+
class AssemblyExceptions < Test::Unit::TestCase
|
21
|
+
def test_chr_x
|
22
|
+
source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
|
23
|
+
assert_equal('tagttatagattaaaagaagttaa', source_slice.seq)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_slice_overlapping_PAR_and_allosome
|
27
|
+
source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
|
28
|
+
target_slices = source_slice.project('contig')
|
29
|
+
assert_equal('contig::AC006209.25.1.141759:23323:23343:-1', target_slices[0].display_name)
|
30
|
+
assert_equal('contig::AC006040.3.1.186504:57272:57291:1', target_slices[1].display_name)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_seq_slice_overlapping_PAR
|
34
|
+
seq = ''
|
35
|
+
File.open('../../data/seq_y.fa').reject{|l| l=~/^>/}.each do |line|
|
36
|
+
line.chomp!
|
37
|
+
seq += line
|
38
|
+
end
|
39
|
+
seq.downcase!
|
40
|
+
|
41
|
+
source_slice = Slice.fetch_by_region('chromosome', 'Y', 2709497, 2709542)
|
42
|
+
assert_equal(seq.downcase, source_slice.seq)
|
43
|
+
end
|
44
|
+
|
45
|
+
# The MHC haplotypes for human are not implemented yet, so we raise an error
|
46
|
+
# in the code.
|
47
|
+
def test_seq_slice_overlapping_HAP
|
48
|
+
seq = ''
|
49
|
+
File.open('../../data/seq_c6qbl.fa').reject{|l| l=~/^>/}.each do |line|
|
50
|
+
line.chomp!
|
51
|
+
seq += line
|
52
|
+
end
|
53
|
+
seq.downcase!
|
54
|
+
|
55
|
+
source_slice = Slice.fetch_by_region('chromosome', 'c6_QBL', 33451191, 33451690)
|
56
|
+
assert_raise(NotImplementedError) {source_slice.seq}
|
57
|
+
end
|
58
|
+
end
|