ruby-ensembl-api 0.9.6 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/TUTORIAL.rdoc +1 -1
- data/bin/variation_effect_predictor +106 -0
- data/lib/ensembl.rb +2 -2
- data/lib/ensembl/core/activerecord.rb +119 -225
- data/lib/ensembl/core/collection.rb +14 -10
- data/lib/ensembl/core/project.rb +6 -8
- data/lib/ensembl/core/slice.rb +87 -123
- data/lib/ensembl/core/transcript.rb +49 -65
- data/lib/ensembl/core/transform.rb +6 -8
- data/lib/ensembl/db_connection.rb +56 -72
- data/lib/ensembl/variation/activerecord.rb +138 -8
- data/lib/ensembl/variation/variation.rb +284 -46
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_example.rb +67 -0
- data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
- data/test/unit/release_60/core/test_project_human.rb +38 -0
- data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
- data/test/unit/release_60/core/test_transcript.rb +126 -0
- data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
- data/test/unit/release_60/variation/test_activerecord.rb +213 -0
- data/test/unit/release_60/variation/test_consequence.rb +158 -0
- data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
- data/test/unit/test_connection.rb +2 -2
- data/test/unit/test_releases.rb +8 -8
- metadata +27 -43
- data/test/unit/data/seq_c6qbl.fa +0 -10
- data/test/unit/data/seq_cso19_coding.fa +0 -16
- data/test/unit/data/seq_cso19_transcript.fa +0 -28
- data/test/unit/data/seq_drd3_gene.fa +0 -838
- data/test/unit/data/seq_drd3_transcript.fa +0 -22
- data/test/unit/data/seq_drd4_transcript.fa +0 -24
- data/test/unit/data/seq_forward_composite.fa +0 -1669
- data/test/unit/data/seq_par_boundary.fa +0 -169
- data/test/unit/data/seq_rnd3_transcript.fa +0 -47
- data/test/unit/data/seq_ub2r1_coding.fa +0 -13
- data/test/unit/data/seq_ub2r1_gene.fa +0 -174
- data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
- data/test/unit/data/seq_y.fa +0 -2
- data/test/unit/ensembl_genomes/test_collection.rb +0 -51
- data/test/unit/ensembl_genomes/test_gene.rb +0 -52
- data/test/unit/ensembl_genomes/test_slice.rb +0 -71
- data/test/unit/ensembl_genomes/test_variation.rb +0 -17
- data/test/unit/release_50/core/test_project.rb +0 -215
- data/test/unit/release_50/core/test_project_human.rb +0 -58
- data/test/unit/release_50/core/test_relationships.rb +0 -66
- data/test/unit/release_50/core/test_sequence.rb +0 -175
- data/test/unit/release_50/core/test_slice.rb +0 -121
- data/test/unit/release_50/core/test_transcript.rb +0 -108
- data/test/unit/release_50/core/test_transform.rb +0 -223
- data/test/unit/release_50/variation/test_activerecord.rb +0 -143
- data/test/unit/release_50/variation/test_variation.rb +0 -84
- data/test/unit/release_53/core/test_gene.rb +0 -66
- data/test/unit/release_53/core/test_project.rb +0 -96
- data/test/unit/release_53/core/test_project_human.rb +0 -65
- data/test/unit/release_53/core/test_slice.rb +0 -47
- data/test/unit/release_53/variation/test_activerecord.rb +0 -145
- data/test/unit/release_53/variation/test_variation.rb +0 -71
- data/test/unit/release_56/core/test_project.rb +0 -96
- data/test/unit/release_56/core/test_transform.rb +0 -63
- data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'lib/ensembl'
|
4
|
+
include Ensembl::Core
|
5
|
+
|
6
|
+
DBConnection.ensemblgenomes_connect('bacillus_cereus_ZK',7) # Connect to the Ensembl Genomes MySQL server and fetch the data for 'bacillus_cereus_ZK'
|
7
|
+
slice = Slice.fetch_by_region('chromosome',"Chromosome",4791870,4799024) # retrieve a slice for this specie
|
8
|
+
|
9
|
+
puts "\nConnecting to 'bacillus_cereus_ZK' database..."
|
10
|
+
# show all the species inside the collection, as 'bacillus_cereus_ZK' genome is stored inside the bacillus_collection database
|
11
|
+
if Collection.check
|
12
|
+
puts "Is this a collection? #{Collection.check}"
|
13
|
+
puts "\nOther species present inside the collection:"
|
14
|
+
Collection.species.each do |s|
|
15
|
+
puts s
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
puts "\nSequence:"
|
20
|
+
# get the sequence
|
21
|
+
puts slice.seq
|
22
|
+
|
23
|
+
puts "\nGenes:"
|
24
|
+
# get all the genes overlapping the slice
|
25
|
+
genes = slice.genes
|
26
|
+
genes.each do |g|
|
27
|
+
print "#{g.stable_id} #{g.name}\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
# CHANGE DATABASE
|
31
|
+
|
32
|
+
puts "\n########################\nConnecting to 'mycobacterium_collection' database..."
|
33
|
+
DBConnection.ensemblgenomes_connect('mycobacterium_collection',7) # connect directly to a collection database
|
34
|
+
slice = Slice.fetch_by_region('chromosome',"Chromosome",752908,759374,1,"Mycobacterium tuberculosis H37Rv") # manually set the species to fetch the slice from
|
35
|
+
|
36
|
+
# show all the species inside the collection
|
37
|
+
if Collection.check
|
38
|
+
puts "\nIs this a collection? #{Collection.check}"
|
39
|
+
puts "\nOther species present inside the collection:"
|
40
|
+
Collection.species.each do |s|
|
41
|
+
puts s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
puts "\nSequence:"
|
46
|
+
# get the sequence
|
47
|
+
puts slice.seq
|
48
|
+
|
49
|
+
puts "\nGenes:"
|
50
|
+
# get all the genes overlapping the slice
|
51
|
+
genes = slice.genes
|
52
|
+
genes.each do |g|
|
53
|
+
print "#{g.stable_id} #{g.name}\n"
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/../lib/ensembl.rb'
|
4
|
+
require 'yaml'
|
5
|
+
require 'progressbar'
|
6
|
+
|
7
|
+
include Ensembl::Core
|
8
|
+
|
9
|
+
## Connecting to the Database
|
10
|
+
DBConnection.connect('homo_sapiens',45)
|
11
|
+
|
12
|
+
## Object adaptors
|
13
|
+
# not necessary, ruby uses class methods instead
|
14
|
+
|
15
|
+
## Slices
|
16
|
+
puts "== Some slices: =="
|
17
|
+
puts Slice.fetch_by_region('chromosome','X').to_yaml
|
18
|
+
puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
|
19
|
+
puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
|
20
|
+
puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
|
21
|
+
puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
|
22
|
+
|
23
|
+
puts "== All chromosomes: =="
|
24
|
+
Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
|
25
|
+
puts chr.display_name
|
26
|
+
end
|
27
|
+
|
28
|
+
puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
|
29
|
+
|
30
|
+
puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
|
31
|
+
Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
|
32
|
+
puts sub_slice.display_name
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "== Sequence of a very small slice: Chr19:112200..112250 =="
|
36
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,112250)
|
37
|
+
puts slice.seq
|
38
|
+
|
39
|
+
puts "== Query a slice about itself =="
|
40
|
+
puts slice.to_yaml
|
41
|
+
|
42
|
+
puts "== Get genes for a slice and print exons and introns=="
|
43
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,500000)
|
44
|
+
slice.genes.each do |gene|
|
45
|
+
puts gene.stable_id
|
46
|
+
puts 'Exons for first transcript:'
|
47
|
+
puts gene.transcripts[0].exons.sort_by{|e| e.start}.collect{|e| e.start.to_s + '->' + e.stop.to_s}.join("\t")
|
48
|
+
puts 'Introns for first transcript:'
|
49
|
+
puts gene.transcripts[0].introns.sort_by{|i| i.start}.collect{|i| i.start.to_s + '->' + i.stop.to_s}.join("\t")
|
50
|
+
puts
|
51
|
+
end
|
52
|
+
|
53
|
+
puts "== Get DNA alignment features for 20:80000..88000 =="
|
54
|
+
slice = Slice.fetch_by_region('chromosome','20',80000,88000)
|
55
|
+
slice.dna_align_features[0..2].each do |daf|
|
56
|
+
puts daf.to_yaml
|
57
|
+
end
|
58
|
+
|
59
|
+
puts "== Get sequence for transcript ENST00000383673 =="
|
60
|
+
transcript = Transcript.find_by_stable_id('ENST00000383673')
|
61
|
+
puts transcript.seq
|
62
|
+
|
63
|
+
puts "== Get synonyms for marker D9S1038E =="
|
64
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
65
|
+
marker.marker_synonyms[0..5].each do |ms|
|
66
|
+
puts ms.to_yaml
|
67
|
+
end
|
68
|
+
|
69
|
+
puts "== Get 5 features for this marker =="
|
70
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
71
|
+
marker.marker_features[0..5].each do |mf|
|
72
|
+
puts 'name: ' + marker.name
|
73
|
+
puts 'seq_region name: ' + mf.seq_region.name
|
74
|
+
puts 'start: ' + mf.seq_region_start.to_s
|
75
|
+
puts 'stop: ' + mf.seq_region_end.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
puts "== Get 5 features for chromosome 22 =="
|
79
|
+
slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
|
80
|
+
slice.marker_features.slice(0,5).each do |mf|
|
81
|
+
puts mf.marker.name + "\t" + mf.slice.display_name
|
82
|
+
end
|
83
|
+
|
84
|
+
puts "== Transcript: from cDNA to genomic positions =="
|
85
|
+
transcript = Ensembl::Core::Transcript.find(276333)
|
86
|
+
puts "Transcript is ENST00000215574"
|
87
|
+
puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
|
88
|
+
puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
|
89
|
+
puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
|
90
|
+
puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
|
91
|
+
|
92
|
+
puts "== Transcript: get pieces of DNA for a transcript =="
|
93
|
+
transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
|
94
|
+
puts transcript.stable_id
|
95
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
96
|
+
puts "3'UTR: " + transcript.three_prime_utr_seq
|
97
|
+
puts "CDS: " + transcript.cds_seq
|
98
|
+
puts "protein: " + transcript.protein_seq
|
99
|
+
|
100
|
+
#### And now we'll do some stuff with cows.
|
101
|
+
CoreDBConnection.connection.disconnect!
|
102
|
+
CoreDBConnection.connect('bos_taurus')
|
103
|
+
|
104
|
+
puts "== Projecting a slice from component to assembly: =="
|
105
|
+
puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
|
106
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
107
|
+
target_slices = source_slice.project('chromosome')
|
108
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
109
|
+
|
110
|
+
puts "== Projecting a slice from assembly to components: =="
|
111
|
+
puts "== chromosome slice chr4:329500..380000 to contig level =="
|
112
|
+
source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
|
113
|
+
target_slices = source_slice.project('contig')
|
114
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
115
|
+
|
116
|
+
puts "== Transforming a gene from chromosome level to scaffold level =="
|
117
|
+
gene = Gene.find(2408)
|
118
|
+
cloned_gene = gene.transform('scaffold')
|
119
|
+
puts gene.slice.display_name
|
120
|
+
puts cloned_gene.slice.display_name
|
121
|
+
|
122
|
+
puts "== Relationships for Gene class =="
|
123
|
+
puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
124
|
+
puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
125
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require '../lib/ensembl'
|
3
|
+
|
4
|
+
include Ensembl::Core
|
5
|
+
|
6
|
+
DBConnection.connect('homo_sapiens')
|
7
|
+
|
8
|
+
puts "== Get a slice =="
|
9
|
+
slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
|
10
|
+
puts slice.display_name
|
11
|
+
|
12
|
+
puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
|
13
|
+
slice.genes.each do |gene|
|
14
|
+
puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
|
15
|
+
end
|
16
|
+
|
17
|
+
puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
|
18
|
+
transcript = Transcript.find_by_stable_id('ENST00000380593')
|
19
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
20
|
+
puts "CDS: " + transcript.cds_seq
|
21
|
+
puts "peptide: " + transcript.protein_seq
|
22
|
+
|
23
|
+
DBConnection.connection.disconnect!
|
24
|
+
DBConnection.connect('bos_taurus',45)
|
25
|
+
|
26
|
+
puts "== Transforming a cow gene from chromosome level to scaffold level =="
|
27
|
+
gene = Gene.find(2408)
|
28
|
+
cloned_gene = gene.transform('scaffold')
|
29
|
+
puts "Original: " + gene.slice.display_name
|
30
|
+
puts "Now: " + cloned_gene.slice.display_name
|
31
|
+
|
32
|
+
puts "== What things are related to a 'gene' object? =="
|
33
|
+
puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
34
|
+
puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# Based on Perl API tutorial
|
4
|
+
# http://www.ensembl.org/info/using/api/variation/variation_tutorial.html
|
5
|
+
|
6
|
+
|
7
|
+
require File.dirname(__FILE__) + '/../lib/ensembl.rb'
|
8
|
+
include Ensembl::Core
|
9
|
+
include Ensembl::Variation
|
10
|
+
|
11
|
+
Ensembl::Variation::DBConnection.connect('homo_sapiens',60)
|
12
|
+
# The connection with the Core database can be omitted. It is created automatically
|
13
|
+
# when needed, using Variation DB connection parameters. The database name is derived
|
14
|
+
# from Variation DB name. If you are using non conventional DB names (i.e. for a local copy of Ensembl database)
|
15
|
+
# an exception will be raised. Otherwise, if a Core DB connection is already
|
16
|
+
# present, that connection will be used by default, instead of creating a new one.
|
17
|
+
|
18
|
+
id = ['rs73514758','rs77811736']
|
19
|
+
|
20
|
+
id.each do |i|
|
21
|
+
v = Variation.find_by_name(i)
|
22
|
+
v.variation_features.each do |vf|
|
23
|
+
|
24
|
+
up_seq,down_seq = vf.flanking_seq # retrieve upstream and downstream flanking sequences
|
25
|
+
|
26
|
+
seq_region_name = vf.fetch_region.seq_region.name # fetch the genomic region of the Variation and get the region name.
|
27
|
+
# Automatically sets the connection with Core DB, if needed.
|
28
|
+
|
29
|
+
puts "\n== VARIATION FEATURE =="
|
30
|
+
print "NAME: #{vf.variation_name}\n ALLELE: #{vf.allele_string}\n UPSTREAM SEQ: #{up_seq.seq} \n DOWNSTREAM SEQ: #{down_seq.seq}\n SEQ REGION NAME :#{seq_region_name}\n"
|
31
|
+
vf.transcript_variations.each do |tv|
|
32
|
+
t = tv.transcript # retrieve Ensembl::Core::Transcript from Core DB. Automatically sets the connection, if necessary.
|
33
|
+
puts "== TRANSCRIPT VARIATION =="
|
34
|
+
print "ALLELE PEPTIDE STRING: #{tv.peptide_allele_string} TRANSCRIPT STABLE ID: #{t.stable_id} GENE STABLE ID: #{t.gene.stable_id}\n"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns all Variations present on a gemomic region
|
40
|
+
|
41
|
+
puts "\n== SEARCHING FOR VARIATIONS ON CHR:1:50000:51000 =="
|
42
|
+
|
43
|
+
# Even in this case, Variation DB connection can be set automatically by specific Slice methods
|
44
|
+
|
45
|
+
s = Slice.fetch_by_region('chromosome',1,50000,51000)
|
46
|
+
variation_features = s.get_variation_features # automatically sets the connection with Variation DB, if needed.
|
47
|
+
variation_features.each do |vf|
|
48
|
+
print "NAME: #{vf.variation_name} ALLELE: #{vf.allele_string} SEQ REGION NAME: #{vf.seq_region.name}\n"
|
49
|
+
end
|
50
|
+
|
51
|
+
puts "\n== GENOTYPED VARIATIONS =="
|
52
|
+
|
53
|
+
genotyped_variation_features = s.get_genotyped_variation_features # automatically sets the connection with Variation DB, if needed.
|
54
|
+
genotyped_variation_features.each do |gvf|
|
55
|
+
print "NAME: #{gvf.variation_name} ALLELE: #{gvf.allele_string} SEQ REGION NAME: #{gvf.seq_region.name}\n"
|
56
|
+
end
|
57
|
+
|
58
|
+
puts "\n== STRUCTURAL VARIATIONS ON CHR:11:60125:320837 =="
|
59
|
+
slice = Ensembl::Core::Slice.fetch_by_region('chromosome',11,60125,320837)
|
60
|
+
structural_variations = s.get_structural_variations
|
61
|
+
structural_variations.each do |sv|
|
62
|
+
print "NAME: #{sv.variation_name} SEQ REGION NAME: #{sv.seq_region.name}\n"
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "\n== What things are related to a 'variation' object? =="
|
66
|
+
puts 'Variation belong to: ' + Variation.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
67
|
+
puts 'Variation have many: ' + Variation.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# = test/unit/
|
2
|
+
# = test/unit/release_53/core/test_gene.rb - Unit test for Ensembl::Core
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
5
|
#
|
@@ -19,7 +19,7 @@ include Ensembl::Core
|
|
19
19
|
class TestGene < Test::Unit::TestCase
|
20
20
|
|
21
21
|
def setup
|
22
|
-
DBConnection.connect('homo_sapiens',
|
22
|
+
DBConnection.connect('homo_sapiens', 60)
|
23
23
|
end
|
24
24
|
|
25
25
|
def teardown
|
@@ -31,10 +31,10 @@ class TestGene < Test::Unit::TestCase
|
|
31
31
|
assert_equal("ENSG00000006451",g.stable_id)
|
32
32
|
assert_equal("7",g.seq_region.name)
|
33
33
|
assert_equal(39663082,g.start)
|
34
|
-
assert_equal(39747723
|
34
|
+
assert_equal(39747723,g.stop)
|
35
35
|
assert_equal(1,g.strand)
|
36
36
|
assert_equal(84642,g.seq.length)
|
37
|
-
assert_equal("
|
37
|
+
assert_equal("v-ral simian leukemia viral oncogene homolog A (ras related) [Source:HGNC Symbol;Acc:9839]",g.description)
|
38
38
|
assert_equal("RALA",g.name)
|
39
39
|
end
|
40
40
|
|
@@ -42,9 +42,9 @@ class TestGene < Test::Unit::TestCase
|
|
42
42
|
g = Gene.find_by_stable_id("ENSG00000006451")
|
43
43
|
t = g.transcripts
|
44
44
|
assert_equal(5,t.size)
|
45
|
-
assert_equal("
|
45
|
+
assert_equal("ENST00000005257",t[0].stable_id)
|
46
46
|
t = t[0]
|
47
|
-
assert_equal(
|
47
|
+
assert_equal(2880,t.seq.length)
|
48
48
|
end
|
49
49
|
|
50
50
|
def test_exons
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_project.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
7
|
+
# License:: Ruby's
|
8
|
+
#
|
9
|
+
# $Id:
|
10
|
+
require 'pathname'
|
11
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
12
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
13
|
+
require 'test/unit'
|
14
|
+
require 'lib/ensembl'
|
15
|
+
|
16
|
+
include Ensembl::Core
|
17
|
+
|
18
|
+
class AssemblyExceptions < Test::Unit::TestCase
|
19
|
+
|
20
|
+
def setup
|
21
|
+
DBConnection.connect('homo_sapiens', 60)
|
22
|
+
end
|
23
|
+
|
24
|
+
def teardown
|
25
|
+
DBConnection.remove_connection
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_chr_x
|
29
|
+
source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
|
30
|
+
assert_equal('ctgaagaattgtgtttcttcccta', source_slice.seq)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_slice_overlapping_PAR_and_allosome
|
34
|
+
source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
|
35
|
+
assert_equal('AGAAACTGAAAATGCTAAGAAATTCAGTTCCAGGATATGAA', source_slice.seq.upcase)
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -19,7 +19,7 @@ include Ensembl::Core
|
|
19
19
|
class TestSlice < Test::Unit::TestCase
|
20
20
|
|
21
21
|
def setup
|
22
|
-
DBConnection.connect('homo_sapiens',
|
22
|
+
DBConnection.connect('homo_sapiens', 60)
|
23
23
|
end
|
24
24
|
|
25
25
|
def teardown
|
@@ -44,11 +44,4 @@ class TestSlice < Test::Unit::TestCase
|
|
44
44
|
assert_equal(31786816,s_rev.stop)
|
45
45
|
end
|
46
46
|
|
47
|
-
def test_slice_genes
|
48
|
-
slice = Slice.fetch_by_region("chromosome","13",31773073,31909413,1)
|
49
|
-
genes = slice.genes
|
50
|
-
assert_equal("ENSG00000229427",genes[1].stable_id)
|
51
|
-
assert_equal("ENSG00000187676",genes[0].stable_id)
|
52
|
-
end
|
53
|
-
|
54
47
|
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_transcript.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2007
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
|
15
|
+
require 'lib/ensembl'
|
16
|
+
|
17
|
+
include Ensembl::Core
|
18
|
+
|
19
|
+
DBConnection.connect('homo_sapiens', 60)
|
20
|
+
|
21
|
+
class CodingPositions < Test::Unit::TestCase
|
22
|
+
def setup
|
23
|
+
# Transcript tr_fw is ENST00000215574
|
24
|
+
@tr_fw = Transcript.find_by_stable_id("ENST00000215574")
|
25
|
+
# Transcript tr_rev is ENST00000358041
|
26
|
+
@tr_rev = Transcript.find_by_stable_id("ENST00000358041")
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_transcript_coords
|
30
|
+
assert_equal(531733, @tr_fw.seq_region_start)
|
31
|
+
assert_equal(542084, @tr_fw.seq_region_end)
|
32
|
+
assert_equal(644371, @tr_rev.seq_region_start)
|
33
|
+
assert_equal(647309, @tr_rev.seq_region_end)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_coding_regions_genomic_coords_of_fw
|
37
|
+
assert_equal(531932, @tr_fw.coding_region_genomic_start)
|
38
|
+
assert_equal(541552, @tr_fw.coding_region_genomic_end)
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_coding_regions_genomic_coords_of_rev
|
42
|
+
assert_equal(646652, @tr_rev.coding_region_genomic_start)
|
43
|
+
assert_equal(647047, @tr_rev.coding_region_genomic_end)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_coding_regions_cdna_coords_of_fw
|
47
|
+
assert_equal(200, @tr_fw.coding_region_cdna_start)
|
48
|
+
assert_equal(910, @tr_fw.coding_region_cdna_end)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_coding_regions_cdna_coords_of_rev
|
52
|
+
assert_equal(263, @tr_rev.coding_region_cdna_start)
|
53
|
+
assert_equal(658, @tr_rev.coding_region_cdna_end)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
class GenomicVsCDna < Test::Unit::TestCase
|
59
|
+
#From BioMart. Columns:
|
60
|
+
# Ensembl_Transcript_ID
|
61
|
+
# Chromosome
|
62
|
+
# Strand
|
63
|
+
# Ensembl_Exon_ID
|
64
|
+
# Exon_Chr_Start
|
65
|
+
# Exon_Chr_End
|
66
|
+
# Exon_Rank_in_Transcript
|
67
|
+
#ENST00000215574 19 1 ENSE00000655676 531733 532108 1
|
68
|
+
#ENST00000215574 19 1 ENSE00000655677 535837 535923 2
|
69
|
+
#ENST00000215574 19 1 ENSE00000655678 536243 536340 3
|
70
|
+
#ENST00000215574 19 1 ENSE00000655679 537013 537147 4
|
71
|
+
#ENST00000215574 19 1 ENSE00000655680 541339 542084 5
|
72
|
+
#
|
73
|
+
#ENST00000315489 19 -1 ENSE00001215510 474621 474983 1
|
74
|
+
#ENST00000315489 19 -1 ENSE00001215495 472394 472501 2
|
75
|
+
#ENST00000315489 19 -1 ENSE00001215487 467649 467762 3
|
76
|
+
#ENST00000315489 19 -1 ENSE00001215506 463344 464364 4
|
77
|
+
def setup
|
78
|
+
# Transcript tr_fw is ENST00000215574
|
79
|
+
@tr_fw = Transcript.find_by_stable_id("ENST00000215574")
|
80
|
+
# Transcript tr_rev is ENST00000315489
|
81
|
+
@tr_rev = Transcript.find_by_stable_id("ENST00000315489")
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_identify_exon
|
85
|
+
assert_equal("ENSE00000655679", @tr_fw.exon_for_cdna_position(601).stable_id)
|
86
|
+
assert_equal("ENSE00000655679", @tr_fw.exon_for_genomic_position(537052).stable_id)
|
87
|
+
assert_equal("ENSE00001215487", @tr_rev.exon_for_cdna_position(541).stable_id)
|
88
|
+
assert_equal("ENSE00001215487", @tr_rev.exon_for_genomic_position(467693).stable_id)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_cdna2genomic
|
92
|
+
assert_equal(537052, @tr_fw.cdna2genomic(601))
|
93
|
+
assert_equal(467693, @tr_rev.cdna2genomic(541))
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_cds2genomic
|
97
|
+
assert_equal(537052, @tr_fw.cds2genomic(401))
|
98
|
+
assert_equal(467693, @tr_rev.cds2genomic(304))
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_genomic2cdna
|
102
|
+
assert_equal(601, @tr_fw.genomic2cdna(537052))
|
103
|
+
assert_equal(541, @tr_rev.genomic2cdna(467693))
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_genomic2cds
|
107
|
+
assert_equal(401, @tr_fw.genomic2cds(537052))
|
108
|
+
assert_equal(304, @tr_rev.genomic2cds(467693))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class TestIntron < Test::Unit::TestCase
|
113
|
+
def setup
|
114
|
+
@transcript = Transcript.find_by_stable_id("ENST00000215574")
|
115
|
+
@introns = @transcript.introns
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_get_introns
|
119
|
+
assert_equal(4, @introns.length)
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_intron_slices
|
123
|
+
assert_equal('chromosome:GRCh37:19:532109:535836:1', @introns[0].slice.to_s)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|