ruby-ensembl-api 0.9.6 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL.rdoc +1 -1
- data/bin/variation_effect_predictor +106 -0
- data/lib/ensembl.rb +2 -2
- data/lib/ensembl/core/activerecord.rb +119 -225
- data/lib/ensembl/core/collection.rb +14 -10
- data/lib/ensembl/core/project.rb +6 -8
- data/lib/ensembl/core/slice.rb +87 -123
- data/lib/ensembl/core/transcript.rb +49 -65
- data/lib/ensembl/core/transform.rb +6 -8
- data/lib/ensembl/db_connection.rb +56 -72
- data/lib/ensembl/variation/activerecord.rb +138 -8
- data/lib/ensembl/variation/variation.rb +284 -46
- data/samples/ensembl_genomes_example.rb +60 -0
- data/samples/examples_perl_tutorial.rb +125 -0
- data/samples/small_example_ruby_api.rb +34 -0
- data/samples/variation_example.rb +67 -0
- data/test/unit/{release_56 → release_60}/core/test_gene.rb +6 -6
- data/test/unit/release_60/core/test_project_human.rb +38 -0
- data/test/unit/{release_56 → release_60}/core/test_slice.rb +1 -8
- data/test/unit/release_60/core/test_transcript.rb +126 -0
- data/test/unit/{release_53 → release_60}/core/test_transform.rb +21 -21
- data/test/unit/release_60/variation/test_activerecord.rb +213 -0
- data/test/unit/release_60/variation/test_consequence.rb +158 -0
- data/test/unit/{release_56 → release_60}/variation/test_variation.rb +18 -17
- data/test/unit/test_connection.rb +2 -2
- data/test/unit/test_releases.rb +8 -8
- metadata +27 -43
- data/test/unit/data/seq_c6qbl.fa +0 -10
- data/test/unit/data/seq_cso19_coding.fa +0 -16
- data/test/unit/data/seq_cso19_transcript.fa +0 -28
- data/test/unit/data/seq_drd3_gene.fa +0 -838
- data/test/unit/data/seq_drd3_transcript.fa +0 -22
- data/test/unit/data/seq_drd4_transcript.fa +0 -24
- data/test/unit/data/seq_forward_composite.fa +0 -1669
- data/test/unit/data/seq_par_boundary.fa +0 -169
- data/test/unit/data/seq_rnd3_transcript.fa +0 -47
- data/test/unit/data/seq_ub2r1_coding.fa +0 -13
- data/test/unit/data/seq_ub2r1_gene.fa +0 -174
- data/test/unit/data/seq_ub2r1_transcript.fa +0 -26
- data/test/unit/data/seq_y.fa +0 -2
- data/test/unit/ensembl_genomes/test_collection.rb +0 -51
- data/test/unit/ensembl_genomes/test_gene.rb +0 -52
- data/test/unit/ensembl_genomes/test_slice.rb +0 -71
- data/test/unit/ensembl_genomes/test_variation.rb +0 -17
- data/test/unit/release_50/core/test_project.rb +0 -215
- data/test/unit/release_50/core/test_project_human.rb +0 -58
- data/test/unit/release_50/core/test_relationships.rb +0 -66
- data/test/unit/release_50/core/test_sequence.rb +0 -175
- data/test/unit/release_50/core/test_slice.rb +0 -121
- data/test/unit/release_50/core/test_transcript.rb +0 -108
- data/test/unit/release_50/core/test_transform.rb +0 -223
- data/test/unit/release_50/variation/test_activerecord.rb +0 -143
- data/test/unit/release_50/variation/test_variation.rb +0 -84
- data/test/unit/release_53/core/test_gene.rb +0 -66
- data/test/unit/release_53/core/test_project.rb +0 -96
- data/test/unit/release_53/core/test_project_human.rb +0 -65
- data/test/unit/release_53/core/test_slice.rb +0 -47
- data/test/unit/release_53/variation/test_activerecord.rb +0 -145
- data/test/unit/release_53/variation/test_variation.rb +0 -71
- data/test/unit/release_56/core/test_project.rb +0 -96
- data/test/unit/release_56/core/test_transform.rb +0 -63
- data/test/unit/release_56/variation/test_activerecord.rb +0 -142
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'lib/ensembl'
|
4
|
+
include Ensembl::Core
|
5
|
+
|
6
|
+
DBConnection.ensemblgenomes_connect('bacillus_cereus_ZK',7) # Connect to the Ensembl Genomes MySQL server and fetch the data for 'bacillus_cereus_ZK'
|
7
|
+
slice = Slice.fetch_by_region('chromosome',"Chromosome",4791870,4799024) # retrieve a slice for this specie
|
8
|
+
|
9
|
+
puts "\nConnecting to 'bacillus_cereus_ZK' database..."
|
10
|
+
# show all the species inside the collection, as 'bacillus_cereus_ZK' genome is stored inside the bacillus_collection database
|
11
|
+
if Collection.check
|
12
|
+
puts "Is this a collection? #{Collection.check}"
|
13
|
+
puts "\nOther species present inside the collection:"
|
14
|
+
Collection.species.each do |s|
|
15
|
+
puts s
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
puts "\nSequence:"
|
20
|
+
# get the sequence
|
21
|
+
puts slice.seq
|
22
|
+
|
23
|
+
puts "\nGenes:"
|
24
|
+
# get all the genes overlapping the slice
|
25
|
+
genes = slice.genes
|
26
|
+
genes.each do |g|
|
27
|
+
print "#{g.stable_id} #{g.name}\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
# CHANGE DATABASE
|
31
|
+
|
32
|
+
puts "\n########################\nConnecting to 'mycobacterium_collection' database..."
|
33
|
+
DBConnection.ensemblgenomes_connect('mycobacterium_collection',7) # connect directly to a collection database
|
34
|
+
slice = Slice.fetch_by_region('chromosome',"Chromosome",752908,759374,1,"Mycobacterium tuberculosis H37Rv") # manually set the species to fetch the slice from
|
35
|
+
|
36
|
+
# show all the species inside the collection
|
37
|
+
if Collection.check
|
38
|
+
puts "\nIs this a collection? #{Collection.check}"
|
39
|
+
puts "\nOther species present inside the collection:"
|
40
|
+
Collection.species.each do |s|
|
41
|
+
puts s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
puts "\nSequence:"
|
46
|
+
# get the sequence
|
47
|
+
puts slice.seq
|
48
|
+
|
49
|
+
puts "\nGenes:"
|
50
|
+
# get all the genes overlapping the slice
|
51
|
+
genes = slice.genes
|
52
|
+
genes.each do |g|
|
53
|
+
print "#{g.stable_id} #{g.name}\n"
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/../lib/ensembl.rb'
|
4
|
+
require 'yaml'
|
5
|
+
require 'progressbar'
|
6
|
+
|
7
|
+
include Ensembl::Core
|
8
|
+
|
9
|
+
## Connecting to the Database
|
10
|
+
DBConnection.connect('homo_sapiens',45)
|
11
|
+
|
12
|
+
## Object adaptors
|
13
|
+
# not necessary, ruby uses class methods instead
|
14
|
+
|
15
|
+
## Slices
|
16
|
+
puts "== Some slices: =="
|
17
|
+
puts Slice.fetch_by_region('chromosome','X').to_yaml
|
18
|
+
puts Slice.fetch_by_region('clone','AL359765.6').to_yaml
|
19
|
+
puts Slice.fetch_by_region('supercontig','NT_011333').to_yaml
|
20
|
+
puts Slice.fetch_by_region('chromosome', '20', 1000000, 2000000).to_yaml
|
21
|
+
puts Slice.fetch_by_gene_stable_id('ENSG00000099889', 5000).to_yaml
|
22
|
+
|
23
|
+
puts "== All chromosomes: =="
|
24
|
+
Slice.fetch_all('chromosome', 'NCBI36').each do |chr|
|
25
|
+
puts chr.display_name
|
26
|
+
end
|
27
|
+
|
28
|
+
puts "== Number of clone slices: " + Slice.fetch_all('clone').length.to_s
|
29
|
+
|
30
|
+
puts "== Subslices of chromosome 19 (length = 10000000; overlap = 250): =="
|
31
|
+
Slice.fetch_by_region('chromosome','19').split(10000000, 250).each do |sub_slice|
|
32
|
+
puts sub_slice.display_name
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "== Sequence of a very small slice: Chr19:112200..112250 =="
|
36
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,112250)
|
37
|
+
puts slice.seq
|
38
|
+
|
39
|
+
puts "== Query a slice about itself =="
|
40
|
+
puts slice.to_yaml
|
41
|
+
|
42
|
+
puts "== Get genes for a slice and print exons and introns=="
|
43
|
+
slice = Slice.fetch_by_region('chromosome','19',112200,500000)
|
44
|
+
slice.genes.each do |gene|
|
45
|
+
puts gene.stable_id
|
46
|
+
puts 'Exons for first transcript:'
|
47
|
+
puts gene.transcripts[0].exons.sort_by{|e| e.start}.collect{|e| e.start.to_s + '->' + e.stop.to_s}.join("\t")
|
48
|
+
puts 'Introns for first transcript:'
|
49
|
+
puts gene.transcripts[0].introns.sort_by{|i| i.start}.collect{|i| i.start.to_s + '->' + i.stop.to_s}.join("\t")
|
50
|
+
puts
|
51
|
+
end
|
52
|
+
|
53
|
+
puts "== Get DNA alignment features for 20:80000..88000 =="
|
54
|
+
slice = Slice.fetch_by_region('chromosome','20',80000,88000)
|
55
|
+
slice.dna_align_features[0..2].each do |daf|
|
56
|
+
puts daf.to_yaml
|
57
|
+
end
|
58
|
+
|
59
|
+
puts "== Get sequence for transcript ENST00000383673 =="
|
60
|
+
transcript = Transcript.find_by_stable_id('ENST00000383673')
|
61
|
+
puts transcript.seq
|
62
|
+
|
63
|
+
puts "== Get synonyms for marker D9S1038E =="
|
64
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
65
|
+
marker.marker_synonyms[0..5].each do |ms|
|
66
|
+
puts ms.to_yaml
|
67
|
+
end
|
68
|
+
|
69
|
+
puts "== Get 5 features for this marker =="
|
70
|
+
marker = Ensembl::Core::Marker.find_by_name('D9S1038E')
|
71
|
+
marker.marker_features[0..5].each do |mf|
|
72
|
+
puts 'name: ' + marker.name
|
73
|
+
puts 'seq_region name: ' + mf.seq_region.name
|
74
|
+
puts 'start: ' + mf.seq_region_start.to_s
|
75
|
+
puts 'stop: ' + mf.seq_region_end.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
puts "== Get 5 features for chromosome 22 =="
|
79
|
+
slice = Ensembl::Core::Slice.fetch_by_region('chromosome', '22')
|
80
|
+
slice.marker_features.slice(0,5).each do |mf|
|
81
|
+
puts mf.marker.name + "\t" + mf.slice.display_name
|
82
|
+
end
|
83
|
+
|
84
|
+
puts "== Transcript: from cDNA to genomic positions =="
|
85
|
+
transcript = Ensembl::Core::Transcript.find(276333)
|
86
|
+
puts "Transcript is ENST00000215574"
|
87
|
+
puts "Genomic position 488053 is cDNA position: " + transcript.genomic2cdna(488053).to_s
|
88
|
+
puts "cDNA position 601 is genomic position: " + transcript.cdna2genomic(601).to_s
|
89
|
+
puts "Genomic position 488053 is CDS position: " + transcript.genomic2cds(488053).to_s
|
90
|
+
puts "CDS position 401 is genomic position: " + transcript.cds2genomic(401).to_s
|
91
|
+
|
92
|
+
puts "== Transcript: get pieces of DNA for a transcript =="
|
93
|
+
transcript = Ensembl::Core::Transcript.find_by_stable_id('ENST00000380593')
|
94
|
+
puts transcript.stable_id
|
95
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
96
|
+
puts "3'UTR: " + transcript.three_prime_utr_seq
|
97
|
+
puts "CDS: " + transcript.cds_seq
|
98
|
+
puts "protein: " + transcript.protein_seq
|
99
|
+
|
100
|
+
#### And now we'll do some stuff with cows.
|
101
|
+
CoreDBConnection.connection.disconnect!
|
102
|
+
CoreDBConnection.connect('bos_taurus')
|
103
|
+
|
104
|
+
puts "== Projecting a slice from component to assembly: =="
|
105
|
+
puts "== scaffold Chr4.003.105:42..2007 to chromosome level =="
|
106
|
+
source_slice = Slice.fetch_by_region('scaffold','Chr4.003.105', 42, 2007)
|
107
|
+
target_slices = source_slice.project('chromosome')
|
108
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
109
|
+
|
110
|
+
puts "== Projecting a slice from assembly to components: =="
|
111
|
+
puts "== chromosome slice chr4:329500..380000 to contig level =="
|
112
|
+
source_slice = Slice.fetch_by_region('chromosome', '4', 329500, 380000)
|
113
|
+
target_slices = source_slice.project('contig')
|
114
|
+
puts target_slices.collect{|s| s.display_name}.join("\n")
|
115
|
+
|
116
|
+
puts "== Transforming a gene from chromosome level to scaffold level =="
|
117
|
+
gene = Gene.find(2408)
|
118
|
+
cloned_gene = gene.transform('scaffold')
|
119
|
+
puts gene.slice.display_name
|
120
|
+
puts cloned_gene.slice.display_name
|
121
|
+
|
122
|
+
puts "== Relationships for Gene class =="
|
123
|
+
puts 'belongs to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
124
|
+
puts 'has many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
125
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require '../lib/ensembl'
|
3
|
+
|
4
|
+
include Ensembl::Core
|
5
|
+
|
6
|
+
DBConnection.connect('homo_sapiens')
|
7
|
+
|
8
|
+
puts "== Get a slice =="
|
9
|
+
slice = Slice.fetch_by_region('chromosome','4',10000,99999,-1)
|
10
|
+
puts slice.display_name
|
11
|
+
|
12
|
+
puts "== Print all gene for that slice (regardless of what coord_system genes are annotated on) =="
|
13
|
+
slice.genes.each do |gene|
|
14
|
+
puts gene.stable_id + "\t" + gene.status + "\t" + gene.slice.display_name
|
15
|
+
end
|
16
|
+
|
17
|
+
puts "== Get a transcript and print its 5'UTR, CDS and protein sequence =="
|
18
|
+
transcript = Transcript.find_by_stable_id('ENST00000380593')
|
19
|
+
puts "5'UTR: " + transcript.five_prime_utr_seq
|
20
|
+
puts "CDS: " + transcript.cds_seq
|
21
|
+
puts "peptide: " + transcript.protein_seq
|
22
|
+
|
23
|
+
DBConnection.connection.disconnect!
|
24
|
+
DBConnection.connect('bos_taurus',45)
|
25
|
+
|
26
|
+
puts "== Transforming a cow gene from chromosome level to scaffold level =="
|
27
|
+
gene = Gene.find(2408)
|
28
|
+
cloned_gene = gene.transform('scaffold')
|
29
|
+
puts "Original: " + gene.slice.display_name
|
30
|
+
puts "Now: " + cloned_gene.slice.display_name
|
31
|
+
|
32
|
+
puts "== What things are related to a 'gene' object? =="
|
33
|
+
puts 'Genes belong to: ' + Gene.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
34
|
+
puts 'Genes have many: ' + Gene.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# Based on Perl API tutorial
|
4
|
+
# http://www.ensembl.org/info/using/api/variation/variation_tutorial.html
|
5
|
+
|
6
|
+
|
7
|
+
require File.dirname(__FILE__) + '/../lib/ensembl.rb'
|
8
|
+
include Ensembl::Core
|
9
|
+
include Ensembl::Variation
|
10
|
+
|
11
|
+
Ensembl::Variation::DBConnection.connect('homo_sapiens',60)
|
12
|
+
# The connection with the Core database can be omitted. It is created automatically
|
13
|
+
# when needed, using Variation DB connection parameters. The database name is derived
|
14
|
+
# from Variation DB name. If you are using non conventional DB names (i.e. for a local copy of Ensembl database)
|
15
|
+
# an exception will be raised. Otherwise, if a Core DB connection is already
|
16
|
+
# present, that connection will be used by default, instead of creating a new one.
|
17
|
+
|
18
|
+
id = ['rs73514758','rs77811736']
|
19
|
+
|
20
|
+
id.each do |i|
|
21
|
+
v = Variation.find_by_name(i)
|
22
|
+
v.variation_features.each do |vf|
|
23
|
+
|
24
|
+
up_seq,down_seq = vf.flanking_seq # retrieve upstream and downstream flanking sequences
|
25
|
+
|
26
|
+
seq_region_name = vf.fetch_region.seq_region.name # fetch the genomic region of the Variation and get the region name.
|
27
|
+
# Automatically sets the connection with Core DB, if needed.
|
28
|
+
|
29
|
+
puts "\n== VARIATION FEATURE =="
|
30
|
+
print "NAME: #{vf.variation_name}\n ALLELE: #{vf.allele_string}\n UPSTREAM SEQ: #{up_seq.seq} \n DOWNSTREAM SEQ: #{down_seq.seq}\n SEQ REGION NAME :#{seq_region_name}\n"
|
31
|
+
vf.transcript_variations.each do |tv|
|
32
|
+
t = tv.transcript # retrieve Ensembl::Core::Transcript from Core DB. Automatically sets the connection, if necessary.
|
33
|
+
puts "== TRANSCRIPT VARIATION =="
|
34
|
+
print "ALLELE PEPTIDE STRING: #{tv.peptide_allele_string} TRANSCRIPT STABLE ID: #{t.stable_id} GENE STABLE ID: #{t.gene.stable_id}\n"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns all Variations present on a gemomic region
|
40
|
+
|
41
|
+
puts "\n== SEARCHING FOR VARIATIONS ON CHR:1:50000:51000 =="
|
42
|
+
|
43
|
+
# Even in this case, Variation DB connection can be set automatically by specific Slice methods
|
44
|
+
|
45
|
+
s = Slice.fetch_by_region('chromosome',1,50000,51000)
|
46
|
+
variation_features = s.get_variation_features # automatically sets the connection with Variation DB, if needed.
|
47
|
+
variation_features.each do |vf|
|
48
|
+
print "NAME: #{vf.variation_name} ALLELE: #{vf.allele_string} SEQ REGION NAME: #{vf.seq_region.name}\n"
|
49
|
+
end
|
50
|
+
|
51
|
+
puts "\n== GENOTYPED VARIATIONS =="
|
52
|
+
|
53
|
+
genotyped_variation_features = s.get_genotyped_variation_features # automatically sets the connection with Variation DB, if needed.
|
54
|
+
genotyped_variation_features.each do |gvf|
|
55
|
+
print "NAME: #{gvf.variation_name} ALLELE: #{gvf.allele_string} SEQ REGION NAME: #{gvf.seq_region.name}\n"
|
56
|
+
end
|
57
|
+
|
58
|
+
puts "\n== STRUCTURAL VARIATIONS ON CHR:11:60125:320837 =="
|
59
|
+
slice = Ensembl::Core::Slice.fetch_by_region('chromosome',11,60125,320837)
|
60
|
+
structural_variations = s.get_structural_variations
|
61
|
+
structural_variations.each do |sv|
|
62
|
+
print "NAME: #{sv.variation_name} SEQ REGION NAME: #{sv.seq_region.name}\n"
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "\n== What things are related to a 'variation' object? =="
|
66
|
+
puts 'Variation belong to: ' + Variation.reflect_on_all_associations(:belongs_to).collect{|a| a.name.to_s}.join(',')
|
67
|
+
puts 'Variation have many: ' + Variation.reflect_on_all_associations(:has_many).collect{|a| a.name.to_s}.join(',')
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# = test/unit/
|
2
|
+
# = test/unit/release_53/core/test_gene.rb - Unit test for Ensembl::Core
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2009 Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
5
|
#
|
@@ -19,7 +19,7 @@ include Ensembl::Core
|
|
19
19
|
class TestGene < Test::Unit::TestCase
|
20
20
|
|
21
21
|
def setup
|
22
|
-
DBConnection.connect('homo_sapiens',
|
22
|
+
DBConnection.connect('homo_sapiens', 60)
|
23
23
|
end
|
24
24
|
|
25
25
|
def teardown
|
@@ -31,10 +31,10 @@ class TestGene < Test::Unit::TestCase
|
|
31
31
|
assert_equal("ENSG00000006451",g.stable_id)
|
32
32
|
assert_equal("7",g.seq_region.name)
|
33
33
|
assert_equal(39663082,g.start)
|
34
|
-
assert_equal(39747723
|
34
|
+
assert_equal(39747723,g.stop)
|
35
35
|
assert_equal(1,g.strand)
|
36
36
|
assert_equal(84642,g.seq.length)
|
37
|
-
assert_equal("
|
37
|
+
assert_equal("v-ral simian leukemia viral oncogene homolog A (ras related) [Source:HGNC Symbol;Acc:9839]",g.description)
|
38
38
|
assert_equal("RALA",g.name)
|
39
39
|
end
|
40
40
|
|
@@ -42,9 +42,9 @@ class TestGene < Test::Unit::TestCase
|
|
42
42
|
g = Gene.find_by_stable_id("ENSG00000006451")
|
43
43
|
t = g.transcripts
|
44
44
|
assert_equal(5,t.size)
|
45
|
-
assert_equal("
|
45
|
+
assert_equal("ENST00000005257",t[0].stable_id)
|
46
46
|
t = t[0]
|
47
|
-
assert_equal(
|
47
|
+
assert_equal(2880,t.seq.length)
|
48
48
|
end
|
49
49
|
|
50
50
|
def test_exons
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_project.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
7
|
+
# License:: Ruby's
|
8
|
+
#
|
9
|
+
# $Id:
|
10
|
+
require 'pathname'
|
11
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
12
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
13
|
+
require 'test/unit'
|
14
|
+
require 'lib/ensembl'
|
15
|
+
|
16
|
+
include Ensembl::Core
|
17
|
+
|
18
|
+
class AssemblyExceptions < Test::Unit::TestCase
|
19
|
+
|
20
|
+
def setup
|
21
|
+
DBConnection.connect('homo_sapiens', 60)
|
22
|
+
end
|
23
|
+
|
24
|
+
def teardown
|
25
|
+
DBConnection.remove_connection
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_chr_x
|
29
|
+
source_slice = Slice.fetch_by_region('chromosome','X', 2709497, 2709520)
|
30
|
+
assert_equal('ctgaagaattgtgtttcttcccta', source_slice.seq)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_slice_overlapping_PAR_and_allosome
|
34
|
+
source_slice = Slice.fetch_by_region('chromosome','Y',2709500,2709540)
|
35
|
+
assert_equal('AGAAACTGAAAATGCTAAGAAATTCAGTTCCAGGATATGAA', source_slice.seq.upcase)
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -19,7 +19,7 @@ include Ensembl::Core
|
|
19
19
|
class TestSlice < Test::Unit::TestCase
|
20
20
|
|
21
21
|
def setup
|
22
|
-
DBConnection.connect('homo_sapiens',
|
22
|
+
DBConnection.connect('homo_sapiens', 60)
|
23
23
|
end
|
24
24
|
|
25
25
|
def teardown
|
@@ -44,11 +44,4 @@ class TestSlice < Test::Unit::TestCase
|
|
44
44
|
assert_equal(31786816,s_rev.stop)
|
45
45
|
end
|
46
46
|
|
47
|
-
def test_slice_genes
|
48
|
-
slice = Slice.fetch_by_region("chromosome","13",31773073,31909413,1)
|
49
|
-
genes = slice.genes
|
50
|
-
assert_equal("ENSG00000229427",genes[1].stable_id)
|
51
|
-
assert_equal("ENSG00000187676",genes[0].stable_id)
|
52
|
-
end
|
53
|
-
|
54
47
|
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#
|
2
|
+
# = test/unit/test_transcript.rb - Unit test for Ensembl::Core
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2007
|
5
|
+
# Jan Aerts <http://jandot.myopenid.com>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id:
|
9
|
+
require 'pathname'
|
10
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
11
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
12
|
+
|
13
|
+
require 'test/unit'
|
14
|
+
|
15
|
+
require 'lib/ensembl'
|
16
|
+
|
17
|
+
include Ensembl::Core
|
18
|
+
|
19
|
+
DBConnection.connect('homo_sapiens', 60)
|
20
|
+
|
21
|
+
class CodingPositions < Test::Unit::TestCase
|
22
|
+
def setup
|
23
|
+
# Transcript tr_fw is ENST00000215574
|
24
|
+
@tr_fw = Transcript.find_by_stable_id("ENST00000215574")
|
25
|
+
# Transcript tr_rev is ENST00000358041
|
26
|
+
@tr_rev = Transcript.find_by_stable_id("ENST00000358041")
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_transcript_coords
|
30
|
+
assert_equal(531733, @tr_fw.seq_region_start)
|
31
|
+
assert_equal(542084, @tr_fw.seq_region_end)
|
32
|
+
assert_equal(644371, @tr_rev.seq_region_start)
|
33
|
+
assert_equal(647309, @tr_rev.seq_region_end)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_coding_regions_genomic_coords_of_fw
|
37
|
+
assert_equal(531932, @tr_fw.coding_region_genomic_start)
|
38
|
+
assert_equal(541552, @tr_fw.coding_region_genomic_end)
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_coding_regions_genomic_coords_of_rev
|
42
|
+
assert_equal(646652, @tr_rev.coding_region_genomic_start)
|
43
|
+
assert_equal(647047, @tr_rev.coding_region_genomic_end)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_coding_regions_cdna_coords_of_fw
|
47
|
+
assert_equal(200, @tr_fw.coding_region_cdna_start)
|
48
|
+
assert_equal(910, @tr_fw.coding_region_cdna_end)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_coding_regions_cdna_coords_of_rev
|
52
|
+
assert_equal(263, @tr_rev.coding_region_cdna_start)
|
53
|
+
assert_equal(658, @tr_rev.coding_region_cdna_end)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
class GenomicVsCDna < Test::Unit::TestCase
|
59
|
+
#From BioMart. Columns:
|
60
|
+
# Ensembl_Transcript_ID
|
61
|
+
# Chromosome
|
62
|
+
# Strand
|
63
|
+
# Ensembl_Exon_ID
|
64
|
+
# Exon_Chr_Start
|
65
|
+
# Exon_Chr_End
|
66
|
+
# Exon_Rank_in_Transcript
|
67
|
+
#ENST00000215574 19 1 ENSE00000655676 531733 532108 1
|
68
|
+
#ENST00000215574 19 1 ENSE00000655677 535837 535923 2
|
69
|
+
#ENST00000215574 19 1 ENSE00000655678 536243 536340 3
|
70
|
+
#ENST00000215574 19 1 ENSE00000655679 537013 537147 4
|
71
|
+
#ENST00000215574 19 1 ENSE00000655680 541339 542084 5
|
72
|
+
#
|
73
|
+
#ENST00000315489 19 -1 ENSE00001215510 474621 474983 1
|
74
|
+
#ENST00000315489 19 -1 ENSE00001215495 472394 472501 2
|
75
|
+
#ENST00000315489 19 -1 ENSE00001215487 467649 467762 3
|
76
|
+
#ENST00000315489 19 -1 ENSE00001215506 463344 464364 4
|
77
|
+
def setup
|
78
|
+
# Transcript tr_fw is ENST00000215574
|
79
|
+
@tr_fw = Transcript.find_by_stable_id("ENST00000215574")
|
80
|
+
# Transcript tr_rev is ENST00000315489
|
81
|
+
@tr_rev = Transcript.find_by_stable_id("ENST00000315489")
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_identify_exon
|
85
|
+
assert_equal("ENSE00000655679", @tr_fw.exon_for_cdna_position(601).stable_id)
|
86
|
+
assert_equal("ENSE00000655679", @tr_fw.exon_for_genomic_position(537052).stable_id)
|
87
|
+
assert_equal("ENSE00001215487", @tr_rev.exon_for_cdna_position(541).stable_id)
|
88
|
+
assert_equal("ENSE00001215487", @tr_rev.exon_for_genomic_position(467693).stable_id)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_cdna2genomic
|
92
|
+
assert_equal(537052, @tr_fw.cdna2genomic(601))
|
93
|
+
assert_equal(467693, @tr_rev.cdna2genomic(541))
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_cds2genomic
|
97
|
+
assert_equal(537052, @tr_fw.cds2genomic(401))
|
98
|
+
assert_equal(467693, @tr_rev.cds2genomic(304))
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_genomic2cdna
|
102
|
+
assert_equal(601, @tr_fw.genomic2cdna(537052))
|
103
|
+
assert_equal(541, @tr_rev.genomic2cdna(467693))
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_genomic2cds
|
107
|
+
assert_equal(401, @tr_fw.genomic2cds(537052))
|
108
|
+
assert_equal(304, @tr_rev.genomic2cds(467693))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class TestIntron < Test::Unit::TestCase
|
113
|
+
def setup
|
114
|
+
@transcript = Transcript.find_by_stable_id("ENST00000215574")
|
115
|
+
@introns = @transcript.introns
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_get_introns
|
119
|
+
assert_equal(4, @introns.length)
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_intron_slices
|
123
|
+
assert_equal('chromosome:GRCh37:19:532109:535836:1', @introns[0].slice.to_s)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|