rbbt-sources 0.2.2 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/sources/COSTART.rb +2 -3
- data/lib/rbbt/sources/CTCAE.rb +1 -1
- data/lib/rbbt/sources/biomart.rb +32 -32
- data/lib/rbbt/sources/entrez.rb +14 -10
- data/lib/rbbt/sources/go.rb +9 -8
- data/lib/rbbt/sources/organism.rb +36 -10
- data/lib/rbbt/sources/organism/sequence.rb +337 -0
- data/lib/rbbt/sources/polysearch.rb +5 -5
- data/share/install/Organism/Hsa/Rakefile +7 -68
- data/share/install/Organism/Sce/Rakefile +4 -70
- data/share/install/Organism/organism_helpers.rb +305 -0
- data/share/install/lib/helpers.rb +5 -5
- data/test/rbbt/sources/test_biomart.rb +7 -6
- data/test/rbbt/sources/test_entrez.rb +3 -3
- data/test/rbbt/sources/test_organism.rb +32 -3
- data/test/rbbt/sources/test_pubmed.rb +1 -1
- metadata +7 -6
- data/lib/rbbt/sources/Reactome.rb +0 -16
@@ -7,10 +7,10 @@ def tsv_file(url, native, extra, options = {})
|
|
7
7
|
|
8
8
|
case
|
9
9
|
when Array === native
|
10
|
-
options = Misc.add_defaults options, :
|
10
|
+
options = Misc.add_defaults options, :key => native.last
|
11
11
|
key_field = native.first
|
12
12
|
when (String === native or Integer === native)
|
13
|
-
options = Misc.add_defaults options, :
|
13
|
+
options = Misc.add_defaults options, :key => native
|
14
14
|
key_field = nil
|
15
15
|
else
|
16
16
|
key_field = nil
|
@@ -18,10 +18,10 @@ def tsv_file(url, native, extra, options = {})
|
|
18
18
|
|
19
19
|
case
|
20
20
|
when (Array === extra and Array === extra.first)
|
21
|
-
options = Misc.add_defaults options, :
|
21
|
+
options = Misc.add_defaults options, :fields => extra.collect{|e| e.last}
|
22
22
|
fields = extra.collect{|e| e.first}
|
23
23
|
when (Array === extra and not Array === extra.first)
|
24
|
-
options = Misc.add_defaults options, :
|
24
|
+
options = Misc.add_defaults options, :fields => extra
|
25
25
|
fields = (1..extra.length).to_a.collect{|i| "Field#{i}"}
|
26
26
|
else
|
27
27
|
fields = nil
|
@@ -29,7 +29,7 @@ def tsv_file(url, native, extra, options = {})
|
|
29
29
|
|
30
30
|
tsv = TSV.new(Open.open(url), options)
|
31
31
|
tsv.key_field ||= key_field
|
32
|
-
tsv.fields
|
32
|
+
tsv.fields ||= fields
|
33
33
|
tsv
|
34
34
|
end
|
35
35
|
|
@@ -4,17 +4,18 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestBioMart < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_get
|
8
8
|
assert_raise BioMart::QueryError do
|
9
9
|
BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
|
10
10
|
end
|
11
11
|
|
12
|
-
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :wget_options => {
|
13
|
-
|
12
|
+
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :merge => true, :wget_options => {:quiet => false})
|
13
|
+
tsv = TSV.new data, :double, :merge => true
|
14
|
+
assert(tsv['852236'][0].include? 'CAA84864')
|
14
15
|
|
15
16
|
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => false, :wget_options => { :quiet => false} )
|
16
|
-
|
17
|
-
assert(
|
17
|
+
tsv = TSV.new data, :double, :merge => true
|
18
|
+
assert(tsv['852236'][1].include? 'YBL044W')
|
18
19
|
end
|
19
20
|
|
20
21
|
def test_query
|
@@ -23,7 +24,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
23
24
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
24
25
|
end
|
25
26
|
|
26
|
-
def
|
27
|
+
def test_tsv
|
27
28
|
data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
28
29
|
|
29
30
|
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
@@ -3,12 +3,12 @@ require 'rbbt/sources/entrez'
|
|
3
3
|
require 'test/unit'
|
4
4
|
|
5
5
|
class TestEntrez < Test::Unit::TestCase
|
6
|
-
$yeast_tax =
|
6
|
+
$yeast_tax = 559292
|
7
7
|
|
8
8
|
def test_entrez2native
|
9
9
|
tax = $yeast_tax
|
10
10
|
fix = proc{|line| line.sub(/SGD:S0/,'S0') }
|
11
|
-
select = proc{|line| line.match(
|
11
|
+
select = proc{|line| line.match(/SGD:S0/)}
|
12
12
|
lexicon = Entrez.entrez2native(tax, :fix => fix, :select => select)
|
13
13
|
|
14
14
|
assert(lexicon['855611'].include? 'S000005056')
|
@@ -40,7 +40,7 @@ class TestEntrez < Test::Unit::TestCase
|
|
40
40
|
|
41
41
|
def test_similarity
|
42
42
|
assert(Entrez.gene_text_similarity(9129, "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)") > 0)
|
43
|
-
assert_equal(0, Entrez.gene_text_similarity("NON
|
43
|
+
assert_equal(0, Entrez.gene_text_similarity("NON EXISTENT GENEID", "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"))
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
@@ -1,12 +1,13 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
2
|
require 'rbbt/sources/organism'
|
3
|
+
require 'rbbt/sources/organism/sequence'
|
3
4
|
require 'test/unit'
|
4
5
|
|
5
6
|
class TestEntrez < Test::Unit::TestCase
|
6
7
|
def test_identifiers
|
7
|
-
assert
|
8
|
-
assert
|
9
|
-
assert
|
8
|
+
assert Organism.identifiers('Sce').tsv['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
9
|
+
assert Organism::Sce.identifiers.tsv['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
10
|
+
assert Organism.identifiers('Hsa').tsv(:key => "Entrez Gene ID")['1020']["Associated Gene Name"].include?('CDK5')
|
10
11
|
end
|
11
12
|
|
12
13
|
def test_lexicon
|
@@ -24,6 +25,34 @@ class TestEntrez < Test::Unit::TestCase
|
|
24
25
|
assert Organism.organisms.include? "Hsa"
|
25
26
|
assert_equal "Hsa", Organism.organism("Homo sapiens")
|
26
27
|
end
|
28
|
+
|
29
|
+
def test_attach_translations
|
30
|
+
tsv = TSV.new({"1020" => []}, :list)
|
31
|
+
tsv.key_field = "Entrez Gene ID"
|
32
|
+
tsv.fields = []
|
33
|
+
tsv.namespace = "Hsa"
|
34
|
+
|
35
|
+
Organism::Hsa.attach_translations tsv, "Associated Gene Name"
|
36
|
+
Organism::Hsa.attach_translations tsv, "Ensembl Gene ID"
|
37
|
+
|
38
|
+
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_genes_at_chromosome
|
42
|
+
pos = [12, 117799500]
|
43
|
+
assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_genes_at_chromosome_array
|
47
|
+
pos = [12, [117799500, 106903900]]
|
48
|
+
assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_genes_at_genomic_positions
|
52
|
+
pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
|
53
|
+
assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
|
54
|
+
end
|
55
|
+
|
27
56
|
end
|
28
57
|
|
29
58
|
|
@@ -21,7 +21,7 @@ class TestPubMed < Test::Unit::TestCase
|
|
21
21
|
assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
24
|
+
def test_full_text
|
25
25
|
pmid = '16438716'
|
26
26
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
27
27
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 3
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-03-19 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -85,17 +85,18 @@ extra_rdoc_files: []
|
|
85
85
|
files:
|
86
86
|
- lib/rbbt/sources/COSTART.rb
|
87
87
|
- lib/rbbt/sources/CTCAE.rb
|
88
|
-
- lib/rbbt/sources/Reactome.rb
|
89
88
|
- lib/rbbt/sources/bibtex.rb
|
90
89
|
- lib/rbbt/sources/biomart.rb
|
91
90
|
- lib/rbbt/sources/entrez.rb
|
92
91
|
- lib/rbbt/sources/go.rb
|
93
92
|
- lib/rbbt/sources/gscholar.rb
|
94
93
|
- lib/rbbt/sources/organism.rb
|
94
|
+
- lib/rbbt/sources/organism/sequence.rb
|
95
95
|
- lib/rbbt/sources/polysearch.rb
|
96
96
|
- lib/rbbt/sources/pubmed.rb
|
97
97
|
- share/install/Organism/Hsa/Rakefile
|
98
98
|
- share/install/Organism/Sce/Rakefile
|
99
|
+
- share/install/Organism/organism_helpers.rb
|
99
100
|
- share/install/lib/helpers.rb
|
100
101
|
- test/rbbt/sources/test_biomart.rb
|
101
102
|
- test/rbbt/sources/test_entrez.rb
|
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'rbbt-util'
|
2
|
-
|
3
|
-
module Reactome
|
4
|
-
|
5
|
-
Rbbt.claim "Reactome",
|
6
|
-
Proc.new do
|
7
|
-
headers = ["Uniprot ID#1", "Ensembl Gene ID#2","Entrez Gene ID#1", "Uniprot ID#2", "Ensembl Gene ID#2", "Entrez Gene ID#2" , "Type", "Reaction", "PMID"]
|
8
|
-
|
9
|
-
tsv = TSV.new(Open.open("http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"), :fix => Proc.new {|l| l.gsub(/[\w ]+:/, "")})
|
10
|
-
tsv.key_field = headers.shift
|
11
|
-
tsv.fields = headers
|
12
|
-
|
13
|
-
tsv.to_s
|
14
|
-
end, 'Reactome'
|
15
|
-
]
|
16
|
-
end
|