rbbt-sources 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/sources/COSTART.rb +2 -3
- data/lib/rbbt/sources/CTCAE.rb +1 -1
- data/lib/rbbt/sources/biomart.rb +32 -32
- data/lib/rbbt/sources/entrez.rb +14 -10
- data/lib/rbbt/sources/go.rb +9 -8
- data/lib/rbbt/sources/organism.rb +36 -10
- data/lib/rbbt/sources/organism/sequence.rb +337 -0
- data/lib/rbbt/sources/polysearch.rb +5 -5
- data/share/install/Organism/Hsa/Rakefile +7 -68
- data/share/install/Organism/Sce/Rakefile +4 -70
- data/share/install/Organism/organism_helpers.rb +305 -0
- data/share/install/lib/helpers.rb +5 -5
- data/test/rbbt/sources/test_biomart.rb +7 -6
- data/test/rbbt/sources/test_entrez.rb +3 -3
- data/test/rbbt/sources/test_organism.rb +32 -3
- data/test/rbbt/sources/test_pubmed.rb +1 -1
- metadata +7 -6
- data/lib/rbbt/sources/Reactome.rb +0 -16
@@ -7,10 +7,10 @@ def tsv_file(url, native, extra, options = {})
|
|
7
7
|
|
8
8
|
case
|
9
9
|
when Array === native
|
10
|
-
options = Misc.add_defaults options, :
|
10
|
+
options = Misc.add_defaults options, :key => native.last
|
11
11
|
key_field = native.first
|
12
12
|
when (String === native or Integer === native)
|
13
|
-
options = Misc.add_defaults options, :
|
13
|
+
options = Misc.add_defaults options, :key => native
|
14
14
|
key_field = nil
|
15
15
|
else
|
16
16
|
key_field = nil
|
@@ -18,10 +18,10 @@ def tsv_file(url, native, extra, options = {})
|
|
18
18
|
|
19
19
|
case
|
20
20
|
when (Array === extra and Array === extra.first)
|
21
|
-
options = Misc.add_defaults options, :
|
21
|
+
options = Misc.add_defaults options, :fields => extra.collect{|e| e.last}
|
22
22
|
fields = extra.collect{|e| e.first}
|
23
23
|
when (Array === extra and not Array === extra.first)
|
24
|
-
options = Misc.add_defaults options, :
|
24
|
+
options = Misc.add_defaults options, :fields => extra
|
25
25
|
fields = (1..extra.length).to_a.collect{|i| "Field#{i}"}
|
26
26
|
else
|
27
27
|
fields = nil
|
@@ -29,7 +29,7 @@ def tsv_file(url, native, extra, options = {})
|
|
29
29
|
|
30
30
|
tsv = TSV.new(Open.open(url), options)
|
31
31
|
tsv.key_field ||= key_field
|
32
|
-
tsv.fields
|
32
|
+
tsv.fields ||= fields
|
33
33
|
tsv
|
34
34
|
end
|
35
35
|
|
@@ -4,17 +4,18 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestBioMart < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_get
|
8
8
|
assert_raise BioMart::QueryError do
|
9
9
|
BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
|
10
10
|
end
|
11
11
|
|
12
|
-
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :wget_options => {
|
13
|
-
|
12
|
+
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :merge => true, :wget_options => {:quiet => false})
|
13
|
+
tsv = TSV.new data, :double, :merge => true
|
14
|
+
assert(tsv['852236'][0].include? 'CAA84864')
|
14
15
|
|
15
16
|
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => false, :wget_options => { :quiet => false} )
|
16
|
-
|
17
|
-
assert(
|
17
|
+
tsv = TSV.new data, :double, :merge => true
|
18
|
+
assert(tsv['852236'][1].include? 'YBL044W')
|
18
19
|
end
|
19
20
|
|
20
21
|
def test_query
|
@@ -23,7 +24,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
23
24
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
24
25
|
end
|
25
26
|
|
26
|
-
def
|
27
|
+
def test_tsv
|
27
28
|
data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
28
29
|
|
29
30
|
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
@@ -3,12 +3,12 @@ require 'rbbt/sources/entrez'
|
|
3
3
|
require 'test/unit'
|
4
4
|
|
5
5
|
class TestEntrez < Test::Unit::TestCase
|
6
|
-
$yeast_tax =
|
6
|
+
$yeast_tax = 559292
|
7
7
|
|
8
8
|
def test_entrez2native
|
9
9
|
tax = $yeast_tax
|
10
10
|
fix = proc{|line| line.sub(/SGD:S0/,'S0') }
|
11
|
-
select = proc{|line| line.match(
|
11
|
+
select = proc{|line| line.match(/SGD:S0/)}
|
12
12
|
lexicon = Entrez.entrez2native(tax, :fix => fix, :select => select)
|
13
13
|
|
14
14
|
assert(lexicon['855611'].include? 'S000005056')
|
@@ -40,7 +40,7 @@ class TestEntrez < Test::Unit::TestCase
|
|
40
40
|
|
41
41
|
def test_similarity
|
42
42
|
assert(Entrez.gene_text_similarity(9129, "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)") > 0)
|
43
|
-
assert_equal(0, Entrez.gene_text_similarity("NON
|
43
|
+
assert_equal(0, Entrez.gene_text_similarity("NON EXISTENT GENEID", "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"))
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
@@ -1,12 +1,13 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
2
|
require 'rbbt/sources/organism'
|
3
|
+
require 'rbbt/sources/organism/sequence'
|
3
4
|
require 'test/unit'
|
4
5
|
|
5
6
|
class TestEntrez < Test::Unit::TestCase
|
6
7
|
def test_identifiers
|
7
|
-
assert
|
8
|
-
assert
|
9
|
-
assert
|
8
|
+
assert Organism.identifiers('Sce').tsv['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
9
|
+
assert Organism::Sce.identifiers.tsv['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
10
|
+
assert Organism.identifiers('Hsa').tsv(:key => "Entrez Gene ID")['1020']["Associated Gene Name"].include?('CDK5')
|
10
11
|
end
|
11
12
|
|
12
13
|
def test_lexicon
|
@@ -24,6 +25,34 @@ class TestEntrez < Test::Unit::TestCase
|
|
24
25
|
assert Organism.organisms.include? "Hsa"
|
25
26
|
assert_equal "Hsa", Organism.organism("Homo sapiens")
|
26
27
|
end
|
28
|
+
|
29
|
+
def test_attach_translations
|
30
|
+
tsv = TSV.new({"1020" => []}, :list)
|
31
|
+
tsv.key_field = "Entrez Gene ID"
|
32
|
+
tsv.fields = []
|
33
|
+
tsv.namespace = "Hsa"
|
34
|
+
|
35
|
+
Organism::Hsa.attach_translations tsv, "Associated Gene Name"
|
36
|
+
Organism::Hsa.attach_translations tsv, "Ensembl Gene ID"
|
37
|
+
|
38
|
+
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_genes_at_chromosome
|
42
|
+
pos = [12, 117799500]
|
43
|
+
assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_genes_at_chromosome_array
|
47
|
+
pos = [12, [117799500, 106903900]]
|
48
|
+
assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_genes_at_genomic_positions
|
52
|
+
pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
|
53
|
+
assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
|
54
|
+
end
|
55
|
+
|
27
56
|
end
|
28
57
|
|
29
58
|
|
@@ -21,7 +21,7 @@ class TestPubMed < Test::Unit::TestCase
|
|
21
21
|
assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
24
|
+
def test_full_text
|
25
25
|
pmid = '16438716'
|
26
26
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
27
27
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 3
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-03-19 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -85,17 +85,18 @@ extra_rdoc_files: []
|
|
85
85
|
files:
|
86
86
|
- lib/rbbt/sources/COSTART.rb
|
87
87
|
- lib/rbbt/sources/CTCAE.rb
|
88
|
-
- lib/rbbt/sources/Reactome.rb
|
89
88
|
- lib/rbbt/sources/bibtex.rb
|
90
89
|
- lib/rbbt/sources/biomart.rb
|
91
90
|
- lib/rbbt/sources/entrez.rb
|
92
91
|
- lib/rbbt/sources/go.rb
|
93
92
|
- lib/rbbt/sources/gscholar.rb
|
94
93
|
- lib/rbbt/sources/organism.rb
|
94
|
+
- lib/rbbt/sources/organism/sequence.rb
|
95
95
|
- lib/rbbt/sources/polysearch.rb
|
96
96
|
- lib/rbbt/sources/pubmed.rb
|
97
97
|
- share/install/Organism/Hsa/Rakefile
|
98
98
|
- share/install/Organism/Sce/Rakefile
|
99
|
+
- share/install/Organism/organism_helpers.rb
|
99
100
|
- share/install/lib/helpers.rb
|
100
101
|
- test/rbbt/sources/test_biomart.rb
|
101
102
|
- test/rbbt/sources/test_entrez.rb
|
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'rbbt-util'
|
2
|
-
|
3
|
-
module Reactome
|
4
|
-
|
5
|
-
Rbbt.claim "Reactome",
|
6
|
-
Proc.new do
|
7
|
-
headers = ["Uniprot ID#1", "Ensembl Gene ID#2","Entrez Gene ID#1", "Uniprot ID#2", "Ensembl Gene ID#2", "Entrez Gene ID#2" , "Type", "Reaction", "PMID"]
|
8
|
-
|
9
|
-
tsv = TSV.new(Open.open("http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"), :fix => Proc.new {|l| l.gsub(/[\w ]+:/, "")})
|
10
|
-
tsv.key_field = headers.shift
|
11
|
-
tsv.fields = headers
|
12
|
-
|
13
|
-
tsv.to_s
|
14
|
-
end, 'Reactome'
|
15
|
-
]
|
16
|
-
end
|