rbbt-sources 0.4.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/etc/biomart/missing_in_archive +15 -0
- data/lib/rbbt/sources/COSMIC.rb +14 -0
- data/lib/rbbt/sources/COSTART.rb +1 -1
- data/lib/rbbt/sources/CTCAE.rb +1 -1
- data/lib/rbbt/sources/InterPro.rb +17 -0
- data/lib/rbbt/sources/NCI.rb +7 -0
- data/lib/rbbt/sources/biomart.rb +9 -9
- data/lib/rbbt/sources/entrez.rb +44 -17
- data/lib/rbbt/sources/go.rb +10 -7
- data/lib/rbbt/sources/jochem.rb +4 -0
- data/lib/rbbt/sources/organism.rb +24 -25
- data/lib/rbbt/sources/organism/sequence.rb +253 -19
- data/lib/rbbt/sources/polysearch.rb +5 -5
- data/lib/rbbt/sources/pubmed.rb +10 -5
- data/lib/rbbt/sources/wgEncodeBroadHmm.rb +37 -0
- data/share/install/InterPro/Rakefile +29 -0
- data/share/install/JoChem/Rakefile +67 -0
- data/share/install/NCI/Rakefile +79 -0
- data/share/install/Organism/Hsa/Rakefile +20 -1
- data/share/install/Organism/Rno/Rakefile +2 -0
- data/share/install/Organism/organism_helpers.rb +134 -77
- data/share/install/lib/helpers.rb +6 -5
- data/test/rbbt/sources/test_biomart.rb +8 -5
- data/test/rbbt/sources/test_organism.rb +23 -19
- metadata +39 -14
@@ -1,9 +1,9 @@
|
|
1
|
-
require 'rbbt
|
1
|
+
require 'rbbt'
|
2
2
|
require 'rbbt/sources/biomart'
|
3
3
|
require 'rbbt/sources/entrez'
|
4
4
|
|
5
5
|
def tsv_file(url, native, extra, options = {})
|
6
|
-
options = Misc.add_defaults options, :
|
6
|
+
options = Misc.add_defaults options, :persist => false, :keep_empty => true
|
7
7
|
|
8
8
|
case
|
9
9
|
when Array === native
|
@@ -27,7 +27,7 @@ def tsv_file(url, native, extra, options = {})
|
|
27
27
|
fields = nil
|
28
28
|
end
|
29
29
|
|
30
|
-
tsv = TSV.
|
30
|
+
tsv = TSV.open(Open.open(url), options)
|
31
31
|
tsv.key_field ||= key_field
|
32
32
|
tsv.fields ||= fields
|
33
33
|
tsv
|
@@ -35,13 +35,14 @@ end
|
|
35
35
|
|
36
36
|
def merge_entrez(data, taxs, native, fix = nil, select = nil)
|
37
37
|
entrez = Entrez.entrez2native(taxs, :fix => fix, :select => select)
|
38
|
+
entrez.key_field = "Entrez Gene ID"
|
38
39
|
entrez.fields = [native]
|
39
40
|
entrez
|
40
41
|
|
41
|
-
data.
|
42
|
+
data.attach entrez, "Entrez Gene ID"
|
42
43
|
end
|
43
44
|
|
44
45
|
def merge_biomart(lexicon, db, native, other, match = nil)
|
45
46
|
match ||= native.first
|
46
|
-
lexicon.
|
47
|
+
lexicon.attach BioMart.tsv(db, native, other)
|
47
48
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'rbbt/sources/pubmed'
|
4
|
+
require 'test/unit'
|
2
5
|
require 'rbbt/sources/biomart'
|
3
6
|
require 'rbbt/util/tmpfile'
|
4
7
|
require 'test/unit'
|
@@ -11,11 +14,11 @@ class TestBioMart < Test::Unit::TestCase
|
|
11
14
|
end
|
12
15
|
|
13
16
|
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :merge => true, :wget_options => {:quiet => false})
|
14
|
-
tsv = TSV.
|
17
|
+
tsv = TSV.open data, :double, :merge => true
|
15
18
|
assert(tsv['852236'][0].include? 'CAA84864')
|
16
19
|
|
17
20
|
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => false, :wget_options => { :quiet => false} )
|
18
|
-
tsv = TSV.
|
21
|
+
tsv = TSV.open data, :double, :merge => true
|
19
22
|
assert(tsv['852236'][1].include? 'YBL044W')
|
20
23
|
end
|
21
24
|
|
@@ -25,7 +28,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
25
28
|
|
26
29
|
TmpFile.with_file do |f|
|
27
30
|
filename = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
28
|
-
data = TSV.
|
31
|
+
data = TSV.open Open.open(filename)
|
29
32
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
30
33
|
end
|
31
34
|
end
|
@@ -38,7 +41,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
38
41
|
|
39
42
|
TmpFile.with_file do |f|
|
40
43
|
filename = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
41
|
-
data = TSV.
|
44
|
+
data = TSV.open Open.open(filename, :merge => true)
|
42
45
|
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
43
46
|
assert_equal 'Entrez Gene', data.key_field
|
44
47
|
assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
|
@@ -1,24 +1,28 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
2
|
require 'rbbt/sources/organism'
|
3
|
-
require 'rbbt/sources/organism/sequence'
|
4
3
|
require 'test/unit'
|
5
4
|
|
6
5
|
class TestEntrez < Test::Unit::TestCase
|
6
|
+
def test_location
|
7
|
+
assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
|
8
|
+
end
|
9
|
+
|
10
|
+
|
7
11
|
def test_identifiers
|
8
|
-
assert Organism.identifiers('
|
9
|
-
assert Organism
|
10
|
-
assert Organism.identifiers
|
12
|
+
assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
|
13
|
+
assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
14
|
+
assert Organism::Sce.identifiers.tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
11
15
|
end
|
12
16
|
|
13
17
|
def test_lexicon
|
14
|
-
assert TSV.
|
18
|
+
assert TSV.open(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
|
15
19
|
end
|
16
20
|
|
17
21
|
def test_guess_id
|
18
22
|
ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
|
19
23
|
gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
|
20
|
-
assert_equal "Ensembl Gene ID", Organism::Sce.guess_id(ensembl).first
|
21
24
|
assert_equal "Associated Gene Name", Organism::Sce.guess_id(gene_name).first
|
25
|
+
assert_equal "Ensembl Gene ID", Organism::Sce.guess_id(ensembl).first
|
22
26
|
end
|
23
27
|
|
24
28
|
def test_organisms
|
@@ -27,7 +31,7 @@ class TestEntrez < Test::Unit::TestCase
|
|
27
31
|
end
|
28
32
|
|
29
33
|
def test_attach_translations
|
30
|
-
tsv = TSV.
|
34
|
+
tsv = TSV.setup({"1020" => []}, :type => :list)
|
31
35
|
tsv.key_field = "Entrez Gene ID"
|
32
36
|
tsv.fields = []
|
33
37
|
tsv.namespace = "Hsa"
|
@@ -38,20 +42,20 @@ class TestEntrez < Test::Unit::TestCase
|
|
38
42
|
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
39
43
|
end
|
40
44
|
|
41
|
-
def test_genes_at_chromosome
|
42
|
-
|
43
|
-
|
44
|
-
end
|
45
|
+
#def test_genes_at_chromosome
|
46
|
+
# pos = [12, 117799500]
|
47
|
+
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
48
|
+
#end
|
45
49
|
|
46
|
-
def test_genes_at_chromosome_array
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
+
#def test_genes_at_chromosome_array
|
51
|
+
# pos = [12, [117799500, 106903900]]
|
52
|
+
# assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
53
|
+
#end
|
50
54
|
|
51
|
-
def test_genes_at_genomic_positions
|
52
|
-
|
53
|
-
|
54
|
-
end
|
55
|
+
#def test_genes_at_genomic_positions
|
56
|
+
# pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
|
57
|
+
# assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
|
58
|
+
#end
|
55
59
|
|
56
60
|
end
|
57
61
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
6
|
segments:
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
- 4
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-09-07 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -26,10 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 63
|
30
30
|
segments:
|
31
|
+
- 4
|
31
32
|
- 0
|
32
|
-
|
33
|
+
- 0
|
34
|
+
version: 4.0.0
|
33
35
|
type: :runtime
|
34
36
|
version_requirements: *id001
|
35
37
|
- !ruby/object:Gem::Dependency
|
@@ -74,6 +76,20 @@ dependencies:
|
|
74
76
|
version: "0"
|
75
77
|
type: :runtime
|
76
78
|
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: bio
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :runtime
|
92
|
+
version_requirements: *id005
|
77
93
|
description: Data sources like PubMed, Entrez Gene, or Gene Ontology
|
78
94
|
email: miguel.vazquez@fdi.ucm.es
|
79
95
|
executables: []
|
@@ -83,28 +99,37 @@ extensions: []
|
|
83
99
|
extra_rdoc_files: []
|
84
100
|
|
85
101
|
files:
|
102
|
+
- etc/biomart/missing_in_archive
|
103
|
+
- lib/rbbt/sources/COSMIC.rb
|
86
104
|
- lib/rbbt/sources/COSTART.rb
|
87
105
|
- lib/rbbt/sources/CTCAE.rb
|
106
|
+
- lib/rbbt/sources/InterPro.rb
|
107
|
+
- lib/rbbt/sources/NCI.rb
|
88
108
|
- lib/rbbt/sources/bibtex.rb
|
89
109
|
- lib/rbbt/sources/biomart.rb
|
90
110
|
- lib/rbbt/sources/entrez.rb
|
91
111
|
- lib/rbbt/sources/go.rb
|
92
112
|
- lib/rbbt/sources/gscholar.rb
|
113
|
+
- lib/rbbt/sources/jochem.rb
|
93
114
|
- lib/rbbt/sources/organism.rb
|
94
115
|
- lib/rbbt/sources/organism/sequence.rb
|
95
116
|
- lib/rbbt/sources/polysearch.rb
|
96
117
|
- lib/rbbt/sources/pubmed.rb
|
118
|
+
- lib/rbbt/sources/wgEncodeBroadHmm.rb
|
119
|
+
- share/install/InterPro/Rakefile
|
120
|
+
- share/install/JoChem/Rakefile
|
121
|
+
- share/install/NCI/Rakefile
|
97
122
|
- share/install/Organism/Hsa/Rakefile
|
98
123
|
- share/install/Organism/Rno/Rakefile
|
99
124
|
- share/install/Organism/Sce/Rakefile
|
100
125
|
- share/install/Organism/organism_helpers.rb
|
101
126
|
- share/install/lib/helpers.rb
|
102
|
-
- test/
|
127
|
+
- test/test_helper.rb
|
103
128
|
- test/rbbt/sources/test_entrez.rb
|
129
|
+
- test/rbbt/sources/test_pubmed.rb
|
104
130
|
- test/rbbt/sources/test_go.rb
|
131
|
+
- test/rbbt/sources/test_biomart.rb
|
105
132
|
- test/rbbt/sources/test_organism.rb
|
106
|
-
- test/rbbt/sources/test_pubmed.rb
|
107
|
-
- test/test_helper.rb
|
108
133
|
has_rdoc: true
|
109
134
|
homepage: http://github.com/mikisvaz/rbbt-sources
|
110
135
|
licenses: []
|
@@ -135,14 +160,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
160
|
requirements: []
|
136
161
|
|
137
162
|
rubyforge_project:
|
138
|
-
rubygems_version: 1.
|
163
|
+
rubygems_version: 1.3.7
|
139
164
|
signing_key:
|
140
165
|
specification_version: 3
|
141
166
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
142
167
|
test_files:
|
143
|
-
- test/
|
168
|
+
- test/test_helper.rb
|
144
169
|
- test/rbbt/sources/test_entrez.rb
|
170
|
+
- test/rbbt/sources/test_pubmed.rb
|
145
171
|
- test/rbbt/sources/test_go.rb
|
172
|
+
- test/rbbt/sources/test_biomart.rb
|
146
173
|
- test/rbbt/sources/test_organism.rb
|
147
|
-
- test/rbbt/sources/test_pubmed.rb
|
148
|
-
- test/test_helper.rb
|