rbbt-sources 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/etc/biomart/missing_in_archive +15 -0
- data/lib/rbbt/sources/COSMIC.rb +14 -0
- data/lib/rbbt/sources/COSTART.rb +1 -1
- data/lib/rbbt/sources/CTCAE.rb +1 -1
- data/lib/rbbt/sources/InterPro.rb +17 -0
- data/lib/rbbt/sources/NCI.rb +7 -0
- data/lib/rbbt/sources/biomart.rb +9 -9
- data/lib/rbbt/sources/entrez.rb +44 -17
- data/lib/rbbt/sources/go.rb +10 -7
- data/lib/rbbt/sources/jochem.rb +4 -0
- data/lib/rbbt/sources/organism.rb +24 -25
- data/lib/rbbt/sources/organism/sequence.rb +253 -19
- data/lib/rbbt/sources/polysearch.rb +5 -5
- data/lib/rbbt/sources/pubmed.rb +10 -5
- data/lib/rbbt/sources/wgEncodeBroadHmm.rb +37 -0
- data/share/install/InterPro/Rakefile +29 -0
- data/share/install/JoChem/Rakefile +67 -0
- data/share/install/NCI/Rakefile +79 -0
- data/share/install/Organism/Hsa/Rakefile +20 -1
- data/share/install/Organism/Rno/Rakefile +2 -0
- data/share/install/Organism/organism_helpers.rb +134 -77
- data/share/install/lib/helpers.rb +6 -5
- data/test/rbbt/sources/test_biomart.rb +8 -5
- data/test/rbbt/sources/test_organism.rb +23 -19
- metadata +39 -14
@@ -1,9 +1,9 @@
|
|
1
|
-
require 'rbbt
|
1
|
+
require 'rbbt'
|
2
2
|
require 'rbbt/sources/biomart'
|
3
3
|
require 'rbbt/sources/entrez'
|
4
4
|
|
5
5
|
def tsv_file(url, native, extra, options = {})
|
6
|
-
options = Misc.add_defaults options, :
|
6
|
+
options = Misc.add_defaults options, :persist => false, :keep_empty => true
|
7
7
|
|
8
8
|
case
|
9
9
|
when Array === native
|
@@ -27,7 +27,7 @@ def tsv_file(url, native, extra, options = {})
|
|
27
27
|
fields = nil
|
28
28
|
end
|
29
29
|
|
30
|
-
tsv = TSV.
|
30
|
+
tsv = TSV.open(Open.open(url), options)
|
31
31
|
tsv.key_field ||= key_field
|
32
32
|
tsv.fields ||= fields
|
33
33
|
tsv
|
@@ -35,13 +35,14 @@ end
|
|
35
35
|
|
36
36
|
def merge_entrez(data, taxs, native, fix = nil, select = nil)
|
37
37
|
entrez = Entrez.entrez2native(taxs, :fix => fix, :select => select)
|
38
|
+
entrez.key_field = "Entrez Gene ID"
|
38
39
|
entrez.fields = [native]
|
39
40
|
entrez
|
40
41
|
|
41
|
-
data.
|
42
|
+
data.attach entrez, "Entrez Gene ID"
|
42
43
|
end
|
43
44
|
|
44
45
|
def merge_biomart(lexicon, db, native, other, match = nil)
|
45
46
|
match ||= native.first
|
46
|
-
lexicon.
|
47
|
+
lexicon.attach BioMart.tsv(db, native, other)
|
47
48
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'rbbt/sources/pubmed'
|
4
|
+
require 'test/unit'
|
2
5
|
require 'rbbt/sources/biomart'
|
3
6
|
require 'rbbt/util/tmpfile'
|
4
7
|
require 'test/unit'
|
@@ -11,11 +14,11 @@ class TestBioMart < Test::Unit::TestCase
|
|
11
14
|
end
|
12
15
|
|
13
16
|
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :merge => true, :wget_options => {:quiet => false})
|
14
|
-
tsv = TSV.
|
17
|
+
tsv = TSV.open data, :double, :merge => true
|
15
18
|
assert(tsv['852236'][0].include? 'CAA84864')
|
16
19
|
|
17
20
|
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => false, :wget_options => { :quiet => false} )
|
18
|
-
tsv = TSV.
|
21
|
+
tsv = TSV.open data, :double, :merge => true
|
19
22
|
assert(tsv['852236'][1].include? 'YBL044W')
|
20
23
|
end
|
21
24
|
|
@@ -25,7 +28,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
25
28
|
|
26
29
|
TmpFile.with_file do |f|
|
27
30
|
filename = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
28
|
-
data = TSV.
|
31
|
+
data = TSV.open Open.open(filename)
|
29
32
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
30
33
|
end
|
31
34
|
end
|
@@ -38,7 +41,7 @@ class TestBioMart < Test::Unit::TestCase
|
|
38
41
|
|
39
42
|
TmpFile.with_file do |f|
|
40
43
|
filename = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
41
|
-
data = TSV.
|
44
|
+
data = TSV.open Open.open(filename, :merge => true)
|
42
45
|
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
43
46
|
assert_equal 'Entrez Gene', data.key_field
|
44
47
|
assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
|
@@ -1,24 +1,28 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
2
|
require 'rbbt/sources/organism'
|
3
|
-
require 'rbbt/sources/organism/sequence'
|
4
3
|
require 'test/unit'
|
5
4
|
|
6
5
|
class TestEntrez < Test::Unit::TestCase
|
6
|
+
def test_location
|
7
|
+
assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
|
8
|
+
end
|
9
|
+
|
10
|
+
|
7
11
|
def test_identifiers
|
8
|
-
assert Organism.identifiers('
|
9
|
-
assert Organism
|
10
|
-
assert Organism.identifiers
|
12
|
+
assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
|
13
|
+
assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
14
|
+
assert Organism::Sce.identifiers.tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
11
15
|
end
|
12
16
|
|
13
17
|
def test_lexicon
|
14
|
-
assert TSV.
|
18
|
+
assert TSV.open(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
|
15
19
|
end
|
16
20
|
|
17
21
|
def test_guess_id
|
18
22
|
ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
|
19
23
|
gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
|
20
|
-
assert_equal "Ensembl Gene ID", Organism::Sce.guess_id(ensembl).first
|
21
24
|
assert_equal "Associated Gene Name", Organism::Sce.guess_id(gene_name).first
|
25
|
+
assert_equal "Ensembl Gene ID", Organism::Sce.guess_id(ensembl).first
|
22
26
|
end
|
23
27
|
|
24
28
|
def test_organisms
|
@@ -27,7 +31,7 @@ class TestEntrez < Test::Unit::TestCase
|
|
27
31
|
end
|
28
32
|
|
29
33
|
def test_attach_translations
|
30
|
-
tsv = TSV.
|
34
|
+
tsv = TSV.setup({"1020" => []}, :type => :list)
|
31
35
|
tsv.key_field = "Entrez Gene ID"
|
32
36
|
tsv.fields = []
|
33
37
|
tsv.namespace = "Hsa"
|
@@ -38,20 +42,20 @@ class TestEntrez < Test::Unit::TestCase
|
|
38
42
|
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
39
43
|
end
|
40
44
|
|
41
|
-
def test_genes_at_chromosome
|
42
|
-
|
43
|
-
|
44
|
-
end
|
45
|
+
#def test_genes_at_chromosome
|
46
|
+
# pos = [12, 117799500]
|
47
|
+
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
48
|
+
#end
|
45
49
|
|
46
|
-
def test_genes_at_chromosome_array
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
+
#def test_genes_at_chromosome_array
|
51
|
+
# pos = [12, [117799500, 106903900]]
|
52
|
+
# assert_equal ["ENSG00000089250", "ENSG00000013503"], Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
53
|
+
#end
|
50
54
|
|
51
|
-
def test_genes_at_genomic_positions
|
52
|
-
|
53
|
-
|
54
|
-
end
|
55
|
+
#def test_genes_at_genomic_positions
|
56
|
+
# pos = [[12, 117799500], [12, 106903900], [1, 115259500]]
|
57
|
+
# assert_equal ["ENSG00000089250", "ENSG00000013503", "ENSG00000213281"], Organism::Hsa.genes_at_genomic_positions(pos)
|
58
|
+
#end
|
55
59
|
|
56
60
|
end
|
57
61
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
6
|
segments:
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
- 4
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-09-07 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -26,10 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 63
|
30
30
|
segments:
|
31
|
+
- 4
|
31
32
|
- 0
|
32
|
-
|
33
|
+
- 0
|
34
|
+
version: 4.0.0
|
33
35
|
type: :runtime
|
34
36
|
version_requirements: *id001
|
35
37
|
- !ruby/object:Gem::Dependency
|
@@ -74,6 +76,20 @@ dependencies:
|
|
74
76
|
version: "0"
|
75
77
|
type: :runtime
|
76
78
|
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: bio
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :runtime
|
92
|
+
version_requirements: *id005
|
77
93
|
description: Data sources like PubMed, Entrez Gene, or Gene Ontology
|
78
94
|
email: miguel.vazquez@fdi.ucm.es
|
79
95
|
executables: []
|
@@ -83,28 +99,37 @@ extensions: []
|
|
83
99
|
extra_rdoc_files: []
|
84
100
|
|
85
101
|
files:
|
102
|
+
- etc/biomart/missing_in_archive
|
103
|
+
- lib/rbbt/sources/COSMIC.rb
|
86
104
|
- lib/rbbt/sources/COSTART.rb
|
87
105
|
- lib/rbbt/sources/CTCAE.rb
|
106
|
+
- lib/rbbt/sources/InterPro.rb
|
107
|
+
- lib/rbbt/sources/NCI.rb
|
88
108
|
- lib/rbbt/sources/bibtex.rb
|
89
109
|
- lib/rbbt/sources/biomart.rb
|
90
110
|
- lib/rbbt/sources/entrez.rb
|
91
111
|
- lib/rbbt/sources/go.rb
|
92
112
|
- lib/rbbt/sources/gscholar.rb
|
113
|
+
- lib/rbbt/sources/jochem.rb
|
93
114
|
- lib/rbbt/sources/organism.rb
|
94
115
|
- lib/rbbt/sources/organism/sequence.rb
|
95
116
|
- lib/rbbt/sources/polysearch.rb
|
96
117
|
- lib/rbbt/sources/pubmed.rb
|
118
|
+
- lib/rbbt/sources/wgEncodeBroadHmm.rb
|
119
|
+
- share/install/InterPro/Rakefile
|
120
|
+
- share/install/JoChem/Rakefile
|
121
|
+
- share/install/NCI/Rakefile
|
97
122
|
- share/install/Organism/Hsa/Rakefile
|
98
123
|
- share/install/Organism/Rno/Rakefile
|
99
124
|
- share/install/Organism/Sce/Rakefile
|
100
125
|
- share/install/Organism/organism_helpers.rb
|
101
126
|
- share/install/lib/helpers.rb
|
102
|
-
- test/
|
127
|
+
- test/test_helper.rb
|
103
128
|
- test/rbbt/sources/test_entrez.rb
|
129
|
+
- test/rbbt/sources/test_pubmed.rb
|
104
130
|
- test/rbbt/sources/test_go.rb
|
131
|
+
- test/rbbt/sources/test_biomart.rb
|
105
132
|
- test/rbbt/sources/test_organism.rb
|
106
|
-
- test/rbbt/sources/test_pubmed.rb
|
107
|
-
- test/test_helper.rb
|
108
133
|
has_rdoc: true
|
109
134
|
homepage: http://github.com/mikisvaz/rbbt-sources
|
110
135
|
licenses: []
|
@@ -135,14 +160,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
160
|
requirements: []
|
136
161
|
|
137
162
|
rubyforge_project:
|
138
|
-
rubygems_version: 1.
|
163
|
+
rubygems_version: 1.3.7
|
139
164
|
signing_key:
|
140
165
|
specification_version: 3
|
141
166
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
142
167
|
test_files:
|
143
|
-
- test/
|
168
|
+
- test/test_helper.rb
|
144
169
|
- test/rbbt/sources/test_entrez.rb
|
170
|
+
- test/rbbt/sources/test_pubmed.rb
|
145
171
|
- test/rbbt/sources/test_go.rb
|
172
|
+
- test/rbbt/sources/test_biomart.rb
|
146
173
|
- test/rbbt/sources/test_organism.rb
|
147
|
-
- test/rbbt/sources/test_pubmed.rb
|
148
|
-
- test/test_helper.rb
|