rbbt-sources 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/etc/biomart/missing_in_archive +11 -0
- data/lib/rbbt/sources/COSMIC.rb +47 -4
- data/lib/rbbt/sources/HPRD.rb +23 -0
- data/lib/rbbt/sources/InterPro.rb +98 -8
- data/lib/rbbt/sources/NCI.rb +7 -5
- data/lib/rbbt/sources/PSI_MI.rb +41 -0
- data/lib/rbbt/sources/STITCH.rb +92 -0
- data/lib/rbbt/sources/barcode.rb +0 -3
- data/lib/rbbt/sources/biomart.rb +3 -3
- data/lib/rbbt/sources/dbSNP.rb +100 -0
- data/lib/rbbt/sources/ensembl_ftp.rb +79 -0
- data/lib/rbbt/sources/entrez.rb +2 -2
- data/lib/rbbt/sources/genomes1000.rb +45 -0
- data/lib/rbbt/sources/go.rb +16 -4
- data/lib/rbbt/sources/organism.rb +80 -12
- data/lib/rbbt/sources/pfam.rb +63 -3
- data/lib/rbbt/sources/pubmed.rb +10 -3
- data/lib/rbbt/sources/reactome.rb +82 -0
- data/lib/rbbt/sources/tfacts.rb +37 -36
- data/lib/rbbt/sources/uniprot.rb +25 -23
- data/share/Ensembl/release_dates +18 -0
- data/share/install/Genomes1000/Rakefile +15 -0
- data/share/install/JoChem/Rakefile +11 -3
- data/share/install/NCI/Rakefile +54 -16
- data/share/install/Organism/Hsa/Rakefile +3 -2
- data/share/install/Organism/Rno/Rakefile +1 -2
- data/share/install/Organism/Sce/Rakefile +43 -45
- data/share/install/Organism/organism_helpers.rb +360 -96
- data/share/install/STITCH/Rakefile +0 -0
- data/test/rbbt/sources/test_organism.rb +26 -7
- data/test/rbbt/sources/test_pubmed.rb +5 -0
- metadata +94 -97
- data/share/install/InterPro/Rakefile +0 -29
File without changes
|
@@ -1,17 +1,22 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
2
|
require 'rbbt/sources/organism'
|
3
3
|
require 'test/unit'
|
4
|
+
require 'rbbt/sources/ensembl_ftp'
|
5
|
+
|
6
|
+
class TestOrganism < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_known_ids
|
9
|
+
assert Organism.known_ids("Hsa").include?("Associated Gene Name")
|
10
|
+
end
|
4
11
|
|
5
|
-
class TestEntrez < Test::Unit::TestCase
|
6
12
|
def test_location
|
7
13
|
assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
|
8
14
|
end
|
9
15
|
|
10
|
-
|
11
16
|
def test_identifiers
|
12
17
|
assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
|
13
18
|
assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
14
|
-
assert Organism
|
19
|
+
assert Organism.identifiers("Sce").tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
15
20
|
end
|
16
21
|
|
17
22
|
def test_lexicon
|
@@ -21,8 +26,8 @@ class TestEntrez < Test::Unit::TestCase
|
|
21
26
|
def test_guess_id
|
22
27
|
ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
|
23
28
|
gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
|
24
|
-
assert_equal "Associated Gene Name", Organism
|
25
|
-
assert_equal "Ensembl Gene ID", Organism
|
29
|
+
assert_equal "Associated Gene Name", Organism.guess_id("Sce", gene_name).first
|
30
|
+
assert_equal "Ensembl Gene ID", Organism.guess_id("Sce", ensembl).first
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_organisms
|
@@ -36,12 +41,26 @@ class TestEntrez < Test::Unit::TestCase
|
|
36
41
|
tsv.fields = []
|
37
42
|
tsv.namespace = "Hsa"
|
38
43
|
|
39
|
-
Organism
|
40
|
-
Organism
|
44
|
+
Organism.attach_translations "Hsa", tsv, "Associated Gene Name"
|
45
|
+
Organism.attach_translations "Hsa", tsv, "Ensembl Gene ID"
|
41
46
|
|
42
47
|
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
43
48
|
end
|
44
49
|
|
50
|
+
def test_entrez_taxids
|
51
|
+
assert_equal "Hsa", Organism.entrez_taxid_organism('9606')
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_lift_over
|
55
|
+
mutation_19 = "19:21131664:T"
|
56
|
+
mutation_18 = "19:20923504:T"
|
57
|
+
source_build = "Hsa/jun2011"
|
58
|
+
target_build = "Hsa/may2009"
|
59
|
+
|
60
|
+
assert_equal mutation_18, Organism.liftOver([mutation_19], source_build, target_build).first
|
61
|
+
assert_equal mutation_19, Organism.liftOver([mutation_18], target_build, source_build).first
|
62
|
+
end
|
63
|
+
|
45
64
|
#def test_genes_at_chromosome
|
46
65
|
# pos = [12, 117799500]
|
47
66
|
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
@@ -30,6 +30,11 @@ class TestPubMed < Test::Unit::TestCase
|
|
30
30
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
31
31
|
end
|
32
32
|
|
33
|
+
def test_year
|
34
|
+
pmid = '16438716'
|
35
|
+
assert_equal "2006", PubMed.get_article(pmid).year
|
36
|
+
end
|
37
|
+
|
33
38
|
def test_bibentry
|
34
39
|
assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
|
35
40
|
assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
|
metadata
CHANGED
@@ -1,115 +1,119 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 2
|
9
|
-
- 0
|
10
|
-
version: 1.2.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Miguel Vazquez
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rbbt-util
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 63
|
30
|
-
segments:
|
31
|
-
- 4
|
32
|
-
- 0
|
33
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
34
21
|
version: 4.0.0
|
35
22
|
type: :runtime
|
36
|
-
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: rbbt-text
|
39
23
|
prerelease: false
|
40
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 4.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rbbt-text
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
41
33
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
segments:
|
47
|
-
- 0
|
48
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
49
38
|
type: :runtime
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: mechanize
|
53
39
|
prerelease: false
|
54
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
41
|
none: false
|
56
|
-
requirements:
|
57
|
-
- -
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
|
60
|
-
|
61
|
-
- 0
|
62
|
-
version: "0"
|
63
|
-
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
66
47
|
name: libxml-ruby
|
67
|
-
|
68
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
69
49
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
|
74
|
-
segments:
|
75
|
-
- 0
|
76
|
-
version: "0"
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
77
54
|
type: :runtime
|
78
|
-
|
79
|
-
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
80
63
|
name: bio
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
81
71
|
prerelease: false
|
82
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
73
|
none: false
|
84
|
-
requirements:
|
85
|
-
- -
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: mechanize
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
91
86
|
type: :runtime
|
92
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
93
94
|
description: Data sources like PubMed, Entrez Gene, or Gene Ontology
|
94
95
|
email: miguel.vazquez@fdi.ucm.es
|
95
96
|
executables: []
|
96
|
-
|
97
97
|
extensions: []
|
98
|
-
|
99
98
|
extra_rdoc_files: []
|
100
|
-
|
101
|
-
files:
|
99
|
+
files:
|
102
100
|
- etc/biomart/missing_in_archive
|
103
101
|
- lib/rbbt/sources/COSMIC.rb
|
104
102
|
- lib/rbbt/sources/COSTART.rb
|
105
103
|
- lib/rbbt/sources/CTCAE.rb
|
104
|
+
- lib/rbbt/sources/HPRD.rb
|
106
105
|
- lib/rbbt/sources/InterPro.rb
|
107
106
|
- lib/rbbt/sources/NCI.rb
|
107
|
+
- lib/rbbt/sources/PSI_MI.rb
|
108
|
+
- lib/rbbt/sources/STITCH.rb
|
108
109
|
- lib/rbbt/sources/barcode.rb
|
109
110
|
- lib/rbbt/sources/bibtex.rb
|
110
111
|
- lib/rbbt/sources/biomart.rb
|
111
112
|
- lib/rbbt/sources/cath.rb
|
113
|
+
- lib/rbbt/sources/dbSNP.rb
|
114
|
+
- lib/rbbt/sources/ensembl_ftp.rb
|
112
115
|
- lib/rbbt/sources/entrez.rb
|
116
|
+
- lib/rbbt/sources/genomes1000.rb
|
113
117
|
- lib/rbbt/sources/go.rb
|
114
118
|
- lib/rbbt/sources/gscholar.rb
|
115
119
|
- lib/rbbt/sources/jochem.rb
|
@@ -117,10 +121,12 @@ files:
|
|
117
121
|
- lib/rbbt/sources/pfam.rb
|
118
122
|
- lib/rbbt/sources/polysearch.rb
|
119
123
|
- lib/rbbt/sources/pubmed.rb
|
124
|
+
- lib/rbbt/sources/reactome.rb
|
120
125
|
- lib/rbbt/sources/tfacts.rb
|
121
126
|
- lib/rbbt/sources/uniprot.rb
|
122
127
|
- lib/rbbt/sources/wgEncodeBroadHmm.rb
|
123
|
-
- share/
|
128
|
+
- share/Ensembl/release_dates
|
129
|
+
- share/install/Genomes1000/Rakefile
|
124
130
|
- share/install/JoChem/Rakefile
|
125
131
|
- share/install/NCI/Rakefile
|
126
132
|
- share/install/Organism/Hsa/Rakefile
|
@@ -128,6 +134,7 @@ files:
|
|
128
134
|
- share/install/Organism/Rno/Rakefile
|
129
135
|
- share/install/Organism/Sce/Rakefile
|
130
136
|
- share/install/Organism/organism_helpers.rb
|
137
|
+
- share/install/STITCH/Rakefile
|
131
138
|
- share/install/lib/helpers.rb
|
132
139
|
- test/test_helper.rb
|
133
140
|
- test/rbbt/sources/test_entrez.rb
|
@@ -135,41 +142,31 @@ files:
|
|
135
142
|
- test/rbbt/sources/test_go.rb
|
136
143
|
- test/rbbt/sources/test_biomart.rb
|
137
144
|
- test/rbbt/sources/test_organism.rb
|
138
|
-
has_rdoc: true
|
139
145
|
homepage: http://github.com/mikisvaz/rbbt-sources
|
140
146
|
licenses: []
|
141
|
-
|
142
147
|
post_install_message:
|
143
148
|
rdoc_options: []
|
144
|
-
|
145
|
-
require_paths:
|
149
|
+
require_paths:
|
146
150
|
- lib
|
147
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
148
152
|
none: false
|
149
|
-
requirements:
|
150
|
-
- -
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
|
153
|
-
|
154
|
-
- 0
|
155
|
-
version: "0"
|
156
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
153
|
+
requirements:
|
154
|
+
- - ! '>='
|
155
|
+
- !ruby/object:Gem::Version
|
156
|
+
version: '0'
|
157
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
158
|
none: false
|
158
|
-
requirements:
|
159
|
-
- -
|
160
|
-
- !ruby/object:Gem::Version
|
161
|
-
|
162
|
-
segments:
|
163
|
-
- 0
|
164
|
-
version: "0"
|
159
|
+
requirements:
|
160
|
+
- - ! '>='
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
version: '0'
|
165
163
|
requirements: []
|
166
|
-
|
167
164
|
rubyforge_project:
|
168
|
-
rubygems_version: 1.
|
165
|
+
rubygems_version: 1.8.24
|
169
166
|
signing_key:
|
170
167
|
specification_version: 3
|
171
168
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
172
|
-
test_files:
|
169
|
+
test_files:
|
173
170
|
- test/test_helper.rb
|
174
171
|
- test/rbbt/sources/test_entrez.rb
|
175
172
|
- test/rbbt/sources/test_pubmed.rb
|
@@ -1,29 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
|
2
|
-
require 'rbbt/sources/biomart'
|
3
|
-
require 'rbbt/sources/entrez'
|
4
|
-
|
5
|
-
$interpro_db = 'entry'
|
6
|
-
|
7
|
-
$interpro_id = ['InterPro Entry Accession','entry_id']
|
8
|
-
|
9
|
-
$interpro_pos = [
|
10
|
-
["UniProt/SwissProt Accession", "protein_ac"],
|
11
|
-
["Match Start Position", "pos_from"],
|
12
|
-
["Match Stop Position ", "pos_to"]
|
13
|
-
]
|
14
|
-
|
15
|
-
file 'interpro_positions' do |t|
|
16
|
-
Open.write(t.name, InterPro.tsv($interpro_db, $interpro_id, $interpro_pos, [], nil, :type => :double, :nocache => true).to_s)
|
17
|
-
end
|
18
|
-
|
19
|
-
file 'interpro_names' do |t|
|
20
|
-
Open.write(t.name, "#: :type=:list\n#InterPro Entry Accession\tName\n" + Open.read("ftp://ftp.ebi.ac.uk/pub/databases/interpro/names.dat"))
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
file 'interpro_short_names' do |t|
|
25
|
-
Open.write(t.name, "#: :type=:list\n#InterPro Entry Accession\tShort Name\n" + Open.read("ftp://ftp.ebi.ac.uk/pub/databases/interpro/short_names.dat"))
|
26
|
-
end
|
27
|
-
|
28
|
-
|
29
|
-
|