rbbt-sources 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/etc/biomart/missing_in_archive +11 -0
- data/lib/rbbt/sources/COSMIC.rb +47 -4
- data/lib/rbbt/sources/HPRD.rb +23 -0
- data/lib/rbbt/sources/InterPro.rb +98 -8
- data/lib/rbbt/sources/NCI.rb +7 -5
- data/lib/rbbt/sources/PSI_MI.rb +41 -0
- data/lib/rbbt/sources/STITCH.rb +92 -0
- data/lib/rbbt/sources/barcode.rb +0 -3
- data/lib/rbbt/sources/biomart.rb +3 -3
- data/lib/rbbt/sources/dbSNP.rb +100 -0
- data/lib/rbbt/sources/ensembl_ftp.rb +79 -0
- data/lib/rbbt/sources/entrez.rb +2 -2
- data/lib/rbbt/sources/genomes1000.rb +45 -0
- data/lib/rbbt/sources/go.rb +16 -4
- data/lib/rbbt/sources/organism.rb +80 -12
- data/lib/rbbt/sources/pfam.rb +63 -3
- data/lib/rbbt/sources/pubmed.rb +10 -3
- data/lib/rbbt/sources/reactome.rb +82 -0
- data/lib/rbbt/sources/tfacts.rb +37 -36
- data/lib/rbbt/sources/uniprot.rb +25 -23
- data/share/Ensembl/release_dates +18 -0
- data/share/install/Genomes1000/Rakefile +15 -0
- data/share/install/JoChem/Rakefile +11 -3
- data/share/install/NCI/Rakefile +54 -16
- data/share/install/Organism/Hsa/Rakefile +3 -2
- data/share/install/Organism/Rno/Rakefile +1 -2
- data/share/install/Organism/Sce/Rakefile +43 -45
- data/share/install/Organism/organism_helpers.rb +360 -96
- data/share/install/STITCH/Rakefile +0 -0
- data/test/rbbt/sources/test_organism.rb +26 -7
- data/test/rbbt/sources/test_pubmed.rb +5 -0
- metadata +94 -97
- data/share/install/InterPro/Rakefile +0 -29
File without changes
|
@@ -1,17 +1,22 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
2
|
require 'rbbt/sources/organism'
|
3
3
|
require 'test/unit'
|
4
|
+
require 'rbbt/sources/ensembl_ftp'
|
5
|
+
|
6
|
+
class TestOrganism < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_known_ids
|
9
|
+
assert Organism.known_ids("Hsa").include?("Associated Gene Name")
|
10
|
+
end
|
4
11
|
|
5
|
-
class TestEntrez < Test::Unit::TestCase
|
6
12
|
def test_location
|
7
13
|
assert_equal "share/organisms/Sce/identifiers", Organism.identifiers('Sce')
|
8
14
|
end
|
9
15
|
|
10
|
-
|
11
16
|
def test_identifiers
|
12
17
|
assert Organism.identifiers('Hsa').tsv(:key_field => "Entrez Gene ID", :persist => true)['1020']["Associated Gene Name"].include?('CDK5')
|
13
18
|
assert Organism.identifiers('Sce').tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
14
|
-
assert Organism
|
19
|
+
assert Organism.identifiers("Sce").tsv(:persist => true)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
|
15
20
|
end
|
16
21
|
|
17
22
|
def test_lexicon
|
@@ -21,8 +26,8 @@ class TestEntrez < Test::Unit::TestCase
|
|
21
26
|
def test_guess_id
|
22
27
|
ensembl = %w(YOL044W YDR289C YAL034C YGR246C ARS519 tH(GUG)E2 YDR218C YLR002C YGL224C)
|
23
28
|
gene_name = %w(SNR64 MIP1 MRPS18 TFB2 JEN1 IVY1 TRS33 GAS3)
|
24
|
-
assert_equal "Associated Gene Name", Organism
|
25
|
-
assert_equal "Ensembl Gene ID", Organism
|
29
|
+
assert_equal "Associated Gene Name", Organism.guess_id("Sce", gene_name).first
|
30
|
+
assert_equal "Ensembl Gene ID", Organism.guess_id("Sce", ensembl).first
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_organisms
|
@@ -36,12 +41,26 @@ class TestEntrez < Test::Unit::TestCase
|
|
36
41
|
tsv.fields = []
|
37
42
|
tsv.namespace = "Hsa"
|
38
43
|
|
39
|
-
Organism
|
40
|
-
Organism
|
44
|
+
Organism.attach_translations "Hsa", tsv, "Associated Gene Name"
|
45
|
+
Organism.attach_translations "Hsa", tsv, "Ensembl Gene ID"
|
41
46
|
|
42
47
|
assert_equal "CDK5", tsv["1020"]["Associated Gene Name"]
|
43
48
|
end
|
44
49
|
|
50
|
+
def test_entrez_taxids
|
51
|
+
assert_equal "Hsa", Organism.entrez_taxid_organism('9606')
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_lift_over
|
55
|
+
mutation_19 = "19:21131664:T"
|
56
|
+
mutation_18 = "19:20923504:T"
|
57
|
+
source_build = "Hsa/jun2011"
|
58
|
+
target_build = "Hsa/may2009"
|
59
|
+
|
60
|
+
assert_equal mutation_18, Organism.liftOver([mutation_19], source_build, target_build).first
|
61
|
+
assert_equal mutation_19, Organism.liftOver([mutation_18], target_build, source_build).first
|
62
|
+
end
|
63
|
+
|
45
64
|
#def test_genes_at_chromosome
|
46
65
|
# pos = [12, 117799500]
|
47
66
|
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
@@ -30,6 +30,11 @@ class TestPubMed < Test::Unit::TestCase
|
|
30
30
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
31
31
|
end
|
32
32
|
|
33
|
+
def test_year
|
34
|
+
pmid = '16438716'
|
35
|
+
assert_equal "2006", PubMed.get_article(pmid).year
|
36
|
+
end
|
37
|
+
|
33
38
|
def test_bibentry
|
34
39
|
assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
|
35
40
|
assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
|
metadata
CHANGED
@@ -1,115 +1,119 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 2
|
9
|
-
- 0
|
10
|
-
version: 1.2.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Miguel Vazquez
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rbbt-util
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 63
|
30
|
-
segments:
|
31
|
-
- 4
|
32
|
-
- 0
|
33
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
34
21
|
version: 4.0.0
|
35
22
|
type: :runtime
|
36
|
-
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: rbbt-text
|
39
23
|
prerelease: false
|
40
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 4.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rbbt-text
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
41
33
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
segments:
|
47
|
-
- 0
|
48
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
49
38
|
type: :runtime
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: mechanize
|
53
39
|
prerelease: false
|
54
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
41
|
none: false
|
56
|
-
requirements:
|
57
|
-
- -
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
|
60
|
-
|
61
|
-
- 0
|
62
|
-
version: "0"
|
63
|
-
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
66
47
|
name: libxml-ruby
|
67
|
-
|
68
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
69
49
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
|
74
|
-
segments:
|
75
|
-
- 0
|
76
|
-
version: "0"
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
77
54
|
type: :runtime
|
78
|
-
|
79
|
-
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
80
63
|
name: bio
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
81
71
|
prerelease: false
|
82
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
73
|
none: false
|
84
|
-
requirements:
|
85
|
-
- -
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: mechanize
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
91
86
|
type: :runtime
|
92
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
93
94
|
description: Data sources like PubMed, Entrez Gene, or Gene Ontology
|
94
95
|
email: miguel.vazquez@fdi.ucm.es
|
95
96
|
executables: []
|
96
|
-
|
97
97
|
extensions: []
|
98
|
-
|
99
98
|
extra_rdoc_files: []
|
100
|
-
|
101
|
-
files:
|
99
|
+
files:
|
102
100
|
- etc/biomart/missing_in_archive
|
103
101
|
- lib/rbbt/sources/COSMIC.rb
|
104
102
|
- lib/rbbt/sources/COSTART.rb
|
105
103
|
- lib/rbbt/sources/CTCAE.rb
|
104
|
+
- lib/rbbt/sources/HPRD.rb
|
106
105
|
- lib/rbbt/sources/InterPro.rb
|
107
106
|
- lib/rbbt/sources/NCI.rb
|
107
|
+
- lib/rbbt/sources/PSI_MI.rb
|
108
|
+
- lib/rbbt/sources/STITCH.rb
|
108
109
|
- lib/rbbt/sources/barcode.rb
|
109
110
|
- lib/rbbt/sources/bibtex.rb
|
110
111
|
- lib/rbbt/sources/biomart.rb
|
111
112
|
- lib/rbbt/sources/cath.rb
|
113
|
+
- lib/rbbt/sources/dbSNP.rb
|
114
|
+
- lib/rbbt/sources/ensembl_ftp.rb
|
112
115
|
- lib/rbbt/sources/entrez.rb
|
116
|
+
- lib/rbbt/sources/genomes1000.rb
|
113
117
|
- lib/rbbt/sources/go.rb
|
114
118
|
- lib/rbbt/sources/gscholar.rb
|
115
119
|
- lib/rbbt/sources/jochem.rb
|
@@ -117,10 +121,12 @@ files:
|
|
117
121
|
- lib/rbbt/sources/pfam.rb
|
118
122
|
- lib/rbbt/sources/polysearch.rb
|
119
123
|
- lib/rbbt/sources/pubmed.rb
|
124
|
+
- lib/rbbt/sources/reactome.rb
|
120
125
|
- lib/rbbt/sources/tfacts.rb
|
121
126
|
- lib/rbbt/sources/uniprot.rb
|
122
127
|
- lib/rbbt/sources/wgEncodeBroadHmm.rb
|
123
|
-
- share/
|
128
|
+
- share/Ensembl/release_dates
|
129
|
+
- share/install/Genomes1000/Rakefile
|
124
130
|
- share/install/JoChem/Rakefile
|
125
131
|
- share/install/NCI/Rakefile
|
126
132
|
- share/install/Organism/Hsa/Rakefile
|
@@ -128,6 +134,7 @@ files:
|
|
128
134
|
- share/install/Organism/Rno/Rakefile
|
129
135
|
- share/install/Organism/Sce/Rakefile
|
130
136
|
- share/install/Organism/organism_helpers.rb
|
137
|
+
- share/install/STITCH/Rakefile
|
131
138
|
- share/install/lib/helpers.rb
|
132
139
|
- test/test_helper.rb
|
133
140
|
- test/rbbt/sources/test_entrez.rb
|
@@ -135,41 +142,31 @@ files:
|
|
135
142
|
- test/rbbt/sources/test_go.rb
|
136
143
|
- test/rbbt/sources/test_biomart.rb
|
137
144
|
- test/rbbt/sources/test_organism.rb
|
138
|
-
has_rdoc: true
|
139
145
|
homepage: http://github.com/mikisvaz/rbbt-sources
|
140
146
|
licenses: []
|
141
|
-
|
142
147
|
post_install_message:
|
143
148
|
rdoc_options: []
|
144
|
-
|
145
|
-
require_paths:
|
149
|
+
require_paths:
|
146
150
|
- lib
|
147
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
148
152
|
none: false
|
149
|
-
requirements:
|
150
|
-
- -
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
|
153
|
-
|
154
|
-
- 0
|
155
|
-
version: "0"
|
156
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
153
|
+
requirements:
|
154
|
+
- - ! '>='
|
155
|
+
- !ruby/object:Gem::Version
|
156
|
+
version: '0'
|
157
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
158
|
none: false
|
158
|
-
requirements:
|
159
|
-
- -
|
160
|
-
- !ruby/object:Gem::Version
|
161
|
-
|
162
|
-
segments:
|
163
|
-
- 0
|
164
|
-
version: "0"
|
159
|
+
requirements:
|
160
|
+
- - ! '>='
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
version: '0'
|
165
163
|
requirements: []
|
166
|
-
|
167
164
|
rubyforge_project:
|
168
|
-
rubygems_version: 1.
|
165
|
+
rubygems_version: 1.8.24
|
169
166
|
signing_key:
|
170
167
|
specification_version: 3
|
171
168
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
172
|
-
test_files:
|
169
|
+
test_files:
|
173
170
|
- test/test_helper.rb
|
174
171
|
- test/rbbt/sources/test_entrez.rb
|
175
172
|
- test/rbbt/sources/test_pubmed.rb
|
@@ -1,29 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
|
2
|
-
require 'rbbt/sources/biomart'
|
3
|
-
require 'rbbt/sources/entrez'
|
4
|
-
|
5
|
-
$interpro_db = 'entry'
|
6
|
-
|
7
|
-
$interpro_id = ['InterPro Entry Accession','entry_id']
|
8
|
-
|
9
|
-
$interpro_pos = [
|
10
|
-
["UniProt/SwissProt Accession", "protein_ac"],
|
11
|
-
["Match Start Position", "pos_from"],
|
12
|
-
["Match Stop Position ", "pos_to"]
|
13
|
-
]
|
14
|
-
|
15
|
-
file 'interpro_positions' do |t|
|
16
|
-
Open.write(t.name, InterPro.tsv($interpro_db, $interpro_id, $interpro_pos, [], nil, :type => :double, :nocache => true).to_s)
|
17
|
-
end
|
18
|
-
|
19
|
-
file 'interpro_names' do |t|
|
20
|
-
Open.write(t.name, "#: :type=:list\n#InterPro Entry Accession\tName\n" + Open.read("ftp://ftp.ebi.ac.uk/pub/databases/interpro/names.dat"))
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
file 'interpro_short_names' do |t|
|
25
|
-
Open.write(t.name, "#: :type=:list\n#InterPro Entry Accession\tShort Name\n" + Open.read("ftp://ftp.ebi.ac.uk/pub/databases/interpro/short_names.dat"))
|
26
|
-
end
|
27
|
-
|
28
|
-
|
29
|
-
|