goruby 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/go.rb +61 -16
- data/test/test_goruby.rb +23 -14
- metadata +49 -21
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/go.rb
CHANGED
@@ -15,11 +15,11 @@ module Bio
|
|
15
15
|
def go_offspring(go_id)
|
16
16
|
o = ontology_abbreviation(go_id)
|
17
17
|
case o
|
18
|
-
|
18
|
+
when 'MF'
|
19
19
|
return molecular_function_offspring(go_id)
|
20
|
-
|
20
|
+
when 'CC'
|
21
21
|
return cellular_component_offspring(go_id)
|
22
|
-
|
22
|
+
when 'BP'
|
23
23
|
return biological_process_offspring(go_id)
|
24
24
|
else
|
25
25
|
raise Exception, "Unknown ontology abbreviation found: #{o} for go id: #{go_id}"
|
@@ -32,7 +32,7 @@ module Bio
|
|
32
32
|
def cellular_component_offspring(go_term)
|
33
33
|
go_get(go_term, 'GOCCOFFSPRING')
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
# Return an array of GO identifiers that are the offspring (all the descendents)
|
37
37
|
# of the given GO term given that it is a molecular function
|
38
38
|
# GO term.
|
@@ -73,13 +73,17 @@ module Bio
|
|
73
73
|
# A performance note:
|
74
74
|
# According to some tests that I ran, finding GOID by searching GOTERM
|
75
75
|
# is much faster than by GOSYNONYM. A
|
76
|
-
|
76
|
+
|
77
77
|
begin
|
78
78
|
# Assume it is a primary ID, as it likely will be most of the time.
|
79
79
|
return @r.eval_R("GOID(get('#{go_id_or_synonym_id}', GOTERM))")
|
80
80
|
rescue RException
|
81
81
|
# if no primary is found, try to finding it by synonym. raise RException if none is found
|
82
|
-
|
82
|
+
begin
|
83
|
+
return @r.eval_R("GOID(get('#{go_id_or_synonym_id}', GOSYNONYM))")
|
84
|
+
rescue RException => e
|
85
|
+
raise RException, "#{e.message}: GO Identifier '#{go_id_or_synonym_id}' does not appear to be a primary ID nor synonym. Is the GO.db database up to date?"
|
86
|
+
end
|
83
87
|
end
|
84
88
|
end
|
85
89
|
|
@@ -112,7 +116,7 @@ module Bio
|
|
112
116
|
|
113
117
|
return gos.flatten.uniq
|
114
118
|
end
|
115
|
-
|
119
|
+
|
116
120
|
# Does the subsumer subsume the subsumee? i.e. Does it include
|
117
121
|
# the subsumee as one of its children in the GO tree?
|
118
122
|
#
|
@@ -122,10 +126,10 @@ module Bio
|
|
122
126
|
# map the subsumee to non-synonomic id
|
123
127
|
primaree = self.primary_go_id(subsumee_go_id)
|
124
128
|
primarer = self.primary_go_id(subsumer_go_id)
|
125
|
-
|
129
|
+
|
126
130
|
# return if they are the same - the obvious case
|
127
131
|
return true if primaree == primarer
|
128
|
-
|
132
|
+
|
129
133
|
# return if subsumee is a descendent of sumsumer
|
130
134
|
return go_offspring(primarer).include?(primaree)
|
131
135
|
end
|
@@ -135,18 +139,59 @@ module Bio
|
|
135
139
|
def subsume_tester(subsumer_go_id, check_for_synonym=true)
|
136
140
|
Go::SubsumeTester.new(self, subsumer_go_id, check_for_synonym)
|
137
141
|
end
|
138
|
-
|
142
|
+
|
139
143
|
# Return 'MF', 'CC' or 'BP' corresponding to the
|
140
144
|
def ontology_abbreviation(go_id)
|
141
145
|
@r.eval_R("Ontology(get('#{go_id}', GOTERM))")
|
142
146
|
end
|
143
|
-
|
147
|
+
|
148
|
+
# Return an array of GO ids that correspond to the parent GO terms
|
149
|
+
# in the ontology. This isn't the most efficient this could be, because it
|
150
|
+
# probably gets the parents for a single id multiple times.
|
151
|
+
def ancestors_cc(primary_go_id)
|
152
|
+
go_get(primary_go_id, 'GOCCANCESTOR')
|
153
|
+
end
|
154
|
+
|
155
|
+
# Return an array of ancestors of the GO term or any
|
156
|
+
# of the GO terms' children, in no particular order. This is useful
|
157
|
+
# when wanting to know if a term has an annotation that is
|
158
|
+
# non-overlapping with a particular go term. For instance, 'membrane'
|
159
|
+
# is cordial with 'nucleus', they are boths is an ancestors of
|
160
|
+
# 'nuclear membrane'. However, 'mitochondrion' and 'nucleus' are
|
161
|
+
# not cordial, since they share no common offspring.
|
162
|
+
def cordial_cc(primary_go_id)
|
163
|
+
# cordial can be direct ancestors of a term - then the common term
|
164
|
+
# is this term itself
|
165
|
+
cordial_ids = ancestors_cc(primary_go_id)
|
166
|
+
|
167
|
+
# collect all ancestors of all offspring
|
168
|
+
offspring = cellular_component_offspring(primary_go_id)
|
169
|
+
offspring.each do |o|
|
170
|
+
cordial_ids.push ancestors_cc(o)
|
171
|
+
cordial_ids.push o
|
172
|
+
end
|
173
|
+
|
174
|
+
# remove the term itself and any children - they are not
|
175
|
+
# merely cordial
|
176
|
+
cordial_ids = cordial_ids.flatten.uniq.reject do |i|
|
177
|
+
offspring.include?(i) or primary_go_id==i
|
178
|
+
end
|
179
|
+
|
180
|
+
# return a uniq array of cordial terms
|
181
|
+
cordial_ids
|
182
|
+
end
|
183
|
+
|
184
|
+
# When repeatedly testing subsumtion by a certain GO term,
|
185
|
+
# it is faster to instantiate a SubsumeTester and use
|
186
|
+
# Bio::GO::SubsumeTester#subsume?, rather than
|
187
|
+
# repeatedly calling Bio::GO#subsume? because SubsumeTester
|
188
|
+
# does caching.
|
144
189
|
class SubsumeTester
|
145
190
|
attr_reader :subsumer_offspring, :master_go_id
|
146
|
-
|
191
|
+
|
147
192
|
def initialize(go_object, subsumer_go_id, check_for_synonym=true)
|
148
193
|
@go = go_object
|
149
|
-
|
194
|
+
|
150
195
|
if check_for_synonym
|
151
196
|
@master_go_id = @go.primary_go_id(subsumer_go_id)
|
152
197
|
else
|
@@ -155,11 +200,11 @@ module Bio
|
|
155
200
|
@subsumer_offspring = @go.go_offspring(@master_go_id)
|
156
201
|
@subsumer_offspring_hash = [@subsumer_offspring].flatten.to_hash
|
157
202
|
end
|
158
|
-
|
203
|
+
|
159
204
|
def subsume?(subsumer_go_id, check_for_synonym=true)
|
160
205
|
primaree = check_for_synonym ?
|
161
|
-
|
162
|
-
|
206
|
+
@go.primary_go_id(subsumer_go_id) :
|
207
|
+
subsumer_go_id
|
163
208
|
return true if @master_go_id == primaree
|
164
209
|
@subsumer_offspring_hash.has_key?(primaree)
|
165
210
|
end
|
data/test/test_goruby.rb
CHANGED
@@ -15,12 +15,12 @@ class GoTest < Test::Unit::TestCase
|
|
15
15
|
assert_equal [], @go.cellular_component_offspring('GO:0031676')
|
16
16
|
|
17
17
|
# test multiple offspring
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
]
|
23
|
-
|
18
|
+
old_offspring = ["GO:0030077", "GO:0030078", "GO:0030079", "GO:0030080", "GO:0030081", "GO:0030082", "GO:0031633", "GO:0031676", "GO:0042717", "GO:0048493", "GO:0048494"]
|
19
|
+
offspring = @go.cellular_component_offspring('GO:0042716')
|
20
|
+
assert offspring.length >= old_offspring.length
|
21
|
+
assert offspring.include?(old_offspring[0])
|
22
|
+
assert offspring.include?(old_offspring[1])
|
23
|
+
assert offspring.include?(old_offspring[old_offspring.length-2])
|
24
24
|
|
25
25
|
# test not in CC
|
26
26
|
assert_raise RException do
|
@@ -39,7 +39,7 @@ class GoTest < Test::Unit::TestCase
|
|
39
39
|
def test_go_term
|
40
40
|
# test MF
|
41
41
|
assert_equal "G-protein coupled receptor activity", @go.term('GO:0004930')
|
42
|
-
|
42
|
+
|
43
43
|
# test CC
|
44
44
|
assert_equal 'endoplasmic reticulum', @go.term('GO:0005783')
|
45
45
|
end
|
@@ -60,13 +60,13 @@ class GoTest < Test::Unit::TestCase
|
|
60
60
|
def test_subsume
|
61
61
|
# test normal truth
|
62
62
|
assert @go.subsume?('GO:0003824', 'GO:0050333')
|
63
|
-
|
63
|
+
|
64
64
|
# test subsumee is synonym
|
65
65
|
assert @go.subsume?('GO:0003824', 'GO:0048253')
|
66
|
-
|
66
|
+
|
67
67
|
# test equal terms
|
68
68
|
assert @go.subsume?('GO:0009536','GO:0009536')
|
69
|
-
|
69
|
+
|
70
70
|
# test falsity - plastid part does not subsume plastid
|
71
71
|
assert_equal false, @go.subsume?('GO:0044435','GO:0009536')
|
72
72
|
end
|
@@ -83,26 +83,35 @@ class GoTest < Test::Unit::TestCase
|
|
83
83
|
tester = @go.subsume_tester('GO:0044435')
|
84
84
|
assert tester.subsume?('GO:0044435')
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
def test_subsume_tester_no_check_synonym
|
88
88
|
# test normal truth
|
89
89
|
tester = @go.subsume_tester('GO:0003824')
|
90
90
|
assert_equal true, tester.subsume?('GO:0050333', false)
|
91
91
|
assert_equal true, tester.subsume?('GO:0050333', true)
|
92
|
-
|
92
|
+
|
93
93
|
# test subsumee is synonym
|
94
94
|
tester = @go.subsume_tester('GO:0003824')
|
95
95
|
assert_equal false, tester.subsume?('GO:0048253', false)
|
96
96
|
assert_equal true, tester.subsume?('GO:0048253', true)
|
97
|
-
|
97
|
+
|
98
98
|
# test equal terms
|
99
99
|
tester = @go.subsume_tester('GO:0050333')
|
100
100
|
assert_equal true, tester.subsume?('GO:0050333', false)
|
101
101
|
assert_equal true, tester.subsume?('GO:0050333', true)
|
102
|
-
|
102
|
+
|
103
103
|
# test equal that is synonym
|
104
104
|
tester = @go.subsume_tester('GO:0050333')
|
105
105
|
assert_equal false, tester.subsume?('GO:0048253', false)
|
106
106
|
assert_equal true, tester.subsume?('GO:0048253', true)
|
107
107
|
end
|
108
|
+
|
109
|
+
def test_cordial_cc
|
110
|
+
nucleus_cordial = @go.cordial_cc('GO:0005634')
|
111
|
+
|
112
|
+
assert nucleus_cordial.include?('GO:0005575'), "include cellular component itself"
|
113
|
+
assert_equal false, nucleus_cordial.include?('GO:0005634'), 'don\'t include nucleus itself'
|
114
|
+
assert nucleus_cordial.include?('GO:0005623'), 'include a direct ancestor of nucleus'
|
115
|
+
assert nucleus_cordial.include?('GO:0005623'), 'include membrane, a \'indirect\' direct ancestor of nucleus (it is an ancestor of nuclear membrane)'
|
116
|
+
end
|
108
117
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Ben J Woodcroft
|
@@ -9,49 +15,65 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-11-03 00:00:00 +11:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: thoughtbot-shoulda
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
23
32
|
version: "0"
|
24
|
-
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
25
35
|
- !ruby/object:Gem::Dependency
|
26
36
|
name: array_pair
|
27
|
-
|
28
|
-
|
29
|
-
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
30
40
|
requirements:
|
31
41
|
- - ">="
|
32
42
|
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
33
46
|
version: "0"
|
34
|
-
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
35
49
|
- !ruby/object:Gem::Dependency
|
36
50
|
name: rsruby
|
37
|
-
|
38
|
-
|
39
|
-
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
40
54
|
requirements:
|
41
55
|
- - ">="
|
42
56
|
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
43
60
|
version: "0"
|
44
|
-
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
45
63
|
- !ruby/object:Gem::Dependency
|
46
64
|
name: bio
|
47
|
-
|
48
|
-
|
49
|
-
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
50
68
|
requirements:
|
51
69
|
- - ">="
|
52
70
|
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
53
74
|
version: "0"
|
54
|
-
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
55
77
|
description: GoRuby makes it easy to interact with the Gene Ontology by using the infrastructure setup in R. By connecting to R using RSRuby, the database and methods can be interrogated. Plus, keeping the R library up to date is much simpler than having to keep a GO implementation up to date.
|
56
78
|
email: donttrustben near gmail.com
|
57
79
|
executables: []
|
@@ -80,21 +102,27 @@ rdoc_options:
|
|
80
102
|
require_paths:
|
81
103
|
- lib
|
82
104
|
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
83
106
|
requirements:
|
84
107
|
- - ">="
|
85
108
|
- !ruby/object:Gem::Version
|
109
|
+
hash: 3
|
110
|
+
segments:
|
111
|
+
- 0
|
86
112
|
version: "0"
|
87
|
-
version:
|
88
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
89
115
|
requirements:
|
90
116
|
- - ">="
|
91
117
|
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
119
|
+
segments:
|
120
|
+
- 0
|
92
121
|
version: "0"
|
93
|
-
version:
|
94
122
|
requirements: []
|
95
123
|
|
96
124
|
rubyforge_project:
|
97
|
-
rubygems_version: 1.3.
|
125
|
+
rubygems_version: 1.3.7
|
98
126
|
signing_key:
|
99
127
|
specification_version: 3
|
100
128
|
summary: Gene Ontology (GO) interface for Ruby
|