goruby 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/go.rb +61 -16
- data/test/test_goruby.rb +23 -14
- metadata +49 -21
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/go.rb
CHANGED
@@ -15,11 +15,11 @@ module Bio
|
|
15
15
|
def go_offspring(go_id)
|
16
16
|
o = ontology_abbreviation(go_id)
|
17
17
|
case o
|
18
|
-
|
18
|
+
when 'MF'
|
19
19
|
return molecular_function_offspring(go_id)
|
20
|
-
|
20
|
+
when 'CC'
|
21
21
|
return cellular_component_offspring(go_id)
|
22
|
-
|
22
|
+
when 'BP'
|
23
23
|
return biological_process_offspring(go_id)
|
24
24
|
else
|
25
25
|
raise Exception, "Unknown ontology abbreviation found: #{o} for go id: #{go_id}"
|
@@ -32,7 +32,7 @@ module Bio
|
|
32
32
|
def cellular_component_offspring(go_term)
|
33
33
|
go_get(go_term, 'GOCCOFFSPRING')
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
# Return an array of GO identifiers that are the offspring (all the descendents)
|
37
37
|
# of the given GO term given that it is a molecular function
|
38
38
|
# GO term.
|
@@ -73,13 +73,17 @@ module Bio
|
|
73
73
|
# A performance note:
|
74
74
|
# According to some tests that I ran, finding GOID by searching GOTERM
|
75
75
|
# is much faster than by GOSYNONYM. A
|
76
|
-
|
76
|
+
|
77
77
|
begin
|
78
78
|
# Assume it is a primary ID, as it likely will be most of the time.
|
79
79
|
return @r.eval_R("GOID(get('#{go_id_or_synonym_id}', GOTERM))")
|
80
80
|
rescue RException
|
81
81
|
# if no primary is found, try to finding it by synonym. raise RException if none is found
|
82
|
-
|
82
|
+
begin
|
83
|
+
return @r.eval_R("GOID(get('#{go_id_or_synonym_id}', GOSYNONYM))")
|
84
|
+
rescue RException => e
|
85
|
+
raise RException, "#{e.message}: GO Identifier '#{go_id_or_synonym_id}' does not appear to be a primary ID nor synonym. Is the GO.db database up to date?"
|
86
|
+
end
|
83
87
|
end
|
84
88
|
end
|
85
89
|
|
@@ -112,7 +116,7 @@ module Bio
|
|
112
116
|
|
113
117
|
return gos.flatten.uniq
|
114
118
|
end
|
115
|
-
|
119
|
+
|
116
120
|
# Does the subsumer subsume the subsumee? i.e. Does it include
|
117
121
|
# the subsumee as one of its children in the GO tree?
|
118
122
|
#
|
@@ -122,10 +126,10 @@ module Bio
|
|
122
126
|
# map the subsumee to non-synonomic id
|
123
127
|
primaree = self.primary_go_id(subsumee_go_id)
|
124
128
|
primarer = self.primary_go_id(subsumer_go_id)
|
125
|
-
|
129
|
+
|
126
130
|
# return if they are the same - the obvious case
|
127
131
|
return true if primaree == primarer
|
128
|
-
|
132
|
+
|
129
133
|
# return if subsumee is a descendent of sumsumer
|
130
134
|
return go_offspring(primarer).include?(primaree)
|
131
135
|
end
|
@@ -135,18 +139,59 @@ module Bio
|
|
135
139
|
def subsume_tester(subsumer_go_id, check_for_synonym=true)
|
136
140
|
Go::SubsumeTester.new(self, subsumer_go_id, check_for_synonym)
|
137
141
|
end
|
138
|
-
|
142
|
+
|
139
143
|
# Return 'MF', 'CC' or 'BP' corresponding to the
|
140
144
|
def ontology_abbreviation(go_id)
|
141
145
|
@r.eval_R("Ontology(get('#{go_id}', GOTERM))")
|
142
146
|
end
|
143
|
-
|
147
|
+
|
148
|
+
# Return an array of GO ids that correspond to the parent GO terms
|
149
|
+
# in the ontology. This isn't the most efficient this could be, because it
|
150
|
+
# probably gets the parents for a single id multiple times.
|
151
|
+
def ancestors_cc(primary_go_id)
|
152
|
+
go_get(primary_go_id, 'GOCCANCESTOR')
|
153
|
+
end
|
154
|
+
|
155
|
+
# Return an array of ancestors of the GO term or any
|
156
|
+
# of the GO terms' children, in no particular order. This is useful
|
157
|
+
# when wanting to know if a term has an annotation that is
|
158
|
+
# non-overlapping with a particular go term. For instance, 'membrane'
|
159
|
+
# is cordial with 'nucleus', they are boths is an ancestors of
|
160
|
+
# 'nuclear membrane'. However, 'mitochondrion' and 'nucleus' are
|
161
|
+
# not cordial, since they share no common offspring.
|
162
|
+
def cordial_cc(primary_go_id)
|
163
|
+
# cordial can be direct ancestors of a term - then the common term
|
164
|
+
# is this term itself
|
165
|
+
cordial_ids = ancestors_cc(primary_go_id)
|
166
|
+
|
167
|
+
# collect all ancestors of all offspring
|
168
|
+
offspring = cellular_component_offspring(primary_go_id)
|
169
|
+
offspring.each do |o|
|
170
|
+
cordial_ids.push ancestors_cc(o)
|
171
|
+
cordial_ids.push o
|
172
|
+
end
|
173
|
+
|
174
|
+
# remove the term itself and any children - they are not
|
175
|
+
# merely cordial
|
176
|
+
cordial_ids = cordial_ids.flatten.uniq.reject do |i|
|
177
|
+
offspring.include?(i) or primary_go_id==i
|
178
|
+
end
|
179
|
+
|
180
|
+
# return a uniq array of cordial terms
|
181
|
+
cordial_ids
|
182
|
+
end
|
183
|
+
|
184
|
+
# When repeatedly testing subsumtion by a certain GO term,
|
185
|
+
# it is faster to instantiate a SubsumeTester and use
|
186
|
+
# Bio::GO::SubsumeTester#subsume?, rather than
|
187
|
+
# repeatedly calling Bio::GO#subsume? because SubsumeTester
|
188
|
+
# does caching.
|
144
189
|
class SubsumeTester
|
145
190
|
attr_reader :subsumer_offspring, :master_go_id
|
146
|
-
|
191
|
+
|
147
192
|
def initialize(go_object, subsumer_go_id, check_for_synonym=true)
|
148
193
|
@go = go_object
|
149
|
-
|
194
|
+
|
150
195
|
if check_for_synonym
|
151
196
|
@master_go_id = @go.primary_go_id(subsumer_go_id)
|
152
197
|
else
|
@@ -155,11 +200,11 @@ module Bio
|
|
155
200
|
@subsumer_offspring = @go.go_offspring(@master_go_id)
|
156
201
|
@subsumer_offspring_hash = [@subsumer_offspring].flatten.to_hash
|
157
202
|
end
|
158
|
-
|
203
|
+
|
159
204
|
def subsume?(subsumer_go_id, check_for_synonym=true)
|
160
205
|
primaree = check_for_synonym ?
|
161
|
-
|
162
|
-
|
206
|
+
@go.primary_go_id(subsumer_go_id) :
|
207
|
+
subsumer_go_id
|
163
208
|
return true if @master_go_id == primaree
|
164
209
|
@subsumer_offspring_hash.has_key?(primaree)
|
165
210
|
end
|
data/test/test_goruby.rb
CHANGED
@@ -15,12 +15,12 @@ class GoTest < Test::Unit::TestCase
|
|
15
15
|
assert_equal [], @go.cellular_component_offspring('GO:0031676')
|
16
16
|
|
17
17
|
# test multiple offspring
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
]
|
23
|
-
|
18
|
+
old_offspring = ["GO:0030077", "GO:0030078", "GO:0030079", "GO:0030080", "GO:0030081", "GO:0030082", "GO:0031633", "GO:0031676", "GO:0042717", "GO:0048493", "GO:0048494"]
|
19
|
+
offspring = @go.cellular_component_offspring('GO:0042716')
|
20
|
+
assert offspring.length >= old_offspring.length
|
21
|
+
assert offspring.include?(old_offspring[0])
|
22
|
+
assert offspring.include?(old_offspring[1])
|
23
|
+
assert offspring.include?(old_offspring[old_offspring.length-2])
|
24
24
|
|
25
25
|
# test not in CC
|
26
26
|
assert_raise RException do
|
@@ -39,7 +39,7 @@ class GoTest < Test::Unit::TestCase
|
|
39
39
|
def test_go_term
|
40
40
|
# test MF
|
41
41
|
assert_equal "G-protein coupled receptor activity", @go.term('GO:0004930')
|
42
|
-
|
42
|
+
|
43
43
|
# test CC
|
44
44
|
assert_equal 'endoplasmic reticulum', @go.term('GO:0005783')
|
45
45
|
end
|
@@ -60,13 +60,13 @@ class GoTest < Test::Unit::TestCase
|
|
60
60
|
def test_subsume
|
61
61
|
# test normal truth
|
62
62
|
assert @go.subsume?('GO:0003824', 'GO:0050333')
|
63
|
-
|
63
|
+
|
64
64
|
# test subsumee is synonym
|
65
65
|
assert @go.subsume?('GO:0003824', 'GO:0048253')
|
66
|
-
|
66
|
+
|
67
67
|
# test equal terms
|
68
68
|
assert @go.subsume?('GO:0009536','GO:0009536')
|
69
|
-
|
69
|
+
|
70
70
|
# test falsity - plastid part does not subsume plastid
|
71
71
|
assert_equal false, @go.subsume?('GO:0044435','GO:0009536')
|
72
72
|
end
|
@@ -83,26 +83,35 @@ class GoTest < Test::Unit::TestCase
|
|
83
83
|
tester = @go.subsume_tester('GO:0044435')
|
84
84
|
assert tester.subsume?('GO:0044435')
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
def test_subsume_tester_no_check_synonym
|
88
88
|
# test normal truth
|
89
89
|
tester = @go.subsume_tester('GO:0003824')
|
90
90
|
assert_equal true, tester.subsume?('GO:0050333', false)
|
91
91
|
assert_equal true, tester.subsume?('GO:0050333', true)
|
92
|
-
|
92
|
+
|
93
93
|
# test subsumee is synonym
|
94
94
|
tester = @go.subsume_tester('GO:0003824')
|
95
95
|
assert_equal false, tester.subsume?('GO:0048253', false)
|
96
96
|
assert_equal true, tester.subsume?('GO:0048253', true)
|
97
|
-
|
97
|
+
|
98
98
|
# test equal terms
|
99
99
|
tester = @go.subsume_tester('GO:0050333')
|
100
100
|
assert_equal true, tester.subsume?('GO:0050333', false)
|
101
101
|
assert_equal true, tester.subsume?('GO:0050333', true)
|
102
|
-
|
102
|
+
|
103
103
|
# test equal that is synonym
|
104
104
|
tester = @go.subsume_tester('GO:0050333')
|
105
105
|
assert_equal false, tester.subsume?('GO:0048253', false)
|
106
106
|
assert_equal true, tester.subsume?('GO:0048253', true)
|
107
107
|
end
|
108
|
+
|
109
|
+
def test_cordial_cc
|
110
|
+
nucleus_cordial = @go.cordial_cc('GO:0005634')
|
111
|
+
|
112
|
+
assert nucleus_cordial.include?('GO:0005575'), "include cellular component itself"
|
113
|
+
assert_equal false, nucleus_cordial.include?('GO:0005634'), 'don\'t include nucleus itself'
|
114
|
+
assert nucleus_cordial.include?('GO:0005623'), 'include a direct ancestor of nucleus'
|
115
|
+
assert nucleus_cordial.include?('GO:0005623'), 'include membrane, a \'indirect\' direct ancestor of nucleus (it is an ancestor of nuclear membrane)'
|
116
|
+
end
|
108
117
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Ben J Woodcroft
|
@@ -9,49 +15,65 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-11-03 00:00:00 +11:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: thoughtbot-shoulda
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
23
32
|
version: "0"
|
24
|
-
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
25
35
|
- !ruby/object:Gem::Dependency
|
26
36
|
name: array_pair
|
27
|
-
|
28
|
-
|
29
|
-
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
30
40
|
requirements:
|
31
41
|
- - ">="
|
32
42
|
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
33
46
|
version: "0"
|
34
|
-
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
35
49
|
- !ruby/object:Gem::Dependency
|
36
50
|
name: rsruby
|
37
|
-
|
38
|
-
|
39
|
-
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
40
54
|
requirements:
|
41
55
|
- - ">="
|
42
56
|
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
43
60
|
version: "0"
|
44
|
-
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
45
63
|
- !ruby/object:Gem::Dependency
|
46
64
|
name: bio
|
47
|
-
|
48
|
-
|
49
|
-
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
50
68
|
requirements:
|
51
69
|
- - ">="
|
52
70
|
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
53
74
|
version: "0"
|
54
|
-
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
55
77
|
description: GoRuby makes it easy to interact with the Gene Ontology by using the infrastructure setup in R. By connecting to R using RSRuby, the database and methods can be interrogated. Plus, keeping the R library up to date is much simpler than having to keep a GO implementation up to date.
|
56
78
|
email: donttrustben near gmail.com
|
57
79
|
executables: []
|
@@ -80,21 +102,27 @@ rdoc_options:
|
|
80
102
|
require_paths:
|
81
103
|
- lib
|
82
104
|
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
83
106
|
requirements:
|
84
107
|
- - ">="
|
85
108
|
- !ruby/object:Gem::Version
|
109
|
+
hash: 3
|
110
|
+
segments:
|
111
|
+
- 0
|
86
112
|
version: "0"
|
87
|
-
version:
|
88
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
89
115
|
requirements:
|
90
116
|
- - ">="
|
91
117
|
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
119
|
+
segments:
|
120
|
+
- 0
|
92
121
|
version: "0"
|
93
|
-
version:
|
94
122
|
requirements: []
|
95
123
|
|
96
124
|
rubyforge_project:
|
97
|
-
rubygems_version: 1.3.
|
125
|
+
rubygems_version: 1.3.7
|
98
126
|
signing_key:
|
99
127
|
specification_version: 3
|
100
128
|
summary: Gene Ontology (GO) interface for Ruby
|