oba-client 1.2.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.md +4 -0
- data/README.md +38 -0
- data/lib/oba_client.rb +129 -44
- data/test/test_oba_client.rb +21 -5
- metadata +4 -4
data/History.md
CHANGED
data/README.md
CHANGED
@@ -33,9 +33,47 @@ See [the Annotator documentation](http://www.bioontology.org/wiki/index.php/Anno
|
|
33
33
|
# Returns {:statistics => {information about the annotation},
|
34
34
|
# :annotations => [Array of annotations of text],
|
35
35
|
# :ontologies => [Array of ontologies used]}
|
36
|
+
# Like:
|
37
|
+
:statistics => {"MAPPING" => 1951, "MGREP" => 2319, "ISA_CLOSURE" => 30}
|
38
|
+
:annotations => [{
|
39
|
+
:score => 199,
|
40
|
+
:id => 203820,
|
41
|
+
:localConceptId => "42877/CARO:0000013",
|
42
|
+
:localOntologyId => 42877,
|
43
|
+
:isTopLevel => true,
|
44
|
+
:fullId => "http://purl.obolibrary.org/obo/FBbt_00007002",
|
45
|
+
:preferredName => "cell",
|
46
|
+
:synonyms => ["body cell"],
|
47
|
+
:definitions => ["a cell", "some other definition"],
|
48
|
+
|
49
|
+
:semanticTypes => [
|
50
|
+
{:id => 230820, :semanticType => "T043", :description => "desc"},
|
51
|
+
"etc..."
|
52
|
+
]
|
53
|
+
|
54
|
+
:context => {
|
55
|
+
:contextName => "MAPPING",
|
56
|
+
:isDirect => false,
|
57
|
+
:from => 10,
|
58
|
+
:to => 20,
|
59
|
+
:mappedConcept => {
|
60
|
+
"has" => "the same information as other annotations, minus score"
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
:mappingType => "Automatic"
|
65
|
+
}, "more annotations..."]
|
66
|
+
|
67
|
+
:ontologies => [{
|
68
|
+
:localOntologyId => 40404,
|
69
|
+
:name => "Ontology Name",
|
70
|
+
:virtualOntologyId => 1042
|
71
|
+
}, "more ontologies..."]
|
72
|
+
|
36
73
|
client2.execute("another text string, maybe longer this time.")
|
37
74
|
client2.execute("this is the second query for this client!")
|
38
75
|
|
76
|
+
|
39
77
|
# Or, parse some file you've already got lying about (pass as a string).
|
40
78
|
parsed = OBAClient::parse("<?xml version='1.0'>...</xml>")
|
41
79
|
|
data/lib/oba_client.rb
CHANGED
@@ -5,7 +5,7 @@ require "net/http"
|
|
5
5
|
require "uri"
|
6
6
|
|
7
7
|
class OBAClient
|
8
|
-
VERSION = "
|
8
|
+
VERSION = "2.0.0"
|
9
9
|
|
10
10
|
# A high HTTP read timeout, as the service sometimes takes awhile to respond.
|
11
11
|
DEFAULT_TIMEOUT = 30
|
@@ -15,7 +15,7 @@ class OBAClient
|
|
15
15
|
|
16
16
|
# The header for every request. There's no need to specify this per-instance.
|
17
17
|
HEADER = {"Content-Type" => "application/x-www-form-urlencoded"}
|
18
|
-
|
18
|
+
|
19
19
|
# Parameters the annotator accepts. Any one not in this list (excluding
|
20
20
|
# textToAnnotate) is not valid.
|
21
21
|
ANNOTATOR_PARAMETERS = [
|
@@ -33,11 +33,11 @@ class OBAClient
|
|
33
33
|
:scored,
|
34
34
|
:semanticTypes,
|
35
35
|
:stopWords,
|
36
|
-
:wholeWordOnly,
|
36
|
+
:wholeWordOnly,
|
37
37
|
:withDefaultStopWords,
|
38
38
|
:withSynonyms,
|
39
39
|
]
|
40
|
-
|
40
|
+
|
41
41
|
STATISTICS_BEANS_XPATH = "/success/data/annotatorResultBean/statistics/statisticsBean"
|
42
42
|
ANNOTATION_BEANS_XPATH = "/success/data/annotatorResultBean/annotations/annotationBean"
|
43
43
|
ONTOLOGY_BEANS_XPATH = "/success/data/annotatorResultBean/ontologies/ontologyUsedBean"
|
@@ -48,13 +48,24 @@ class OBAClient
|
|
48
48
|
# * [String] uri: the URI of the annotator service (default: {DEFAULT_URI}).
|
49
49
|
# * [Fixnum] timeout: the length of the read timeout (default: {DEFAULT_TIMEOUT}).
|
50
50
|
# * [Boolean] parse_xml: whether to parse the received text (default: false).
|
51
|
+
# * [Array<String>] ontologies: a pseudo-parameter which will set both
|
52
|
+
# ontologiesToExpand and ontologiesToKeepInResult.
|
51
53
|
# @param [Hash<String, String>] options Parameters for the annotation.
|
52
54
|
def initialize(options = {})
|
53
55
|
@uri = URI.parse(options.delete(:uri) || DEFAULT_URI)
|
54
56
|
@timeout = options.delete(:timeout) || DEFAULT_TIMEOUT
|
55
57
|
@parse_xml = options.delete(:parse_xml)
|
56
|
-
|
57
|
-
|
58
|
+
|
59
|
+
if ontologies = options.delete(:ontologies)
|
60
|
+
[:ontologiesToExpand, :ontologiesToKeepInResult].each do |k|
|
61
|
+
if options.include?(k)
|
62
|
+
puts "WARNING: specified both :ontologies and #{k}, ignoring given value of #{k}."
|
63
|
+
end
|
64
|
+
options[k] = ontologies
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
@options = {}
|
58
69
|
options.each do |k, v|
|
59
70
|
if !ANNOTATOR_PARAMETERS.include?(k)
|
60
71
|
puts "WARNING: #{k} is not a valid annotator parameter."
|
@@ -65,7 +76,7 @@ class OBAClient
|
|
65
76
|
@options[k] = v
|
66
77
|
end
|
67
78
|
end
|
68
|
-
|
79
|
+
|
69
80
|
if !@options.include?(:email)
|
70
81
|
puts "TIP: as a courtesy, consider including your email in the request."
|
71
82
|
end
|
@@ -81,6 +92,7 @@ class OBAClient
|
|
81
92
|
request.body = {:textToAnnotate => text}.merge(@options).map do |k, v|
|
82
93
|
"#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
|
83
94
|
end.join("&")
|
95
|
+
puts request.body if $DEBUG
|
84
96
|
|
85
97
|
begin
|
86
98
|
response = Net::HTTP.new(@uri.host, @uri.port).start do |http|
|
@@ -92,7 +104,7 @@ class OBAClient
|
|
92
104
|
puts "Request for #{text[0..10]}... timed-out at #{@timeout} seconds."
|
93
105
|
end
|
94
106
|
end
|
95
|
-
|
107
|
+
|
96
108
|
# Convert a string true/false or 1/0 value to boolean.
|
97
109
|
# @param [String] value The value to convert.
|
98
110
|
# @return [true, false]
|
@@ -105,7 +117,108 @@ class OBAClient
|
|
105
117
|
end
|
106
118
|
end
|
107
119
|
|
108
|
-
#
|
120
|
+
# Attributes for mapping concepts (annotation concepts add one,
|
121
|
+
# @see ANNOTATION_CONCEPT_ATTRIBUTES.
|
122
|
+
CONCEPT_ATTRIBUTES = {
|
123
|
+
:id => lambda {|c| c.xpath("id").text.to_i},
|
124
|
+
:localConceptId => lambda {|c| c.xpath("localConceptId").text},
|
125
|
+
:localOntologyId => lambda {|c| c.xpath("localOntologyId").text.to_i},
|
126
|
+
:isTopLevel => lambda {|c| to_b(c.xpath("isTopLevel").text)},
|
127
|
+
:fullId => lambda {|c| c.xpath("fullId").text},
|
128
|
+
:preferredName => lambda {|c| c.xpath("preferredName").text},
|
129
|
+
|
130
|
+
:synonyms => lambda do |c|
|
131
|
+
c.xpath("synonyms/synonym").map do |s|
|
132
|
+
s.xpath("string").text
|
133
|
+
end
|
134
|
+
end,
|
135
|
+
|
136
|
+
:semanticTypes => lambda do |c|
|
137
|
+
c.xpath("semanticTypes/semanticTypeBean").map do |s|
|
138
|
+
{
|
139
|
+
:id => s.xpath("id").text.to_i,
|
140
|
+
:semanticType => s.xpath("semanticType").text,
|
141
|
+
:description => s.xpath("description").text
|
142
|
+
}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
}
|
146
|
+
|
147
|
+
# Annotation concepts have the same attributes as mapping concepts, plus one.
|
148
|
+
ANNOTATION_CONCEPT_ATTRIBUTES = CONCEPT_ATTRIBUTES.merge(
|
149
|
+
:mappingType => lambda {|c| c.xpath("mappingType").text}
|
150
|
+
)
|
151
|
+
|
152
|
+
# # Toplevel attributes for annotation contexts.
|
153
|
+
ANNOTATION_CONTEXT_ATTRIBUTES = {
|
154
|
+
:score => lambda {|c| c.xpath("score").text.to_i},
|
155
|
+
:concept => lambda {|c| parse_concept(c.xpath("concept").first)},
|
156
|
+
:context => lambda {|c| parse_context(c.xpath("context").first)}
|
157
|
+
}
|
158
|
+
|
159
|
+
# Toplevel attributes for all other contexts.
|
160
|
+
CONTEXT_ATTRIBUTES = {
|
161
|
+
:contextName => lambda {|c| c.xpath("contextName").text},
|
162
|
+
:isDirect => lambda {|c| to_b(c.xpath("isDirect").text)},
|
163
|
+
:from => lambda {|c| c.xpath("from").text.to_i},
|
164
|
+
:to => lambda {|c| c.xpath("to").text.to_i},
|
165
|
+
}
|
166
|
+
|
167
|
+
# # Toplevel attributes for mapping contexts.
|
168
|
+
MAPPED_CONTEXT_ATTRIBUTES = CONTEXT_ATTRIBUTES.merge(
|
169
|
+
:mappedConcept => lambda {|c| parse_concept(c.xpath("mappedConcept").first)}
|
170
|
+
)
|
171
|
+
|
172
|
+
# Toplevel attributes for mgrep contexts.
|
173
|
+
MGREP_CONTEXT_ATTRIBUTES = CONTEXT_ATTRIBUTES.merge(
|
174
|
+
:name => lambda {|c| c.xpath("term/name").text},
|
175
|
+
:localConceptId => lambda {|c| c.xpath("term/localConceptId").text},
|
176
|
+
:isPreferred => lambda {|c| to_b(c.xpath("term/isPreferred").text)},
|
177
|
+
:dictionaryId => lambda {|c| c.xpath("term/dictionaryId").text}
|
178
|
+
)
|
179
|
+
|
180
|
+
CONCEPT_TYPES = {
|
181
|
+
"concept" => ANNOTATION_CONCEPT_ATTRIBUTES,
|
182
|
+
"mappedConcept" => CONCEPT_ATTRIBUTES
|
183
|
+
}
|
184
|
+
|
185
|
+
CONTEXT_CLASSES = {
|
186
|
+
"annotationContextBean" => ANNOTATION_CONTEXT_ATTRIBUTES,
|
187
|
+
"mgrepContextBean" => MGREP_CONTEXT_ATTRIBUTES,
|
188
|
+
"mappingContextBean" => MAPPED_CONTEXT_ATTRIBUTES,
|
189
|
+
}
|
190
|
+
|
191
|
+
##
|
192
|
+
# Parse a context - an annotation, or a mapping/mgrep context bean.
|
193
|
+
# @param [Nokgiri::XML::Node] context The root node of the context.
|
194
|
+
# @return Hash<Symbol, Object> The parsed context.
|
195
|
+
def self.parse_context(context)
|
196
|
+
# Annotations (annotationBeans) do not have a class, so we'll refer to them
|
197
|
+
# as annotationContextBeans
|
198
|
+
context_class = if context.attribute("class").nil?
|
199
|
+
"annotationContextBean"
|
200
|
+
else
|
201
|
+
context.attribute("class").value
|
202
|
+
end
|
203
|
+
|
204
|
+
Hash[CONTEXT_CLASSES[context_class].map do |k, v|
|
205
|
+
[k, v.call(context)]
|
206
|
+
end]
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# Parse a concept - a toplevel annotation concept, or an annotation's
|
211
|
+
# mapping concept.
|
212
|
+
# @param [Nokogiri::XML::Node] concept The root node of the concept.
|
213
|
+
# @return [Hash<Symbol, Object>] The parsed concept.
|
214
|
+
def self.parse_concept(concept)
|
215
|
+
Hash[CONCEPT_TYPES[concept.name].map do |k, v|
|
216
|
+
[k, v.call(concept)]
|
217
|
+
end]
|
218
|
+
end
|
219
|
+
|
220
|
+
##
|
221
|
+
# Parse raw XML, returning a Hash with three elements: statistics,
|
109
222
|
# annotations, and ontologies. Respectively, these represent the annotation
|
110
223
|
# statistics (annotations by mapping type, etc., as a Hash), an Array of
|
111
224
|
# each annotation (as a Hash), and an Array of ontologies used (also as
|
@@ -114,46 +227,18 @@ class OBAClient
|
|
114
227
|
# @return [Hash<Symbol, Object>] A Hash representation of the XML, as
|
115
228
|
# described above.
|
116
229
|
def self.parse(xml)
|
117
|
-
statistics = {}
|
118
|
-
annotations = []
|
119
|
-
ontologies = []
|
120
230
|
doc = Nokogiri::XML.parse(xml)
|
121
231
|
|
122
|
-
doc.xpath(STATISTICS_BEANS_XPATH).
|
123
|
-
|
124
|
-
end
|
125
|
-
|
126
|
-
doc.xpath(ANNOTATION_BEANS_XPATH).each do |ann|
|
127
|
-
parsed = {
|
128
|
-
:score => ann.xpath("score").text.to_i,
|
129
|
-
:id => ann.xpath("concept/id").text.to_i,
|
130
|
-
:localConceptId => ann.xpath("concept/localConceptId").text,
|
131
|
-
:localOntologyId => ann.xpath("concept/localOntologyId").text.to_i,
|
132
|
-
:isTopLevel => to_b(ann.xpath("concept/isTopLevel").text),
|
133
|
-
:fullId => ann.xpath("concept/fullId").text,
|
134
|
-
:preferredName => ann.xpath("concept/preferredName").text,
|
135
|
-
:mappingType => ann.xpath("context/contextName").text,
|
136
|
-
:isDirect => to_b(ann.xpath("context/isDirect").text)
|
137
|
-
}
|
232
|
+
statistics = Hash[doc.xpath(STATISTICS_BEANS_XPATH).map do |sb|
|
233
|
+
[sb.xpath("mapping").text, sb.xpath("nbAnnotation").text.to_i]
|
234
|
+
end]
|
138
235
|
|
139
|
-
|
140
|
-
|
141
|
-
synonym.xpath("string").text
|
142
|
-
end
|
143
|
-
|
144
|
-
semanticTypeBeans = ann.xpath("concept/semanticTypes/semanticTypeBean")
|
145
|
-
parsed[:semanticTypes] = semanticTypeBeans.map do |semanticType|
|
146
|
-
{
|
147
|
-
:id => semanticType.xpath("id").text.to_i,
|
148
|
-
:semanticType => semanticType.xpath("semanticType").text,
|
149
|
-
:description => semanticType.xpath("description").text
|
150
|
-
}
|
151
|
-
end
|
152
|
-
annotations << parsed
|
236
|
+
annotations = doc.xpath(ANNOTATION_BEANS_XPATH).map do |annotation|
|
237
|
+
parse_context(annotation)
|
153
238
|
end
|
154
239
|
|
155
|
-
doc.xpath(ONTOLOGY_BEANS_XPATH).
|
156
|
-
|
240
|
+
ontologies = doc.xpath(ONTOLOGY_BEANS_XPATH).map do |ontology|
|
241
|
+
{
|
157
242
|
:localOntologyId => ontology.xpath("localOntologyId").text.to_i,
|
158
243
|
:virtualOntologyId => ontology.xpath("virtualOntologyId").text.to_i,
|
159
244
|
:name => ontology.xpath("name").text
|
data/test/test_oba_client.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require "test/unit"
|
2
2
|
require "oba_client"
|
3
|
+
require "pp"
|
3
4
|
|
4
5
|
TEST_TEXTS = [
|
5
|
-
"Mexico,, Disease Thing \o\r\m\n\t\v\l\rzebrafish !!! cancer of the thorax.",
|
6
|
-
%Q{LOROE aonuhaso unseu anoeuhs aeuhsaonuh asoneuhason uaosenuh aosenuhaose
|
7
|
-
aoneuhasonuhaoenuh anoeuhasn euhasoneu haosneuhaosenuhaoesunahoeusnaoeuteeano
|
8
|
-
aot tt t t t t t t tae \n!!@)$@(#)%@#!)@# asoeuaohsenutahoeusaheou
|
9
|
-
}
|
6
|
+
"Mexico,, Disease Thing \o\r\m\n\t\v\l\rzebrafish !!! cancer of the thorax. large intestine thorax",
|
7
|
+
# %Q{LOROE aonuhaso unseu anoeuhs aeuhsaonuh asoneuhason uaosenuh aosenuhaose
|
8
|
+
# aoneuhasonuhaoenuh anoeuhasn euhasoneu haosneuhaosenuhaoesunahoeusnaoeuteeano
|
9
|
+
# aot tt t t t t t t tae \n!!@)$@(#)%@#!)@# asoeuaohsenutahoeusaheou
|
10
|
+
# }
|
10
11
|
]
|
11
12
|
|
12
13
|
class TestOBAClient < Test::Unit::TestCase
|
@@ -70,7 +71,22 @@ class TestOBAClient < Test::Unit::TestCase
|
|
70
71
|
end
|
71
72
|
end
|
72
73
|
|
74
|
+
def test_ontologies_pseudo_parameter
|
75
|
+
ann = OBAClient.new(:ontologies => [42812], :parse_xml => true)
|
76
|
+
TEST_TEXTS.each do |text|
|
77
|
+
parsed = ann.execute(text)
|
78
|
+
assert parsed[:ontologies].all? {|o| o[:localOntologyId] == 42812}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
73
82
|
def test_parse
|
74
83
|
parsed = OBAClient::parse("<?xml version='1.0'></xml>")
|
75
84
|
end
|
85
|
+
|
86
|
+
def test_with_print
|
87
|
+
ann = OBAClient.new(:ontologies => [42838, 35686], :parse_xml => false)
|
88
|
+
ann = OBAClient.new(:ontologies => [42838, 35686], :parse_xml => true)
|
89
|
+
end
|
90
|
+
|
91
|
+
|
76
92
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oba-client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
|
-
- 1
|
8
7
|
- 2
|
9
|
-
-
|
10
|
-
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 2.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Rob Tirrell
|