oba-client 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 2.0.0 / 2010-07-08
2
+
3
+ * Parse XML entirely. See README for more information on the return value.
4
+
1
5
  ## 1.2.1 / 2010-07-08
2
6
 
3
7
  * Link correctly (problem with using markdown-ed URL in README.md).
data/README.md CHANGED
@@ -33,9 +33,47 @@ See [the Annotator documentation](http://www.bioontology.org/wiki/index.php/Anno
33
33
  # Returns {:statistics => {information about the annotation},
34
34
  # :annotations => [Array of annotations of text],
35
35
  # :ontologies => [Array of ontologies used]}
36
+ # Like:
37
+ :statistics => {"MAPPING" => 1951, "MGREP" => 2319, "ISA_CLOSURE" => 30}
38
+ :annotations => [{
39
+ :score => 199,
40
+ :id => 203820,
41
+ :localConceptId => "42877/CARO:0000013",
42
+ :localOntologyId => 42877,
43
+ :isTopLevel => true,
44
+ :fullId => "http://purl.obolibrary.org/obo/FBbt_00007002",
45
+ :preferredName => "cell",
46
+ :synonyms => ["body cell"],
47
+ :definitions => ["a cell", "some other definition"],
48
+
49
+ :semanticTypes => [
50
+ {:id => 230820, :semanticType => "T043", :description => "desc"},
51
+ "etc..."
52
+ ]
53
+
54
+ :context => {
55
+ :contextName => "MAPPING",
56
+ :isDirect => false,
57
+ :from => 10,
58
+ :to => 20,
59
+ :mappedConcept => {
60
+ "has" => "the same information as other annotations, minus score"
61
+ }
62
+ }
63
+
64
+ :mappingType => "Automatic"
65
+ }, "more annotations..."]
66
+
67
+ :ontologies => [{
68
+ :localOntologyId => 40404,
69
+ :name => "Ontology Name",
70
+ :virtualOntologyId => 1042
71
+ }, "more ontologies..."]
72
+
36
73
  client2.execute("another text string, maybe longer this time.")
37
74
  client2.execute("this is the second query for this client!")
38
75
 
76
+
39
77
  # Or, parse some file you've already got lying about (pass as a string).
40
78
  parsed = OBAClient::parse("<?xml version='1.0'>...</xml>")
41
79
 
data/lib/oba_client.rb CHANGED
@@ -5,7 +5,7 @@ require "net/http"
5
5
  require "uri"
6
6
 
7
7
  class OBAClient
8
- VERSION = "1.2.1"
8
+ VERSION = "2.0.0"
9
9
 
10
10
  # A high HTTP read timeout, as the service sometimes takes awhile to respond.
11
11
  DEFAULT_TIMEOUT = 30
@@ -15,7 +15,7 @@ class OBAClient
15
15
 
16
16
  # The header for every request. There's no need to specify this per-instance.
17
17
  HEADER = {"Content-Type" => "application/x-www-form-urlencoded"}
18
-
18
+
19
19
  # Parameters the annotator accepts. Any one not in this list (excluding
20
20
  # textToAnnotate) is not valid.
21
21
  ANNOTATOR_PARAMETERS = [
@@ -33,11 +33,11 @@ class OBAClient
33
33
  :scored,
34
34
  :semanticTypes,
35
35
  :stopWords,
36
- :wholeWordOnly,
36
+ :wholeWordOnly,
37
37
  :withDefaultStopWords,
38
38
  :withSynonyms,
39
39
  ]
40
-
40
+
41
41
  STATISTICS_BEANS_XPATH = "/success/data/annotatorResultBean/statistics/statisticsBean"
42
42
  ANNOTATION_BEANS_XPATH = "/success/data/annotatorResultBean/annotations/annotationBean"
43
43
  ONTOLOGY_BEANS_XPATH = "/success/data/annotatorResultBean/ontologies/ontologyUsedBean"
@@ -48,13 +48,24 @@ class OBAClient
48
48
  # * [String] uri: the URI of the annotator service (default: {DEFAULT_URI}).
49
49
  # * [Fixnum] timeout: the length of the read timeout (default: {DEFAULT_TIMEOUT}).
50
50
  # * [Boolean] parse_xml: whether to parse the received text (default: false).
51
+ # * [Array<String>] ontologies: a pseudo-parameter which will set both
52
+ # ontologiesToExpand and ontologiesToKeepInResult.
51
53
  # @param [Hash<String, String>] options Parameters for the annotation.
52
54
  def initialize(options = {})
53
55
  @uri = URI.parse(options.delete(:uri) || DEFAULT_URI)
54
56
  @timeout = options.delete(:timeout) || DEFAULT_TIMEOUT
55
57
  @parse_xml = options.delete(:parse_xml)
56
-
57
- @options = {}
58
+
59
+ if ontologies = options.delete(:ontologies)
60
+ [:ontologiesToExpand, :ontologiesToKeepInResult].each do |k|
61
+ if options.include?(k)
62
+ puts "WARNING: specified both :ontologies and #{k}, ignoring given value of #{k}."
63
+ end
64
+ options[k] = ontologies
65
+ end
66
+ end
67
+
68
+ @options = {}
58
69
  options.each do |k, v|
59
70
  if !ANNOTATOR_PARAMETERS.include?(k)
60
71
  puts "WARNING: #{k} is not a valid annotator parameter."
@@ -65,7 +76,7 @@ class OBAClient
65
76
  @options[k] = v
66
77
  end
67
78
  end
68
-
79
+
69
80
  if !@options.include?(:email)
70
81
  puts "TIP: as a courtesy, consider including your email in the request."
71
82
  end
@@ -81,6 +92,7 @@ class OBAClient
81
92
  request.body = {:textToAnnotate => text}.merge(@options).map do |k, v|
82
93
  "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
83
94
  end.join("&")
95
+ puts request.body if $DEBUG
84
96
 
85
97
  begin
86
98
  response = Net::HTTP.new(@uri.host, @uri.port).start do |http|
@@ -92,7 +104,7 @@ class OBAClient
92
104
  puts "Request for #{text[0..10]}... timed-out at #{@timeout} seconds."
93
105
  end
94
106
  end
95
-
107
+
96
108
  # Convert a string true/false or 1/0 value to boolean.
97
109
  # @param [String] value The value to convert.
98
110
  # @return [true, false]
@@ -105,7 +117,108 @@ class OBAClient
105
117
  end
106
118
  end
107
119
 
108
- # Parse the raw XML, returning a Hash with three elements: statistics,
120
+ # Attributes for mapping concepts (annotation concepts add one,
121
+ # @see ANNOTATION_CONCEPT_ATTRIBUTES.
122
+ CONCEPT_ATTRIBUTES = {
123
+ :id => lambda {|c| c.xpath("id").text.to_i},
124
+ :localConceptId => lambda {|c| c.xpath("localConceptId").text},
125
+ :localOntologyId => lambda {|c| c.xpath("localOntologyId").text.to_i},
126
+ :isTopLevel => lambda {|c| to_b(c.xpath("isTopLevel").text)},
127
+ :fullId => lambda {|c| c.xpath("fullId").text},
128
+ :preferredName => lambda {|c| c.xpath("preferredName").text},
129
+
130
+ :synonyms => lambda do |c|
131
+ c.xpath("synonyms/synonym").map do |s|
132
+ s.xpath("string").text
133
+ end
134
+ end,
135
+
136
+ :semanticTypes => lambda do |c|
137
+ c.xpath("semanticTypes/semanticTypeBean").map do |s|
138
+ {
139
+ :id => s.xpath("id").text.to_i,
140
+ :semanticType => s.xpath("semanticType").text,
141
+ :description => s.xpath("description").text
142
+ }
143
+ end
144
+ end
145
+ }
146
+
147
+ # Annotation concepts have the same attributes as mapping concepts, plus one.
148
+ ANNOTATION_CONCEPT_ATTRIBUTES = CONCEPT_ATTRIBUTES.merge(
149
+ :mappingType => lambda {|c| c.xpath("mappingType").text}
150
+ )
151
+
152
+ # # Toplevel attributes for annotation contexts.
153
+ ANNOTATION_CONTEXT_ATTRIBUTES = {
154
+ :score => lambda {|c| c.xpath("score").text.to_i},
155
+ :concept => lambda {|c| parse_concept(c.xpath("concept").first)},
156
+ :context => lambda {|c| parse_context(c.xpath("context").first)}
157
+ }
158
+
159
+ # Toplevel attributes for all other contexts.
160
+ CONTEXT_ATTRIBUTES = {
161
+ :contextName => lambda {|c| c.xpath("contextName").text},
162
+ :isDirect => lambda {|c| to_b(c.xpath("isDirect").text)},
163
+ :from => lambda {|c| c.xpath("from").text.to_i},
164
+ :to => lambda {|c| c.xpath("to").text.to_i},
165
+ }
166
+
167
+ # # Toplevel attributes for mapping contexts.
168
+ MAPPED_CONTEXT_ATTRIBUTES = CONTEXT_ATTRIBUTES.merge(
169
+ :mappedConcept => lambda {|c| parse_concept(c.xpath("mappedConcept").first)}
170
+ )
171
+
172
+ # Toplevel attributes for mgrep contexts.
173
+ MGREP_CONTEXT_ATTRIBUTES = CONTEXT_ATTRIBUTES.merge(
174
+ :name => lambda {|c| c.xpath("term/name").text},
175
+ :localConceptId => lambda {|c| c.xpath("term/localConceptId").text},
176
+ :isPreferred => lambda {|c| to_b(c.xpath("term/isPreferred").text)},
177
+ :dictionaryId => lambda {|c| c.xpath("term/dictionaryId").text}
178
+ )
179
+
180
+ CONCEPT_TYPES = {
181
+ "concept" => ANNOTATION_CONCEPT_ATTRIBUTES,
182
+ "mappedConcept" => CONCEPT_ATTRIBUTES
183
+ }
184
+
185
+ CONTEXT_CLASSES = {
186
+ "annotationContextBean" => ANNOTATION_CONTEXT_ATTRIBUTES,
187
+ "mgrepContextBean" => MGREP_CONTEXT_ATTRIBUTES,
188
+ "mappingContextBean" => MAPPED_CONTEXT_ATTRIBUTES,
189
+ }
190
+
191
+ ##
192
+ # Parse a context - an annotation, or a mapping/mgrep context bean.
193
+ # @param [Nokgiri::XML::Node] context The root node of the context.
194
+ # @return Hash<Symbol, Object> The parsed context.
195
+ def self.parse_context(context)
196
+ # Annotations (annotationBeans) do not have a class, so we'll refer to them
197
+ # as annotationContextBeans
198
+ context_class = if context.attribute("class").nil?
199
+ "annotationContextBean"
200
+ else
201
+ context.attribute("class").value
202
+ end
203
+
204
+ Hash[CONTEXT_CLASSES[context_class].map do |k, v|
205
+ [k, v.call(context)]
206
+ end]
207
+ end
208
+
209
+ ##
210
+ # Parse a concept - a toplevel annotation concept, or an annotation's
211
+ # mapping concept.
212
+ # @param [Nokogiri::XML::Node] concept The root node of the concept.
213
+ # @return [Hash<Symbol, Object>] The parsed concept.
214
+ def self.parse_concept(concept)
215
+ Hash[CONCEPT_TYPES[concept.name].map do |k, v|
216
+ [k, v.call(concept)]
217
+ end]
218
+ end
219
+
220
+ ##
221
+ # Parse raw XML, returning a Hash with three elements: statistics,
109
222
  # annotations, and ontologies. Respectively, these represent the annotation
110
223
  # statistics (annotations by mapping type, etc., as a Hash), an Array of
111
224
  # each annotation (as a Hash), and an Array of ontologies used (also as
@@ -114,46 +227,18 @@ class OBAClient
114
227
  # @return [Hash<Symbol, Object>] A Hash representation of the XML, as
115
228
  # described above.
116
229
  def self.parse(xml)
117
- statistics = {}
118
- annotations = []
119
- ontologies = []
120
230
  doc = Nokogiri::XML.parse(xml)
121
231
 
122
- doc.xpath(STATISTICS_BEANS_XPATH).each do |sb|
123
- statistics[sb.xpath("mapping").text] = sb.xpath("nbAnnotation").text.to_i
124
- end
125
-
126
- doc.xpath(ANNOTATION_BEANS_XPATH).each do |ann|
127
- parsed = {
128
- :score => ann.xpath("score").text.to_i,
129
- :id => ann.xpath("concept/id").text.to_i,
130
- :localConceptId => ann.xpath("concept/localConceptId").text,
131
- :localOntologyId => ann.xpath("concept/localOntologyId").text.to_i,
132
- :isTopLevel => to_b(ann.xpath("concept/isTopLevel").text),
133
- :fullId => ann.xpath("concept/fullId").text,
134
- :preferredName => ann.xpath("concept/preferredName").text,
135
- :mappingType => ann.xpath("context/contextName").text,
136
- :isDirect => to_b(ann.xpath("context/isDirect").text)
137
- }
232
+ statistics = Hash[doc.xpath(STATISTICS_BEANS_XPATH).map do |sb|
233
+ [sb.xpath("mapping").text, sb.xpath("nbAnnotation").text.to_i]
234
+ end]
138
235
 
139
- synonyms = ann.xpath("concept/synonyms/synonym")
140
- parsed[:synonyms] = synonyms.map do |synonym|
141
- synonym.xpath("string").text
142
- end
143
-
144
- semanticTypeBeans = ann.xpath("concept/semanticTypes/semanticTypeBean")
145
- parsed[:semanticTypes] = semanticTypeBeans.map do |semanticType|
146
- {
147
- :id => semanticType.xpath("id").text.to_i,
148
- :semanticType => semanticType.xpath("semanticType").text,
149
- :description => semanticType.xpath("description").text
150
- }
151
- end
152
- annotations << parsed
236
+ annotations = doc.xpath(ANNOTATION_BEANS_XPATH).map do |annotation|
237
+ parse_context(annotation)
153
238
  end
154
239
 
155
- doc.xpath(ONTOLOGY_BEANS_XPATH).each do |ontology|
156
- ontologies << {
240
+ ontologies = doc.xpath(ONTOLOGY_BEANS_XPATH).map do |ontology|
241
+ {
157
242
  :localOntologyId => ontology.xpath("localOntologyId").text.to_i,
158
243
  :virtualOntologyId => ontology.xpath("virtualOntologyId").text.to_i,
159
244
  :name => ontology.xpath("name").text
@@ -1,12 +1,13 @@
1
1
  require "test/unit"
2
2
  require "oba_client"
3
+ require "pp"
3
4
 
4
5
  TEST_TEXTS = [
5
- "Mexico,, Disease Thing \o\r\m\n\t\v\l\rzebrafish !!! cancer of the thorax.",
6
- %Q{LOROE aonuhaso unseu anoeuhs aeuhsaonuh asoneuhason uaosenuh aosenuhaose
7
- aoneuhasonuhaoenuh anoeuhasn euhasoneu haosneuhaosenuhaoesunahoeusnaoeuteeano
8
- aot tt t t t t t t tae \n!!@)$@(#)%@#!)@# asoeuaohsenutahoeusaheou
9
- }
6
+ "Mexico,, Disease Thing \o\r\m\n\t\v\l\rzebrafish !!! cancer of the thorax. large intestine thorax",
7
+ # %Q{LOROE aonuhaso unseu anoeuhs aeuhsaonuh asoneuhason uaosenuh aosenuhaose
8
+ # aoneuhasonuhaoenuh anoeuhasn euhasoneu haosneuhaosenuhaoesunahoeusnaoeuteeano
9
+ # aot tt t t t t t t tae \n!!@)$@(#)%@#!)@# asoeuaohsenutahoeusaheou
10
+ # }
10
11
  ]
11
12
 
12
13
  class TestOBAClient < Test::Unit::TestCase
@@ -70,7 +71,22 @@ class TestOBAClient < Test::Unit::TestCase
70
71
  end
71
72
  end
72
73
 
74
+ def test_ontologies_pseudo_parameter
75
+ ann = OBAClient.new(:ontologies => [42812], :parse_xml => true)
76
+ TEST_TEXTS.each do |text|
77
+ parsed = ann.execute(text)
78
+ assert parsed[:ontologies].all? {|o| o[:localOntologyId] == 42812}
79
+ end
80
+ end
81
+
73
82
  def test_parse
74
83
  parsed = OBAClient::parse("<?xml version='1.0'></xml>")
75
84
  end
85
+
86
+ def test_with_print
87
+ ann = OBAClient.new(:ontologies => [42838, 35686], :parse_xml => false)
88
+ ann = OBAClient.new(:ontologies => [42838, 35686], :parse_xml => true)
89
+ end
90
+
91
+
76
92
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oba-client
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 15
5
5
  prerelease: false
6
6
  segments:
7
- - 1
8
7
  - 2
9
- - 1
10
- version: 1.2.1
8
+ - 0
9
+ - 0
10
+ version: 2.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Rob Tirrell