oba-client 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +23 -0
- data/History.txt +6 -0
- data/Manifest.txt +8 -0
- data/README.txt +57 -0
- data/Rakefile +9 -0
- data/bin/oba_client +3 -0
- data/lib/oba_client.rb +137 -0
- data/test/test_oba_client.rb +68 -0
- metadata +109 -0
data/.autotest
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'autotest/restart'
|
4
|
+
|
5
|
+
# Autotest.add_hook :initialize do |at|
|
6
|
+
# at.extra_files << "../some/external/dependency.rb"
|
7
|
+
#
|
8
|
+
# at.libs << ":../some/external"
|
9
|
+
#
|
10
|
+
# at.add_exception 'vendor'
|
11
|
+
#
|
12
|
+
# at.add_mapping(/dependency.rb/) do |f, _|
|
13
|
+
# at.files_matching(/test_.*rb$/)
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# %w(TestA TestB).each do |klass|
|
17
|
+
# at.extra_class_map[klass] = "test/test_misc.rb"
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
|
21
|
+
# Autotest.add_hook :run_command do |at|
|
22
|
+
# system "rake build"
|
23
|
+
# end
|
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= oba_client
|
2
|
+
|
3
|
+
* http://rubyforge.org/oba-client
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
FIX (describe your package)
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
* FIX (list of features or problems)
|
12
|
+
|
13
|
+
== SYNOPSIS:
|
14
|
+
|
15
|
+
FIX (code sample of usage)
|
16
|
+
|
17
|
+
== REQUIREMENTS:
|
18
|
+
|
19
|
+
* None
|
20
|
+
|
21
|
+
== INSTALL:
|
22
|
+
|
23
|
+
* FIX (sudo gem install, anything else)
|
24
|
+
|
25
|
+
== DEVELOPERS:
|
26
|
+
|
27
|
+
After checking out the source, run:
|
28
|
+
|
29
|
+
$ rake newb
|
30
|
+
|
31
|
+
This task will install any missing dependencies, run the tests/specs,
|
32
|
+
and generate the RDoc.
|
33
|
+
|
34
|
+
== LICENSE:
|
35
|
+
|
36
|
+
(The MIT License)
|
37
|
+
|
38
|
+
Copyright (c) 2010 FIX
|
39
|
+
|
40
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
41
|
+
a copy of this software and associated documentation files (the
|
42
|
+
'Software'), to deal in the Software without restriction, including
|
43
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
44
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
45
|
+
permit persons to whom the Software is furnished to do so, subject to
|
46
|
+
the following conditions:
|
47
|
+
|
48
|
+
The above copyright notice and this permission notice shall be
|
49
|
+
included in all copies or substantial portions of the Software.
|
50
|
+
|
51
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
52
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
53
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
54
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
55
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
56
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
57
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/bin/oba_client
ADDED
data/lib/oba_client.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "nokogiri"
|
3
|
+
require "cgi"
|
4
|
+
require "net/http"
|
5
|
+
require "uri"
|
6
|
+
|
7
|
+
class OBAClient
|
8
|
+
VERSION = "1.0.0"
|
9
|
+
|
10
|
+
# A high HTTP read timeout, as the service sometimes takes awhile to respond.
|
11
|
+
DEFAULT_TIMEOUT = 30
|
12
|
+
|
13
|
+
# The endpoint URI for the production version of the Annotator service.
|
14
|
+
DEFAULT_URI = "http://rest.bioontology.org/obs/annotator"
|
15
|
+
|
16
|
+
# The header for every request. There's no need to specify this per-instance.
|
17
|
+
HEADER = {"Content-Type" => "application/x-www-form-urlencoded"}
|
18
|
+
|
19
|
+
# Parameters the annotator accepts. Any one not in this list (excluding
|
20
|
+
# textToAnnotate) is not valid.
|
21
|
+
ANNOTATOR_PARAMETERS = %w{
|
22
|
+
wholeWordOnly
|
23
|
+
scored
|
24
|
+
ontologiesToExpand
|
25
|
+
ontologiesToKeepInResult
|
26
|
+
semanticTypes
|
27
|
+
withDefaultStopWords
|
28
|
+
format
|
29
|
+
levelMax
|
30
|
+
mappingTypes
|
31
|
+
email
|
32
|
+
}
|
33
|
+
|
34
|
+
# Annotate a blob of text. Method options are:
|
35
|
+
# - [String] uri: the URI of the annotator service (default: {DEFAULT_URI}).
|
36
|
+
# - [Fixnum] timeout: the length of the read timeout (default: {DEFAULT_TIMEOUT}).
|
37
|
+
# - [Boolean] parse_xml: whether to parse the received text (default: false).
|
38
|
+
# @param [Hash<String, String>] options Parameters for the annotation.
|
39
|
+
def initialize(options = {})
|
40
|
+
@uri = URI.parse(options.delete(:uri) || DEFAULT_URI)
|
41
|
+
@timeout = options.delete(:timeout) || DEFAULT_TIMEOUT
|
42
|
+
@parse_xml = options.delete(:parse_xml)
|
43
|
+
|
44
|
+
@options = {}
|
45
|
+
options.each do |k, v|
|
46
|
+
if !ANNOTATOR_PARAMETERS.include?(k)
|
47
|
+
puts "WARNING: #{k} is not a valid annotator parameter."
|
48
|
+
end
|
49
|
+
if v.is_a? Array
|
50
|
+
@options[k] = v.join(",")
|
51
|
+
else
|
52
|
+
@options[k] = v
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if !@options.include?(:email)
|
57
|
+
puts "TIP: as a courtesy, consider including your email in the request."
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Perform the annotation.
|
62
|
+
# @param [String] text The text to annotate.
|
63
|
+
# @return [Hash<Symbol, Array>, String, nil] A Hash representing the parsed
|
64
|
+
# document, the raw XML if parsing is not requested, or nil if the
|
65
|
+
# request times out.
|
66
|
+
def execute(text)
|
67
|
+
request = Net::HTTP::Post.new(@uri.path, initheader=HEADER)
|
68
|
+
request.body = {:textToAnnotate => text}.merge(@options).map do |k, v|
|
69
|
+
"#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
|
70
|
+
end.join("&")
|
71
|
+
|
72
|
+
begin
|
73
|
+
response = Net::HTTP.new(@uri.host, @uri.port).start do |http|
|
74
|
+
http.read_timeout = @timeout
|
75
|
+
http.request(request)
|
76
|
+
end
|
77
|
+
@parse_xml ? self.class.parse(response.body) : response.body
|
78
|
+
rescue Timeout::Error
|
79
|
+
puts "Request for #{text[0..10]} timed-out at #{@timeout} seconds."
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Parse the raw XML, returning a Hash with three elements: statistics,
|
84
|
+
# annotations, and ontologies. Respectively, these represent the annotation
|
85
|
+
# statistics (annotations by mapping type, etc., as a Hash), an Array of
|
86
|
+
# each annotation (as a Hash), and an Array of ontologies used (also as
|
87
|
+
# a Hash).
|
88
|
+
# @param [String] xml The XMl we'll be parsing.
|
89
|
+
# @return [Hash<Symbol, Object>] A Hash representation of the XML, as
|
90
|
+
# described above.
|
91
|
+
def self.parse(xml)
|
92
|
+
statistics = []
|
93
|
+
annotations = []
|
94
|
+
ontologies = []
|
95
|
+
doc = Nokogiri::XML.parse(xml)
|
96
|
+
|
97
|
+
doc.xpath("//annotationBean").each do |ann|
|
98
|
+
parsed = {}
|
99
|
+
parsed[:score] = ann.xpath("score").text.to_i
|
100
|
+
parsed[:id] = ann.xpath("concept/id").text.to_i
|
101
|
+
parsed[:localConceptId] = ann.xpath("concept/localConceptId")
|
102
|
+
parsed[:localOntologyId] = ann.xpath("concept/localOntologyId").text.to_i
|
103
|
+
parsed[:isTopLevel] = ann.xpath("concept/isTopLevel").text.to_i
|
104
|
+
parsed[:fullId] = ann.xpath("concept/fullId").text
|
105
|
+
parsed[:preferredName] = ann.xpath("concept/preferredName").text
|
106
|
+
|
107
|
+
synonyms = ann.xpath("concept/synonyms/synonym")
|
108
|
+
parsed[:synonyms] = synonyms.map do |synonym|
|
109
|
+
synonym.xpath("string").text
|
110
|
+
end
|
111
|
+
|
112
|
+
semanticTypeBeans = ann.xpath("concept/semanticTypes/semanticTypeBean")
|
113
|
+
parsed[:semanticTypes] = semanticTypeBeans.map do |semanticType|
|
114
|
+
{
|
115
|
+
:id => semanticType.xpath("id").text.to_i,
|
116
|
+
:semanticType => semanticType.xpath("semanticType").text,
|
117
|
+
:description => semanticType.xpath("description").text
|
118
|
+
}
|
119
|
+
end
|
120
|
+
annotations << parsed
|
121
|
+
end
|
122
|
+
|
123
|
+
doc.xpath("//ontologyUsedBean").each do |ontology|
|
124
|
+
parsed = {}
|
125
|
+
parsed[:localOntologyId] = ontology.xpath("localOntologyId").text
|
126
|
+
parsed[:virtualOntologyId] = ontology.xpath("virtualOntologyId").text
|
127
|
+
parsed[:name] = ontology.xpath("name").text
|
128
|
+
ontologies << parsed
|
129
|
+
end
|
130
|
+
|
131
|
+
{
|
132
|
+
:statistics => statistics,
|
133
|
+
:annotations => annotations,
|
134
|
+
:ontologies => ontologies
|
135
|
+
}
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "oba_client"
|
3
|
+
|
4
|
+
TEST_TEXTS = [
|
5
|
+
"hello I am a monkeyfish with a benign neoplastic....\t\n\\n",
|
6
|
+
"zebrafish echo delta tango TURN <?xml MY VOLUME UP cancer of the thorax.",
|
7
|
+
"zebrafish DROP TABLE !!! TURN MY VOLUME UP cancer of the thorax.",
|
8
|
+
%Q{LOROE aonuhaso unseu anoeuhs aeuhsaonuh asoneuhason uaosenuh aosenuhaose
|
9
|
+
aoneuhasonuhaoenuh anoeuhasn euhasoneu haosneuhaosenuhaoesunahoeusnaoeuteeano
|
10
|
+
aot tt t t t t t t tae \n!!@)$@(#)%@#!)@# asoeuaohsenutahoeusaheou
|
11
|
+
}
|
12
|
+
]
|
13
|
+
|
14
|
+
class TestOBAClient < Test::Unit::TestCase
|
15
|
+
def test_reuse_annotator_instance
|
16
|
+
ann = OBAClient.new
|
17
|
+
TEST_TEXTS.each do |text|
|
18
|
+
xml = ann.execute(text)
|
19
|
+
assert xml[0..4] == "<?xml"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_annotation_no_parameters
|
24
|
+
TEST_TEXTS.each do |text|
|
25
|
+
ann = OBAClient.new
|
26
|
+
xml = ann.execute(text)
|
27
|
+
assert xml[0..4] == "<?xml"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_annotation_parse
|
32
|
+
TEST_TEXTS.each do |text|
|
33
|
+
ann = OBAClient.new :parse_xml => true
|
34
|
+
parsed = ann.execute(text)
|
35
|
+
assert parsed[:statistics].is_a?(Array)
|
36
|
+
assert parsed[:annotations].is_a?(Array)
|
37
|
+
assert parsed[:ontologies].is_a?(Array)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_annotation_keep_one_ontology
|
42
|
+
TEST_TEXTS.each do |text|
|
43
|
+
ann = OBAClient.new(
|
44
|
+
:ontologiesToKeepInResult => [42812],
|
45
|
+
:parse_xml => true
|
46
|
+
)
|
47
|
+
parsed = ann.execute(text)
|
48
|
+
assert parsed[:statistics].is_a?(Array)
|
49
|
+
assert parsed[:annotations].is_a?(Array)
|
50
|
+
assert parsed[:ontologies].is_a?(Array)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_annotation_invalid_parameters
|
55
|
+
TEST_TEXTS.each do |text|
|
56
|
+
ann = OBAClient.new(
|
57
|
+
:ontologiesToKeepInResult => [42812],
|
58
|
+
:parse_xml => true,
|
59
|
+
:blah_blah => true,
|
60
|
+
:hoho => ["merry", "christmas"]
|
61
|
+
)
|
62
|
+
parsed = ann.execute(text)
|
63
|
+
assert parsed[:statistics].is_a?(Array)
|
64
|
+
assert parsed[:annotations].is_a?(Array)
|
65
|
+
assert parsed[:ontologies].is_a?(Array)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: oba-client
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Rob Tirrell
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-07-08 00:00:00 -07:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rubyforge
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 0
|
33
|
+
- 4
|
34
|
+
version: 2.0.4
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: hoe
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 21
|
46
|
+
segments:
|
47
|
+
- 2
|
48
|
+
- 6
|
49
|
+
- 1
|
50
|
+
version: 2.6.1
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
53
|
+
description: FIX (describe your package)
|
54
|
+
email:
|
55
|
+
- rpt@stanford.edu
|
56
|
+
executables:
|
57
|
+
- oba_client
|
58
|
+
extensions: []
|
59
|
+
|
60
|
+
extra_rdoc_files:
|
61
|
+
- History.txt
|
62
|
+
- Manifest.txt
|
63
|
+
- README.txt
|
64
|
+
files:
|
65
|
+
- .autotest
|
66
|
+
- History.txt
|
67
|
+
- Manifest.txt
|
68
|
+
- README.txt
|
69
|
+
- Rakefile
|
70
|
+
- bin/oba_client
|
71
|
+
- lib/oba_client.rb
|
72
|
+
- test/test_oba_client.rb
|
73
|
+
has_rdoc: true
|
74
|
+
homepage: http://rubyforge.org/oba-client
|
75
|
+
licenses: []
|
76
|
+
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options:
|
79
|
+
- --main
|
80
|
+
- README.txt
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
hash: 3
|
89
|
+
segments:
|
90
|
+
- 0
|
91
|
+
version: "0"
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
hash: 3
|
98
|
+
segments:
|
99
|
+
- 0
|
100
|
+
version: "0"
|
101
|
+
requirements: []
|
102
|
+
|
103
|
+
rubyforge_project: oba-client
|
104
|
+
rubygems_version: 1.3.7
|
105
|
+
signing_key:
|
106
|
+
specification_version: 3
|
107
|
+
summary: FIX (describe your package)
|
108
|
+
test_files:
|
109
|
+
- test/test_oba_client.rb
|