ncbo_resource_index 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ module NCBO
2
+ class ResourceIndex
3
+
4
+ class Annotations
5
+ attr_accessor :resource, :annotations, :total_annotation_count, :offset, :limit
6
+ end
7
+
8
+ class Annotation
9
+ attr_accessor :score, :concept, :context, :element
10
+ end
11
+
12
+ class RankedElements
13
+ attr_accessor :concepts, :resources
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,229 @@
1
+ module NCBO
2
+ module Parser
3
+
4
+ class BaseParser
5
+ def parse_xml(xml)
6
+ if xml.kind_of?(String)
7
+ parser = XML::Parser.string(xml, :options => LibXML::XML::Parser::Options::NOBLANKS)
8
+ else
9
+ parser = XML::Parser.io(xml, :options => LibXML::XML::Parser::Options::NOBLANKS)
10
+ end
11
+ parser.parse
12
+ end
13
+
14
+ def safe_to_i(str)
15
+ Integer(str) rescue str
16
+ end
17
+ end
18
+
19
+ class ResourceIndex < BaseParser
20
+ def initialize(results)
21
+ @root = "/success/data/annotatorResultBean"
22
+ @results = parse_xml(results)
23
+ end
24
+
25
+ def self.parse_included_ontologies(ontologies)
26
+ new(ontologies).parse_included_ontologies
27
+ end
28
+
29
+ def parse_included_ontologies
30
+ @root = "/success/data/set"
31
+ ontologies = parse_ontologies("ontology")
32
+ end
33
+
34
+ def self.parse_resources(resources)
35
+ new(resources).parse_resources
36
+ end
37
+
38
+ def parse_resources
39
+ resources = []
40
+ @results.find("/success/data/set/resource").each do |resource|
41
+ a = {}
42
+ resource.children.each {|child| a[child.name.to_sym] = safe_to_i(child.content) if !child.first.nil? && !child.first.children?}
43
+ a[:contexts] = parse_resource_structure(resource.find_first("resourceStructure"))
44
+ resources << a
45
+ end
46
+ resources
47
+ end
48
+
49
+ def self.parse_results(results, options = {})
50
+ new(results).parse_results(options)
51
+ end
52
+
53
+ def parse_results(options = {})
54
+ resource ||= options[:resource]
55
+ annotation_location ||= options[:annotation_location]
56
+
57
+ results = []
58
+ @results.find("/success/data/list/*").each do |result|
59
+ resource_annotations = NCBO::ResourceIndex::Annotations.new
60
+ resource_annotations.resource = resource ||= result.find_first("resourceId").content
61
+
62
+ # Check to see if parameters are enabled that will change how we process the output
63
+ with_context = result.find_first("withContext").content.eql?("true") rescue false
64
+ counts = result.find_first("counts").content.eql?("true") rescue false
65
+
66
+ # Update total count (if available)
67
+ resource_annotations.total_annotation_count = result.find_first("resultStatistics/statistics/annotationCount").content.to_i if counts
68
+
69
+ resource_annotations.annotations = parse_annotations(result, with_context, annotation_location)
70
+ results << resource_annotations
71
+ end
72
+ results = results[0] if results.kind_of?(Array) && results.length == 1
73
+ results
74
+ end
75
+
76
+ def self.parse_ranked_element_results(results)
77
+ new(results).parse_ranked_element_results
78
+ end
79
+
80
+ def parse_ranked_element_results
81
+ ranked_elements = NCBO::ResourceIndex::RankedElements.new
82
+
83
+ ranked_elements.concepts = []
84
+ @results.find("/success/data/map/entry[string='concepts']/list/*").each do |concept|
85
+ concept = parse_concept(concept, ".")
86
+ ranked_elements.concepts << concept
87
+ end
88
+
89
+ ranked_elements.resources = []
90
+ @results.find("/success/data/map/entry[string='elements']/list/resourceElements").each do |resource|
91
+ r = {}
92
+ r[:resourceId] = resource.find_first("resourceId").content
93
+ r[:offset] = resource.find_first("offset").content.to_i
94
+ r[:limit] = resource.find_first("limit").content.to_i
95
+ r[:totalResults] = resource.find_first("totalResults").content.to_i
96
+ r[:elements] = []
97
+ resource.find("./elementResults/elementResult").each do |element|
98
+ r[:elements] << parse_element(element)
99
+ end
100
+ ranked_elements.resources << r
101
+ end
102
+
103
+ ranked_elements
104
+ end
105
+
106
+ def self.parse_popular_concepts(results)
107
+ new(results).parse_popular_concepts
108
+ end
109
+
110
+ def parse_popular_concepts
111
+ concepts = []
112
+ @results.find("/success/data/list/*").each do |concept_frequency|
113
+ concept = {}
114
+ concept[:counts] = concept_frequency.find_first("counts").content.to_i
115
+ concept[:score] = concept_frequency.find_first("score").content.to_i
116
+ concept[:concept] = parse_concept(concept_frequency)
117
+ concepts << concept
118
+ end
119
+ concepts
120
+ end
121
+
122
+ def self.parse_element_annotations(results)
123
+ new(results).parse_element_annotations
124
+ end
125
+
126
+ def parse_element_annotations
127
+ annotation_location = "annotation"
128
+ with_context = false
129
+
130
+ annotations = NCBO::ResourceIndex::Annotations.new
131
+ annotations.annotations = parse_annotations(@results.find_first("/success/data/list"), with_context, annotation_location)
132
+ annotations
133
+ end
134
+
135
+ private
136
+
137
+ def parse_annotations(result, with_context = false, annotation_location = "annotations/*")
138
+ annotations = []
139
+ if with_context
140
+ result.find("mgrepAnnotations/*").each do |annotation|
141
+ annotations << parse_annotation(annotation)
142
+ end
143
+ result.find("reportedAnnotations/*").each do |annotation|
144
+ annotations << parse_annotation(annotation)
145
+ end
146
+ result.find("isaAnnotations/*").each do |annotation|
147
+ annotations << parse_annotation(annotation)
148
+ end
149
+ result.find("mappingAnnotations/*").each do |annotation|
150
+ annotations << parse_annotation(annotation)
151
+ end
152
+ else
153
+ result.find(annotation_location).each do |annotation|
154
+ annotations << parse_annotation(annotation)
155
+ end
156
+ end
157
+ annotations
158
+ end
159
+
160
+ def parse_annotation(annotation, context = false)
161
+ new_annotation = NCBO::ResourceIndex::Annotation.new
162
+ new_annotation.score = annotation.find_first("score").content.to_f
163
+ new_annotation.concept = parse_concept(annotation)
164
+ new_annotation.context = parse_context(annotation)
165
+ new_annotation.element = parse_element(annotation)
166
+ new_annotation
167
+ end
168
+
169
+ # The only thing we care about from here is the contexts, everything else is internal info
170
+ def parse_resource_structure(resource_structure)
171
+ contexts = []
172
+ resource_structure.find_first("contexts").each {|context| contexts << context.first.content}
173
+ contexts
174
+ end
175
+
176
+ def parse_ontologies(ontology_location = "ontologies/ontologyUsedBean")
177
+ ontologies = []
178
+ @results.find(@root + "/#{ontology_location}").each do |ontology|
179
+ ont = {}
180
+ ontology.children.each {|child| ont[child.name.to_sym] = safe_to_i(child.content)}
181
+ ontologies << ont
182
+ end
183
+ ontologies
184
+ end
185
+
186
+ def parse_concept(annotation, concept_location = "concept")
187
+ a = {}
188
+ annotation.find("#{concept_location}/*").each {|child| a[child.name.to_sym] = safe_to_i(child.content) if !child.first.nil? && !child.first.children?}
189
+ a[:synonyms] = annotation.find("#{concept_location}/synonyms/string").map {|syn| safe_to_i(syn.content)}
190
+ semantic_types = parse_semantic_types(annotation.find_first("#{concept_location}/localSemanticTypeIds"))
191
+ a[:semantic_types] = semantic_types
192
+ a
193
+ end
194
+
195
+ def parse_semantic_types(semantic_types_xml)
196
+ return Array.new if semantic_types_xml.nil?
197
+
198
+ semantic_types = []
199
+ semantic_types_xml.each do |semantic_type_bean|
200
+ semantic_type_bean.children.each { |child| semantic_types << safe_to_i(child.content) }
201
+ end
202
+ semantic_types
203
+ end
204
+
205
+ def parse_context(annotation)
206
+ a = {}
207
+ annotation.find("context/*").each {|child| a[child.name.to_sym] = safe_to_i(child.content) if !child.first.nil? && !child.first.children?}
208
+ a[:contextType] = annotation.find_first("context").attributes["class"] unless annotation.find_first("context").nil?
209
+ a
210
+ end
211
+
212
+ def parse_element(annotation)
213
+ a = {}
214
+ a[:localElementId] = annotation.find_first("element/localElementId").content unless annotation.find_first("element/localElementId").nil?
215
+ # element text
216
+ a[:text] = {}
217
+ annotation.find("element/elementStructure/contexts/*").each {|context| a[:text][context.children[0].content] = context.children[1].content}
218
+ # element weights
219
+ a[:weights] = []
220
+ annotation.find("element/elementStructure/weights/*").each {|weight| a[:weights] << {:name => weight.children[0].content, :weight => weight.children[1].content.to_f} }
221
+ # which element portions are associated with an ontology
222
+ a[:ontoIds] = {}
223
+ annotation.find("element/elementStructure/ontoIds/*").each {|ont_id| a[:ontoIds][ont_id.children[0].content] = ont_id.children[1].content.to_i}
224
+ return a
225
+ end
226
+ end
227
+
228
+ end
229
+ end
@@ -0,0 +1,276 @@
1
+ require 'net/http'
2
+ require 'xml'
3
+ require 'uri'
4
+ require 'open-uri'
5
+ require 'cgi'
6
+ require 'ncbo_resource_index/parser'
7
+ require 'ncbo_resource_index/data'
8
+
9
+
10
+ module NCBO
11
+ class ResourceIndex
12
+
13
+ def initialize(args = {})
14
+ @options = {}
15
+
16
+ # Shared with Annotator
17
+ @options[:resource_index_location] = "http://rest.bioontology.org/resource_index/"
18
+ @options[:filterNumber] = true
19
+ @options[:isStopWordsCaseSensitive] = false
20
+ @options[:isVirtualOntologyId] = true
21
+ @options[:levelMax] = 0
22
+ @options[:longestOnly] = false
23
+ @options[:ontologiesToExpand] = []
24
+ @options[:ontologiesToKeepInResult] = []
25
+ @options[:mappingTypes] = []
26
+ @options[:minTermSize] = 3
27
+ @options[:scored] = true
28
+ @options[:semanticTypes] = []
29
+ @options[:stopWords] = []
30
+ @options[:wholeWordOnly] = true
31
+ @options[:withDefaultStopWords] = true
32
+ @options[:withSynonyms] = true
33
+
34
+ # RI-specific
35
+ @options[:conceptids] = []
36
+ @options[:mode] = :union
37
+ @options[:elementid] = []
38
+ @options[:resourceids] = []
39
+ @options[:elementDetails] = false
40
+ @options[:withContext] = true
41
+ @options[:offset] = 0
42
+ @options[:limit] = 10
43
+ @options[:format] = :xml
44
+ @options[:counts] = false
45
+ @options[:request_timeout] = 300
46
+
47
+ @options.merge!(args)
48
+
49
+ @ontologies = nil
50
+ @options[:resourceids] ||= []
51
+
52
+ # Check to make sure mappingTypes are capitalized
53
+ fix_params
54
+
55
+ raise ArgumentError, ":apikey is required, you can obtain one at http://bioportal.bioontology.org/accounts/new" if @options[:apikey].nil?
56
+ end
57
+
58
+ def self.find_by_concept(concepts, options = {})
59
+ new(options).find_by_concept(concepts)
60
+ end
61
+
62
+ def find_by_concept(concepts = [], options = {})
63
+ @options[:conceptids] = concepts unless concepts.nil? || concepts.empty?
64
+ @options.merge!(options) unless options.empty?
65
+ fix_params
66
+
67
+ raise ArgumentError, ":conceptids must be included" if @options[:conceptids].nil? || @options[:conceptids].empty?
68
+
69
+ result_xml = resource_index_post
70
+ Parser::ResourceIndex.parse_results(result_xml)
71
+ end
72
+
73
+ def self.find_by_element(element, resource, options = {})
74
+ new(options).find_by_element(element, resource)
75
+ end
76
+
77
+ def find_by_element(element, resource, options = {})
78
+ @options[:elementid] = element unless element.nil? || element.empty?
79
+ @options[:resourceids] = [resource] unless resource.nil? || resource.empty?
80
+ @options.merge!(options) unless options.empty?
81
+ fix_params
82
+ raise ArgumentError, ":elementid must be included" if @options[:elementid].nil? || @options[:elementid].empty?
83
+ raise ArgumentError, ":resourceids must be included" if @options[:resourceids].nil? || @options[:resourceids].empty?
84
+ Parser::ResourceIndex.parse_results(resource_index_post)
85
+ end
86
+
87
+ def self.element_annotations(element, concepts, resource, options = {})
88
+ new(options).element_annotations(element, concepts)
89
+ end
90
+
91
+ def element_annotations(element, concepts, resource)
92
+ @options[:conceptids] = concepts unless concepts.nil? || concepts.empty?
93
+ raise ArgumentError, ":conceptids must be included" if @options[:conceptids].nil? || @options[:conceptids].empty?
94
+ raise ArgumentError, ":resourceids must be an array" unless @options[:resourceids].kind_of? Array
95
+ resource = resource.upcase
96
+
97
+ "http://rest.bioontology.org/resource_index/details/false/virtual/concept/1032/resource/AE/0/9999?conceptid=Melanoma&elementid=E-GEOD-18509"
98
+
99
+ concept_annotations = []
100
+ concepts.each do |concept|
101
+ split_concept = concept.split("/")
102
+ ontology_id = split_concept[0]
103
+ concept_id = split_concept[1]
104
+ virtual = @options[:isVirtualOntologyId] ? "/virtual" : ""
105
+ puts ["#{@options[:resource_index_location]}",
106
+ "details/#{@options[:elementDetails]}",
107
+ virtual,
108
+ "/concept/#{ontology_id}",
109
+ "/resource/#{resource}",
110
+ "/#{@options[:offset]}",
111
+ "/#{@options[:limit]}",
112
+ "?conceptid=#{CGI.escape(concept_id)}",
113
+ "&elementid=#{CGI.escape(element)}",
114
+ "&apikey=#{@options[:apikey]}"].join("")
115
+ result_xml = open(["#{@options[:resource_index_location]}",
116
+ "details/#{@options[:elementDetails]}",
117
+ virtual,
118
+ "/concept/#{ontology_id}",
119
+ "/resource/#{resource}",
120
+ "/#{@options[:offset]}",
121
+ "/#{@options[:limit]}",
122
+ "?conceptid=#{CGI.escape(concept_id)}",
123
+ "&elementid=#{CGI.escape(element)}",
124
+ "&apikey=#{@options[:apikey]}"].join("")).read
125
+
126
+ annotations = Parser::ResourceIndex.parse_element_annotations(result_xml)
127
+ concept_annotations << annotations
128
+ end
129
+
130
+ if concept_annotations.length > 1
131
+ # Merge the two result sets
132
+ primary_annotations = Annotations.new
133
+ primary_annotations.annotations = []
134
+ primary_annotations.resource = resource
135
+ concept_annotations.each do |result|
136
+ primary_annotations.annotations.concat result.annotations
137
+ end
138
+ elsif concept_annotations.length == 1
139
+ primary_annotations = concept_annotations[0]
140
+ else
141
+ primary_annotations = nil
142
+ end
143
+ primary_annotations
144
+ end
145
+
146
+ def self.ranked_elements(concepts, options = {})
147
+ new(options).ranked_elements(concepts)
148
+ end
149
+
150
+ def ranked_elements(concepts = [], options = {})
151
+ @options[:conceptids] = concepts unless concepts.nil? || concepts.empty?
152
+ @options[:resourceids] ||= []
153
+ @options.merge!(options) unless options.empty?
154
+ fix_params
155
+
156
+ raise ArgumentError, ":conceptids must be included" if @options[:conceptids].nil? || @options[:conceptids].empty?
157
+ raise ArgumentError, ":resourceids must be an array" unless @options[:resourceids].kind_of? Array
158
+
159
+ puts ["#{@options[:resource_index_location]}",
160
+ "elements-ranked-by-concepts/#{@options[:resourceids].join(",")}",
161
+ "?offset=#{@options[:offset]}",
162
+ "&limit=#{@options[:limit]}",
163
+ "&conceptids=#{@options[:conceptids].join(",")}",
164
+ "&ontologiesToKeepInResult=#{@options[:ontologiesToKeepInResult]}",
165
+ "&isVirtualOntologyId=#{@options[:isVirtualOntologyId]}",
166
+ "&apikey=#{@options[:apikey]}"].join("")
167
+ result_xml = open(["#{@options[:resource_index_location]}",
168
+ "elements-ranked-by-concepts/#{@options[:resourceids].join(",")}",
169
+ "?offset=#{@options[:offset]}",
170
+ "&limit=#{@options[:limit]}",
171
+ "&conceptids=#{@options[:conceptids].join(",")}",
172
+ "&ontologiesToKeepInResult=#{@options[:ontologiesToKeepInResult]}",
173
+ "&isVirtualOntologyId=#{@options[:isVirtualOntologyId]}",
174
+ "&apikey=#{@options[:apikey]}"].join("")).read
175
+ Parser::ResourceIndex.parse_ranked_element_results(result_xml)
176
+ end
177
+
178
+ def self.popular_concepts(resources = nil, options = {})
179
+ new(options).popular_concepts(resources)
180
+ end
181
+
182
+ def popular_concepts(resources = nil, options = {})
183
+ @options[:resourceids] = resources
184
+ @options[:resourceids] = [resources] unless resources.nil? || resources.empty? || resources.kind_of?(Array)
185
+ @options.merge!(options) unless options.empty?
186
+ fix_params
187
+
188
+ if @options[:resourceids].nil? || @options[:resourceids].empty?
189
+ @options[:resourceids] = self.resources.collect {|resource| resource[:resourceId]}
190
+ end
191
+
192
+ popular_concepts = {}
193
+ @options[:resourceids].each do |resource|
194
+ popular_concepts_xml = open("#{@options[:resource_index_location]}most-used-concepts/#{resource}?apikey=#{@options[:apikey]}&offset=#{@options[:offset]}&limit=#{@options[:limit]}").read
195
+ popular_concepts[resource] = Parser::ResourceIndex.parse_popular_concepts(popular_concepts_xml)
196
+ end
197
+ popular_concepts
198
+ end
199
+
200
+ def self.ontologies(options)
201
+ new(options).ontologies
202
+ end
203
+
204
+ def ontologies(options = {})
205
+ @options.merge!(options) unless options.empty?
206
+
207
+ if @ontologies.nil?
208
+ ontologies_xml = open("#{@options[:resource_index_location]}ontologies?apikey=#{@options[:apikey]}").read
209
+ @ontologies = Parser::ResourceIndex.parse_included_ontologies(ontologies_xml)
210
+ else
211
+ @ontologies
212
+ end
213
+ end
214
+
215
+ def self.resources(options)
216
+ new(options).resources
217
+ end
218
+
219
+ def resources(options = {})
220
+ @options.merge!(options) unless options.empty?
221
+
222
+ if @resources.nil?
223
+ resources_xml = open("#{@options[:resource_index_location]}resources?apikey=#{@options[:apikey]}").read
224
+ @resources = Parser::ResourceIndex.parse_resources(resources_xml)
225
+ else
226
+ @resources
227
+ end
228
+ end
229
+
230
+ def self.resources_hash(options)
231
+ new(options).resources_hash
232
+ end
233
+
234
+ def resources_hash(options = {})
235
+ @options.merge!(options) unless options.empty?
236
+ resources = resources()
237
+ resources_hash = {}
238
+ resources.each {|res| resources_hash[res[:resourceId].downcase.to_sym] = res}
239
+ resources_hash
240
+ end
241
+
242
+ def options
243
+ @options
244
+ end
245
+
246
+ private
247
+
248
+ def fix_params
249
+ @options[:mappingTypes] = @options[:mappingTypes].split(",") if @options[:mappingTypes].kind_of?(String)
250
+ @options[:ontologiesToExpand] = @options[:ontologiesToExpand].split(",") if @options[:ontologiesToExpand].kind_of?(String)
251
+ @options[:ontologiesToKeepInResult] = @options[:ontologiesToKeepInResult].split(",") if @options[:ontologiesToKeepInResult].kind_of?(String)
252
+ @options[:semanticTypes] = @options[:semanticTypes].split(",") if @options[:semanticTypes].kind_of?(String)
253
+ @options[:stopWords] = @options[:stopWords].split(",") if @options[:stopWords].kind_of?(String)
254
+ @options[:mappingTypes].collect! {|e| e.capitalize} unless @options[:mappingTypes].nil?
255
+ end
256
+
257
+ def resource_index_post
258
+ url = URI.parse(@options[:resource_index_location])
259
+ request_body = []
260
+ @options.each do |k,v|
261
+ next if v.kind_of?(Array) && v.empty?
262
+ if v.kind_of?(Array)
263
+ request_body << "#{k}=#{v.collect {|val| CGI.escape(val)}.join(",")}"
264
+ else
265
+ request_body << "#{k}=#{v}"
266
+ end
267
+ end
268
+ req = Net::HTTP::Post.new(url.path)
269
+ req.body = request_body.join("&")
270
+ http = Net::HTTP.new(url.host, url.port)
271
+ http.read_timeout = @options[:request_timeout]
272
+ res = http.start {|http| http.request(req)}
273
+ res.body
274
+ end
275
+ end
276
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ncbo_resource_index
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.0'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Paul R Alexander
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-25 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: libxml-ruby
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.2.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 2.2.0
30
+ description: The NCBO Resource Index Gem is a Ruby client for NCBO's Resource Index
31
+ Web service. The NCBO Resource Index is a system for ontology based annotation and
32
+ indexing of biomedical data; the key functionality of this system is to enable users
33
+ to locate biomedical data resources related to particular concepts. A set of annotations
34
+ is generated automatically and presented through integration with BioPortal, enabling
35
+ researchers to search for biomedical resources associated (annotated) with specific
36
+ ontology terms. This service uses a concept recognizer (developed by the National
37
+ Center for Integrative Biomedical Informatics, University of Michigan) to produce
38
+ a set of annotations and expand them using ontology is_a relations.
39
+ email: support@bioontology.org
40
+ executables: []
41
+ extensions: []
42
+ extra_rdoc_files: []
43
+ files:
44
+ - lib/ncbo_resource_index/data.rb
45
+ - lib/ncbo_resource_index/parser.rb
46
+ - lib/ncbo_resource_index.rb
47
+ homepage: http://github.com/ncbo/ncbo_resource_index
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project:
67
+ rubygems_version: 1.8.23
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: The NCBO Resource Index Gem is a Ruby client for NCBO's Resource Index Web
71
+ service
72
+ test_files: []