opentox-ruby-api-wrapper 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/owl.rb CHANGED
@@ -1,45 +1,198 @@
1
+ class Redland::Literal
2
+
3
+ def self.create(value, datatype=nil)
4
+ if datatype
5
+ if datatype.is_a?(Redland::Uri)
6
+ Redland::Literal.new(value.to_s,nil,datatype)
7
+ else
8
+ Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
9
+ end
10
+ else
11
+ Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
12
+ end
13
+ end
14
+
15
+ # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
16
+ # found solution in mailing list
17
+ def datatype
18
+ uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
19
+ return Redland.librdf_uri_to_string(uri) if uri
20
+ end
21
+
22
+ # gets value of literal, value class is se according to literal datatype
23
+ def get_value
24
+ Redland::Literal.parse_value( self.value, self.datatype )
25
+ end
26
+
27
+ private
28
+ @@type_string = XML["string"].uri
29
+ @@type_uri = XML["anyURI"].uri
30
+ @@type_float = XML["float"].uri
31
+ @@type_double = XML["double"].uri
32
+ @@type_date = XML["date"].uri
33
+ @@type_boolean = XML["boolean"].uri
34
+ @@type_datetime = XML["dateTime"].uri
35
+
36
+ # parses value according to datatype uri
37
+ def self.parse_value(string_value, datatype_uri)
38
+ if (datatype_uri==nil || datatype_uri.size==0)
39
+ LOGGER.warn("empty datatype for literal with value: "+string_value)
40
+ return string_value
41
+ end
42
+ case datatype_uri
43
+ when @@type_string.to_s
44
+ return string_value
45
+ when @@type_uri.to_s
46
+ return string_value #PENDING uri as string?
47
+ when @@type_float.to_s
48
+ return string_value.to_f
49
+ when @@type_double.to_s
50
+ return string_value.to_f
51
+ when @@type_boolean.to_s
52
+ return string_value.upcase=="TRUE"
53
+ when @@type_date.to_s
54
+ return string_value #PENDING date as string?
55
+ when @@type_datetime.to_s
56
+ return string_value #PENDING date as string?
57
+ else
58
+ raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
59
+ end
60
+ end
61
+
62
+ # parse datatype uri accoring to value class
63
+ def self.parse_datatype_uri(value)
64
+ if value==nil
65
+ raise "illegal datatype: value is nil"
66
+ elsif value.is_a?(String)
67
+ # PENDING: uri check too slow?
68
+ if OpenTox::Utils.is_uri?(value)
69
+ return @@type_uri
70
+ else
71
+ return @@type_string
72
+ end
73
+ elsif value.is_a?(Float)
74
+ return @@type_float
75
+ elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
76
+ return @@type_boolean
77
+ else
78
+ raise "illegal datatype: "+value.class.to_s+" "+value.to_s
79
+ end
80
+ end
81
+ end
82
+
1
83
  module OpenTox
2
84
 
3
85
  class Owl
86
+
87
+ # ot_class is the class of the object, e.g. "Model","Dataset", ...
88
+ # root_node is the root-object node in the rdf
89
+ # uri the uri of the object
90
+ attr_accessor :ot_class, :root_node, :uri, :model
4
91
 
5
- attr_reader :uri, :ot_class
6
-
7
- def initialize(ot_class,uri)
92
+ def initialize
8
93
  @model = Redland::Model.new Redland::MemoryStore.new
9
- @parser = Redland::Parser.new
10
- @ot_class = ot_class
11
- @uri = Redland::Uri.new(uri.chomp)
12
- @model.add @uri, RDF['type'], OT[@ot_class]
13
- @model.add @uri, DC['identifier'], @uri
14
94
  end
15
95
 
16
- def method_missing(name, *args)
17
- methods = ['title', 'source', 'identifier', 'algorithm', 'independentVariables', 'dependentVariables', 'predictedVariables', 'date','trainingDataset' ]
18
- if methods.include? name.to_s.sub(/=/,'')
19
- if /=/ =~ name.to_s # setter
20
- name = name.to_s.sub(/=/,'')
21
- begin # delete existing entry
22
- t = @model.object(@uri, DC[name])
23
- @model.delete @uri, DC[name], t
24
- rescue
25
- end
26
- @model.add @uri, DC[name], args.first
27
- else # getter
28
- @model.object(@uri, DC['title']).to_s
29
- end
30
- else
31
- raise "Method '#{name.to_s}' not found."
32
- end
96
+ def self.create( ot_class, uri )
97
+
98
+ owl = OpenTox::Owl.new
99
+ owl.ot_class = ot_class
100
+ owl.root_node = Redland::Resource.new(uri.to_s.strip)
101
+ owl.set("type",owl.node(owl.ot_class))
102
+ owl
103
+ end
104
+
105
+ # loads owl from data
106
+ def self.from_data(data, base_uri, ot_class)
107
+
108
+ owl = OpenTox::Owl.new
109
+ parser = Redland::Parser.new
110
+
111
+ begin
112
+ parser.parse_string_into_model(owl.model, data, base_uri)
113
+
114
+ # now loading root_node and uri
115
+ owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
116
+ #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
117
+ is_root = true
118
+ owl.model.find(nil, nil, s) do |ss,pp,oo|
119
+ is_root = false
120
+ break
121
+ end
122
+ if is_root
123
+ raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
124
+ raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
125
+ owl.uri = s.uri.to_s
126
+ owl.root_node = s
127
+ end
128
+ end
129
+
130
+ # handle error if no root node was found
131
+ unless owl.root_node
132
+ types = []
133
+ owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
134
+ raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
135
+ end
136
+ raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
137
+ owl.ot_class = ot_class
138
+ owl
139
+ rescue => e
140
+ RestClientWrapper.raise_uri_error(e.message, base_uri)
141
+ end
142
+ end
143
+
144
+ def self.from_uri(uri, ot_class)
145
+ return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class)
33
146
  end
34
147
 
148
+ def rdf
149
+ @model.to_string
150
+ end
151
+
152
+ def get(name)
153
+ raise "uri is no prop, use owl.uri instead" if name=="uri"
154
+ property_node = node(name.to_s)
155
+ return get_value( @model.object(@root_node, property_node) )
156
+ end
157
+
158
+ private
159
+ def get_value( node )
160
+ return nil unless node
161
+ if node.is_a?(Redland::Literal)
162
+ return node.get_value
163
+ elsif node.blank?
164
+ return nil
165
+ else
166
+ return node.uri.to_s
167
+ end
168
+ end
169
+
170
+ public
171
+ def set(name, value, datatype=nil)
172
+ raise "uri is no prop, cannot set uri" if name=="uri"
173
+ property_node = node(name.to_s)
174
+ begin # delete existing entry
175
+ t = @model.object(@root_node, property_node)
176
+ @model.delete @root_node, property_node, t
177
+ rescue
178
+ end
179
+ if value.is_a?(Redland::Node)
180
+ raise "not nil datatype not allowed when setting redland node as value" if datatype
181
+ @model.add @root_node, property_node, value
182
+ else # if value is no node, a literal is created
183
+ @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
184
+ end
185
+ end
186
+
35
187
  def parameters=(params)
36
188
  params.each do |name, settings|
37
189
  parameter = @model.create_resource
38
- @model.add parameter, RDF['type'], OT['Parameter']
39
- @model.add parameter, DC['title'], name
40
- @model.add parameter, OT['paramScope'], settings[:scope]
41
- @model.add parameter, OT['paramValue'], settings[:value]
42
- end
190
+ @model.add parameter, node('type'), node('Parameter')
191
+ @model.add parameter, node('title'), name
192
+ @model.add parameter, node('paramScope'), settings[:scope]
193
+ @model.add parameter, node('paramValue'), settings[:value]
194
+ @model.add @root_node, node('parameters'), parameter
195
+ end
43
196
  end
44
197
 
45
198
  def add_data_entries(compound_uri,features)
@@ -47,8 +200,7 @@ module OpenTox
47
200
  compound = @model.subject(DC["identifier"], compound_uri)
48
201
  if compound.nil?
49
202
  compound = @model.create_resource(compound_uri)
50
- @model.add compound, RDF['type'], OT["Compound"]
51
- @model.add compound, DC["identifier"], compound_uri
203
+ @model.add compound, node('type'), node("Compound")
52
204
  end
53
205
  features.each do |f|
54
206
  f.each do |feature_uri,value|
@@ -57,144 +209,229 @@ module OpenTox
57
209
  if value.class.to_s == 'Hash'
58
210
  # create tuple
59
211
  tuple = @model.create_resource
60
- @model.add tuple, RDF['type'], OT["Tuple"]
61
- @model.add tuple, OT['feature'], feature
212
+ @model.add tuple, node('type'), node("Tuple")
213
+ @model.add tuple, node('feature'), feature
62
214
  value.each do |uri,v|
63
215
  f = find_or_create_feature uri
64
216
  complex_value = @model.create_resource
65
- @model.add tuple, OT['complexValue'], complex_value
66
- @model.add complex_value, RDF['type'], OT["FeatureValue"]
67
- @model.add complex_value, OT['feature'], f
68
- @model.add complex_value, OT['value'], v.to_s
217
+ @model.add tuple, node('complexValue'), complex_value
218
+ @model.add complex_value, node('type'), node("FeatureValue")
219
+ @model.add complex_value, node('feature'), f
220
+ @model.add complex_value, node('value'), Redland::Literal.create(v)
69
221
  end
70
222
  # add data entry
71
- data_entry = @model.subject OT['compound'], compound
223
+ data_entry = @model.subject node('compound'), compound
72
224
  if data_entry.nil?
73
225
  data_entry = @model.create_resource
74
- @model.add @uri, OT['dataEntry'], data_entry
75
- @model.add data_entry, RDF['type'], OT["DataEntry"]
76
- @model.add data_entry, OT['compound'], compound
226
+ @model.add @root_node, node('dataEntry'), data_entry
227
+ @model.add data_entry, node('type'), node("DataEntry")
228
+ @model.add data_entry, node('compound'), compound
77
229
  end
78
- @model.add data_entry, OT['values'], tuple
230
+ @model.add data_entry, node('values'), tuple
79
231
  else
80
- data_entry = @model.subject OT['compound'], compound
232
+ data_entry = @model.subject node('compound'), compound
81
233
  if data_entry.nil?
82
234
  data_entry = @model.create_resource
83
- @model.add @uri, OT['dataEntry'], data_entry
84
- @model.add data_entry, RDF['type'], OT["DataEntry"]
85
- @model.add data_entry, OT['compound'], compound
235
+ @model.add @root_node, node('dataEntry'), data_entry
236
+ @model.add data_entry,node('type'), node("DataEntry")
237
+ @model.add data_entry, node('compound'), compound
86
238
  end
87
239
  values = @model.create_resource
88
- @model.add data_entry, OT['values'], values
89
- @model.add values, RDF['type'], OT['FeatureValue']
90
- @model.add values, OT['feature'], feature
91
- @model.add values, OT['value'], value.to_s
240
+ @model.add data_entry, node('values'), values
241
+ @model.add values, node('type'), node('FeatureValue')
242
+ @model.add values, node('feature'), feature
243
+ @model.add values, node('value'), Redland::Literal.create(value)
92
244
  end
93
245
  end
94
246
  end
95
- end
96
-
97
- def find_or_create_feature(feature_uri)
98
- feature = @model.subject(DC["identifier"], feature_uri)
99
- if feature.nil?
100
- feature = @model.create_resource(feature_uri)
101
- @model.add feature, RDF['type'], OT["Feature"]
102
- @model.add feature, DC["identifier"], feature_uri
103
- @model.add feature, DC["title"], File.basename(feature_uri).split(/#/)[1]
104
- @model.add feature, DC['source'], feature_uri
105
- end
106
- feature
107
- end
108
-
109
- def rdf
110
- @model.to_string
111
- end
112
-
113
- =begin
114
-
115
- def to_ntriples
116
- @serializer.model_to_string(Redland::Uri.new(@uri), @model)
117
- end
118
-
119
- def uri=(uri)
120
- @uri = uri.chomp
121
- # rewrite uri
122
- @model.subjects(RDF['type'],OT[@ot_class]).each do |me|
123
- @model.delete(me,RDF['type'],OT[@ot_class])
124
- @model.add(uri,RDF['type'],OT[@ot_class])
125
- id = @model.object(me, DC['identifier'])
126
- @model.delete me, DC['identifier'], id
127
- # find/replace metadata
128
- @model.find(me, nil, nil) do |s,p,o|
129
- @model.delete s,p,o
130
- @model.add uri,p,o
131
- end
132
- @model.add uri, DC['identifier'], @uri
133
- end
134
- end
135
-
136
- def read(uri)
137
- @parser.parse_into_model(@model,uri)
138
- @uri = uri
139
- end
140
-
141
- def identifier
142
- me = @model.subject(RDF['type'],OT[@ot_class])
143
- @model.object(me, DC['identifier']).to_s unless me.nil?
144
- end
145
-
146
- def title=(title)
147
- me = @model.subject(RDF['type'],OT[@ot_class])
148
- begin
149
- t = @model.object(me, DC['title'])
150
- @model.delete me, DC['title'], t
151
- rescue
152
- end
153
- @model.add me, DC['title'], title
154
- end
247
+ end
248
+
249
+ private
250
+ def find_feature(feature_uri)
251
+ # PENDING: more efficiently get feature node?
252
+ @model.subjects(RDF['type'], OT['Feature']).each do |feature|
253
+ return feature if feature_uri==get_value(feature)
254
+ end
255
+ return nil
256
+ end
155
257
 
156
- def source=(source)
157
- me = @model.subject(RDF['type'],OT[@ot_class])
158
- begin
159
- t = @model.object(me, DC['source'])
160
- @model.delete me, DC['source'], t
161
- rescue
162
- end
163
- @model.add me, DC['source'], source
258
+ public
259
+ def find_or_create_feature(feature_uri)
260
+ feature = find_feature(feature_uri)
261
+ unless feature
262
+ feature = @model.create_resource(feature_uri)
263
+ @model.add feature, node('type'), node("Feature")
264
+ @model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
265
+ @model.add feature, node('creator'), feature_uri
164
266
  end
267
+ feature
268
+ end
269
+
270
+ # feature values are not loaded for performance reasons
271
+ # loading compounds and features into arrays that are given as params
272
+ def load_dataset( compounds, features )
273
+
274
+ @model.subjects(node('type'), node('Compound')).each do |compound|
275
+ compounds << get_value(compound)
276
+ end
277
+ @model.subjects(node('type'), node('Feature')).each do |feature|
278
+ features << get_value(feature)
279
+ end
280
+ LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
281
+ end
165
282
 
166
- def title
167
- # I have no idea, why 2 subjects are returned
168
- # iterating over all subjects leads to memory allocation problems
169
- # SPARQL queries also do not work
170
- #me = @model.subjects(RDF['type'],OT[@ot_class])[1]
171
- me = @model.subject(RDF['type'],OT[@ot_class])
172
- @model.object(me, DC['title']).to_s
173
- end
283
+ # loading feature values for the specified feature
284
+ # if feature is nil, all feature values are loaded
285
+ #
286
+ # general remark on the rdf loading (found out with some testing):
287
+ # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
288
+ # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
289
+ def load_dataset_feature_values( compounds, data, feature_uri=nil )
290
+
291
+ LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
174
292
 
175
- def source
176
- me = @model.subject(RDF['type'],OT[@ot_class])
177
- @model.object(me, DC['source']).to_s unless me.nil?
178
- end
179
- def create_owl_statement(name,value)
180
- r = @model.create_resource
181
- dc_class = DC[name.gsub(/^[a-z]/) { |a| a.upcase }] # capitalize only the first letter
182
- #puts "DC:" + name.gsub(/^[a-z]/) { |a| a.upcase }
183
- @model.add dc_class, RDF['type'], OWL["Class"]
184
- @model.add r, RDF['type'], dc_class
185
- @model.add r, DC[name], value
186
- end
293
+ # values are stored in the data-hash, hash has a key for each compound
294
+ compounds.each{|c| data[c] = [] unless data[c]}
295
+
296
+ load_all_features = feature_uri==nil
297
+ feature_node = nil
298
+
299
+ # create feature node for feature uri if specified
300
+ unless load_all_features
301
+ feature_node = find_feature(feature_uri)
302
+ raise "feature node not found" unless feature_node
303
+ end
304
+
305
+ count = 0
306
+
307
+ # preformance tweak: store uirs to save some resource init time
308
+ compound_uri_store = {}
309
+ feature_uri_store = {}
310
+
311
+ # search for all feature_value_node with property 'ot_feature'
312
+ # feature_node is either nil, i.e. a wildcard or specified
313
+ @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
314
+
315
+ # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
316
+ value_nodes = @model.subjects(node('values'),feature_value_node)
317
+ raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
318
+ value_node = value_nodes[0]
319
+ compound_node = @model.object(value_node, node('compound'))
320
+ compound_uri = compound_uri_store[compound_node.to_s]
321
+ unless compound_uri
322
+ compound_uri = get_value(compound_node)
323
+ compound_uri_store[compound_node.to_s] = compound_uri
324
+ end
325
+
326
+ if load_all_features
327
+ # if load all features, feautre_uri is not specified, derieve from feature_node
328
+ feature_uri = feature_uri_store[o.to_s]
329
+ unless feature_uri
330
+ feature_uri = get_value(o)
331
+ feature_uri_store[o.to_s] = feature_uri
332
+ end
333
+ end
334
+
335
+ value_node_type = @model.object(feature_value_node, node('type'))
336
+ if (value_node_type == node('FeatureValue'))
337
+ value_literal = @model.object( feature_value_node, node('value'))
338
+ raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
339
+ data[compound_uri] << {feature_uri => value_literal.get_value }
340
+ else
341
+ raise "feature value type not yet implemented "+value_node_type.to_s
342
+ end
343
+ count += 1
344
+ LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
345
+ end
346
+
347
+ LOGGER.debug "loaded "+count.to_s+" feature values"
348
+ end
349
+
350
+ @@property_nodes = { "type" => RDF["type"],
351
+ "about" => RDF["about"],
352
+ "title" => DC["title"],
353
+ "creator" => DC["creator"],
354
+ #"identifier" => DC["identifier"], identifier is deprecated
355
+ "date" => DC["date"],
356
+ "format" => DC["format"]}
357
+
358
+ # this method has to purposes:
359
+ # * distinguishing ot-properties from dc- and rdf- properties
360
+ # * caching nodes, as creating nodes is costly
361
+ def node(name)
362
+ raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
363
+ n = @@property_nodes[name]
364
+ unless n
365
+ n = OT[name]
366
+ @@property_nodes[name] = n
367
+ end
368
+ return n
369
+ end
187
370
 
188
- def method_missing(name, *args)
189
- # create magic setter methods
190
- if /=/ =~ name.to_s
191
- create_owl_statement name.to_s.sub(/=/,''), args.first
192
- else
193
- raise "No method #{name}"
194
- end
195
- end
371
+ =begin
372
+ def data
373
+ LOGGER.debug("getting data from model")
374
+
375
+ data = {}
376
+ @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
377
+ compound_node = @model.object(data_entry, OT['compound'])
378
+ compound_uri = @model.object(compound_node, DC['identifier']).to_s
379
+ @model.find(data_entry, OT['values'], nil) do |s,p,values|
380
+ feature_node = @model.object values, OT['feature']
381
+ feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
382
+ type = @model.object(values, RDF['type'])
383
+ if type == OT['FeatureValue']
384
+ value = @model.object(values, OT['value']).to_s
385
+ case value.to_s
386
+ when TRUE_REGEXP # defined in environment.rb
387
+ value = true
388
+ when FALSE_REGEXP # defined in environment.rb
389
+ value = false
390
+ when /.*\^\^<.*XMLSchema#.*>/
391
+ #HACK for reading ambit datasets
392
+ case value.to_s
393
+ when /XMLSchema#string/
394
+ value = value.to_s[0..(value.to_s.index("^^")-1)]
395
+ when /XMLSchema#double/
396
+ value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
397
+ else
398
+ LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
399
+ value = nil
400
+ end
401
+ else
402
+ LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
403
+ value = nil
404
+ end
405
+ LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
406
+
407
+ return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
408
+
409
+ #puts "c "+compound_uri.to_s
410
+ #puts "f "+feature_uri.to_s
411
+ #puts "v "+value.to_s
412
+ #puts ""
413
+ data[compound_uri] = [] unless data[compound_uri]
414
+ data[compound_uri] << {feature_uri => value} unless value.nil?
415
+ elsif type == OT['Tuple']
416
+ entry = {}
417
+ data[compound_uri] = [] unless data[compound_uri]
418
+ #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
419
+ @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
420
+ name_node = @model.object complex_value, OT['feature']
421
+ name = @model.object(name_node, DC['title']).to_s
422
+ value = @model.object(complex_value, OT['value']).to_s
423
+ v = value.sub(/\^\^.*$/,'') # remove XML datatype
424
+ v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
425
+ entry[name] = v
426
+ end
427
+ data[compound_uri] << {feature_uri => entry} unless entry.empty?
428
+ end
429
+ end
430
+ end
431
+ data
432
+ end
196
433
  =end
197
434
 
198
- end
199
-
435
+ end
200
436
  end
437
+