opentox-ruby-api-wrapper 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/owl.rb CHANGED
@@ -1,45 +1,198 @@
1
+ class Redland::Literal
2
+
3
+ def self.create(value, datatype=nil)
4
+ if datatype
5
+ if datatype.is_a?(Redland::Uri)
6
+ Redland::Literal.new(value.to_s,nil,datatype)
7
+ else
8
+ Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
9
+ end
10
+ else
11
+ Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
12
+ end
13
+ end
14
+
15
+ # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
16
+ # found solution in mailing list
17
+ def datatype
18
+ uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
19
+ return Redland.librdf_uri_to_string(uri) if uri
20
+ end
21
+
22
+ # gets value of literal, value class is se according to literal datatype
23
+ def get_value
24
+ Redland::Literal.parse_value( self.value, self.datatype )
25
+ end
26
+
27
+ private
28
+ @@type_string = XML["string"].uri
29
+ @@type_uri = XML["anyURI"].uri
30
+ @@type_float = XML["float"].uri
31
+ @@type_double = XML["double"].uri
32
+ @@type_date = XML["date"].uri
33
+ @@type_boolean = XML["boolean"].uri
34
+ @@type_datetime = XML["dateTime"].uri
35
+
36
+ # parses value according to datatype uri
37
+ def self.parse_value(string_value, datatype_uri)
38
+ if (datatype_uri==nil || datatype_uri.size==0)
39
+ LOGGER.warn("empty datatype for literal with value: "+string_value)
40
+ return string_value
41
+ end
42
+ case datatype_uri
43
+ when @@type_string.to_s
44
+ return string_value
45
+ when @@type_uri.to_s
46
+ return string_value #PENDING uri as string?
47
+ when @@type_float.to_s
48
+ return string_value.to_f
49
+ when @@type_double.to_s
50
+ return string_value.to_f
51
+ when @@type_boolean.to_s
52
+ return string_value.upcase=="TRUE"
53
+ when @@type_date.to_s
54
+ return string_value #PENDING date as string?
55
+ when @@type_datetime.to_s
56
+ return string_value #PENDING date as string?
57
+ else
58
+ raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
59
+ end
60
+ end
61
+
62
+ # parse datatype uri accoring to value class
63
+ def self.parse_datatype_uri(value)
64
+ if value==nil
65
+ raise "illegal datatype: value is nil"
66
+ elsif value.is_a?(String)
67
+ # PENDING: uri check too slow?
68
+ if OpenTox::Utils.is_uri?(value)
69
+ return @@type_uri
70
+ else
71
+ return @@type_string
72
+ end
73
+ elsif value.is_a?(Float)
74
+ return @@type_float
75
+ elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
76
+ return @@type_boolean
77
+ else
78
+ raise "illegal datatype: "+value.class.to_s+" "+value.to_s
79
+ end
80
+ end
81
+ end
82
+
1
83
  module OpenTox
2
84
 
3
85
  class Owl
86
+
87
+ # ot_class is the class of the object, e.g. "Model","Dataset", ...
88
+ # root_node is the root-object node in the rdf
89
+ # uri the uri of the object
90
+ attr_accessor :ot_class, :root_node, :uri, :model
4
91
 
5
- attr_reader :uri, :ot_class
6
-
7
- def initialize(ot_class,uri)
92
+ def initialize
8
93
  @model = Redland::Model.new Redland::MemoryStore.new
9
- @parser = Redland::Parser.new
10
- @ot_class = ot_class
11
- @uri = Redland::Uri.new(uri.chomp)
12
- @model.add @uri, RDF['type'], OT[@ot_class]
13
- @model.add @uri, DC['identifier'], @uri
14
94
  end
15
95
 
16
- def method_missing(name, *args)
17
- methods = ['title', 'source', 'identifier', 'algorithm', 'independentVariables', 'dependentVariables', 'predictedVariables', 'date','trainingDataset' ]
18
- if methods.include? name.to_s.sub(/=/,'')
19
- if /=/ =~ name.to_s # setter
20
- name = name.to_s.sub(/=/,'')
21
- begin # delete existing entry
22
- t = @model.object(@uri, DC[name])
23
- @model.delete @uri, DC[name], t
24
- rescue
25
- end
26
- @model.add @uri, DC[name], args.first
27
- else # getter
28
- @model.object(@uri, DC['title']).to_s
29
- end
30
- else
31
- raise "Method '#{name.to_s}' not found."
32
- end
96
+ def self.create( ot_class, uri )
97
+
98
+ owl = OpenTox::Owl.new
99
+ owl.ot_class = ot_class
100
+ owl.root_node = Redland::Resource.new(uri.to_s.strip)
101
+ owl.set("type",owl.node(owl.ot_class))
102
+ owl
103
+ end
104
+
105
+ # loads owl from data
106
+ def self.from_data(data, base_uri, ot_class)
107
+
108
+ owl = OpenTox::Owl.new
109
+ parser = Redland::Parser.new
110
+
111
+ begin
112
+ parser.parse_string_into_model(owl.model, data, base_uri)
113
+
114
+ # now loading root_node and uri
115
+ owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
116
+ #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
117
+ is_root = true
118
+ owl.model.find(nil, nil, s) do |ss,pp,oo|
119
+ is_root = false
120
+ break
121
+ end
122
+ if is_root
123
+ raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
124
+ raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
125
+ owl.uri = s.uri.to_s
126
+ owl.root_node = s
127
+ end
128
+ end
129
+
130
+ # handle error if no root node was found
131
+ unless owl.root_node
132
+ types = []
133
+ owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
134
+ raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
135
+ end
136
+ raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
137
+ owl.ot_class = ot_class
138
+ owl
139
+ rescue => e
140
+ RestClientWrapper.raise_uri_error(e.message, base_uri)
141
+ end
142
+ end
143
+
144
+ def self.from_uri(uri, ot_class)
145
+ return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class)
33
146
  end
34
147
 
148
+ def rdf
149
+ @model.to_string
150
+ end
151
+
152
+ def get(name)
153
+ raise "uri is no prop, use owl.uri instead" if name=="uri"
154
+ property_node = node(name.to_s)
155
+ return get_value( @model.object(@root_node, property_node) )
156
+ end
157
+
158
+ private
159
+ def get_value( node )
160
+ return nil unless node
161
+ if node.is_a?(Redland::Literal)
162
+ return node.get_value
163
+ elsif node.blank?
164
+ return nil
165
+ else
166
+ return node.uri.to_s
167
+ end
168
+ end
169
+
170
+ public
171
+ def set(name, value, datatype=nil)
172
+ raise "uri is no prop, cannot set uri" if name=="uri"
173
+ property_node = node(name.to_s)
174
+ begin # delete existing entry
175
+ t = @model.object(@root_node, property_node)
176
+ @model.delete @root_node, property_node, t
177
+ rescue
178
+ end
179
+ if value.is_a?(Redland::Node)
180
+ raise "not nil datatype not allowed when setting redland node as value" if datatype
181
+ @model.add @root_node, property_node, value
182
+ else # if value is no node, a literal is created
183
+ @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
184
+ end
185
+ end
186
+
35
187
  def parameters=(params)
36
188
  params.each do |name, settings|
37
189
  parameter = @model.create_resource
38
- @model.add parameter, RDF['type'], OT['Parameter']
39
- @model.add parameter, DC['title'], name
40
- @model.add parameter, OT['paramScope'], settings[:scope]
41
- @model.add parameter, OT['paramValue'], settings[:value]
42
- end
190
+ @model.add parameter, node('type'), node('Parameter')
191
+ @model.add parameter, node('title'), name
192
+ @model.add parameter, node('paramScope'), settings[:scope]
193
+ @model.add parameter, node('paramValue'), settings[:value]
194
+ @model.add @root_node, node('parameters'), parameter
195
+ end
43
196
  end
44
197
 
45
198
  def add_data_entries(compound_uri,features)
@@ -47,8 +200,7 @@ module OpenTox
47
200
  compound = @model.subject(DC["identifier"], compound_uri)
48
201
  if compound.nil?
49
202
  compound = @model.create_resource(compound_uri)
50
- @model.add compound, RDF['type'], OT["Compound"]
51
- @model.add compound, DC["identifier"], compound_uri
203
+ @model.add compound, node('type'), node("Compound")
52
204
  end
53
205
  features.each do |f|
54
206
  f.each do |feature_uri,value|
@@ -57,144 +209,229 @@ module OpenTox
57
209
  if value.class.to_s == 'Hash'
58
210
  # create tuple
59
211
  tuple = @model.create_resource
60
- @model.add tuple, RDF['type'], OT["Tuple"]
61
- @model.add tuple, OT['feature'], feature
212
+ @model.add tuple, node('type'), node("Tuple")
213
+ @model.add tuple, node('feature'), feature
62
214
  value.each do |uri,v|
63
215
  f = find_or_create_feature uri
64
216
  complex_value = @model.create_resource
65
- @model.add tuple, OT['complexValue'], complex_value
66
- @model.add complex_value, RDF['type'], OT["FeatureValue"]
67
- @model.add complex_value, OT['feature'], f
68
- @model.add complex_value, OT['value'], v.to_s
217
+ @model.add tuple, node('complexValue'), complex_value
218
+ @model.add complex_value, node('type'), node("FeatureValue")
219
+ @model.add complex_value, node('feature'), f
220
+ @model.add complex_value, node('value'), Redland::Literal.create(v)
69
221
  end
70
222
  # add data entry
71
- data_entry = @model.subject OT['compound'], compound
223
+ data_entry = @model.subject node('compound'), compound
72
224
  if data_entry.nil?
73
225
  data_entry = @model.create_resource
74
- @model.add @uri, OT['dataEntry'], data_entry
75
- @model.add data_entry, RDF['type'], OT["DataEntry"]
76
- @model.add data_entry, OT['compound'], compound
226
+ @model.add @root_node, node('dataEntry'), data_entry
227
+ @model.add data_entry, node('type'), node("DataEntry")
228
+ @model.add data_entry, node('compound'), compound
77
229
  end
78
- @model.add data_entry, OT['values'], tuple
230
+ @model.add data_entry, node('values'), tuple
79
231
  else
80
- data_entry = @model.subject OT['compound'], compound
232
+ data_entry = @model.subject node('compound'), compound
81
233
  if data_entry.nil?
82
234
  data_entry = @model.create_resource
83
- @model.add @uri, OT['dataEntry'], data_entry
84
- @model.add data_entry, RDF['type'], OT["DataEntry"]
85
- @model.add data_entry, OT['compound'], compound
235
+ @model.add @root_node, node('dataEntry'), data_entry
236
+ @model.add data_entry,node('type'), node("DataEntry")
237
+ @model.add data_entry, node('compound'), compound
86
238
  end
87
239
  values = @model.create_resource
88
- @model.add data_entry, OT['values'], values
89
- @model.add values, RDF['type'], OT['FeatureValue']
90
- @model.add values, OT['feature'], feature
91
- @model.add values, OT['value'], value.to_s
240
+ @model.add data_entry, node('values'), values
241
+ @model.add values, node('type'), node('FeatureValue')
242
+ @model.add values, node('feature'), feature
243
+ @model.add values, node('value'), Redland::Literal.create(value)
92
244
  end
93
245
  end
94
246
  end
95
- end
96
-
97
- def find_or_create_feature(feature_uri)
98
- feature = @model.subject(DC["identifier"], feature_uri)
99
- if feature.nil?
100
- feature = @model.create_resource(feature_uri)
101
- @model.add feature, RDF['type'], OT["Feature"]
102
- @model.add feature, DC["identifier"], feature_uri
103
- @model.add feature, DC["title"], File.basename(feature_uri).split(/#/)[1]
104
- @model.add feature, DC['source'], feature_uri
105
- end
106
- feature
107
- end
108
-
109
- def rdf
110
- @model.to_string
111
- end
112
-
113
- =begin
114
-
115
- def to_ntriples
116
- @serializer.model_to_string(Redland::Uri.new(@uri), @model)
117
- end
118
-
119
- def uri=(uri)
120
- @uri = uri.chomp
121
- # rewrite uri
122
- @model.subjects(RDF['type'],OT[@ot_class]).each do |me|
123
- @model.delete(me,RDF['type'],OT[@ot_class])
124
- @model.add(uri,RDF['type'],OT[@ot_class])
125
- id = @model.object(me, DC['identifier'])
126
- @model.delete me, DC['identifier'], id
127
- # find/replace metadata
128
- @model.find(me, nil, nil) do |s,p,o|
129
- @model.delete s,p,o
130
- @model.add uri,p,o
131
- end
132
- @model.add uri, DC['identifier'], @uri
133
- end
134
- end
135
-
136
- def read(uri)
137
- @parser.parse_into_model(@model,uri)
138
- @uri = uri
139
- end
140
-
141
- def identifier
142
- me = @model.subject(RDF['type'],OT[@ot_class])
143
- @model.object(me, DC['identifier']).to_s unless me.nil?
144
- end
145
-
146
- def title=(title)
147
- me = @model.subject(RDF['type'],OT[@ot_class])
148
- begin
149
- t = @model.object(me, DC['title'])
150
- @model.delete me, DC['title'], t
151
- rescue
152
- end
153
- @model.add me, DC['title'], title
154
- end
247
+ end
248
+
249
+ private
250
+ def find_feature(feature_uri)
251
+ # PENDING: more efficiently get feature node?
252
+ @model.subjects(RDF['type'], OT['Feature']).each do |feature|
253
+ return feature if feature_uri==get_value(feature)
254
+ end
255
+ return nil
256
+ end
155
257
 
156
- def source=(source)
157
- me = @model.subject(RDF['type'],OT[@ot_class])
158
- begin
159
- t = @model.object(me, DC['source'])
160
- @model.delete me, DC['source'], t
161
- rescue
162
- end
163
- @model.add me, DC['source'], source
258
+ public
259
+ def find_or_create_feature(feature_uri)
260
+ feature = find_feature(feature_uri)
261
+ unless feature
262
+ feature = @model.create_resource(feature_uri)
263
+ @model.add feature, node('type'), node("Feature")
264
+ @model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
265
+ @model.add feature, node('creator'), feature_uri
164
266
  end
267
+ feature
268
+ end
269
+
270
+ # feature values are not loaded for performance reasons
271
+ # loading compounds and features into arrays that are given as params
272
+ def load_dataset( compounds, features )
273
+
274
+ @model.subjects(node('type'), node('Compound')).each do |compound|
275
+ compounds << get_value(compound)
276
+ end
277
+ @model.subjects(node('type'), node('Feature')).each do |feature|
278
+ features << get_value(feature)
279
+ end
280
+ LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
281
+ end
165
282
 
166
- def title
167
- # I have no idea, why 2 subjects are returned
168
- # iterating over all subjects leads to memory allocation problems
169
- # SPARQL queries also do not work
170
- #me = @model.subjects(RDF['type'],OT[@ot_class])[1]
171
- me = @model.subject(RDF['type'],OT[@ot_class])
172
- @model.object(me, DC['title']).to_s
173
- end
283
+ # loading feature values for the specified feature
284
+ # if feature is nil, all feature values are loaded
285
+ #
286
+ # general remark on the rdf loading (found out with some testing):
287
+ # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
288
+ # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
289
+ def load_dataset_feature_values( compounds, data, feature_uri=nil )
290
+
291
+ LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
174
292
 
175
- def source
176
- me = @model.subject(RDF['type'],OT[@ot_class])
177
- @model.object(me, DC['source']).to_s unless me.nil?
178
- end
179
- def create_owl_statement(name,value)
180
- r = @model.create_resource
181
- dc_class = DC[name.gsub(/^[a-z]/) { |a| a.upcase }] # capitalize only the first letter
182
- #puts "DC:" + name.gsub(/^[a-z]/) { |a| a.upcase }
183
- @model.add dc_class, RDF['type'], OWL["Class"]
184
- @model.add r, RDF['type'], dc_class
185
- @model.add r, DC[name], value
186
- end
293
+ # values are stored in the data-hash, hash has a key for each compound
294
+ compounds.each{|c| data[c] = [] unless data[c]}
295
+
296
+ load_all_features = feature_uri==nil
297
+ feature_node = nil
298
+
299
+ # create feature node for feature uri if specified
300
+ unless load_all_features
301
+ feature_node = find_feature(feature_uri)
302
+ raise "feature node not found" unless feature_node
303
+ end
304
+
305
+ count = 0
306
+
307
+ # preformance tweak: store uirs to save some resource init time
308
+ compound_uri_store = {}
309
+ feature_uri_store = {}
310
+
311
+ # search for all feature_value_node with property 'ot_feature'
312
+ # feature_node is either nil, i.e. a wildcard or specified
313
+ @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
314
+
315
+ # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
316
+ value_nodes = @model.subjects(node('values'),feature_value_node)
317
+ raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
318
+ value_node = value_nodes[0]
319
+ compound_node = @model.object(value_node, node('compound'))
320
+ compound_uri = compound_uri_store[compound_node.to_s]
321
+ unless compound_uri
322
+ compound_uri = get_value(compound_node)
323
+ compound_uri_store[compound_node.to_s] = compound_uri
324
+ end
325
+
326
+ if load_all_features
327
+ # if load all features, feautre_uri is not specified, derieve from feature_node
328
+ feature_uri = feature_uri_store[o.to_s]
329
+ unless feature_uri
330
+ feature_uri = get_value(o)
331
+ feature_uri_store[o.to_s] = feature_uri
332
+ end
333
+ end
334
+
335
+ value_node_type = @model.object(feature_value_node, node('type'))
336
+ if (value_node_type == node('FeatureValue'))
337
+ value_literal = @model.object( feature_value_node, node('value'))
338
+ raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
339
+ data[compound_uri] << {feature_uri => value_literal.get_value }
340
+ else
341
+ raise "feature value type not yet implemented "+value_node_type.to_s
342
+ end
343
+ count += 1
344
+ LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
345
+ end
346
+
347
+ LOGGER.debug "loaded "+count.to_s+" feature values"
348
+ end
349
+
350
+ @@property_nodes = { "type" => RDF["type"],
351
+ "about" => RDF["about"],
352
+ "title" => DC["title"],
353
+ "creator" => DC["creator"],
354
+ #"identifier" => DC["identifier"], identifier is deprecated
355
+ "date" => DC["date"],
356
+ "format" => DC["format"]}
357
+
358
+ # this method has to purposes:
359
+ # * distinguishing ot-properties from dc- and rdf- properties
360
+ # * caching nodes, as creating nodes is costly
361
+ def node(name)
362
+ raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
363
+ n = @@property_nodes[name]
364
+ unless n
365
+ n = OT[name]
366
+ @@property_nodes[name] = n
367
+ end
368
+ return n
369
+ end
187
370
 
188
- def method_missing(name, *args)
189
- # create magic setter methods
190
- if /=/ =~ name.to_s
191
- create_owl_statement name.to_s.sub(/=/,''), args.first
192
- else
193
- raise "No method #{name}"
194
- end
195
- end
371
+ =begin
372
+ def data
373
+ LOGGER.debug("getting data from model")
374
+
375
+ data = {}
376
+ @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
377
+ compound_node = @model.object(data_entry, OT['compound'])
378
+ compound_uri = @model.object(compound_node, DC['identifier']).to_s
379
+ @model.find(data_entry, OT['values'], nil) do |s,p,values|
380
+ feature_node = @model.object values, OT['feature']
381
+ feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
382
+ type = @model.object(values, RDF['type'])
383
+ if type == OT['FeatureValue']
384
+ value = @model.object(values, OT['value']).to_s
385
+ case value.to_s
386
+ when TRUE_REGEXP # defined in environment.rb
387
+ value = true
388
+ when FALSE_REGEXP # defined in environment.rb
389
+ value = false
390
+ when /.*\^\^<.*XMLSchema#.*>/
391
+ #HACK for reading ambit datasets
392
+ case value.to_s
393
+ when /XMLSchema#string/
394
+ value = value.to_s[0..(value.to_s.index("^^")-1)]
395
+ when /XMLSchema#double/
396
+ value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
397
+ else
398
+ LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
399
+ value = nil
400
+ end
401
+ else
402
+ LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
403
+ value = nil
404
+ end
405
+ LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
406
+
407
+ return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
408
+
409
+ #puts "c "+compound_uri.to_s
410
+ #puts "f "+feature_uri.to_s
411
+ #puts "v "+value.to_s
412
+ #puts ""
413
+ data[compound_uri] = [] unless data[compound_uri]
414
+ data[compound_uri] << {feature_uri => value} unless value.nil?
415
+ elsif type == OT['Tuple']
416
+ entry = {}
417
+ data[compound_uri] = [] unless data[compound_uri]
418
+ #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
419
+ @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
420
+ name_node = @model.object complex_value, OT['feature']
421
+ name = @model.object(name_node, DC['title']).to_s
422
+ value = @model.object(complex_value, OT['value']).to_s
423
+ v = value.sub(/\^\^.*$/,'') # remove XML datatype
424
+ v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
425
+ entry[name] = v
426
+ end
427
+ data[compound_uri] << {feature_uri => entry} unless entry.empty?
428
+ end
429
+ end
430
+ end
431
+ data
432
+ end
196
433
  =end
197
434
 
198
- end
199
-
435
+ end
200
436
  end
437
+