opentox-ruby-api-wrapper 1.6.0 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ # class overwrites aka monkey patches
2
+ # hack: store sinatra in global var to make url_for and halt methods accessible
3
+ before{ $sinatra = self unless $sinatra }
4
+
5
+ class Sinatra::Base
6
+ # overwriting halt to log halts (!= 202)
7
+ def halt(*response)
8
+ LOGGER.error "halt "+response.first.to_s+" "+(response.size>1 ? response[1].to_s : "") if response and response.first and response.first >= 300
9
+ # orig sinatra code:
10
+ response = response.first if response.length == 1
11
+ throw :halt, response
12
+ end
13
+ end
14
+
data/lib/owl.rb CHANGED
@@ -1,22 +1,31 @@
1
+ # RDF namespaces
2
+ RDF = Redland::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
3
+ OWL = Redland::Namespace.new 'http://www.w3.org/2002/07/owl#'
4
+ DC = Redland::Namespace.new 'http://purl.org/dc/elements/1.1/'
5
+ OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#'
6
+ #OT = Redland::Namespace.new 'http://ortona.informatik.uni-freiburg.de/opentox.owl#'
7
+ XML = Redland::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
8
+
9
+ # overriding literal to give nice access to datatype
10
+ # and to access the stored value as correct ruby type
1
11
  class Redland::Literal
2
12
 
3
- def self.create(value, datatype=nil)
4
- if datatype
5
- if datatype.is_a?(Redland::Uri)
6
- Redland::Literal.new(value.to_s,nil,datatype)
7
- else
8
- Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
9
- end
13
+ def self.create(value, type)
14
+ raise "literal datatype may not be nil" unless type
15
+ type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type
16
+
17
+ if type.is_a?(Redland::Uri)
18
+ Redland::Literal.new(value.to_s,nil,type)
10
19
  else
11
- Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
20
+ Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s))
12
21
  end
13
22
  end
14
23
 
15
24
  # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
16
25
  # found solution in mailing list
17
26
  def datatype
18
- uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
19
- return Redland.librdf_uri_to_string(uri) if uri
27
+ uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
28
+ return Redland.librdf_uri_to_string(uri) if uri
20
29
  end
21
30
 
22
31
  # gets value of literal, value class is se according to literal datatype
@@ -25,40 +34,33 @@ class Redland::Literal
25
34
  end
26
35
 
27
36
  private
28
- @@type_string = XML["string"].uri
29
- @@type_uri = XML["anyURI"].uri
30
- @@type_float = XML["float"].uri
31
- @@type_double = XML["double"].uri
32
- @@type_date = XML["date"].uri
33
- @@type_boolean = XML["boolean"].uri
34
- @@type_datetime = XML["dateTime"].uri
35
- @@type_integer = XML["integer"].uri
36
-
37
37
  # parses value according to datatype uri
38
38
  def self.parse_value(string_value, datatype_uri)
39
+
39
40
  if (datatype_uri==nil || datatype_uri.size==0)
40
- LOGGER.warn("empty datatype for literal with value: "+string_value)
41
+ LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
41
42
  return string_value
42
43
  end
43
44
  case datatype_uri
44
- when @@type_string.to_s
45
+ when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
45
46
  return string_value
46
- when @@type_uri.to_s
47
+ when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
47
48
  return string_value #PENDING uri as string?
48
- when @@type_float.to_s
49
+ when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
49
50
  return string_value.to_f
50
- when @@type_double.to_s
51
+ when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
51
52
  return string_value.to_f
52
- when @@type_boolean.to_s
53
+ when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
53
54
  return string_value.upcase=="TRUE"
54
- when @@type_date.to_s
55
- return string_value #PENDING date as string?
56
- when @@type_datetime.to_s
57
- return string_value #PENDING date as string?
58
- when @@type_integer.to_s
55
+ when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
56
+ return Time.parse(string_value)
57
+ when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
58
+ return Time.parse(string_value)
59
+ when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
59
60
  return string_value.to_i
60
61
  else
61
- raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
62
+ raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
63
+ "), please specify new OpenTox::Owl::LITERAL_DATATYPE"
62
64
  end
63
65
  end
64
66
 
@@ -69,20 +71,20 @@ class Redland::Literal
69
71
  elsif value.is_a?(String)
70
72
  # PENDING: uri check too slow?
71
73
  if OpenTox::Utils.is_uri?(value)
72
- return @@type_uri
74
+ return OpenTox::Owl::LITERAL_DATATYPE_URI
73
75
  else
74
- return @@type_string
76
+ return OpenTox::Owl::LITERAL_DATATYPE_STRING
75
77
  end
76
78
  elsif value.is_a?(Float)
77
- return @@type_float
79
+ return OpenTox::Owl::LITERAL_DATATYPE_FLOAT
78
80
  elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
79
- return @@type_boolean
81
+ return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN
80
82
  elsif value.is_a?(Integer)
81
- return @@type_integer
83
+ return OpenTox::Owl::LITERAL_DATATYPE_INTEGER
82
84
  elsif value.is_a?(DateTime)
83
- return @@type_datetime
85
+ return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
84
86
  elsif value.is_a?(Time)
85
- return @@type_datetime
87
+ return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
86
88
  else
87
89
  raise "illegal datatype: "+value.class.to_s+" "+value.to_s
88
90
  end
@@ -92,22 +94,118 @@ end
92
94
  module OpenTox
93
95
 
94
96
  class Owl
97
+
98
+ # to get correct owl-dl, properties and objects have to be typed
99
+ # i.e. the following triple is insufficient:
100
+ # ModelXY,ot:algorithm,AlgorithmXY
101
+ # further needed:
102
+ # ot:algorithm,rdf:type,owl:ObjectProperty
103
+ # AlgorithmXY,rdf:type,ot:Algorithm
104
+ # ot:Algorithm,rdf:type,owl:Class
105
+ #
106
+ # therefore OpentoxOwl needs info about the opentox-ontology
107
+ # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
108
+
109
+ # contains all owl:ObjectProperty as keys, and the respective classes as value
110
+ # some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue")
111
+ # in this case, use set_object_property() (instead of set()) and specify class manually
112
+ OBJECT_PROPERTY_CLASS = {}
113
+ [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
114
+ [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
115
+ [ "trainingDataset", "testTargetDataset", "predictionDataset",
116
+ "testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
117
+ [ "feature", "dependentVariables", "independentVariables",
118
+ "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
119
+ [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
120
+ [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
121
+ [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
122
+ [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
123
+ [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
124
+ [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
125
+ [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
126
+ [ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
127
+ [ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
128
+ [ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
129
+ [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
130
+ [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
131
+
132
+ # literals point to primitive values (not to other resources)
133
+ # the literal datatype is encoded via uri:
134
+ LITERAL_DATATYPE_STRING = XML["string"].uri
135
+ LITERAL_DATATYPE_URI = XML["anyURI"].uri
136
+ LITERAL_DATATYPE_FLOAT = XML["float"].uri
137
+ LITERAL_DATATYPE_DOUBLE = XML["double"].uri
138
+ LITERAL_DATATYPE_DATE = XML["date"].uri
139
+ LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
140
+ LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
141
+ LITERAL_DATATYPE_INTEGER = XML["integer"].uri
142
+
143
+ # list all literals (to distinguish from objectProperties) as keys, datatype as values
144
+ # (do not add dc-identifier, deprecated, object are identified via name=uri)
145
+ LITERAL_TYPES = {}
146
+ [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
147
+ "classValue", "reportType", "confusionMatrixActual",
148
+ "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
149
+ [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
150
+ [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
151
+ "areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
152
+ "percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
153
+ "percentIncorrect", "percentUnpredicted", "realRuntime",
154
+ "percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
155
+ "targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
156
+ "sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
157
+ [ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
158
+ "numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
159
+ "randomSeed", "numFolds", "confusionMatrixValue",
160
+ "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
161
+ [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
162
+ [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
163
+ # some literals can have different types, parse from ruby type
164
+ PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE"
165
+ [ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE }
166
+
167
+ # constants for often used redland-resources
168
+ OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
169
+ OWL_TYPE_CLASS = OWL["Class"]
170
+ OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
171
+ RDF_TYPE = RDF['type']
172
+
173
+ # store redland:resources (=nodes) to:
174
+ # * separate namespaces (OT from RDF and DC)
175
+ # * save time, as generating resources is timeconsuming in redland
176
+ @@nodes = {}
177
+ [ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
178
+ [ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
179
+
180
+ def node(property)
181
+ raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
182
+ property.to_s+")" unless property.is_a?(String) and property.size>0
183
+ raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
184
+ @@nodes[property] = OT[property] unless @@nodes.has_key?(property)
185
+ return @@nodes[property]
186
+ end
95
187
 
96
- # ot_class is the class of the object, e.g. "Model","Dataset", ...
188
+ # ot_class is the class of the object as string, e.g. "Model","Dataset", ...
97
189
  # root_node is the root-object node in the rdf
98
190
  # uri the uri of the object
99
191
  attr_accessor :ot_class, :root_node, :uri, :model
100
192
 
193
+ private
101
194
  def initialize
102
195
  @model = Redland::Model.new Redland::MemoryStore.new
103
196
  end
104
197
 
198
+ # build new owl object
199
+ # ot_class is the class of this object, should be a string like "Model", "Task", ...
200
+ # uri is name and identifier of this object
201
+ public
105
202
  def self.create( ot_class, uri )
106
203
 
107
- owl = OpenTox::Owl.new
204
+ owl = OpenTox::Owl.new
108
205
  owl.ot_class = ot_class
109
206
  owl.root_node = Redland::Resource.new(uri.to_s.strip)
110
- owl.set("type",owl.node(owl.ot_class)) #,true))
207
+ owl.set("type",owl.ot_class)
208
+ owl.uri = uri
111
209
  owl
112
210
  end
113
211
 
@@ -121,7 +219,7 @@ module OpenTox
121
219
  parser.parse_string_into_model(owl.model, data, base_uri)
122
220
 
123
221
  # now loading root_node and uri
124
- owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
222
+ owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
125
223
  #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
126
224
  is_root = true
127
225
  owl.model.find(nil, nil, s) do |ss,pp,oo|
@@ -129,8 +227,10 @@ module OpenTox
129
227
  break
130
228
  end
131
229
  if is_root
230
+ # handle error if root is already set
132
231
  raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
133
232
  raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
233
+ #store root note and uri
134
234
  owl.uri = s.uri.to_s
135
235
  owl.root_node = s
136
236
  end
@@ -139,7 +239,7 @@ module OpenTox
139
239
  # handle error if no root node was found
140
240
  unless owl.root_node
141
241
  types = []
142
- owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
242
+ owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
143
243
  raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
144
244
  end
145
245
  raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
@@ -158,13 +258,33 @@ module OpenTox
158
258
  @model.to_string
159
259
  end
160
260
 
161
- def get(name)
162
- raise "uri is no prop, use owl.uri instead" if name=="uri"
163
- property_node = node(name.to_s)
164
- return get_value( @model.object(@root_node, property_node) )
261
+ # returns the first object for subject:root_node and property
262
+ # (sufficient for accessing simple, root-node properties)
263
+ def get( property )
264
+ raise "uri is no prop, use owl.uri instead" if property=="uri"
265
+ return get_value( @model.object( @root_node, node(property.to_s)) )
266
+ end
267
+
268
+ # returns an array of objects (not only the first one) that fit for the property
269
+ # accepts array of properties to access not-root-node vaules
270
+ # i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
271
+ # returns an array of all confusionMatrixValues
272
+ def get_nested( property_array )
273
+ n = [ @root_node ]
274
+ property_array.each do |p|
275
+ new_nodes = []
276
+ n.each do |nn|
277
+ @model.find( nn, node(p), nil ) do |sub,pred,obj|
278
+ new_nodes << obj
279
+ end
280
+ end
281
+ n = new_nodes
282
+ end
283
+ return n.collect{|nn| get_value( nn )}
165
284
  end
166
285
 
167
286
  private
287
+ # returns node-value
168
288
  def get_value( node )
169
289
  return nil unless node
170
290
  if node.is_a?(Redland::Literal)
@@ -177,285 +297,251 @@ module OpenTox
177
297
  end
178
298
 
179
299
  public
180
- def set(name, value, datatype=nil)
300
+ # sets values of current_node (by default root_node)
301
+ #
302
+ # note: this does not delete existing triples
303
+ # * there can be several triples for the same subject and predicate
304
+ # ( e.g. after set("description","bla1") and set("description","bla2")
305
+ # both descriptions are in the model,
306
+ # but the get("description") will give you only one object (by chance)
307
+ # * this does not matter in pratice (only dataset uses this -> load_dataset-methods)
308
+ # * identical values appear only once in rdf
309
+ def set(predicate, object, current_node=@root_node )
181
310
 
182
- raise "uri is no prop, cannot set uri" if name=="uri"
183
- property_node = node(name.to_s) #, true)
184
- begin # delete existing entry
185
- t = @model.object(@root_node, property_node)
186
- @model.delete @root_node, property_node, t
187
- rescue
311
+ pred = predicate.to_s
312
+ raise "uri is no prop, cannot set uri" if pred=="uri"
313
+ raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
314
+ if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0
315
+ # set only not-nil values
316
+ LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
317
+ return
188
318
  end
189
- if value.is_a?(Redland::Node)
190
- raise "not nil datatype not allowed when setting redland node as value" if datatype
191
- @model.add @root_node, property_node, value
192
- else # if value is no node, a literal is created
193
- @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
319
+
320
+ if pred=="type"
321
+ # predicate is type, set class of current node
322
+ set_type(object, current_node)
323
+ elsif LITERAL_TYPES.has_key?(pred)
324
+ # predicate is literal
325
+ set_literal(pred,object,LITERAL_TYPES[pred],current_node)
326
+ elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
327
+ # predicte is objectProperty, object is another resource
328
+ set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node)
329
+ else
330
+ raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
194
331
  end
195
332
  end
196
-
197
- def parameters=(params)
198
- params.each do |name, settings|
199
- parameter = @model.create_resource
200
- @model.add parameter, node('type'), node('Parameter')
201
- @model.add parameter, node('title'), name
202
- @model.add parameter, node('paramScope'), settings[:scope]
203
- @model.add parameter, node('paramValue'), settings[:value]
204
- @model.add @root_node, node('parameters'), parameter
205
- end
206
- end
207
-
208
- def add_data_entries(compound_uri,features)
209
- # add compound
210
- compound = @model.subject(DC["identifier"], compound_uri)
211
- if compound.nil?
212
- compound = @model.create_resource(compound_uri)
213
- @model.add compound, node('type'), node("Compound")
214
- end
215
- features.each do |f|
216
- f.each do |feature_uri,value|
217
- # add feature
218
- feature = find_or_create_feature feature_uri
219
- if value.class.to_s == 'Hash'
220
- # create tuple
221
- tuple = @model.create_resource
222
- @model.add tuple, node('type'), node("Tuple")
223
- @model.add tuple, node('feature'), feature
224
- value.each do |uri,v|
225
- f = find_or_create_feature uri
226
- complex_value = @model.create_resource
227
- @model.add tuple, node('complexValue'), complex_value
228
- @model.add complex_value, node('type'), node("FeatureValue")
229
- @model.add complex_value, node('feature'), f
230
- @model.add complex_value, node('value'), Redland::Literal.create(v)
231
- end
232
- # add data entry
233
- data_entry = @model.subject node('compound'), compound
234
- if data_entry.nil?
235
- data_entry = @model.create_resource
236
- @model.add @root_node, node('dataEntry'), data_entry
237
- @model.add data_entry, node('type'), node("DataEntry")
238
- @model.add data_entry, node('compound'), compound
239
- end
240
- @model.add data_entry, node('values'), tuple
241
- else
242
- data_entry = @model.subject node('compound'), compound
243
- if data_entry.nil?
244
- data_entry = @model.create_resource
245
- @model.add @root_node, node('dataEntry'), data_entry
246
- @model.add data_entry,node('type'), node("DataEntry")
247
- @model.add data_entry, node('compound'), compound
248
- end
249
- values = @model.create_resource
250
- @model.add data_entry, node('values'), values
251
- @model.add values, node('type'), node('FeatureValue')
252
- @model.add values, node('feature'), feature
253
- @model.add values, node('value'), Redland::Literal.create(value)
254
- end
255
- end
256
- end
257
- end
258
-
259
- private
260
- def find_feature(feature_uri)
261
- # PENDING: more efficiently get feature node?
262
- @model.subjects(RDF['type'], OT['Feature']).each do |feature|
263
- return feature if feature_uri==get_value(feature)
333
+
334
+ # example-triples for setting rdf-type to model:
335
+ # model_xy,rdf:type,ot:Model
336
+ # ot:Model,rdf:type,owl:Class
337
+ def set_type(ot_class, current_node=@root_node)
338
+ @model.add current_node, RDF_TYPE, node(ot_class)
339
+ @model.add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS
264
340
  end
265
- return nil
266
- end
267
-
268
- public
269
- def find_or_create_feature(feature_uri)
270
- feature = find_feature(feature_uri)
271
- unless feature
272
- feature = @model.create_resource(feature_uri)
273
- @model.add feature, node('type'), node("Feature")
274
- @model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
275
- @model.add feature, node('creator'), feature_uri
276
- end
277
- feature
278
- end
279
-
280
- # feature values are not loaded for performance reasons
281
- # loading compounds and features into arrays that are given as params
282
- def load_dataset( compounds, features )
283
341
 
284
- @model.subjects(node('type'), node('Compound')).each do |compound|
285
- compounds << get_value(compound)
286
- end
287
- @model.subjects(node('type'), node('Feature')).each do |feature|
288
- features << get_value(feature)
342
+ # example-triples for setting description of a model:
343
+ # model_xy,ot:description,bla..bla^^xml:string
344
+ # ot:description,rdf:type,owl:Literal
345
+ def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node)
346
+ @model.add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype)
347
+ @model.add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL
289
348
  end
290
- LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
291
- end
292
-
293
- # loading feature values for the specified feature
294
- # if feature is nil, all feature values are loaded
295
- #
296
- # general remark on the rdf loading (found out with some testing):
297
- # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
298
- # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
299
- def load_dataset_feature_values( compounds, data, feature_uri=nil )
300
349
 
301
- LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
350
+ # example-triples for setting algorithm property of a model:
351
+ # model_xy,ot:algorithm,algorihtm_xy
352
+ # ot:algorithm,rdf:type,owl:ObjectProperty
353
+ # algorihtm_xy,rdf:type,ot:Algorithm
354
+ # ot:Algorithm,rdf:type,owl:Class
355
+ def set_object_property(property, object, object_class, current_node=@root_node)
356
+ object_node = Redland::Resource.new(object)
357
+ @model.add current_node, node(property), object_node
358
+ @model.add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
359
+ @model.add object_node, RDF_TYPE, node(object_class)
360
+ @model.add node(object_class), RDF_TYPE, OWL_TYPE_CLASS
361
+ end
302
362
 
303
- # values are stored in the data-hash, hash has a key for each compound
304
- compounds.each{|c| data[c] = [] unless data[c]}
305
-
306
- load_all_features = feature_uri==nil
307
- feature_node = nil
308
-
309
- # create feature node for feature uri if specified
310
- unless load_all_features
311
- feature_node = find_feature(feature_uri)
312
- raise "feature node not found" unless feature_node
363
+ # this is (a recursiv method) to set nested-data via hashes (not only simple properties)
364
+ # example (for a dataset)
365
+ # { :description => "bla",
366
+ # :dataEntry => { :compound => "compound_uri",
367
+ # :values => [ { :class => "FeatureValue"
368
+ # :feature => "feat1",
369
+ # :value => 42 },
370
+ # { :class => "FeatureValue"
371
+ # :feature => "feat2",
372
+ # :value => 123 } ] } }
373
+ def set_data(hash, current_node=@root_node)
374
+
375
+ hash.each do |k,v|
376
+ if v.is_a?(Hash)
377
+ # value is again a hash
378
+ prop = k.to_s
379
+
380
+ # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS
381
+ object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop]
382
+ raise "hash key must be a object-property, please add '"+prop.to_s+
383
+ "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class
384
+
385
+ # the new node is a class node, to specify the uri of the resource use key :uri
386
+ if v[:uri]
387
+ # identifier is either a specified uri
388
+ class_node = Redland::Resource.new(v.delete(:uri))
389
+ else
390
+ # or a new uri, make up internal uri with increment
391
+ class_node = new_class_node(object_class,current_node)
392
+ end
393
+ set_object_property(prop,class_node,object_class,current_node)
394
+ # recursivly call set_data method with new node
395
+ set_data(v,class_node)
396
+ elsif v.is_a?(Array)
397
+ # value is an array, each array element is added with current key as predicate
398
+ v.each do |value|
399
+ set_data( { k => value }, current_node )
400
+ end
401
+ else
402
+ # neither hash nor array, call simple set-method
403
+ set( k, v, current_node )
404
+ end
405
+ end
313
406
  end
314
407
 
315
- count = 0
316
-
317
- # preformance tweak: store uirs to save some resource init time
318
- compound_uri_store = {}
319
- feature_uri_store = {}
320
-
321
- # search for all feature_value_node with property 'ot_feature'
322
- # feature_node is either nil, i.e. a wildcard or specified
323
- @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
324
-
325
- # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
326
- value_nodes = @model.subjects(node('values'),feature_value_node)
327
- raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
328
- value_node = value_nodes[0]
329
- compound_node = @model.object(value_node, node('compound'))
330
- compound_uri = compound_uri_store[compound_node.to_s]
331
- unless compound_uri
332
- compound_uri = get_value(compound_node)
333
- compound_uri_store[compound_node.to_s] = compound_uri
408
+ # create a new (internal class) node with unique, uri-like name
409
+ def new_class_node(name, current_node=@root_node)
410
+ # to avoid anonymous nodes, make up uris for sub-objects
411
+ # use counter to make sure each uri is unique
412
+ # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
413
+ count = 1
414
+ while (true)
415
+ res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
416
+ match = false
417
+ @model.find(nil, nil, res) do |s,p,o|
418
+ match = true
419
+ break
420
+ end
421
+ if match
422
+ count += 1
423
+ else
424
+ break
425
+ end
334
426
  end
427
+ return res
428
+ end
429
+
430
+ # for "backwards-compatiblity"
431
+ # better use directly:
432
+ # set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
433
+ def parameters=(params)
335
434
 
336
- if load_all_features
337
- # if load all features, feautre_uri is not specified, derieve from feature_node
338
- feature_uri = feature_uri_store[o.to_s]
339
- unless feature_uri
340
- feature_uri = get_value(o)
341
- feature_uri_store[o.to_s] = feature_uri
435
+ converted_params = []
436
+ params.each do |name, settings|
437
+ converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
438
+ end
439
+ set_data( :parameters => converted_params )
440
+ end
441
+
442
+ # PENDING move to dataset.rb
443
+ # this is for dataset.to_owl
444
+ # adds feautre value for a single compound
445
+ def add_data_entries(compound_uri,features)
446
+
447
+ data_entry = { :compound => compound_uri }
448
+ if features
449
+ feature_values = []
450
+ features.each do |f|
451
+ f.each do |feature_uri,value|
452
+ if value.is_a?(Hash)
453
+ complex_values = []
454
+ value.each do |uri,v|
455
+ complex_values << { :feature => uri, :value => v }
456
+ end
457
+ feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values }
458
+ else
459
+ feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value }
460
+ end
461
+ end
342
462
  end
463
+ data_entry[:values] = feature_values
464
+ end
465
+ set_data( :dataEntry => data_entry )
466
+ end
467
+
468
+ # PENDING move to dataset.rb
469
+ # feature values are not loaded for performance reasons
470
+ # loading compounds and features into arrays that are given as params
471
+ def load_dataset( compounds, features )
472
+
473
+ @model.subjects(RDF_TYPE, node('Compound')).each do |compound|
474
+ compounds << get_value(compound)
343
475
  end
344
476
 
345
- value_node_type = @model.object(feature_value_node, node('type'))
346
- if (value_node_type == node('FeatureValue'))
347
- value_literal = @model.object( feature_value_node, node('value'))
348
- raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
349
- data[compound_uri] << {feature_uri => value_literal.get_value }
350
- else
351
- raise "feature value type not yet implemented "+value_node_type.to_s
477
+ @model.subjects(RDF_TYPE, node('Feature')).each do |feature|
478
+ feature_value_found=false
479
+ @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o|
480
+ @model.find(nil, node("values"), potential_feature_value) do |s,p,o|
481
+ feature_value_found=true
482
+ break
483
+ end
484
+ break if feature_value_found
485
+ end
486
+ features << get_value(feature) if feature_value_found
352
487
  end
353
- count += 1
354
- LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
488
+ LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s
355
489
  end
356
-
357
- LOGGER.debug "loaded "+count.to_s+" feature values"
358
- end
359
-
360
- @@property_nodes = { "type" => RDF["type"],
361
- "about" => RDF["about"],
362
- "title" => DC["title"],
363
- "creator" => DC["creator"],
364
- #"identifier" => DC["identifier"], identifier is deprecated
365
- "date" => DC["date"],
366
- "format" => DC["format"]}
367
490
 
368
- # @object_prop = OWL["ObjectProperty"]
369
- # @@type = { "Validation" => OWL["Class"],
370
- # "Model" => OWL["Class"],
371
- # "title" => OWL["AnnotationProperty"],
372
- # "creator" => OWL["AnnotationProperty"],
373
- # "date" => OWL["AnnotationProperty"],
374
- # "format" => OWL["AnnotationProperty"],
375
- # "predictedVariables" => @object_prop}
491
+ # PENDING move to dataset.rb
492
+ # loading feature values for the specified feature
493
+ # if feature is nil, all feature values are loaded
494
+ #
495
+ # general remark on the rdf loading (found out with some testing):
496
+ # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
497
+ # which cannot be avoided in general
498
+ def load_dataset_feature_values( compounds, data, feature_uris )
499
+
500
+ raise "no feature-uri array" unless feature_uris.is_a?(Array)
376
501
 
377
- # this method has two purposes:
378
- # * distinguishing ot-properties from dc- and rdf- properties
379
- # * caching nodes, as creating nodes is costly
380
- def node(name) #, write_type_to_model=false)
381
- raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
382
- n = @@property_nodes[name]
383
- unless n
384
- n = OT[name]
385
- @@property_nodes[name] = n
386
- end
387
-
388
- # if write_type_to_model and name!="type"
389
- # raise "no type defined for '"+name+"'" unless @@type[name]
390
- # @model.add n,RDF['type'],@@type[name]
391
- # end
392
- return n
393
- end
502
+ # values are stored in the data-hash, hash has a key for each compound
503
+ compounds.each{|c| data[c] = [] unless data[c]}
504
+
505
+ count = 0
394
506
 
395
- =begin
396
- def data
397
- LOGGER.debug("getting data from model")
507
+ feature_uris.each do |feature_uri|
508
+ LOGGER.debug("load feature values for feature: "+feature_uri )
509
+ feature_node = Redland::Resource.new(feature_uri)
510
+
511
+ # search for all feature_value_node with property 'ot_feature' and the feature we are looking for
512
+ @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
398
513
 
399
- data = {}
400
- @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
401
- compound_node = @model.object(data_entry, OT['compound'])
402
- compound_uri = @model.object(compound_node, DC['identifier']).to_s
403
- @model.find(data_entry, OT['values'], nil) do |s,p,values|
404
- feature_node = @model.object values, OT['feature']
405
- feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
406
- type = @model.object(values, RDF['type'])
407
- if type == OT['FeatureValue']
408
- value = @model.object(values, OT['value']).to_s
409
- case value.to_s
410
- when TRUE_REGEXP # defined in environment.rb
411
- value = true
412
- when FALSE_REGEXP # defined in environment.rb
413
- value = false
414
- when /.*\^\^<.*XMLSchema#.*>/
415
- #HACK for reading ambit datasets
416
- case value.to_s
417
- when /XMLSchema#string/
418
- value = value.to_s[0..(value.to_s.index("^^")-1)]
419
- when /XMLSchema#double/
420
- value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
421
- else
422
- LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
423
- value = nil
424
- end
425
- else
426
- LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
427
- value = nil
428
- end
429
- LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
514
+ # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
515
+ value_nodes = @model.subjects(node('values'),feature_value_node)
516
+ if value_nodes.size>0
517
+ raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1
518
+ value_node = value_nodes[0]
430
519
 
431
- return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
520
+ compound_uri = get_value( @model.object(value_node, node('compound')) )
432
521
 
433
- #puts "c "+compound_uri.to_s
434
- #puts "f "+feature_uri.to_s
435
- #puts "v "+value.to_s
436
- #puts ""
437
- data[compound_uri] = [] unless data[compound_uri]
438
- data[compound_uri] << {feature_uri => value} unless value.nil?
439
- elsif type == OT['Tuple']
440
- entry = {}
441
- data[compound_uri] = [] unless data[compound_uri]
442
- #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
443
- @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
444
- name_node = @model.object complex_value, OT['feature']
445
- name = @model.object(name_node, DC['title']).to_s
446
- value = @model.object(complex_value, OT['value']).to_s
447
- v = value.sub(/\^\^.*$/,'') # remove XML datatype
448
- v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
449
- entry[name] = v
522
+ value_node_type = @model.object(feature_value_node, RDF_TYPE)
523
+ if (value_node_type == node('FeatureValue'))
524
+ value_literal = @model.object( feature_value_node, node('value'))
525
+ raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal)
526
+ data[compound_uri] << {feature_uri => value_literal.get_value }
527
+ elsif (value_node_type == node('Tuple'))
528
+ complex_values = {}
529
+ @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value|
530
+ complex_value_type = @model.object(complex_value, RDF_TYPE)
531
+ raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue')
532
+ complex_feature_uri = get_value(@model.object( complex_value, node('feature')))
533
+ complex_value = @model.object( complex_value, node('value'))
534
+ raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal)
535
+ complex_values[ complex_feature_uri ] = complex_value.get_value
536
+ end
537
+ data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0
450
538
  end
451
- data[compound_uri] << {feature_uri => entry} unless entry.empty?
539
+ count += 1
540
+ LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0)
452
541
  end
453
542
  end
543
+ LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s
454
544
  end
455
- data
456
545
  end
457
- =end
458
-
459
546
  end
460
547
  end
461
-