opentox-ruby-api-wrapper 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/lib/algorithm.rb +3 -5
- data/lib/compound.rb +21 -3
- data/lib/dataset.rb +71 -61
- data/lib/environment.rb +4 -72
- data/lib/model.rb +5 -0
- data/lib/opentox-ruby-api-wrapper.rb +1 -1
- data/lib/ot-logger.rb +48 -0
- data/lib/overwrite.rb +14 -0
- data/lib/owl.rb +384 -298
- data/lib/task.rb +6 -6
- data/lib/templates/config.yaml +42 -40
- data/lib/utils.rb +7 -0
- metadata +35 -20
- data/lib/tasks/opentox.rb +0 -107
data/lib/overwrite.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# class overwrites aka monkey patches
|
2
|
+
# hack: store sinatra in global var to make url_for and halt methods accessible
|
3
|
+
before{ $sinatra = self unless $sinatra }
|
4
|
+
|
5
|
+
class Sinatra::Base
|
6
|
+
# overwriting halt to log halts (!= 202)
|
7
|
+
def halt(*response)
|
8
|
+
LOGGER.error "halt "+response.first.to_s+" "+(response.size>1 ? response[1].to_s : "") if response and response.first and response.first >= 300
|
9
|
+
# orig sinatra code:
|
10
|
+
response = response.first if response.length == 1
|
11
|
+
throw :halt, response
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
data/lib/owl.rb
CHANGED
@@ -1,22 +1,31 @@
|
|
1
|
+
# RDF namespaces
|
2
|
+
RDF = Redland::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
3
|
+
OWL = Redland::Namespace.new 'http://www.w3.org/2002/07/owl#'
|
4
|
+
DC = Redland::Namespace.new 'http://purl.org/dc/elements/1.1/'
|
5
|
+
OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#'
|
6
|
+
#OT = Redland::Namespace.new 'http://ortona.informatik.uni-freiburg.de/opentox.owl#'
|
7
|
+
XML = Redland::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
|
8
|
+
|
9
|
+
# overriding literal to give nice access to datatype
|
10
|
+
# and to access the stored value as correct ruby type
|
1
11
|
class Redland::Literal
|
2
12
|
|
3
|
-
def self.create(value,
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
13
|
+
def self.create(value, type)
|
14
|
+
raise "literal datatype may not be nil" unless type
|
15
|
+
type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type
|
16
|
+
|
17
|
+
if type.is_a?(Redland::Uri)
|
18
|
+
Redland::Literal.new(value.to_s,nil,type)
|
10
19
|
else
|
11
|
-
Redland::Literal.new(value.to_s,nil,Redland::
|
20
|
+
Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s))
|
12
21
|
end
|
13
22
|
end
|
14
23
|
|
15
24
|
# the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
|
16
25
|
# found solution in mailing list
|
17
26
|
def datatype
|
18
|
-
|
19
|
-
|
27
|
+
uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
|
28
|
+
return Redland.librdf_uri_to_string(uri) if uri
|
20
29
|
end
|
21
30
|
|
22
31
|
# gets value of literal, value class is se according to literal datatype
|
@@ -25,40 +34,33 @@ class Redland::Literal
|
|
25
34
|
end
|
26
35
|
|
27
36
|
private
|
28
|
-
@@type_string = XML["string"].uri
|
29
|
-
@@type_uri = XML["anyURI"].uri
|
30
|
-
@@type_float = XML["float"].uri
|
31
|
-
@@type_double = XML["double"].uri
|
32
|
-
@@type_date = XML["date"].uri
|
33
|
-
@@type_boolean = XML["boolean"].uri
|
34
|
-
@@type_datetime = XML["dateTime"].uri
|
35
|
-
@@type_integer = XML["integer"].uri
|
36
|
-
|
37
37
|
# parses value according to datatype uri
|
38
38
|
def self.parse_value(string_value, datatype_uri)
|
39
|
+
|
39
40
|
if (datatype_uri==nil || datatype_uri.size==0)
|
40
|
-
LOGGER.warn("empty datatype for literal with value: "+string_value)
|
41
|
+
LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
|
41
42
|
return string_value
|
42
43
|
end
|
43
44
|
case datatype_uri
|
44
|
-
when
|
45
|
+
when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
|
45
46
|
return string_value
|
46
|
-
when
|
47
|
+
when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
|
47
48
|
return string_value #PENDING uri as string?
|
48
|
-
when
|
49
|
+
when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
|
49
50
|
return string_value.to_f
|
50
|
-
when
|
51
|
+
when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
|
51
52
|
return string_value.to_f
|
52
|
-
when
|
53
|
+
when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
|
53
54
|
return string_value.upcase=="TRUE"
|
54
|
-
when
|
55
|
-
return string_value
|
56
|
-
when
|
57
|
-
return string_value
|
58
|
-
when
|
55
|
+
when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
|
56
|
+
return Time.parse(string_value)
|
57
|
+
when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
|
58
|
+
return Time.parse(string_value)
|
59
|
+
when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
|
59
60
|
return string_value.to_i
|
60
61
|
else
|
61
|
-
raise "unknown literal datatype: '"+datatype_uri.to_s+"'
|
62
|
+
raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
|
63
|
+
"), please specify new OpenTox::Owl::LITERAL_DATATYPE"
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
@@ -69,20 +71,20 @@ class Redland::Literal
|
|
69
71
|
elsif value.is_a?(String)
|
70
72
|
# PENDING: uri check too slow?
|
71
73
|
if OpenTox::Utils.is_uri?(value)
|
72
|
-
return
|
74
|
+
return OpenTox::Owl::LITERAL_DATATYPE_URI
|
73
75
|
else
|
74
|
-
return
|
76
|
+
return OpenTox::Owl::LITERAL_DATATYPE_STRING
|
75
77
|
end
|
76
78
|
elsif value.is_a?(Float)
|
77
|
-
return
|
79
|
+
return OpenTox::Owl::LITERAL_DATATYPE_FLOAT
|
78
80
|
elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
|
79
|
-
return
|
81
|
+
return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN
|
80
82
|
elsif value.is_a?(Integer)
|
81
|
-
return
|
83
|
+
return OpenTox::Owl::LITERAL_DATATYPE_INTEGER
|
82
84
|
elsif value.is_a?(DateTime)
|
83
|
-
return
|
85
|
+
return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
|
84
86
|
elsif value.is_a?(Time)
|
85
|
-
return
|
87
|
+
return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
|
86
88
|
else
|
87
89
|
raise "illegal datatype: "+value.class.to_s+" "+value.to_s
|
88
90
|
end
|
@@ -92,22 +94,118 @@ end
|
|
92
94
|
module OpenTox
|
93
95
|
|
94
96
|
class Owl
|
97
|
+
|
98
|
+
# to get correct owl-dl, properties and objects have to be typed
|
99
|
+
# i.e. the following triple is insufficient:
|
100
|
+
# ModelXY,ot:algorithm,AlgorithmXY
|
101
|
+
# further needed:
|
102
|
+
# ot:algorithm,rdf:type,owl:ObjectProperty
|
103
|
+
# AlgorithmXY,rdf:type,ot:Algorithm
|
104
|
+
# ot:Algorithm,rdf:type,owl:Class
|
105
|
+
#
|
106
|
+
# therefore OpentoxOwl needs info about the opentox-ontology
|
107
|
+
# the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
|
108
|
+
|
109
|
+
# contains all owl:ObjectProperty as keys, and the respective classes as value
|
110
|
+
# some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue")
|
111
|
+
# in this case, use set_object_property() (instead of set()) and specify class manually
|
112
|
+
OBJECT_PROPERTY_CLASS = {}
|
113
|
+
[ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
|
114
|
+
[ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
|
115
|
+
[ "trainingDataset", "testTargetDataset", "predictionDataset",
|
116
|
+
"testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
|
117
|
+
[ "feature", "dependentVariables", "independentVariables",
|
118
|
+
"predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
|
119
|
+
[ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
|
120
|
+
[ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
|
121
|
+
[ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
|
122
|
+
[ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
|
123
|
+
[ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
|
124
|
+
[ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
|
125
|
+
[ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
|
126
|
+
[ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
|
127
|
+
[ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
|
128
|
+
[ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
|
129
|
+
[ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
|
130
|
+
[ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
|
131
|
+
|
132
|
+
# literals point to primitive values (not to other resources)
|
133
|
+
# the literal datatype is encoded via uri:
|
134
|
+
LITERAL_DATATYPE_STRING = XML["string"].uri
|
135
|
+
LITERAL_DATATYPE_URI = XML["anyURI"].uri
|
136
|
+
LITERAL_DATATYPE_FLOAT = XML["float"].uri
|
137
|
+
LITERAL_DATATYPE_DOUBLE = XML["double"].uri
|
138
|
+
LITERAL_DATATYPE_DATE = XML["date"].uri
|
139
|
+
LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
|
140
|
+
LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
|
141
|
+
LITERAL_DATATYPE_INTEGER = XML["integer"].uri
|
142
|
+
|
143
|
+
# list all literals (to distinguish from objectProperties) as keys, datatype as values
|
144
|
+
# (do not add dc-identifier, deprecated, object are identified via name=uri)
|
145
|
+
LITERAL_TYPES = {}
|
146
|
+
[ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
|
147
|
+
"classValue", "reportType", "confusionMatrixActual",
|
148
|
+
"confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
|
149
|
+
[ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
|
150
|
+
[ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
|
151
|
+
"areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
|
152
|
+
"percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
|
153
|
+
"percentIncorrect", "percentUnpredicted", "realRuntime",
|
154
|
+
"percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
|
155
|
+
"targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
|
156
|
+
"sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
|
157
|
+
[ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
|
158
|
+
"numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
|
159
|
+
"randomSeed", "numFolds", "confusionMatrixValue",
|
160
|
+
"crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
|
161
|
+
[ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
|
162
|
+
[ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
|
163
|
+
# some literals can have different types, parse from ruby type
|
164
|
+
PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE"
|
165
|
+
[ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE }
|
166
|
+
|
167
|
+
# constants for often used redland-resources
|
168
|
+
OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
|
169
|
+
OWL_TYPE_CLASS = OWL["Class"]
|
170
|
+
OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
|
171
|
+
RDF_TYPE = RDF['type']
|
172
|
+
|
173
|
+
# store redland:resources (=nodes) to:
|
174
|
+
# * separate namespaces (OT from RDF and DC)
|
175
|
+
# * save time, as generating resources is timeconsuming in redland
|
176
|
+
@@nodes = {}
|
177
|
+
[ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
|
178
|
+
[ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
|
179
|
+
|
180
|
+
def node(property)
|
181
|
+
raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
|
182
|
+
property.to_s+")" unless property.is_a?(String) and property.size>0
|
183
|
+
raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
|
184
|
+
@@nodes[property] = OT[property] unless @@nodes.has_key?(property)
|
185
|
+
return @@nodes[property]
|
186
|
+
end
|
95
187
|
|
96
|
-
# ot_class is the class of the object, e.g. "Model","Dataset", ...
|
188
|
+
# ot_class is the class of the object as string, e.g. "Model","Dataset", ...
|
97
189
|
# root_node is the root-object node in the rdf
|
98
190
|
# uri the uri of the object
|
99
191
|
attr_accessor :ot_class, :root_node, :uri, :model
|
100
192
|
|
193
|
+
private
|
101
194
|
def initialize
|
102
195
|
@model = Redland::Model.new Redland::MemoryStore.new
|
103
196
|
end
|
104
197
|
|
198
|
+
# build new owl object
|
199
|
+
# ot_class is the class of this object, should be a string like "Model", "Task", ...
|
200
|
+
# uri is name and identifier of this object
|
201
|
+
public
|
105
202
|
def self.create( ot_class, uri )
|
106
203
|
|
107
|
-
|
204
|
+
owl = OpenTox::Owl.new
|
108
205
|
owl.ot_class = ot_class
|
109
206
|
owl.root_node = Redland::Resource.new(uri.to_s.strip)
|
110
|
-
|
207
|
+
owl.set("type",owl.ot_class)
|
208
|
+
owl.uri = uri
|
111
209
|
owl
|
112
210
|
end
|
113
211
|
|
@@ -121,7 +219,7 @@ module OpenTox
|
|
121
219
|
parser.parse_string_into_model(owl.model, data, base_uri)
|
122
220
|
|
123
221
|
# now loading root_node and uri
|
124
|
-
owl.model.find(nil,
|
222
|
+
owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
|
125
223
|
#LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
|
126
224
|
is_root = true
|
127
225
|
owl.model.find(nil, nil, s) do |ss,pp,oo|
|
@@ -129,8 +227,10 @@ module OpenTox
|
|
129
227
|
break
|
130
228
|
end
|
131
229
|
if is_root
|
230
|
+
# handle error if root is already set
|
132
231
|
raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
|
133
232
|
raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
|
233
|
+
#store root note and uri
|
134
234
|
owl.uri = s.uri.to_s
|
135
235
|
owl.root_node = s
|
136
236
|
end
|
@@ -139,7 +239,7 @@ module OpenTox
|
|
139
239
|
# handle error if no root node was found
|
140
240
|
unless owl.root_node
|
141
241
|
types = []
|
142
|
-
owl.model.find(nil,
|
242
|
+
owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
|
143
243
|
raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
|
144
244
|
end
|
145
245
|
raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
|
@@ -158,13 +258,33 @@ module OpenTox
|
|
158
258
|
@model.to_string
|
159
259
|
end
|
160
260
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
261
|
+
# returns the first object for subject:root_node and property
|
262
|
+
# (sufficient for accessing simple, root-node properties)
|
263
|
+
def get( property )
|
264
|
+
raise "uri is no prop, use owl.uri instead" if property=="uri"
|
265
|
+
return get_value( @model.object( @root_node, node(property.to_s)) )
|
266
|
+
end
|
267
|
+
|
268
|
+
# returns an array of objects (not only the first one) that fit for the property
|
269
|
+
# accepts array of properties to access not-root-node vaules
|
270
|
+
# i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
|
271
|
+
# returns an array of all confusionMatrixValues
|
272
|
+
def get_nested( property_array )
|
273
|
+
n = [ @root_node ]
|
274
|
+
property_array.each do |p|
|
275
|
+
new_nodes = []
|
276
|
+
n.each do |nn|
|
277
|
+
@model.find( nn, node(p), nil ) do |sub,pred,obj|
|
278
|
+
new_nodes << obj
|
279
|
+
end
|
280
|
+
end
|
281
|
+
n = new_nodes
|
282
|
+
end
|
283
|
+
return n.collect{|nn| get_value( nn )}
|
165
284
|
end
|
166
285
|
|
167
286
|
private
|
287
|
+
# returns node-value
|
168
288
|
def get_value( node )
|
169
289
|
return nil unless node
|
170
290
|
if node.is_a?(Redland::Literal)
|
@@ -177,285 +297,251 @@ module OpenTox
|
|
177
297
|
end
|
178
298
|
|
179
299
|
public
|
180
|
-
|
300
|
+
# sets values of current_node (by default root_node)
|
301
|
+
#
|
302
|
+
# note: this does not delete existing triples
|
303
|
+
# * there can be several triples for the same subject and predicate
|
304
|
+
# ( e.g. after set("description","bla1") and set("description","bla2")
|
305
|
+
# both descriptions are in the model,
|
306
|
+
# but the get("description") will give you only one object (by chance)
|
307
|
+
# * this does not matter in pratice (only dataset uses this -> load_dataset-methods)
|
308
|
+
# * identical values appear only once in rdf
|
309
|
+
def set(predicate, object, current_node=@root_node )
|
181
310
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
311
|
+
pred = predicate.to_s
|
312
|
+
raise "uri is no prop, cannot set uri" if pred=="uri"
|
313
|
+
raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
|
314
|
+
if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0
|
315
|
+
# set only not-nil values
|
316
|
+
LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
|
317
|
+
return
|
188
318
|
end
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
319
|
+
|
320
|
+
if pred=="type"
|
321
|
+
# predicate is type, set class of current node
|
322
|
+
set_type(object, current_node)
|
323
|
+
elsif LITERAL_TYPES.has_key?(pred)
|
324
|
+
# predicate is literal
|
325
|
+
set_literal(pred,object,LITERAL_TYPES[pred],current_node)
|
326
|
+
elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
|
327
|
+
# predicte is objectProperty, object is another resource
|
328
|
+
set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node)
|
329
|
+
else
|
330
|
+
raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
|
194
331
|
end
|
195
332
|
end
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
@model.add parameter, node('paramValue'), settings[:value]
|
204
|
-
@model.add @root_node, node('parameters'), parameter
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
def add_data_entries(compound_uri,features)
|
209
|
-
# add compound
|
210
|
-
compound = @model.subject(DC["identifier"], compound_uri)
|
211
|
-
if compound.nil?
|
212
|
-
compound = @model.create_resource(compound_uri)
|
213
|
-
@model.add compound, node('type'), node("Compound")
|
214
|
-
end
|
215
|
-
features.each do |f|
|
216
|
-
f.each do |feature_uri,value|
|
217
|
-
# add feature
|
218
|
-
feature = find_or_create_feature feature_uri
|
219
|
-
if value.class.to_s == 'Hash'
|
220
|
-
# create tuple
|
221
|
-
tuple = @model.create_resource
|
222
|
-
@model.add tuple, node('type'), node("Tuple")
|
223
|
-
@model.add tuple, node('feature'), feature
|
224
|
-
value.each do |uri,v|
|
225
|
-
f = find_or_create_feature uri
|
226
|
-
complex_value = @model.create_resource
|
227
|
-
@model.add tuple, node('complexValue'), complex_value
|
228
|
-
@model.add complex_value, node('type'), node("FeatureValue")
|
229
|
-
@model.add complex_value, node('feature'), f
|
230
|
-
@model.add complex_value, node('value'), Redland::Literal.create(v)
|
231
|
-
end
|
232
|
-
# add data entry
|
233
|
-
data_entry = @model.subject node('compound'), compound
|
234
|
-
if data_entry.nil?
|
235
|
-
data_entry = @model.create_resource
|
236
|
-
@model.add @root_node, node('dataEntry'), data_entry
|
237
|
-
@model.add data_entry, node('type'), node("DataEntry")
|
238
|
-
@model.add data_entry, node('compound'), compound
|
239
|
-
end
|
240
|
-
@model.add data_entry, node('values'), tuple
|
241
|
-
else
|
242
|
-
data_entry = @model.subject node('compound'), compound
|
243
|
-
if data_entry.nil?
|
244
|
-
data_entry = @model.create_resource
|
245
|
-
@model.add @root_node, node('dataEntry'), data_entry
|
246
|
-
@model.add data_entry,node('type'), node("DataEntry")
|
247
|
-
@model.add data_entry, node('compound'), compound
|
248
|
-
end
|
249
|
-
values = @model.create_resource
|
250
|
-
@model.add data_entry, node('values'), values
|
251
|
-
@model.add values, node('type'), node('FeatureValue')
|
252
|
-
@model.add values, node('feature'), feature
|
253
|
-
@model.add values, node('value'), Redland::Literal.create(value)
|
254
|
-
end
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
private
|
260
|
-
def find_feature(feature_uri)
|
261
|
-
# PENDING: more efficiently get feature node?
|
262
|
-
@model.subjects(RDF['type'], OT['Feature']).each do |feature|
|
263
|
-
return feature if feature_uri==get_value(feature)
|
333
|
+
|
334
|
+
# example-triples for setting rdf-type to model:
|
335
|
+
# model_xy,rdf:type,ot:Model
|
336
|
+
# ot:Model,rdf:type,owl:Class
|
337
|
+
def set_type(ot_class, current_node=@root_node)
|
338
|
+
@model.add current_node, RDF_TYPE, node(ot_class)
|
339
|
+
@model.add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS
|
264
340
|
end
|
265
|
-
return nil
|
266
|
-
end
|
267
|
-
|
268
|
-
public
|
269
|
-
def find_or_create_feature(feature_uri)
|
270
|
-
feature = find_feature(feature_uri)
|
271
|
-
unless feature
|
272
|
-
feature = @model.create_resource(feature_uri)
|
273
|
-
@model.add feature, node('type'), node("Feature")
|
274
|
-
@model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
|
275
|
-
@model.add feature, node('creator'), feature_uri
|
276
|
-
end
|
277
|
-
feature
|
278
|
-
end
|
279
|
-
|
280
|
-
# feature values are not loaded for performance reasons
|
281
|
-
# loading compounds and features into arrays that are given as params
|
282
|
-
def load_dataset( compounds, features )
|
283
341
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
342
|
+
# example-triples for setting description of a model:
|
343
|
+
# model_xy,ot:description,bla..bla^^xml:string
|
344
|
+
# ot:description,rdf:type,owl:Literal
|
345
|
+
def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node)
|
346
|
+
@model.add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype)
|
347
|
+
@model.add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL
|
289
348
|
end
|
290
|
-
LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
|
291
|
-
end
|
292
|
-
|
293
|
-
# loading feature values for the specified feature
|
294
|
-
# if feature is nil, all feature values are loaded
|
295
|
-
#
|
296
|
-
# general remark on the rdf loading (found out with some testing):
|
297
|
-
# the search methods (subjects/find) are fast, the time consuming parts is creating resources,
|
298
|
-
# which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
|
299
|
-
def load_dataset_feature_values( compounds, data, feature_uri=nil )
|
300
349
|
|
301
|
-
|
350
|
+
# example-triples for setting algorithm property of a model:
|
351
|
+
# model_xy,ot:algorithm,algorihtm_xy
|
352
|
+
# ot:algorithm,rdf:type,owl:ObjectProperty
|
353
|
+
# algorihtm_xy,rdf:type,ot:Algorithm
|
354
|
+
# ot:Algorithm,rdf:type,owl:Class
|
355
|
+
def set_object_property(property, object, object_class, current_node=@root_node)
|
356
|
+
object_node = Redland::Resource.new(object)
|
357
|
+
@model.add current_node, node(property), object_node
|
358
|
+
@model.add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
|
359
|
+
@model.add object_node, RDF_TYPE, node(object_class)
|
360
|
+
@model.add node(object_class), RDF_TYPE, OWL_TYPE_CLASS
|
361
|
+
end
|
302
362
|
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
#
|
310
|
-
|
311
|
-
|
312
|
-
|
363
|
+
# this is (a recursiv method) to set nested-data via hashes (not only simple properties)
|
364
|
+
# example (for a dataset)
|
365
|
+
# { :description => "bla",
|
366
|
+
# :dataEntry => { :compound => "compound_uri",
|
367
|
+
# :values => [ { :class => "FeatureValue"
|
368
|
+
# :feature => "feat1",
|
369
|
+
# :value => 42 },
|
370
|
+
# { :class => "FeatureValue"
|
371
|
+
# :feature => "feat2",
|
372
|
+
# :value => 123 } ] } }
|
373
|
+
def set_data(hash, current_node=@root_node)
|
374
|
+
|
375
|
+
hash.each do |k,v|
|
376
|
+
if v.is_a?(Hash)
|
377
|
+
# value is again a hash
|
378
|
+
prop = k.to_s
|
379
|
+
|
380
|
+
# :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS
|
381
|
+
object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop]
|
382
|
+
raise "hash key must be a object-property, please add '"+prop.to_s+
|
383
|
+
"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class
|
384
|
+
|
385
|
+
# the new node is a class node, to specify the uri of the resource use key :uri
|
386
|
+
if v[:uri]
|
387
|
+
# identifier is either a specified uri
|
388
|
+
class_node = Redland::Resource.new(v.delete(:uri))
|
389
|
+
else
|
390
|
+
# or a new uri, make up internal uri with increment
|
391
|
+
class_node = new_class_node(object_class,current_node)
|
392
|
+
end
|
393
|
+
set_object_property(prop,class_node,object_class,current_node)
|
394
|
+
# recursivly call set_data method with new node
|
395
|
+
set_data(v,class_node)
|
396
|
+
elsif v.is_a?(Array)
|
397
|
+
# value is an array, each array element is added with current key as predicate
|
398
|
+
v.each do |value|
|
399
|
+
set_data( { k => value }, current_node )
|
400
|
+
end
|
401
|
+
else
|
402
|
+
# neither hash nor array, call simple set-method
|
403
|
+
set( k, v, current_node )
|
404
|
+
end
|
405
|
+
end
|
313
406
|
end
|
314
407
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
compound_uri_store[compound_node.to_s] = compound_uri
|
408
|
+
# create a new (internal class) node with unique, uri-like name
|
409
|
+
def new_class_node(name, current_node=@root_node)
|
410
|
+
# to avoid anonymous nodes, make up uris for sub-objects
|
411
|
+
# use counter to make sure each uri is unique
|
412
|
+
# for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
|
413
|
+
count = 1
|
414
|
+
while (true)
|
415
|
+
res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
|
416
|
+
match = false
|
417
|
+
@model.find(nil, nil, res) do |s,p,o|
|
418
|
+
match = true
|
419
|
+
break
|
420
|
+
end
|
421
|
+
if match
|
422
|
+
count += 1
|
423
|
+
else
|
424
|
+
break
|
425
|
+
end
|
334
426
|
end
|
427
|
+
return res
|
428
|
+
end
|
429
|
+
|
430
|
+
# for "backwards-compatiblity"
|
431
|
+
# better use directly:
|
432
|
+
# set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
|
433
|
+
def parameters=(params)
|
335
434
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
435
|
+
converted_params = []
|
436
|
+
params.each do |name, settings|
|
437
|
+
converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
|
438
|
+
end
|
439
|
+
set_data( :parameters => converted_params )
|
440
|
+
end
|
441
|
+
|
442
|
+
# PENDING move to dataset.rb
|
443
|
+
# this is for dataset.to_owl
|
444
|
+
# adds feautre value for a single compound
|
445
|
+
def add_data_entries(compound_uri,features)
|
446
|
+
|
447
|
+
data_entry = { :compound => compound_uri }
|
448
|
+
if features
|
449
|
+
feature_values = []
|
450
|
+
features.each do |f|
|
451
|
+
f.each do |feature_uri,value|
|
452
|
+
if value.is_a?(Hash)
|
453
|
+
complex_values = []
|
454
|
+
value.each do |uri,v|
|
455
|
+
complex_values << { :feature => uri, :value => v }
|
456
|
+
end
|
457
|
+
feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values }
|
458
|
+
else
|
459
|
+
feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value }
|
460
|
+
end
|
461
|
+
end
|
342
462
|
end
|
463
|
+
data_entry[:values] = feature_values
|
464
|
+
end
|
465
|
+
set_data( :dataEntry => data_entry )
|
466
|
+
end
|
467
|
+
|
468
|
+
# PENDING move to dataset.rb
|
469
|
+
# feature values are not loaded for performance reasons
|
470
|
+
# loading compounds and features into arrays that are given as params
|
471
|
+
def load_dataset( compounds, features )
|
472
|
+
|
473
|
+
@model.subjects(RDF_TYPE, node('Compound')).each do |compound|
|
474
|
+
compounds << get_value(compound)
|
343
475
|
end
|
344
476
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
477
|
+
@model.subjects(RDF_TYPE, node('Feature')).each do |feature|
|
478
|
+
feature_value_found=false
|
479
|
+
@model.find(nil, node("feature"), feature) do |potential_feature_value,p,o|
|
480
|
+
@model.find(nil, node("values"), potential_feature_value) do |s,p,o|
|
481
|
+
feature_value_found=true
|
482
|
+
break
|
483
|
+
end
|
484
|
+
break if feature_value_found
|
485
|
+
end
|
486
|
+
features << get_value(feature) if feature_value_found
|
352
487
|
end
|
353
|
-
|
354
|
-
LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
|
488
|
+
LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s
|
355
489
|
end
|
356
|
-
|
357
|
-
LOGGER.debug "loaded "+count.to_s+" feature values"
|
358
|
-
end
|
359
|
-
|
360
|
-
@@property_nodes = { "type" => RDF["type"],
|
361
|
-
"about" => RDF["about"],
|
362
|
-
"title" => DC["title"],
|
363
|
-
"creator" => DC["creator"],
|
364
|
-
#"identifier" => DC["identifier"], identifier is deprecated
|
365
|
-
"date" => DC["date"],
|
366
|
-
"format" => DC["format"]}
|
367
490
|
|
368
|
-
#
|
369
|
-
#
|
370
|
-
#
|
371
|
-
#
|
372
|
-
#
|
373
|
-
#
|
374
|
-
#
|
375
|
-
|
491
|
+
# PENDING move to dataset.rb
|
492
|
+
# loading feature values for the specified feature
|
493
|
+
# if feature is nil, all feature values are loaded
|
494
|
+
#
|
495
|
+
# general remark on the rdf loading (found out with some testing):
|
496
|
+
# the search methods (subjects/find) are fast, the time consuming parts is creating resources,
|
497
|
+
# which cannot be avoided in general
|
498
|
+
def load_dataset_feature_values( compounds, data, feature_uris )
|
499
|
+
|
500
|
+
raise "no feature-uri array" unless feature_uris.is_a?(Array)
|
376
501
|
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
|
382
|
-
n = @@property_nodes[name]
|
383
|
-
unless n
|
384
|
-
n = OT[name]
|
385
|
-
@@property_nodes[name] = n
|
386
|
-
end
|
387
|
-
|
388
|
-
# if write_type_to_model and name!="type"
|
389
|
-
# raise "no type defined for '"+name+"'" unless @@type[name]
|
390
|
-
# @model.add n,RDF['type'],@@type[name]
|
391
|
-
# end
|
392
|
-
return n
|
393
|
-
end
|
502
|
+
# values are stored in the data-hash, hash has a key for each compound
|
503
|
+
compounds.each{|c| data[c] = [] unless data[c]}
|
504
|
+
|
505
|
+
count = 0
|
394
506
|
|
395
|
-
|
396
|
-
|
397
|
-
|
507
|
+
feature_uris.each do |feature_uri|
|
508
|
+
LOGGER.debug("load feature values for feature: "+feature_uri )
|
509
|
+
feature_node = Redland::Resource.new(feature_uri)
|
510
|
+
|
511
|
+
# search for all feature_value_node with property 'ot_feature' and the feature we are looking for
|
512
|
+
@model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
|
398
513
|
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
feature_node = @model.object values, OT['feature']
|
405
|
-
feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
|
406
|
-
type = @model.object(values, RDF['type'])
|
407
|
-
if type == OT['FeatureValue']
|
408
|
-
value = @model.object(values, OT['value']).to_s
|
409
|
-
case value.to_s
|
410
|
-
when TRUE_REGEXP # defined in environment.rb
|
411
|
-
value = true
|
412
|
-
when FALSE_REGEXP # defined in environment.rb
|
413
|
-
value = false
|
414
|
-
when /.*\^\^<.*XMLSchema#.*>/
|
415
|
-
#HACK for reading ambit datasets
|
416
|
-
case value.to_s
|
417
|
-
when /XMLSchema#string/
|
418
|
-
value = value.to_s[0..(value.to_s.index("^^")-1)]
|
419
|
-
when /XMLSchema#double/
|
420
|
-
value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
|
421
|
-
else
|
422
|
-
LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
|
423
|
-
value = nil
|
424
|
-
end
|
425
|
-
else
|
426
|
-
LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
|
427
|
-
value = nil
|
428
|
-
end
|
429
|
-
LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
|
514
|
+
# get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
|
515
|
+
value_nodes = @model.subjects(node('values'),feature_value_node)
|
516
|
+
if value_nodes.size>0
|
517
|
+
raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1
|
518
|
+
value_node = value_nodes[0]
|
430
519
|
|
431
|
-
|
520
|
+
compound_uri = get_value( @model.object(value_node, node('compound')) )
|
432
521
|
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
entry[name] = v
|
522
|
+
value_node_type = @model.object(feature_value_node, RDF_TYPE)
|
523
|
+
if (value_node_type == node('FeatureValue'))
|
524
|
+
value_literal = @model.object( feature_value_node, node('value'))
|
525
|
+
raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal)
|
526
|
+
data[compound_uri] << {feature_uri => value_literal.get_value }
|
527
|
+
elsif (value_node_type == node('Tuple'))
|
528
|
+
complex_values = {}
|
529
|
+
@model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value|
|
530
|
+
complex_value_type = @model.object(complex_value, RDF_TYPE)
|
531
|
+
raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue')
|
532
|
+
complex_feature_uri = get_value(@model.object( complex_value, node('feature')))
|
533
|
+
complex_value = @model.object( complex_value, node('value'))
|
534
|
+
raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal)
|
535
|
+
complex_values[ complex_feature_uri ] = complex_value.get_value
|
536
|
+
end
|
537
|
+
data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0
|
450
538
|
end
|
451
|
-
|
539
|
+
count += 1
|
540
|
+
LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0)
|
452
541
|
end
|
453
542
|
end
|
543
|
+
LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s
|
454
544
|
end
|
455
|
-
data
|
456
545
|
end
|
457
|
-
=end
|
458
|
-
|
459
546
|
end
|
460
547
|
end
|
461
|
-
|