opentox-ruby-api-wrapper 1.6.0 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/lib/algorithm.rb +3 -5
- data/lib/compound.rb +21 -3
- data/lib/dataset.rb +71 -61
- data/lib/environment.rb +4 -72
- data/lib/model.rb +5 -0
- data/lib/opentox-ruby-api-wrapper.rb +1 -1
- data/lib/ot-logger.rb +48 -0
- data/lib/overwrite.rb +14 -0
- data/lib/owl.rb +384 -298
- data/lib/task.rb +6 -6
- data/lib/templates/config.yaml +42 -40
- data/lib/utils.rb +7 -0
- metadata +35 -20
- data/lib/tasks/opentox.rb +0 -107
data/lib/overwrite.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# class overwrites aka monkey patches
|
2
|
+
# hack: store sinatra in global var to make url_for and halt methods accessible
|
3
|
+
before{ $sinatra = self unless $sinatra }
|
4
|
+
|
5
|
+
class Sinatra::Base
|
6
|
+
# overwriting halt to log halts (!= 202)
|
7
|
+
def halt(*response)
|
8
|
+
LOGGER.error "halt "+response.first.to_s+" "+(response.size>1 ? response[1].to_s : "") if response and response.first and response.first >= 300
|
9
|
+
# orig sinatra code:
|
10
|
+
response = response.first if response.length == 1
|
11
|
+
throw :halt, response
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
data/lib/owl.rb
CHANGED
@@ -1,22 +1,31 @@
|
|
1
|
+
# RDF namespaces
|
2
|
+
RDF = Redland::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
3
|
+
OWL = Redland::Namespace.new 'http://www.w3.org/2002/07/owl#'
|
4
|
+
DC = Redland::Namespace.new 'http://purl.org/dc/elements/1.1/'
|
5
|
+
OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#'
|
6
|
+
#OT = Redland::Namespace.new 'http://ortona.informatik.uni-freiburg.de/opentox.owl#'
|
7
|
+
XML = Redland::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
|
8
|
+
|
9
|
+
# overriding literal to give nice access to datatype
|
10
|
+
# and to access the stored value as correct ruby type
|
1
11
|
class Redland::Literal
|
2
12
|
|
3
|
-
def self.create(value,
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
13
|
+
def self.create(value, type)
|
14
|
+
raise "literal datatype may not be nil" unless type
|
15
|
+
type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type
|
16
|
+
|
17
|
+
if type.is_a?(Redland::Uri)
|
18
|
+
Redland::Literal.new(value.to_s,nil,type)
|
10
19
|
else
|
11
|
-
Redland::Literal.new(value.to_s,nil,Redland::
|
20
|
+
Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s))
|
12
21
|
end
|
13
22
|
end
|
14
23
|
|
15
24
|
# the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
|
16
25
|
# found solution in mailing list
|
17
26
|
def datatype
|
18
|
-
|
19
|
-
|
27
|
+
uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
|
28
|
+
return Redland.librdf_uri_to_string(uri) if uri
|
20
29
|
end
|
21
30
|
|
22
31
|
# gets value of literal, value class is se according to literal datatype
|
@@ -25,40 +34,33 @@ class Redland::Literal
|
|
25
34
|
end
|
26
35
|
|
27
36
|
private
|
28
|
-
@@type_string = XML["string"].uri
|
29
|
-
@@type_uri = XML["anyURI"].uri
|
30
|
-
@@type_float = XML["float"].uri
|
31
|
-
@@type_double = XML["double"].uri
|
32
|
-
@@type_date = XML["date"].uri
|
33
|
-
@@type_boolean = XML["boolean"].uri
|
34
|
-
@@type_datetime = XML["dateTime"].uri
|
35
|
-
@@type_integer = XML["integer"].uri
|
36
|
-
|
37
37
|
# parses value according to datatype uri
|
38
38
|
def self.parse_value(string_value, datatype_uri)
|
39
|
+
|
39
40
|
if (datatype_uri==nil || datatype_uri.size==0)
|
40
|
-
LOGGER.warn("empty datatype for literal with value: "+string_value)
|
41
|
+
LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
|
41
42
|
return string_value
|
42
43
|
end
|
43
44
|
case datatype_uri
|
44
|
-
when
|
45
|
+
when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
|
45
46
|
return string_value
|
46
|
-
when
|
47
|
+
when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
|
47
48
|
return string_value #PENDING uri as string?
|
48
|
-
when
|
49
|
+
when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
|
49
50
|
return string_value.to_f
|
50
|
-
when
|
51
|
+
when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
|
51
52
|
return string_value.to_f
|
52
|
-
when
|
53
|
+
when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
|
53
54
|
return string_value.upcase=="TRUE"
|
54
|
-
when
|
55
|
-
return string_value
|
56
|
-
when
|
57
|
-
return string_value
|
58
|
-
when
|
55
|
+
when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
|
56
|
+
return Time.parse(string_value)
|
57
|
+
when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
|
58
|
+
return Time.parse(string_value)
|
59
|
+
when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
|
59
60
|
return string_value.to_i
|
60
61
|
else
|
61
|
-
raise "unknown literal datatype: '"+datatype_uri.to_s+"'
|
62
|
+
raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
|
63
|
+
"), please specify new OpenTox::Owl::LITERAL_DATATYPE"
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
@@ -69,20 +71,20 @@ class Redland::Literal
|
|
69
71
|
elsif value.is_a?(String)
|
70
72
|
# PENDING: uri check too slow?
|
71
73
|
if OpenTox::Utils.is_uri?(value)
|
72
|
-
return
|
74
|
+
return OpenTox::Owl::LITERAL_DATATYPE_URI
|
73
75
|
else
|
74
|
-
return
|
76
|
+
return OpenTox::Owl::LITERAL_DATATYPE_STRING
|
75
77
|
end
|
76
78
|
elsif value.is_a?(Float)
|
77
|
-
return
|
79
|
+
return OpenTox::Owl::LITERAL_DATATYPE_FLOAT
|
78
80
|
elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
|
79
|
-
return
|
81
|
+
return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN
|
80
82
|
elsif value.is_a?(Integer)
|
81
|
-
return
|
83
|
+
return OpenTox::Owl::LITERAL_DATATYPE_INTEGER
|
82
84
|
elsif value.is_a?(DateTime)
|
83
|
-
return
|
85
|
+
return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
|
84
86
|
elsif value.is_a?(Time)
|
85
|
-
return
|
87
|
+
return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
|
86
88
|
else
|
87
89
|
raise "illegal datatype: "+value.class.to_s+" "+value.to_s
|
88
90
|
end
|
@@ -92,22 +94,118 @@ end
|
|
92
94
|
module OpenTox
|
93
95
|
|
94
96
|
class Owl
|
97
|
+
|
98
|
+
# to get correct owl-dl, properties and objects have to be typed
|
99
|
+
# i.e. the following triple is insufficient:
|
100
|
+
# ModelXY,ot:algorithm,AlgorithmXY
|
101
|
+
# further needed:
|
102
|
+
# ot:algorithm,rdf:type,owl:ObjectProperty
|
103
|
+
# AlgorithmXY,rdf:type,ot:Algorithm
|
104
|
+
# ot:Algorithm,rdf:type,owl:Class
|
105
|
+
#
|
106
|
+
# therefore OpentoxOwl needs info about the opentox-ontology
|
107
|
+
# the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
|
108
|
+
|
109
|
+
# contains all owl:ObjectProperty as keys, and the respective classes as value
|
110
|
+
# some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue")
|
111
|
+
# in this case, use set_object_property() (instead of set()) and specify class manually
|
112
|
+
OBJECT_PROPERTY_CLASS = {}
|
113
|
+
[ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
|
114
|
+
[ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
|
115
|
+
[ "trainingDataset", "testTargetDataset", "predictionDataset",
|
116
|
+
"testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
|
117
|
+
[ "feature", "dependentVariables", "independentVariables",
|
118
|
+
"predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
|
119
|
+
[ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
|
120
|
+
[ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
|
121
|
+
[ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
|
122
|
+
[ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
|
123
|
+
[ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
|
124
|
+
[ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
|
125
|
+
[ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
|
126
|
+
[ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
|
127
|
+
[ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
|
128
|
+
[ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
|
129
|
+
[ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
|
130
|
+
[ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
|
131
|
+
|
132
|
+
# literals point to primitive values (not to other resources)
|
133
|
+
# the literal datatype is encoded via uri:
|
134
|
+
LITERAL_DATATYPE_STRING = XML["string"].uri
|
135
|
+
LITERAL_DATATYPE_URI = XML["anyURI"].uri
|
136
|
+
LITERAL_DATATYPE_FLOAT = XML["float"].uri
|
137
|
+
LITERAL_DATATYPE_DOUBLE = XML["double"].uri
|
138
|
+
LITERAL_DATATYPE_DATE = XML["date"].uri
|
139
|
+
LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
|
140
|
+
LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
|
141
|
+
LITERAL_DATATYPE_INTEGER = XML["integer"].uri
|
142
|
+
|
143
|
+
# list all literals (to distinguish from objectProperties) as keys, datatype as values
|
144
|
+
# (do not add dc-identifier, deprecated, object are identified via name=uri)
|
145
|
+
LITERAL_TYPES = {}
|
146
|
+
[ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
|
147
|
+
"classValue", "reportType", "confusionMatrixActual",
|
148
|
+
"confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
|
149
|
+
[ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
|
150
|
+
[ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
|
151
|
+
"areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
|
152
|
+
"percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
|
153
|
+
"percentIncorrect", "percentUnpredicted", "realRuntime",
|
154
|
+
"percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
|
155
|
+
"targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
|
156
|
+
"sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
|
157
|
+
[ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
|
158
|
+
"numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
|
159
|
+
"randomSeed", "numFolds", "confusionMatrixValue",
|
160
|
+
"crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
|
161
|
+
[ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
|
162
|
+
[ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
|
163
|
+
# some literals can have different types, parse from ruby type
|
164
|
+
PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE"
|
165
|
+
[ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE }
|
166
|
+
|
167
|
+
# constants for often used redland-resources
|
168
|
+
OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
|
169
|
+
OWL_TYPE_CLASS = OWL["Class"]
|
170
|
+
OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
|
171
|
+
RDF_TYPE = RDF['type']
|
172
|
+
|
173
|
+
# store redland:resources (=nodes) to:
|
174
|
+
# * separate namespaces (OT from RDF and DC)
|
175
|
+
# * save time, as generating resources is timeconsuming in redland
|
176
|
+
@@nodes = {}
|
177
|
+
[ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
|
178
|
+
[ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
|
179
|
+
|
180
|
+
def node(property)
|
181
|
+
raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
|
182
|
+
property.to_s+")" unless property.is_a?(String) and property.size>0
|
183
|
+
raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
|
184
|
+
@@nodes[property] = OT[property] unless @@nodes.has_key?(property)
|
185
|
+
return @@nodes[property]
|
186
|
+
end
|
95
187
|
|
96
|
-
# ot_class is the class of the object, e.g. "Model","Dataset", ...
|
188
|
+
# ot_class is the class of the object as string, e.g. "Model","Dataset", ...
|
97
189
|
# root_node is the root-object node in the rdf
|
98
190
|
# uri the uri of the object
|
99
191
|
attr_accessor :ot_class, :root_node, :uri, :model
|
100
192
|
|
193
|
+
private
|
101
194
|
def initialize
|
102
195
|
@model = Redland::Model.new Redland::MemoryStore.new
|
103
196
|
end
|
104
197
|
|
198
|
+
# build new owl object
|
199
|
+
# ot_class is the class of this object, should be a string like "Model", "Task", ...
|
200
|
+
# uri is name and identifier of this object
|
201
|
+
public
|
105
202
|
def self.create( ot_class, uri )
|
106
203
|
|
107
|
-
|
204
|
+
owl = OpenTox::Owl.new
|
108
205
|
owl.ot_class = ot_class
|
109
206
|
owl.root_node = Redland::Resource.new(uri.to_s.strip)
|
110
|
-
|
207
|
+
owl.set("type",owl.ot_class)
|
208
|
+
owl.uri = uri
|
111
209
|
owl
|
112
210
|
end
|
113
211
|
|
@@ -121,7 +219,7 @@ module OpenTox
|
|
121
219
|
parser.parse_string_into_model(owl.model, data, base_uri)
|
122
220
|
|
123
221
|
# now loading root_node and uri
|
124
|
-
owl.model.find(nil,
|
222
|
+
owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
|
125
223
|
#LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
|
126
224
|
is_root = true
|
127
225
|
owl.model.find(nil, nil, s) do |ss,pp,oo|
|
@@ -129,8 +227,10 @@ module OpenTox
|
|
129
227
|
break
|
130
228
|
end
|
131
229
|
if is_root
|
230
|
+
# handle error if root is already set
|
132
231
|
raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
|
133
232
|
raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
|
233
|
+
#store root note and uri
|
134
234
|
owl.uri = s.uri.to_s
|
135
235
|
owl.root_node = s
|
136
236
|
end
|
@@ -139,7 +239,7 @@ module OpenTox
|
|
139
239
|
# handle error if no root node was found
|
140
240
|
unless owl.root_node
|
141
241
|
types = []
|
142
|
-
owl.model.find(nil,
|
242
|
+
owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
|
143
243
|
raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
|
144
244
|
end
|
145
245
|
raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
|
@@ -158,13 +258,33 @@ module OpenTox
|
|
158
258
|
@model.to_string
|
159
259
|
end
|
160
260
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
261
|
+
# returns the first object for subject:root_node and property
|
262
|
+
# (sufficient for accessing simple, root-node properties)
|
263
|
+
def get( property )
|
264
|
+
raise "uri is no prop, use owl.uri instead" if property=="uri"
|
265
|
+
return get_value( @model.object( @root_node, node(property.to_s)) )
|
266
|
+
end
|
267
|
+
|
268
|
+
# returns an array of objects (not only the first one) that fit for the property
|
269
|
+
# accepts array of properties to access not-root-node vaules
|
270
|
+
# i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
|
271
|
+
# returns an array of all confusionMatrixValues
|
272
|
+
def get_nested( property_array )
|
273
|
+
n = [ @root_node ]
|
274
|
+
property_array.each do |p|
|
275
|
+
new_nodes = []
|
276
|
+
n.each do |nn|
|
277
|
+
@model.find( nn, node(p), nil ) do |sub,pred,obj|
|
278
|
+
new_nodes << obj
|
279
|
+
end
|
280
|
+
end
|
281
|
+
n = new_nodes
|
282
|
+
end
|
283
|
+
return n.collect{|nn| get_value( nn )}
|
165
284
|
end
|
166
285
|
|
167
286
|
private
|
287
|
+
# returns node-value
|
168
288
|
def get_value( node )
|
169
289
|
return nil unless node
|
170
290
|
if node.is_a?(Redland::Literal)
|
@@ -177,285 +297,251 @@ module OpenTox
|
|
177
297
|
end
|
178
298
|
|
179
299
|
public
|
180
|
-
|
300
|
+
# sets values of current_node (by default root_node)
|
301
|
+
#
|
302
|
+
# note: this does not delete existing triples
|
303
|
+
# * there can be several triples for the same subject and predicate
|
304
|
+
# ( e.g. after set("description","bla1") and set("description","bla2")
|
305
|
+
# both descriptions are in the model,
|
306
|
+
# but the get("description") will give you only one object (by chance)
|
307
|
+
# * this does not matter in pratice (only dataset uses this -> load_dataset-methods)
|
308
|
+
# * identical values appear only once in rdf
|
309
|
+
def set(predicate, object, current_node=@root_node )
|
181
310
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
311
|
+
pred = predicate.to_s
|
312
|
+
raise "uri is no prop, cannot set uri" if pred=="uri"
|
313
|
+
raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
|
314
|
+
if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0
|
315
|
+
# set only not-nil values
|
316
|
+
LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
|
317
|
+
return
|
188
318
|
end
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
319
|
+
|
320
|
+
if pred=="type"
|
321
|
+
# predicate is type, set class of current node
|
322
|
+
set_type(object, current_node)
|
323
|
+
elsif LITERAL_TYPES.has_key?(pred)
|
324
|
+
# predicate is literal
|
325
|
+
set_literal(pred,object,LITERAL_TYPES[pred],current_node)
|
326
|
+
elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
|
327
|
+
# predicte is objectProperty, object is another resource
|
328
|
+
set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node)
|
329
|
+
else
|
330
|
+
raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
|
194
331
|
end
|
195
332
|
end
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
@model.add parameter, node('paramValue'), settings[:value]
|
204
|
-
@model.add @root_node, node('parameters'), parameter
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
def add_data_entries(compound_uri,features)
|
209
|
-
# add compound
|
210
|
-
compound = @model.subject(DC["identifier"], compound_uri)
|
211
|
-
if compound.nil?
|
212
|
-
compound = @model.create_resource(compound_uri)
|
213
|
-
@model.add compound, node('type'), node("Compound")
|
214
|
-
end
|
215
|
-
features.each do |f|
|
216
|
-
f.each do |feature_uri,value|
|
217
|
-
# add feature
|
218
|
-
feature = find_or_create_feature feature_uri
|
219
|
-
if value.class.to_s == 'Hash'
|
220
|
-
# create tuple
|
221
|
-
tuple = @model.create_resource
|
222
|
-
@model.add tuple, node('type'), node("Tuple")
|
223
|
-
@model.add tuple, node('feature'), feature
|
224
|
-
value.each do |uri,v|
|
225
|
-
f = find_or_create_feature uri
|
226
|
-
complex_value = @model.create_resource
|
227
|
-
@model.add tuple, node('complexValue'), complex_value
|
228
|
-
@model.add complex_value, node('type'), node("FeatureValue")
|
229
|
-
@model.add complex_value, node('feature'), f
|
230
|
-
@model.add complex_value, node('value'), Redland::Literal.create(v)
|
231
|
-
end
|
232
|
-
# add data entry
|
233
|
-
data_entry = @model.subject node('compound'), compound
|
234
|
-
if data_entry.nil?
|
235
|
-
data_entry = @model.create_resource
|
236
|
-
@model.add @root_node, node('dataEntry'), data_entry
|
237
|
-
@model.add data_entry, node('type'), node("DataEntry")
|
238
|
-
@model.add data_entry, node('compound'), compound
|
239
|
-
end
|
240
|
-
@model.add data_entry, node('values'), tuple
|
241
|
-
else
|
242
|
-
data_entry = @model.subject node('compound'), compound
|
243
|
-
if data_entry.nil?
|
244
|
-
data_entry = @model.create_resource
|
245
|
-
@model.add @root_node, node('dataEntry'), data_entry
|
246
|
-
@model.add data_entry,node('type'), node("DataEntry")
|
247
|
-
@model.add data_entry, node('compound'), compound
|
248
|
-
end
|
249
|
-
values = @model.create_resource
|
250
|
-
@model.add data_entry, node('values'), values
|
251
|
-
@model.add values, node('type'), node('FeatureValue')
|
252
|
-
@model.add values, node('feature'), feature
|
253
|
-
@model.add values, node('value'), Redland::Literal.create(value)
|
254
|
-
end
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
private
|
260
|
-
def find_feature(feature_uri)
|
261
|
-
# PENDING: more efficiently get feature node?
|
262
|
-
@model.subjects(RDF['type'], OT['Feature']).each do |feature|
|
263
|
-
return feature if feature_uri==get_value(feature)
|
333
|
+
|
334
|
+
# example-triples for setting rdf-type to model:
|
335
|
+
# model_xy,rdf:type,ot:Model
|
336
|
+
# ot:Model,rdf:type,owl:Class
|
337
|
+
def set_type(ot_class, current_node=@root_node)
|
338
|
+
@model.add current_node, RDF_TYPE, node(ot_class)
|
339
|
+
@model.add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS
|
264
340
|
end
|
265
|
-
return nil
|
266
|
-
end
|
267
|
-
|
268
|
-
public
|
269
|
-
def find_or_create_feature(feature_uri)
|
270
|
-
feature = find_feature(feature_uri)
|
271
|
-
unless feature
|
272
|
-
feature = @model.create_resource(feature_uri)
|
273
|
-
@model.add feature, node('type'), node("Feature")
|
274
|
-
@model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
|
275
|
-
@model.add feature, node('creator'), feature_uri
|
276
|
-
end
|
277
|
-
feature
|
278
|
-
end
|
279
|
-
|
280
|
-
# feature values are not loaded for performance reasons
|
281
|
-
# loading compounds and features into arrays that are given as params
|
282
|
-
def load_dataset( compounds, features )
|
283
341
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
342
|
+
# example-triples for setting description of a model:
|
343
|
+
# model_xy,ot:description,bla..bla^^xml:string
|
344
|
+
# ot:description,rdf:type,owl:Literal
|
345
|
+
def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node)
|
346
|
+
@model.add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype)
|
347
|
+
@model.add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL
|
289
348
|
end
|
290
|
-
LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
|
291
|
-
end
|
292
|
-
|
293
|
-
# loading feature values for the specified feature
|
294
|
-
# if feature is nil, all feature values are loaded
|
295
|
-
#
|
296
|
-
# general remark on the rdf loading (found out with some testing):
|
297
|
-
# the search methods (subjects/find) are fast, the time consuming parts is creating resources,
|
298
|
-
# which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
|
299
|
-
def load_dataset_feature_values( compounds, data, feature_uri=nil )
|
300
349
|
|
301
|
-
|
350
|
+
# example-triples for setting algorithm property of a model:
|
351
|
+
# model_xy,ot:algorithm,algorihtm_xy
|
352
|
+
# ot:algorithm,rdf:type,owl:ObjectProperty
|
353
|
+
# algorihtm_xy,rdf:type,ot:Algorithm
|
354
|
+
# ot:Algorithm,rdf:type,owl:Class
|
355
|
+
def set_object_property(property, object, object_class, current_node=@root_node)
|
356
|
+
object_node = Redland::Resource.new(object)
|
357
|
+
@model.add current_node, node(property), object_node
|
358
|
+
@model.add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
|
359
|
+
@model.add object_node, RDF_TYPE, node(object_class)
|
360
|
+
@model.add node(object_class), RDF_TYPE, OWL_TYPE_CLASS
|
361
|
+
end
|
302
362
|
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
#
|
310
|
-
|
311
|
-
|
312
|
-
|
363
|
+
# this is (a recursiv method) to set nested-data via hashes (not only simple properties)
|
364
|
+
# example (for a dataset)
|
365
|
+
# { :description => "bla",
|
366
|
+
# :dataEntry => { :compound => "compound_uri",
|
367
|
+
# :values => [ { :class => "FeatureValue"
|
368
|
+
# :feature => "feat1",
|
369
|
+
# :value => 42 },
|
370
|
+
# { :class => "FeatureValue"
|
371
|
+
# :feature => "feat2",
|
372
|
+
# :value => 123 } ] } }
|
373
|
+
def set_data(hash, current_node=@root_node)
|
374
|
+
|
375
|
+
hash.each do |k,v|
|
376
|
+
if v.is_a?(Hash)
|
377
|
+
# value is again a hash
|
378
|
+
prop = k.to_s
|
379
|
+
|
380
|
+
# :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS
|
381
|
+
object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop]
|
382
|
+
raise "hash key must be a object-property, please add '"+prop.to_s+
|
383
|
+
"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class
|
384
|
+
|
385
|
+
# the new node is a class node, to specify the uri of the resource use key :uri
|
386
|
+
if v[:uri]
|
387
|
+
# identifier is either a specified uri
|
388
|
+
class_node = Redland::Resource.new(v.delete(:uri))
|
389
|
+
else
|
390
|
+
# or a new uri, make up internal uri with increment
|
391
|
+
class_node = new_class_node(object_class,current_node)
|
392
|
+
end
|
393
|
+
set_object_property(prop,class_node,object_class,current_node)
|
394
|
+
# recursivly call set_data method with new node
|
395
|
+
set_data(v,class_node)
|
396
|
+
elsif v.is_a?(Array)
|
397
|
+
# value is an array, each array element is added with current key as predicate
|
398
|
+
v.each do |value|
|
399
|
+
set_data( { k => value }, current_node )
|
400
|
+
end
|
401
|
+
else
|
402
|
+
# neither hash nor array, call simple set-method
|
403
|
+
set( k, v, current_node )
|
404
|
+
end
|
405
|
+
end
|
313
406
|
end
|
314
407
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
compound_uri_store[compound_node.to_s] = compound_uri
|
408
|
+
# create a new (internal class) node with unique, uri-like name
|
409
|
+
def new_class_node(name, current_node=@root_node)
|
410
|
+
# to avoid anonymous nodes, make up uris for sub-objects
|
411
|
+
# use counter to make sure each uri is unique
|
412
|
+
# for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
|
413
|
+
count = 1
|
414
|
+
while (true)
|
415
|
+
res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
|
416
|
+
match = false
|
417
|
+
@model.find(nil, nil, res) do |s,p,o|
|
418
|
+
match = true
|
419
|
+
break
|
420
|
+
end
|
421
|
+
if match
|
422
|
+
count += 1
|
423
|
+
else
|
424
|
+
break
|
425
|
+
end
|
334
426
|
end
|
427
|
+
return res
|
428
|
+
end
|
429
|
+
|
430
|
+
# for "backwards-compatiblity"
|
431
|
+
# better use directly:
|
432
|
+
# set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
|
433
|
+
def parameters=(params)
|
335
434
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
435
|
+
converted_params = []
|
436
|
+
params.each do |name, settings|
|
437
|
+
converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
|
438
|
+
end
|
439
|
+
set_data( :parameters => converted_params )
|
440
|
+
end
|
441
|
+
|
442
|
+
# PENDING move to dataset.rb
|
443
|
+
# this is for dataset.to_owl
|
444
|
+
# adds feautre value for a single compound
|
445
|
+
def add_data_entries(compound_uri,features)
|
446
|
+
|
447
|
+
data_entry = { :compound => compound_uri }
|
448
|
+
if features
|
449
|
+
feature_values = []
|
450
|
+
features.each do |f|
|
451
|
+
f.each do |feature_uri,value|
|
452
|
+
if value.is_a?(Hash)
|
453
|
+
complex_values = []
|
454
|
+
value.each do |uri,v|
|
455
|
+
complex_values << { :feature => uri, :value => v }
|
456
|
+
end
|
457
|
+
feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values }
|
458
|
+
else
|
459
|
+
feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value }
|
460
|
+
end
|
461
|
+
end
|
342
462
|
end
|
463
|
+
data_entry[:values] = feature_values
|
464
|
+
end
|
465
|
+
set_data( :dataEntry => data_entry )
|
466
|
+
end
|
467
|
+
|
468
|
+
# PENDING move to dataset.rb
|
469
|
+
# feature values are not loaded for performance reasons
|
470
|
+
# loading compounds and features into arrays that are given as params
|
471
|
+
def load_dataset( compounds, features )
|
472
|
+
|
473
|
+
@model.subjects(RDF_TYPE, node('Compound')).each do |compound|
|
474
|
+
compounds << get_value(compound)
|
343
475
|
end
|
344
476
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
477
|
+
@model.subjects(RDF_TYPE, node('Feature')).each do |feature|
|
478
|
+
feature_value_found=false
|
479
|
+
@model.find(nil, node("feature"), feature) do |potential_feature_value,p,o|
|
480
|
+
@model.find(nil, node("values"), potential_feature_value) do |s,p,o|
|
481
|
+
feature_value_found=true
|
482
|
+
break
|
483
|
+
end
|
484
|
+
break if feature_value_found
|
485
|
+
end
|
486
|
+
features << get_value(feature) if feature_value_found
|
352
487
|
end
|
353
|
-
|
354
|
-
LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
|
488
|
+
LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s
|
355
489
|
end
|
356
|
-
|
357
|
-
LOGGER.debug "loaded "+count.to_s+" feature values"
|
358
|
-
end
|
359
|
-
|
360
|
-
@@property_nodes = { "type" => RDF["type"],
|
361
|
-
"about" => RDF["about"],
|
362
|
-
"title" => DC["title"],
|
363
|
-
"creator" => DC["creator"],
|
364
|
-
#"identifier" => DC["identifier"], identifier is deprecated
|
365
|
-
"date" => DC["date"],
|
366
|
-
"format" => DC["format"]}
|
367
490
|
|
368
|
-
#
|
369
|
-
#
|
370
|
-
#
|
371
|
-
#
|
372
|
-
#
|
373
|
-
#
|
374
|
-
#
|
375
|
-
|
491
|
+
# PENDING move to dataset.rb
|
492
|
+
# loading feature values for the specified feature
|
493
|
+
# if feature is nil, all feature values are loaded
|
494
|
+
#
|
495
|
+
# general remark on the rdf loading (found out with some testing):
|
496
|
+
# the search methods (subjects/find) are fast, the time consuming parts is creating resources,
|
497
|
+
# which cannot be avoided in general
|
498
|
+
def load_dataset_feature_values( compounds, data, feature_uris )
|
499
|
+
|
500
|
+
raise "no feature-uri array" unless feature_uris.is_a?(Array)
|
376
501
|
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
|
382
|
-
n = @@property_nodes[name]
|
383
|
-
unless n
|
384
|
-
n = OT[name]
|
385
|
-
@@property_nodes[name] = n
|
386
|
-
end
|
387
|
-
|
388
|
-
# if write_type_to_model and name!="type"
|
389
|
-
# raise "no type defined for '"+name+"'" unless @@type[name]
|
390
|
-
# @model.add n,RDF['type'],@@type[name]
|
391
|
-
# end
|
392
|
-
return n
|
393
|
-
end
|
502
|
+
# values are stored in the data-hash, hash has a key for each compound
|
503
|
+
compounds.each{|c| data[c] = [] unless data[c]}
|
504
|
+
|
505
|
+
count = 0
|
394
506
|
|
395
|
-
|
396
|
-
|
397
|
-
|
507
|
+
feature_uris.each do |feature_uri|
|
508
|
+
LOGGER.debug("load feature values for feature: "+feature_uri )
|
509
|
+
feature_node = Redland::Resource.new(feature_uri)
|
510
|
+
|
511
|
+
# search for all feature_value_node with property 'ot_feature' and the feature we are looking for
|
512
|
+
@model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
|
398
513
|
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
feature_node = @model.object values, OT['feature']
|
405
|
-
feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
|
406
|
-
type = @model.object(values, RDF['type'])
|
407
|
-
if type == OT['FeatureValue']
|
408
|
-
value = @model.object(values, OT['value']).to_s
|
409
|
-
case value.to_s
|
410
|
-
when TRUE_REGEXP # defined in environment.rb
|
411
|
-
value = true
|
412
|
-
when FALSE_REGEXP # defined in environment.rb
|
413
|
-
value = false
|
414
|
-
when /.*\^\^<.*XMLSchema#.*>/
|
415
|
-
#HACK for reading ambit datasets
|
416
|
-
case value.to_s
|
417
|
-
when /XMLSchema#string/
|
418
|
-
value = value.to_s[0..(value.to_s.index("^^")-1)]
|
419
|
-
when /XMLSchema#double/
|
420
|
-
value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
|
421
|
-
else
|
422
|
-
LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
|
423
|
-
value = nil
|
424
|
-
end
|
425
|
-
else
|
426
|
-
LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
|
427
|
-
value = nil
|
428
|
-
end
|
429
|
-
LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
|
514
|
+
# get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
|
515
|
+
value_nodes = @model.subjects(node('values'),feature_value_node)
|
516
|
+
if value_nodes.size>0
|
517
|
+
raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1
|
518
|
+
value_node = value_nodes[0]
|
430
519
|
|
431
|
-
|
520
|
+
compound_uri = get_value( @model.object(value_node, node('compound')) )
|
432
521
|
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
entry[name] = v
|
522
|
+
value_node_type = @model.object(feature_value_node, RDF_TYPE)
|
523
|
+
if (value_node_type == node('FeatureValue'))
|
524
|
+
value_literal = @model.object( feature_value_node, node('value'))
|
525
|
+
raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal)
|
526
|
+
data[compound_uri] << {feature_uri => value_literal.get_value }
|
527
|
+
elsif (value_node_type == node('Tuple'))
|
528
|
+
complex_values = {}
|
529
|
+
@model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value|
|
530
|
+
complex_value_type = @model.object(complex_value, RDF_TYPE)
|
531
|
+
raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue')
|
532
|
+
complex_feature_uri = get_value(@model.object( complex_value, node('feature')))
|
533
|
+
complex_value = @model.object( complex_value, node('value'))
|
534
|
+
raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal)
|
535
|
+
complex_values[ complex_feature_uri ] = complex_value.get_value
|
536
|
+
end
|
537
|
+
data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0
|
450
538
|
end
|
451
|
-
|
539
|
+
count += 1
|
540
|
+
LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0)
|
452
541
|
end
|
453
542
|
end
|
543
|
+
LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s
|
454
544
|
end
|
455
|
-
data
|
456
545
|
end
|
457
|
-
=end
|
458
|
-
|
459
546
|
end
|
460
547
|
end
|
461
|
-
|