opentox-ruby 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +4 -4
- data/Rakefile +35 -35
- data/VERSION +1 -1
- data/lib/algorithm.rb +220 -50
- data/lib/compound.rb +138 -73
- data/lib/dataset.rb +296 -192
- data/lib/environment.rb +44 -29
- data/lib/feature.rb +15 -0
- data/lib/model.rb +240 -112
- data/lib/opentox-ruby.rb +13 -0
- data/lib/opentox.rb +47 -0
- data/lib/overwrite.rb +72 -0
- data/lib/parser.rb +286 -0
- data/lib/rest_client_wrapper.rb +12 -12
- data/lib/serializer.rb +340 -0
- data/lib/task.rb +184 -101
- data/lib/validation.rb +58 -8
- metadata +41 -22
data/lib/parser.rb
ADDED
@@ -0,0 +1,286 @@
|
|
1
|
+
require 'spreadsheet'
|
2
|
+
require 'roo'
|
3
|
+
|
4
|
+
class String
|
5
|
+
|
6
|
+
# Split RDF statement into triples
|
7
|
+
# @return [Array] Array with [subject,predicate,object]
|
8
|
+
def to_triple
|
9
|
+
self.chomp.split(' ',3).collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
module OpenTox
|
15
|
+
|
16
|
+
# Parser for various input formats
|
17
|
+
module Parser
|
18
|
+
|
19
|
+
# OWL-DL parser
|
20
|
+
module Owl
|
21
|
+
|
22
|
+
# Create a new OWL-DL parser
|
23
|
+
# @param uri URI of OpenTox object
|
24
|
+
# @return [OpenTox::Parser::Owl] OWL-DL parser
|
25
|
+
def initialize(uri)
|
26
|
+
@uri = uri
|
27
|
+
@metadata = {}
|
28
|
+
end
|
29
|
+
|
30
|
+
# Read metadata from opentox service
|
31
|
+
# @return [Hash] Object metadata
|
32
|
+
def load_metadata
|
33
|
+
|
34
|
+
if @dataset
|
35
|
+
uri = File.join(@uri,"metadata")
|
36
|
+
else
|
37
|
+
uri = @uri
|
38
|
+
end
|
39
|
+
|
40
|
+
statements = []
|
41
|
+
parameter_ids = []
|
42
|
+
`rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line|
|
43
|
+
triple = line.to_triple
|
44
|
+
@metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type']
|
45
|
+
statements << triple
|
46
|
+
parameter_ids << triple[2] if triple[1] == OT.parameters
|
47
|
+
end
|
48
|
+
unless parameter_ids.empty?
|
49
|
+
@metadata[OT.parameters] = []
|
50
|
+
parameter_ids.each do |p|
|
51
|
+
parameter = {}
|
52
|
+
statements.each{ |t| parameter[t[1]] = t[2] if t[0] == p and t[1] != RDF['type']}
|
53
|
+
@metadata[OT.parameters] << parameter
|
54
|
+
end
|
55
|
+
end
|
56
|
+
@metadata
|
57
|
+
end
|
58
|
+
|
59
|
+
# Generic parser for all OpenTox classes
|
60
|
+
class Generic
|
61
|
+
include Owl
|
62
|
+
end
|
63
|
+
|
64
|
+
# OWL-DL parser for datasets
|
65
|
+
class Dataset
|
66
|
+
|
67
|
+
include Owl
|
68
|
+
|
69
|
+
attr_writer :uri
|
70
|
+
|
71
|
+
# Create a new OWL-DL dataset parser
|
72
|
+
# @param uri Dataset URI
|
73
|
+
# @return [OpenTox::Parser::Owl::Dataset] OWL-DL parser
|
74
|
+
def initialize(uri)
|
75
|
+
super uri
|
76
|
+
@dataset = ::OpenTox::Dataset.new(@uri)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Read data from dataset service. Files can be parsed by setting #uri to a filename (after initialization with a real URI)
|
80
|
+
# @example Read data from an external service
|
81
|
+
# parser = OpenTox::Parser::Owl::Dataaset.new "http://wwbservices.in-silico.ch/dataset/1"
|
82
|
+
# dataset = parser.load_uri
|
83
|
+
# @example Create dataset from RDF/XML file
|
84
|
+
# dataset = OpenTox::Dataset.create
|
85
|
+
# parser = OpenTox::Parser::Owl::Dataaset.new dataset.uri
|
86
|
+
# parser.uri = "dataset.rdfxml" # insert your input file
|
87
|
+
# dataset = parser.load_uri
|
88
|
+
# dataset.save
|
89
|
+
# @return [Hash] Internal dataset representation
|
90
|
+
def load_uri
|
91
|
+
data = {}
|
92
|
+
feature_values = {}
|
93
|
+
feature = {}
|
94
|
+
other_statements = {}
|
95
|
+
`rapper -i rdfxml -o ntriples #{@uri} 2>/dev/null`.each_line do |line|
|
96
|
+
triple = line.chomp.split(' ',3)
|
97
|
+
triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
|
98
|
+
case triple[1]
|
99
|
+
when /#{OT.values}/i
|
100
|
+
data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
|
101
|
+
data[triple[0]][:values] << triple[2]
|
102
|
+
when /#{OT.value}/i
|
103
|
+
feature_values[triple[0]] = triple[2]
|
104
|
+
when /#{OT.compound}/i
|
105
|
+
data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
|
106
|
+
data[triple[0]][:compound] = triple[2]
|
107
|
+
when /#{OT.feature}/i
|
108
|
+
feature[triple[0]] = triple[2]
|
109
|
+
else
|
110
|
+
end
|
111
|
+
end
|
112
|
+
data.each do |id,entry|
|
113
|
+
entry[:values].each do |value_id|
|
114
|
+
value = feature_values[value_id].split(/\^\^/).first # remove XSD.type
|
115
|
+
@dataset.add entry[:compound],feature[value_id],value
|
116
|
+
end
|
117
|
+
end
|
118
|
+
load_features
|
119
|
+
@dataset.metadata = load_metadata
|
120
|
+
@dataset
|
121
|
+
end
|
122
|
+
|
123
|
+
# Read only features from a dataset service.
|
124
|
+
# @return [Hash] Internal features representation
|
125
|
+
def load_features
|
126
|
+
uri = File.join(@uri,"features")
|
127
|
+
statements = []
|
128
|
+
features = Set.new
|
129
|
+
`rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line|
|
130
|
+
triple = line.chomp.split('> ').collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}[0..2]
|
131
|
+
statements << triple
|
132
|
+
features << triple[0] if triple[1] == RDF['type'] and triple[2] == OT.Feature
|
133
|
+
end
|
134
|
+
statements.each do |triple|
|
135
|
+
if features.include? triple[0]
|
136
|
+
@dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
|
137
|
+
@dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
|
138
|
+
end
|
139
|
+
end
|
140
|
+
@dataset.features
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
# Parser for getting spreadsheet data into a dataset
|
148
|
+
class Spreadsheets
|
149
|
+
|
150
|
+
attr_accessor :dataset
|
151
|
+
|
152
|
+
def initialize
|
153
|
+
@data = []
|
154
|
+
@features = []
|
155
|
+
@feature_types = {}
|
156
|
+
|
157
|
+
@format_errors = ""
|
158
|
+
@smiles_errors = []
|
159
|
+
@activity_errors = []
|
160
|
+
@duplicates = {}
|
161
|
+
end
|
162
|
+
|
163
|
+
# Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
|
164
|
+
# @param [Excel] book Excel workbook object (created with roo gem)
|
165
|
+
# @return [OpenTox::Dataset] Dataset object with Excel data
|
166
|
+
def load_spreadsheet(book)
|
167
|
+
book.default_sheet = 0
|
168
|
+
add_features book.row(1)
|
169
|
+
2.upto(book.last_row) { |i| add_values book.row(i) }
|
170
|
+
warnings
|
171
|
+
@dataset
|
172
|
+
end
|
173
|
+
|
174
|
+
# Load CSV string (format specification: http://toxcreate.org/help)
|
175
|
+
# @param [String] csv CSV representation of the dataset
|
176
|
+
# @return [OpenTox::Dataset] Dataset object with CSV data
|
177
|
+
def load_csv(csv)
|
178
|
+
row = 0
|
179
|
+
input = csv.split("\n")
|
180
|
+
add_features split_row(input.shift)
|
181
|
+
input.each { |row| add_values split_row(row) }
|
182
|
+
warnings
|
183
|
+
@dataset
|
184
|
+
end
|
185
|
+
|
186
|
+
private
|
187
|
+
|
188
|
+
def warnings
|
189
|
+
|
190
|
+
info = ''
|
191
|
+
@feature_types.each do |feature,types|
|
192
|
+
if types.uniq.size > 1
|
193
|
+
type = OT.NumericFeature
|
194
|
+
else
|
195
|
+
type = types.first
|
196
|
+
end
|
197
|
+
@dataset.add_feature_metadata(feature,{OT.isA => type})
|
198
|
+
info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}."
|
199
|
+
|
200
|
+
# TODO: rewrite feature values
|
201
|
+
# TODO if value.to_f == 0 @activity_errors << "#{smiles} Zero values not allowed for regression datasets - entry ignored."
|
202
|
+
end
|
203
|
+
|
204
|
+
@dataset.metadata[OT.Info] = info
|
205
|
+
|
206
|
+
warnings = ''
|
207
|
+
warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @smiles_errors.join("<br/>") unless @smiles_errors.empty?
|
208
|
+
warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
|
209
|
+
duplicate_warnings = ''
|
210
|
+
@duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
|
211
|
+
warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
|
212
|
+
|
213
|
+
@dataset.metadata[OT.Warnings] = warnings
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
def add_features(row)
|
218
|
+
row.shift # get rid of smiles entry
|
219
|
+
row.each do |feature_name|
|
220
|
+
feature_uri = File.join(@dataset.uri,"feature",URI.encode(feature_name))
|
221
|
+
@feature_types[feature_uri] = []
|
222
|
+
@features << feature_uri
|
223
|
+
@dataset.add_feature(feature_uri,{DC.title => feature_name})
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def add_values(row)
|
228
|
+
|
229
|
+
smiles = row.shift
|
230
|
+
compound = Compound.from_smiles(smiles)
|
231
|
+
if compound.nil? or compound.inchi.nil? or compound.inchi == ""
|
232
|
+
@smiles_errors << smiles+", "+row.join(", ")
|
233
|
+
return false
|
234
|
+
end
|
235
|
+
@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
|
236
|
+
@duplicates[compound.inchi] << smiles+", "+row.join(", ")
|
237
|
+
|
238
|
+
row.each_index do |i|
|
239
|
+
value = row[i]
|
240
|
+
feature = @features[i]
|
241
|
+
type = feature_type(value)
|
242
|
+
|
243
|
+
@feature_types[feature] << type
|
244
|
+
|
245
|
+
case type
|
246
|
+
when OT.NominalFeature
|
247
|
+
case value.to_s
|
248
|
+
when TRUE_REGEXP
|
249
|
+
@dataset.add(compound.uri, feature, true )
|
250
|
+
when FALSE_REGEXP
|
251
|
+
@dataset.add(compound.uri, feature, false )
|
252
|
+
end
|
253
|
+
when OT.NumericFeature
|
254
|
+
@dataset.add compound.uri, feature, value.to_f
|
255
|
+
when OT.StringFeature
|
256
|
+
@dataset.add compound.uri, feature, value.to_s
|
257
|
+
@activity_errors << smiles+", "+row.join(", ")
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def numeric?(value)
|
263
|
+
true if Float(value) rescue false
|
264
|
+
end
|
265
|
+
|
266
|
+
def classification?(value)
|
267
|
+
!value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
|
268
|
+
end
|
269
|
+
|
270
|
+
def feature_type(value)
|
271
|
+
if classification? value
|
272
|
+
return OT.NominalFeature
|
273
|
+
elsif numeric? value
|
274
|
+
return OT.NumericFeature
|
275
|
+
else
|
276
|
+
return OT.StringFeature
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def split_row(row)
|
281
|
+
row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
data/lib/rest_client_wrapper.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
1
|
module OpenTox
|
4
2
|
|
5
3
|
#PENDING: implement ot error api, move to own file
|
@@ -60,14 +58,14 @@ module OpenTox
|
|
60
58
|
def self.execute( rest_call, uri, headers, payload=nil, wait=true )
|
61
59
|
|
62
60
|
do_halt 400,"uri is null",uri,headers,payload unless uri
|
63
|
-
do_halt 400,"not a uri",uri,headers,payload unless
|
61
|
+
do_halt 400,"not a uri",uri,headers,payload unless uri.to_s.uri?
|
64
62
|
do_halt 400,"headers are no hash",uri,headers,payload unless headers==nil or headers.is_a?(Hash)
|
65
63
|
do_halt 400,"nil headers for post not allowed, use {}",uri,headers,payload if rest_call=="post" and headers==nil
|
66
64
|
headers.each{ |k,v| headers.delete(k) if v==nil } if headers #remove keys with empty values, as this can cause problems
|
67
65
|
|
68
66
|
begin
|
69
67
|
#LOGGER.debug "RestCall: "+rest_call.to_s+" "+uri.to_s+" "+headers.inspect
|
70
|
-
resource = RestClient::Resource.new(uri,{:timeout => 60})
|
68
|
+
resource = RestClient::Resource.new(uri,{:timeout => 60})
|
71
69
|
if payload
|
72
70
|
result = resource.send(rest_call, payload, headers)
|
73
71
|
elsif headers
|
@@ -82,6 +80,7 @@ module OpenTox
|
|
82
80
|
raise "content-type not set" unless res.content_type
|
83
81
|
res.code = result.code
|
84
82
|
|
83
|
+
# TODO: Ambit returns task representation with 200 instead of result URI
|
85
84
|
return res if res.code==200 || !wait
|
86
85
|
|
87
86
|
while (res.code==201 || res.code==202)
|
@@ -110,12 +109,13 @@ module OpenTox
|
|
110
109
|
|
111
110
|
task = nil
|
112
111
|
case res.content_type
|
113
|
-
when /application\/rdf\+xml
|
114
|
-
task = OpenTox::Task.
|
112
|
+
when /application\/rdf\+xml/
|
113
|
+
task = OpenTox::Task.from_rdfxml(res)
|
114
|
+
when /yaml/
|
115
|
+
task = OpenTox::Task.from_yaml(res)
|
115
116
|
when /text\//
|
116
|
-
raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and
|
117
|
-
|
118
|
-
task = OpenTox::Task.find(res.to_s) if Utils.task_uri?(res)
|
117
|
+
raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task
|
118
|
+
task = OpenTox::Task.find(res.to_s) if res.to_s.uri?
|
119
119
|
else
|
120
120
|
raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s
|
121
121
|
end
|
@@ -124,7 +124,7 @@ module OpenTox
|
|
124
124
|
task.wait_for_completion
|
125
125
|
raise task.description unless task.completed? # maybe task was cancelled / error
|
126
126
|
|
127
|
-
res = WrapperResult.new task.
|
127
|
+
res = WrapperResult.new task.result_uri
|
128
128
|
res.code = task.http_code
|
129
129
|
res.content_type = "text/uri-list"
|
130
130
|
return res
|
@@ -154,8 +154,8 @@ module OpenTox
|
|
154
154
|
# we are either in a task, or in sinatra
|
155
155
|
# PENDING: always return yaml for now
|
156
156
|
|
157
|
-
if $self_task #this global var in Task.
|
158
|
-
raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.
|
157
|
+
if $self_task #this global var in Task.create to mark that the current process is running in a task
|
158
|
+
raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.create
|
159
159
|
#elsif $sinatra #else halt sinatra
|
160
160
|
#$sinatra.halt(502,error.to_yaml)
|
161
161
|
elsif defined?(halt)
|
data/lib/serializer.rb
ADDED
@@ -0,0 +1,340 @@
|
|
1
|
+
require 'spreadsheet'
|
2
|
+
require 'yajl'
|
3
|
+
|
4
|
+
module OpenTox
|
5
|
+
|
6
|
+
# Serialzer for various oputput formats
|
7
|
+
module Serializer
|
8
|
+
|
9
|
+
# OWL-DL Serializer, modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification
|
10
|
+
class Owl
|
11
|
+
|
12
|
+
attr_accessor :object
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
|
16
|
+
@object = {
|
17
|
+
# this should come from opntox.owl
|
18
|
+
OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
19
|
+
OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
20
|
+
OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
21
|
+
OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
22
|
+
OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
23
|
+
OT.Dataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
24
|
+
OT.DataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
25
|
+
OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
26
|
+
OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
27
|
+
OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
28
|
+
OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
29
|
+
|
30
|
+
OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
31
|
+
OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
32
|
+
OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
33
|
+
OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
34
|
+
OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
35
|
+
OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
36
|
+
OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
37
|
+
|
38
|
+
DC.title => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
39
|
+
DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
40
|
+
DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
41
|
+
DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
42
|
+
DC.description => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
43
|
+
DC.date => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
44
|
+
OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
45
|
+
OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
46
|
+
XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
47
|
+
OT.hasStatus => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
48
|
+
OT.resultURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
49
|
+
OT.percentageCompleted => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
50
|
+
|
51
|
+
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
52
|
+
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
53
|
+
OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
54
|
+
OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
55
|
+
}
|
56
|
+
|
57
|
+
@data_entries = {}
|
58
|
+
@values_id = 0
|
59
|
+
@parameter_id = 0
|
60
|
+
|
61
|
+
@classes = Set.new
|
62
|
+
@object_properties = Set.new
|
63
|
+
@annotation_properties = Set.new
|
64
|
+
@datatype_properties = Set.new
|
65
|
+
|
66
|
+
@objects = Set.new
|
67
|
+
end
|
68
|
+
|
69
|
+
# Add a compound
|
70
|
+
# @param [String] uri Compound URI
|
71
|
+
def add_compound(uri)
|
72
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add a feature
|
76
|
+
# @param [String] uri Feature URI
|
77
|
+
def add_feature(uri,metadata)
|
78
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
|
79
|
+
add_metadata uri, metadata
|
80
|
+
end
|
81
|
+
|
82
|
+
# Add a dataset
|
83
|
+
# @param [String] uri Dataset URI
|
84
|
+
def add_dataset(dataset)
|
85
|
+
|
86
|
+
@dataset = dataset.uri
|
87
|
+
|
88
|
+
@object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
|
89
|
+
|
90
|
+
add_metadata dataset.uri, dataset.metadata
|
91
|
+
|
92
|
+
dataset.compounds.each { |compound| add_compound compound }
|
93
|
+
|
94
|
+
dataset.features.each { |feature,metadata| add_feature feature,metadata }
|
95
|
+
|
96
|
+
dataset.data_entries.each do |compound,entry|
|
97
|
+
entry.each do |feature,values|
|
98
|
+
values.each { |value| add_data_entry compound,feature,value }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
# Add a algorithm
|
105
|
+
# @param [String] uri Algorithm URI
|
106
|
+
def add_algorithm(uri,metadata)
|
107
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
|
108
|
+
add_metadata uri, metadata
|
109
|
+
end
|
110
|
+
|
111
|
+
# Add a model
|
112
|
+
# @param [String] uri Model URI
|
113
|
+
def add_model(uri,metadata)
|
114
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
|
115
|
+
add_metadata uri, metadata
|
116
|
+
end
|
117
|
+
|
118
|
+
# Add a task
|
119
|
+
# @param [String] uri Model URI
|
120
|
+
def add_task(uri,metadata)
|
121
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Task }] }
|
122
|
+
add_metadata uri, metadata
|
123
|
+
end
|
124
|
+
|
125
|
+
# Add metadata
|
126
|
+
# @param [Hash] metadata
|
127
|
+
def add_metadata(uri,metadata)
|
128
|
+
id = 0
|
129
|
+
metadata.each do |u,v|
|
130
|
+
if v.is_a? Array and u == OT.parameters
|
131
|
+
@object[uri][u] = [] unless @object[uri][u]
|
132
|
+
v.each do |value|
|
133
|
+
id+=1
|
134
|
+
genid = "_:genid#{id}"
|
135
|
+
@object[uri][u] << {"type" => "bnode", "value" => genid}
|
136
|
+
@object[genid] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter}] }
|
137
|
+
value.each do |name,entry|
|
138
|
+
@object[genid][name] = [{"type" => type(entry), "value" => entry }]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else # v.is_a? String
|
142
|
+
@object[uri] = {} unless @object[uri]
|
143
|
+
@object[uri][u] = [{"type" => type(v), "value" => v }]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Add a data entry
|
149
|
+
# @param [String] compound Compound URI
|
150
|
+
# @param [String] feature Feature URI
|
151
|
+
# @param [Boolead,Float] value Feature value
|
152
|
+
def add_data_entry(compound,feature,value)
|
153
|
+
add_compound(compound) unless @object[compound]
|
154
|
+
add_feature(feature,{}) unless @object[feature]
|
155
|
+
unless data_entry = @data_entries[compound]
|
156
|
+
data_entry = "_:dataentry#{@data_entries.size}"
|
157
|
+
@data_entries[compound] = data_entry
|
158
|
+
@object[@dataset][OT.dataEntry] = [] unless @object[@dataset][OT.dataEntry]
|
159
|
+
@object[@dataset][OT.dataEntry] << {"type" => "bnode", "value" => data_entry}
|
160
|
+
@object[data_entry] = {
|
161
|
+
RDF["type"] => [{ "type" => "uri", "value" => OT.DataEntry }],
|
162
|
+
OT.compound => [{ "type" => "uri", "value" => compound }],
|
163
|
+
OT.values => [],
|
164
|
+
}
|
165
|
+
end
|
166
|
+
values = "_:values#{@values_id}"
|
167
|
+
@values_id += 1
|
168
|
+
@object[data_entry][OT.values] << {"type" => "bnode", "value" => values}
|
169
|
+
case type(value)
|
170
|
+
when "uri"
|
171
|
+
v = [{ "type" => "uri", "value" => value}]
|
172
|
+
when "literal"
|
173
|
+
v = [{ "type" => "literal", "value" => value, "datatype" => datatype(value) }]
|
174
|
+
else
|
175
|
+
raise "Illegal type #{type(value)} for #{value}."
|
176
|
+
end
|
177
|
+
@object[values] = {
|
178
|
+
RDF["type"] => [{ "type" => "uri", "value" => OT.FeatureValue }],
|
179
|
+
OT.feature => [{ "type" => "uri", "value" => feature }],
|
180
|
+
OT.value => v
|
181
|
+
}
|
182
|
+
@object[feature][RDF["type"]] << { "type" => "uri", "value" => featuretype(value) }
|
183
|
+
end
|
184
|
+
|
185
|
+
# Serializers
|
186
|
+
|
187
|
+
# Convert to N-Triples
|
188
|
+
# @return [text/plain] Object OWL-DL in N-Triples format
|
189
|
+
def to_ntriples
|
190
|
+
|
191
|
+
@triples = Set.new
|
192
|
+
@object.each do |s,entry|
|
193
|
+
s = url(s) if type(s) == "uri"
|
194
|
+
entry.each do |p,objects|
|
195
|
+
p = url(p)
|
196
|
+
objects.each do |o|
|
197
|
+
case o["type"]
|
198
|
+
when "uri"
|
199
|
+
o = url(o["value"])
|
200
|
+
when "literal"
|
201
|
+
o = literal(o["value"],datatype(o["value"]))
|
202
|
+
when "bnode"
|
203
|
+
o = o["value"]
|
204
|
+
end
|
205
|
+
@triples << [s,p,o]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
@triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n"
|
210
|
+
end
|
211
|
+
|
212
|
+
# Convert to RDF/XML
|
213
|
+
# @return [text/plain] Object OWL-DL in RDF/XML format
|
214
|
+
def to_rdfxml
|
215
|
+
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
|
216
|
+
`rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
|
217
|
+
end
|
218
|
+
|
219
|
+
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
|
220
|
+
# (Ambit services use a different JSON representation)
|
221
|
+
# @return [text/plain] Object OWL-DL in JSON format
|
222
|
+
def to_json
|
223
|
+
#rdf_types
|
224
|
+
Yajl::Encoder.encode(@object)
|
225
|
+
end
|
226
|
+
|
227
|
+
# Helpers for type detection
|
228
|
+
private
|
229
|
+
|
230
|
+
def datatype(value)
|
231
|
+
if value.is_a? TrueClass or value.is_a? FalseClass
|
232
|
+
XSD.boolean
|
233
|
+
elsif value.is_a? Float
|
234
|
+
XSD.float
|
235
|
+
else
|
236
|
+
XSD.string
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def featuretype(value)
|
241
|
+
if value.is_a? TrueClass or value.is_a? FalseClass
|
242
|
+
datatype = OT.NominalFeature
|
243
|
+
elsif value.is_a? Float
|
244
|
+
datatype = OT.NumericFeature
|
245
|
+
else
|
246
|
+
datatype = OT.StringFeature
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def type(value)
|
251
|
+
begin
|
252
|
+
uri = URI.parse(value)
|
253
|
+
if uri.class == URI::HTTP or uri.class == URI::HTTPS
|
254
|
+
"uri"
|
255
|
+
elsif value.match(/^_/)
|
256
|
+
"bnode"
|
257
|
+
else
|
258
|
+
"literal"
|
259
|
+
end
|
260
|
+
rescue
|
261
|
+
"literal"
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def literal(value,type)
|
266
|
+
# concat and << are faster string concatination operators than +
|
267
|
+
'"'.concat(value.to_s).concat('"^^<').concat(type).concat('>')
|
268
|
+
end
|
269
|
+
|
270
|
+
def url(uri)
|
271
|
+
# concat and << are faster string concatination operators than +
|
272
|
+
'<'.concat(uri).concat('>')
|
273
|
+
end
|
274
|
+
|
275
|
+
def rdf_types
|
276
|
+
@classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } }
|
277
|
+
@object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } }
|
278
|
+
@annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } }
|
279
|
+
@datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } }
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
# Serializer for spreadsheet formats
|
285
|
+
class Spreadsheets # to avoid nameclash with Spreadsheet gem
|
286
|
+
|
287
|
+
# Create a new spreadsheet serializer
|
288
|
+
# @param [OpenTox::Dataset] dataset Dataset object
|
289
|
+
def initialize(dataset)
|
290
|
+
@rows = []
|
291
|
+
@rows << ["SMILES"]
|
292
|
+
features = dataset.features.keys
|
293
|
+
@rows.first << features
|
294
|
+
@rows.first.flatten!
|
295
|
+
dataset.data_entries.each do |compound,entries|
|
296
|
+
smiles = Compound.new(compound).to_smiles
|
297
|
+
row = Array.new(@rows.first.size)
|
298
|
+
row[0] = smiles
|
299
|
+
entries.each do |feature, values|
|
300
|
+
i = features.index(feature)+1
|
301
|
+
values.each do |value|
|
302
|
+
if row[i]
|
303
|
+
row[i] = "#{row[i]} #{value}" # multiple values
|
304
|
+
else
|
305
|
+
row[i] = value
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
@rows << row
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# Convert to CSV string
|
314
|
+
# @return [String] CSV string
|
315
|
+
def to_csv
|
316
|
+
@rows.collect{|r| r.join(", ")}.join("\n")
|
317
|
+
end
|
318
|
+
|
319
|
+
# Convert to spreadsheet workbook
|
320
|
+
# @return [Spreadsheet::Workbook] Workbook object (use the spreadsheet gemc to write a file)
|
321
|
+
def to_spreadsheet
|
322
|
+
Spreadsheet.client_encoding = 'UTF-8'
|
323
|
+
book = Spreadsheet::Workbook.new
|
324
|
+
sheet = book.create_worksheet(:name => '')
|
325
|
+
sheet.column(0).width = 100
|
326
|
+
i = 0
|
327
|
+
@rows.each do |row|
|
328
|
+
row.each do |c|
|
329
|
+
sheet.row(i).push c
|
330
|
+
end
|
331
|
+
i+=1
|
332
|
+
end
|
333
|
+
book
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
end
|
340
|
+
end
|