opentox-ruby 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +4 -4
- data/Rakefile +35 -35
- data/VERSION +1 -1
- data/lib/algorithm.rb +220 -50
- data/lib/compound.rb +138 -73
- data/lib/dataset.rb +296 -192
- data/lib/environment.rb +44 -29
- data/lib/feature.rb +15 -0
- data/lib/model.rb +240 -112
- data/lib/opentox-ruby.rb +13 -0
- data/lib/opentox.rb +47 -0
- data/lib/overwrite.rb +72 -0
- data/lib/parser.rb +286 -0
- data/lib/rest_client_wrapper.rb +12 -12
- data/lib/serializer.rb +340 -0
- data/lib/task.rb +184 -101
- data/lib/validation.rb +58 -8
- metadata +41 -22
data/lib/parser.rb
ADDED
@@ -0,0 +1,286 @@
|
|
1
|
+
require 'spreadsheet'
|
2
|
+
require 'roo'
|
3
|
+
|
4
|
+
class String
|
5
|
+
|
6
|
+
# Split RDF statement into triples
|
7
|
+
# @return [Array] Array with [subject,predicate,object]
|
8
|
+
def to_triple
|
9
|
+
self.chomp.split(' ',3).collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
module OpenTox
|
15
|
+
|
16
|
+
# Parser for various input formats
|
17
|
+
module Parser
|
18
|
+
|
19
|
+
# OWL-DL parser
|
20
|
+
module Owl
|
21
|
+
|
22
|
+
# Create a new OWL-DL parser
|
23
|
+
# @param uri URI of OpenTox object
|
24
|
+
# @return [OpenTox::Parser::Owl] OWL-DL parser
|
25
|
+
def initialize(uri)
|
26
|
+
@uri = uri
|
27
|
+
@metadata = {}
|
28
|
+
end
|
29
|
+
|
30
|
+
# Read metadata from opentox service
|
31
|
+
# @return [Hash] Object metadata
|
32
|
+
def load_metadata
|
33
|
+
|
34
|
+
if @dataset
|
35
|
+
uri = File.join(@uri,"metadata")
|
36
|
+
else
|
37
|
+
uri = @uri
|
38
|
+
end
|
39
|
+
|
40
|
+
statements = []
|
41
|
+
parameter_ids = []
|
42
|
+
`rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line|
|
43
|
+
triple = line.to_triple
|
44
|
+
@metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type']
|
45
|
+
statements << triple
|
46
|
+
parameter_ids << triple[2] if triple[1] == OT.parameters
|
47
|
+
end
|
48
|
+
unless parameter_ids.empty?
|
49
|
+
@metadata[OT.parameters] = []
|
50
|
+
parameter_ids.each do |p|
|
51
|
+
parameter = {}
|
52
|
+
statements.each{ |t| parameter[t[1]] = t[2] if t[0] == p and t[1] != RDF['type']}
|
53
|
+
@metadata[OT.parameters] << parameter
|
54
|
+
end
|
55
|
+
end
|
56
|
+
@metadata
|
57
|
+
end
|
58
|
+
|
59
|
+
# Generic parser for all OpenTox classes
|
60
|
+
class Generic
|
61
|
+
include Owl
|
62
|
+
end
|
63
|
+
|
64
|
+
# OWL-DL parser for datasets
|
65
|
+
class Dataset
|
66
|
+
|
67
|
+
include Owl
|
68
|
+
|
69
|
+
attr_writer :uri
|
70
|
+
|
71
|
+
# Create a new OWL-DL dataset parser
|
72
|
+
# @param uri Dataset URI
|
73
|
+
# @return [OpenTox::Parser::Owl::Dataset] OWL-DL parser
|
74
|
+
def initialize(uri)
|
75
|
+
super uri
|
76
|
+
@dataset = ::OpenTox::Dataset.new(@uri)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Read data from dataset service. Files can be parsed by setting #uri to a filename (after initialization with a real URI)
|
80
|
+
# @example Read data from an external service
|
81
|
+
# parser = OpenTox::Parser::Owl::Dataaset.new "http://wwbservices.in-silico.ch/dataset/1"
|
82
|
+
# dataset = parser.load_uri
|
83
|
+
# @example Create dataset from RDF/XML file
|
84
|
+
# dataset = OpenTox::Dataset.create
|
85
|
+
# parser = OpenTox::Parser::Owl::Dataaset.new dataset.uri
|
86
|
+
# parser.uri = "dataset.rdfxml" # insert your input file
|
87
|
+
# dataset = parser.load_uri
|
88
|
+
# dataset.save
|
89
|
+
# @return [Hash] Internal dataset representation
|
90
|
+
def load_uri
|
91
|
+
data = {}
|
92
|
+
feature_values = {}
|
93
|
+
feature = {}
|
94
|
+
other_statements = {}
|
95
|
+
`rapper -i rdfxml -o ntriples #{@uri} 2>/dev/null`.each_line do |line|
|
96
|
+
triple = line.chomp.split(' ',3)
|
97
|
+
triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
|
98
|
+
case triple[1]
|
99
|
+
when /#{OT.values}/i
|
100
|
+
data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
|
101
|
+
data[triple[0]][:values] << triple[2]
|
102
|
+
when /#{OT.value}/i
|
103
|
+
feature_values[triple[0]] = triple[2]
|
104
|
+
when /#{OT.compound}/i
|
105
|
+
data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
|
106
|
+
data[triple[0]][:compound] = triple[2]
|
107
|
+
when /#{OT.feature}/i
|
108
|
+
feature[triple[0]] = triple[2]
|
109
|
+
else
|
110
|
+
end
|
111
|
+
end
|
112
|
+
data.each do |id,entry|
|
113
|
+
entry[:values].each do |value_id|
|
114
|
+
value = feature_values[value_id].split(/\^\^/).first # remove XSD.type
|
115
|
+
@dataset.add entry[:compound],feature[value_id],value
|
116
|
+
end
|
117
|
+
end
|
118
|
+
load_features
|
119
|
+
@dataset.metadata = load_metadata
|
120
|
+
@dataset
|
121
|
+
end
|
122
|
+
|
123
|
+
# Read only features from a dataset service.
|
124
|
+
# @return [Hash] Internal features representation
|
125
|
+
def load_features
|
126
|
+
uri = File.join(@uri,"features")
|
127
|
+
statements = []
|
128
|
+
features = Set.new
|
129
|
+
`rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line|
|
130
|
+
triple = line.chomp.split('> ').collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}[0..2]
|
131
|
+
statements << triple
|
132
|
+
features << triple[0] if triple[1] == RDF['type'] and triple[2] == OT.Feature
|
133
|
+
end
|
134
|
+
statements.each do |triple|
|
135
|
+
if features.include? triple[0]
|
136
|
+
@dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
|
137
|
+
@dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
|
138
|
+
end
|
139
|
+
end
|
140
|
+
@dataset.features
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
# Parser for getting spreadsheet data into a dataset
|
148
|
+
class Spreadsheets
|
149
|
+
|
150
|
+
attr_accessor :dataset
|
151
|
+
|
152
|
+
def initialize
|
153
|
+
@data = []
|
154
|
+
@features = []
|
155
|
+
@feature_types = {}
|
156
|
+
|
157
|
+
@format_errors = ""
|
158
|
+
@smiles_errors = []
|
159
|
+
@activity_errors = []
|
160
|
+
@duplicates = {}
|
161
|
+
end
|
162
|
+
|
163
|
+
# Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
|
164
|
+
# @param [Excel] book Excel workbook object (created with roo gem)
|
165
|
+
# @return [OpenTox::Dataset] Dataset object with Excel data
|
166
|
+
def load_spreadsheet(book)
|
167
|
+
book.default_sheet = 0
|
168
|
+
add_features book.row(1)
|
169
|
+
2.upto(book.last_row) { |i| add_values book.row(i) }
|
170
|
+
warnings
|
171
|
+
@dataset
|
172
|
+
end
|
173
|
+
|
174
|
+
# Load CSV string (format specification: http://toxcreate.org/help)
|
175
|
+
# @param [String] csv CSV representation of the dataset
|
176
|
+
# @return [OpenTox::Dataset] Dataset object with CSV data
|
177
|
+
def load_csv(csv)
|
178
|
+
row = 0
|
179
|
+
input = csv.split("\n")
|
180
|
+
add_features split_row(input.shift)
|
181
|
+
input.each { |row| add_values split_row(row) }
|
182
|
+
warnings
|
183
|
+
@dataset
|
184
|
+
end
|
185
|
+
|
186
|
+
private
|
187
|
+
|
188
|
+
def warnings
|
189
|
+
|
190
|
+
info = ''
|
191
|
+
@feature_types.each do |feature,types|
|
192
|
+
if types.uniq.size > 1
|
193
|
+
type = OT.NumericFeature
|
194
|
+
else
|
195
|
+
type = types.first
|
196
|
+
end
|
197
|
+
@dataset.add_feature_metadata(feature,{OT.isA => type})
|
198
|
+
info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}."
|
199
|
+
|
200
|
+
# TODO: rewrite feature values
|
201
|
+
# TODO if value.to_f == 0 @activity_errors << "#{smiles} Zero values not allowed for regression datasets - entry ignored."
|
202
|
+
end
|
203
|
+
|
204
|
+
@dataset.metadata[OT.Info] = info
|
205
|
+
|
206
|
+
warnings = ''
|
207
|
+
warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @smiles_errors.join("<br/>") unless @smiles_errors.empty?
|
208
|
+
warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
|
209
|
+
duplicate_warnings = ''
|
210
|
+
@duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
|
211
|
+
warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
|
212
|
+
|
213
|
+
@dataset.metadata[OT.Warnings] = warnings
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
def add_features(row)
|
218
|
+
row.shift # get rid of smiles entry
|
219
|
+
row.each do |feature_name|
|
220
|
+
feature_uri = File.join(@dataset.uri,"feature",URI.encode(feature_name))
|
221
|
+
@feature_types[feature_uri] = []
|
222
|
+
@features << feature_uri
|
223
|
+
@dataset.add_feature(feature_uri,{DC.title => feature_name})
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def add_values(row)
|
228
|
+
|
229
|
+
smiles = row.shift
|
230
|
+
compound = Compound.from_smiles(smiles)
|
231
|
+
if compound.nil? or compound.inchi.nil? or compound.inchi == ""
|
232
|
+
@smiles_errors << smiles+", "+row.join(", ")
|
233
|
+
return false
|
234
|
+
end
|
235
|
+
@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
|
236
|
+
@duplicates[compound.inchi] << smiles+", "+row.join(", ")
|
237
|
+
|
238
|
+
row.each_index do |i|
|
239
|
+
value = row[i]
|
240
|
+
feature = @features[i]
|
241
|
+
type = feature_type(value)
|
242
|
+
|
243
|
+
@feature_types[feature] << type
|
244
|
+
|
245
|
+
case type
|
246
|
+
when OT.NominalFeature
|
247
|
+
case value.to_s
|
248
|
+
when TRUE_REGEXP
|
249
|
+
@dataset.add(compound.uri, feature, true )
|
250
|
+
when FALSE_REGEXP
|
251
|
+
@dataset.add(compound.uri, feature, false )
|
252
|
+
end
|
253
|
+
when OT.NumericFeature
|
254
|
+
@dataset.add compound.uri, feature, value.to_f
|
255
|
+
when OT.StringFeature
|
256
|
+
@dataset.add compound.uri, feature, value.to_s
|
257
|
+
@activity_errors << smiles+", "+row.join(", ")
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def numeric?(value)
|
263
|
+
true if Float(value) rescue false
|
264
|
+
end
|
265
|
+
|
266
|
+
def classification?(value)
|
267
|
+
!value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
|
268
|
+
end
|
269
|
+
|
270
|
+
def feature_type(value)
|
271
|
+
if classification? value
|
272
|
+
return OT.NominalFeature
|
273
|
+
elsif numeric? value
|
274
|
+
return OT.NumericFeature
|
275
|
+
else
|
276
|
+
return OT.StringFeature
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def split_row(row)
|
281
|
+
row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
data/lib/rest_client_wrapper.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
1
|
module OpenTox
|
4
2
|
|
5
3
|
#PENDING: implement ot error api, move to own file
|
@@ -60,14 +58,14 @@ module OpenTox
|
|
60
58
|
def self.execute( rest_call, uri, headers, payload=nil, wait=true )
|
61
59
|
|
62
60
|
do_halt 400,"uri is null",uri,headers,payload unless uri
|
63
|
-
do_halt 400,"not a uri",uri,headers,payload unless
|
61
|
+
do_halt 400,"not a uri",uri,headers,payload unless uri.to_s.uri?
|
64
62
|
do_halt 400,"headers are no hash",uri,headers,payload unless headers==nil or headers.is_a?(Hash)
|
65
63
|
do_halt 400,"nil headers for post not allowed, use {}",uri,headers,payload if rest_call=="post" and headers==nil
|
66
64
|
headers.each{ |k,v| headers.delete(k) if v==nil } if headers #remove keys with empty values, as this can cause problems
|
67
65
|
|
68
66
|
begin
|
69
67
|
#LOGGER.debug "RestCall: "+rest_call.to_s+" "+uri.to_s+" "+headers.inspect
|
70
|
-
resource = RestClient::Resource.new(uri,{:timeout => 60})
|
68
|
+
resource = RestClient::Resource.new(uri,{:timeout => 60})
|
71
69
|
if payload
|
72
70
|
result = resource.send(rest_call, payload, headers)
|
73
71
|
elsif headers
|
@@ -82,6 +80,7 @@ module OpenTox
|
|
82
80
|
raise "content-type not set" unless res.content_type
|
83
81
|
res.code = result.code
|
84
82
|
|
83
|
+
# TODO: Ambit returns task representation with 200 instead of result URI
|
85
84
|
return res if res.code==200 || !wait
|
86
85
|
|
87
86
|
while (res.code==201 || res.code==202)
|
@@ -110,12 +109,13 @@ module OpenTox
|
|
110
109
|
|
111
110
|
task = nil
|
112
111
|
case res.content_type
|
113
|
-
when /application\/rdf\+xml
|
114
|
-
task = OpenTox::Task.
|
112
|
+
when /application\/rdf\+xml/
|
113
|
+
task = OpenTox::Task.from_rdfxml(res)
|
114
|
+
when /yaml/
|
115
|
+
task = OpenTox::Task.from_yaml(res)
|
115
116
|
when /text\//
|
116
|
-
raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and
|
117
|
-
|
118
|
-
task = OpenTox::Task.find(res.to_s) if Utils.task_uri?(res)
|
117
|
+
raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task
|
118
|
+
task = OpenTox::Task.find(res.to_s) if res.to_s.uri?
|
119
119
|
else
|
120
120
|
raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s
|
121
121
|
end
|
@@ -124,7 +124,7 @@ module OpenTox
|
|
124
124
|
task.wait_for_completion
|
125
125
|
raise task.description unless task.completed? # maybe task was cancelled / error
|
126
126
|
|
127
|
-
res = WrapperResult.new task.
|
127
|
+
res = WrapperResult.new task.result_uri
|
128
128
|
res.code = task.http_code
|
129
129
|
res.content_type = "text/uri-list"
|
130
130
|
return res
|
@@ -154,8 +154,8 @@ module OpenTox
|
|
154
154
|
# we are either in a task, or in sinatra
|
155
155
|
# PENDING: always return yaml for now
|
156
156
|
|
157
|
-
if $self_task #this global var in Task.
|
158
|
-
raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.
|
157
|
+
if $self_task #this global var in Task.create to mark that the current process is running in a task
|
158
|
+
raise error.to_yaml # the error is caught, logged, and task state is set to error in Task.create
|
159
159
|
#elsif $sinatra #else halt sinatra
|
160
160
|
#$sinatra.halt(502,error.to_yaml)
|
161
161
|
elsif defined?(halt)
|
data/lib/serializer.rb
ADDED
@@ -0,0 +1,340 @@
|
|
1
|
+
require 'spreadsheet'
|
2
|
+
require 'yajl'
|
3
|
+
|
4
|
+
module OpenTox
|
5
|
+
|
6
|
+
# Serialzer for various oputput formats
|
7
|
+
module Serializer
|
8
|
+
|
9
|
+
# OWL-DL Serializer, modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification
|
10
|
+
class Owl
|
11
|
+
|
12
|
+
attr_accessor :object
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
|
16
|
+
@object = {
|
17
|
+
# this should come from opntox.owl
|
18
|
+
OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
19
|
+
OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
20
|
+
OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
21
|
+
OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
22
|
+
OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
23
|
+
OT.Dataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
24
|
+
OT.DataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
25
|
+
OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
26
|
+
OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
27
|
+
OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
28
|
+
OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
|
29
|
+
|
30
|
+
OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
31
|
+
OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
32
|
+
OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
33
|
+
OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
34
|
+
OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
35
|
+
OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
36
|
+
OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
|
37
|
+
|
38
|
+
DC.title => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
39
|
+
DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
40
|
+
DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
41
|
+
DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
42
|
+
DC.description => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
43
|
+
DC.date => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
44
|
+
OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
45
|
+
OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
46
|
+
XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
47
|
+
OT.hasStatus => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
48
|
+
OT.resultURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
49
|
+
OT.percentageCompleted => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
|
50
|
+
|
51
|
+
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
52
|
+
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
53
|
+
OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
54
|
+
OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
|
55
|
+
}
|
56
|
+
|
57
|
+
@data_entries = {}
|
58
|
+
@values_id = 0
|
59
|
+
@parameter_id = 0
|
60
|
+
|
61
|
+
@classes = Set.new
|
62
|
+
@object_properties = Set.new
|
63
|
+
@annotation_properties = Set.new
|
64
|
+
@datatype_properties = Set.new
|
65
|
+
|
66
|
+
@objects = Set.new
|
67
|
+
end
|
68
|
+
|
69
|
+
# Add a compound
|
70
|
+
# @param [String] uri Compound URI
|
71
|
+
def add_compound(uri)
|
72
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add a feature
|
76
|
+
# @param [String] uri Feature URI
|
77
|
+
def add_feature(uri,metadata)
|
78
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
|
79
|
+
add_metadata uri, metadata
|
80
|
+
end
|
81
|
+
|
82
|
+
# Add a dataset
|
83
|
+
# @param [String] uri Dataset URI
|
84
|
+
def add_dataset(dataset)
|
85
|
+
|
86
|
+
@dataset = dataset.uri
|
87
|
+
|
88
|
+
@object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
|
89
|
+
|
90
|
+
add_metadata dataset.uri, dataset.metadata
|
91
|
+
|
92
|
+
dataset.compounds.each { |compound| add_compound compound }
|
93
|
+
|
94
|
+
dataset.features.each { |feature,metadata| add_feature feature,metadata }
|
95
|
+
|
96
|
+
dataset.data_entries.each do |compound,entry|
|
97
|
+
entry.each do |feature,values|
|
98
|
+
values.each { |value| add_data_entry compound,feature,value }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
# Add a algorithm
|
105
|
+
# @param [String] uri Algorithm URI
|
106
|
+
def add_algorithm(uri,metadata)
|
107
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
|
108
|
+
add_metadata uri, metadata
|
109
|
+
end
|
110
|
+
|
111
|
+
# Add a model
|
112
|
+
# @param [String] uri Model URI
|
113
|
+
def add_model(uri,metadata)
|
114
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
|
115
|
+
add_metadata uri, metadata
|
116
|
+
end
|
117
|
+
|
118
|
+
# Add a task
|
119
|
+
# @param [String] uri Model URI
|
120
|
+
def add_task(uri,metadata)
|
121
|
+
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Task }] }
|
122
|
+
add_metadata uri, metadata
|
123
|
+
end
|
124
|
+
|
125
|
+
# Add metadata
|
126
|
+
# @param [Hash] metadata
|
127
|
+
def add_metadata(uri,metadata)
|
128
|
+
id = 0
|
129
|
+
metadata.each do |u,v|
|
130
|
+
if v.is_a? Array and u == OT.parameters
|
131
|
+
@object[uri][u] = [] unless @object[uri][u]
|
132
|
+
v.each do |value|
|
133
|
+
id+=1
|
134
|
+
genid = "_:genid#{id}"
|
135
|
+
@object[uri][u] << {"type" => "bnode", "value" => genid}
|
136
|
+
@object[genid] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter}] }
|
137
|
+
value.each do |name,entry|
|
138
|
+
@object[genid][name] = [{"type" => type(entry), "value" => entry }]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else # v.is_a? String
|
142
|
+
@object[uri] = {} unless @object[uri]
|
143
|
+
@object[uri][u] = [{"type" => type(v), "value" => v }]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Add a data entry
|
149
|
+
# @param [String] compound Compound URI
|
150
|
+
# @param [String] feature Feature URI
|
151
|
+
# @param [Boolead,Float] value Feature value
|
152
|
+
def add_data_entry(compound,feature,value)
|
153
|
+
add_compound(compound) unless @object[compound]
|
154
|
+
add_feature(feature,{}) unless @object[feature]
|
155
|
+
unless data_entry = @data_entries[compound]
|
156
|
+
data_entry = "_:dataentry#{@data_entries.size}"
|
157
|
+
@data_entries[compound] = data_entry
|
158
|
+
@object[@dataset][OT.dataEntry] = [] unless @object[@dataset][OT.dataEntry]
|
159
|
+
@object[@dataset][OT.dataEntry] << {"type" => "bnode", "value" => data_entry}
|
160
|
+
@object[data_entry] = {
|
161
|
+
RDF["type"] => [{ "type" => "uri", "value" => OT.DataEntry }],
|
162
|
+
OT.compound => [{ "type" => "uri", "value" => compound }],
|
163
|
+
OT.values => [],
|
164
|
+
}
|
165
|
+
end
|
166
|
+
values = "_:values#{@values_id}"
|
167
|
+
@values_id += 1
|
168
|
+
@object[data_entry][OT.values] << {"type" => "bnode", "value" => values}
|
169
|
+
case type(value)
|
170
|
+
when "uri"
|
171
|
+
v = [{ "type" => "uri", "value" => value}]
|
172
|
+
when "literal"
|
173
|
+
v = [{ "type" => "literal", "value" => value, "datatype" => datatype(value) }]
|
174
|
+
else
|
175
|
+
raise "Illegal type #{type(value)} for #{value}."
|
176
|
+
end
|
177
|
+
@object[values] = {
|
178
|
+
RDF["type"] => [{ "type" => "uri", "value" => OT.FeatureValue }],
|
179
|
+
OT.feature => [{ "type" => "uri", "value" => feature }],
|
180
|
+
OT.value => v
|
181
|
+
}
|
182
|
+
@object[feature][RDF["type"]] << { "type" => "uri", "value" => featuretype(value) }
|
183
|
+
end
|
184
|
+
|
185
|
+
# Serializers
|
186
|
+
|
187
|
+
# Convert to N-Triples
|
188
|
+
# @return [text/plain] Object OWL-DL in N-Triples format
|
189
|
+
def to_ntriples
|
190
|
+
|
191
|
+
@triples = Set.new
|
192
|
+
@object.each do |s,entry|
|
193
|
+
s = url(s) if type(s) == "uri"
|
194
|
+
entry.each do |p,objects|
|
195
|
+
p = url(p)
|
196
|
+
objects.each do |o|
|
197
|
+
case o["type"]
|
198
|
+
when "uri"
|
199
|
+
o = url(o["value"])
|
200
|
+
when "literal"
|
201
|
+
o = literal(o["value"],datatype(o["value"]))
|
202
|
+
when "bnode"
|
203
|
+
o = o["value"]
|
204
|
+
end
|
205
|
+
@triples << [s,p,o]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
@triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n"
|
210
|
+
end
|
211
|
+
|
212
|
+
# Convert to RDF/XML
|
213
|
+
# @return [text/plain] Object OWL-DL in RDF/XML format
|
214
|
+
def to_rdfxml
|
215
|
+
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
|
216
|
+
`rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
|
217
|
+
end
|
218
|
+
|
219
|
+
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
|
220
|
+
# (Ambit services use a different JSON representation)
|
221
|
+
# @return [text/plain] Object OWL-DL in JSON format
|
222
|
+
def to_json
|
223
|
+
#rdf_types
|
224
|
+
Yajl::Encoder.encode(@object)
|
225
|
+
end
|
226
|
+
|
227
|
+
# Helpers for type detection
|
228
|
+
private
|
229
|
+
|
230
|
+
def datatype(value)
|
231
|
+
if value.is_a? TrueClass or value.is_a? FalseClass
|
232
|
+
XSD.boolean
|
233
|
+
elsif value.is_a? Float
|
234
|
+
XSD.float
|
235
|
+
else
|
236
|
+
XSD.string
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def featuretype(value)
|
241
|
+
if value.is_a? TrueClass or value.is_a? FalseClass
|
242
|
+
datatype = OT.NominalFeature
|
243
|
+
elsif value.is_a? Float
|
244
|
+
datatype = OT.NumericFeature
|
245
|
+
else
|
246
|
+
datatype = OT.StringFeature
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def type(value)
|
251
|
+
begin
|
252
|
+
uri = URI.parse(value)
|
253
|
+
if uri.class == URI::HTTP or uri.class == URI::HTTPS
|
254
|
+
"uri"
|
255
|
+
elsif value.match(/^_/)
|
256
|
+
"bnode"
|
257
|
+
else
|
258
|
+
"literal"
|
259
|
+
end
|
260
|
+
rescue
|
261
|
+
"literal"
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def literal(value,type)
|
266
|
+
# concat and << are faster string concatination operators than +
|
267
|
+
'"'.concat(value.to_s).concat('"^^<').concat(type).concat('>')
|
268
|
+
end
|
269
|
+
|
270
|
+
def url(uri)
|
271
|
+
# concat and << are faster string concatination operators than +
|
272
|
+
'<'.concat(uri).concat('>')
|
273
|
+
end
|
274
|
+
|
275
|
+
def rdf_types
|
276
|
+
@classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } }
|
277
|
+
@object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } }
|
278
|
+
@annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } }
|
279
|
+
@datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } }
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
# Serializer for spreadsheet formats
|
285
|
+
class Spreadsheets # to avoid nameclash with Spreadsheet gem
|
286
|
+
|
287
|
+
# Create a new spreadsheet serializer
|
288
|
+
# @param [OpenTox::Dataset] dataset Dataset object
|
289
|
+
def initialize(dataset)
|
290
|
+
@rows = []
|
291
|
+
@rows << ["SMILES"]
|
292
|
+
features = dataset.features.keys
|
293
|
+
@rows.first << features
|
294
|
+
@rows.first.flatten!
|
295
|
+
dataset.data_entries.each do |compound,entries|
|
296
|
+
smiles = Compound.new(compound).to_smiles
|
297
|
+
row = Array.new(@rows.first.size)
|
298
|
+
row[0] = smiles
|
299
|
+
entries.each do |feature, values|
|
300
|
+
i = features.index(feature)+1
|
301
|
+
values.each do |value|
|
302
|
+
if row[i]
|
303
|
+
row[i] = "#{row[i]} #{value}" # multiple values
|
304
|
+
else
|
305
|
+
row[i] = value
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
@rows << row
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# Convert to CSV string
|
314
|
+
# @return [String] CSV string
|
315
|
+
def to_csv
|
316
|
+
@rows.collect{|r| r.join(", ")}.join("\n")
|
317
|
+
end
|
318
|
+
|
319
|
+
# Convert to spreadsheet workbook
|
320
|
+
# @return [Spreadsheet::Workbook] Workbook object (use the spreadsheet gemc to write a file)
|
321
|
+
def to_spreadsheet
|
322
|
+
Spreadsheet.client_encoding = 'UTF-8'
|
323
|
+
book = Spreadsheet::Workbook.new
|
324
|
+
sheet = book.create_worksheet(:name => '')
|
325
|
+
sheet.column(0).width = 100
|
326
|
+
i = 0
|
327
|
+
@rows.each do |row|
|
328
|
+
row.each do |c|
|
329
|
+
sheet.row(i).push c
|
330
|
+
end
|
331
|
+
i+=1
|
332
|
+
end
|
333
|
+
book
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
end
|
340
|
+
end
|