opentox-client 0.0.1pre

Sign up to get free protection for your applications and to get access to all the features.
data/lib/dataset.rb ADDED
@@ -0,0 +1,318 @@
1
+ module OpenTox
2
+
3
+ # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
4
+ # TODO: fix API Doc
5
+ class Dataset
6
+
7
+ #include OpenTox
8
+
9
+ #attr_reader :features, :compounds, :data_entries, :metadata
10
+
11
+ # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations.
12
+ # @example Create an empty dataset
13
+ # dataset = OpenTox::Dataset.new
14
+ # @example Create an empty dataset with URI
15
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
16
+ # @param [optional, String] uri Dataset URI
17
+ # @return [OpenTox::Dataset] Dataset object
18
+ def initialize(uri=nil,subjectid=nil)
19
+ super uri, subjectid
20
+ @features = {}
21
+ @compounds = []
22
+ @data_entries = {}
23
+ end
24
+
25
+ =begin
26
+ # Load YAML representation into the dataset
27
+ # @param [String] yaml YAML representation of the dataset
28
+ # @return [OpenTox::Dataset] Dataset object with YAML data
29
+ def self.from_yaml service_uri, yaml, subjectid=nil
30
+ Dataset.create(service_uri, subjectid).post yaml, :content_type => "application/x-yaml"
31
+ end
32
+
33
+ # Load RDF/XML representation from a file
34
+ # @param [String] file File with RDF/XML representation of the dataset
35
+ # @return [OpenTox::Dataset] Dataset object with RDF/XML data
36
+ def self.from_rdfxml service_uri, rdfxml, subjectid=nil
37
+ Dataset.create(service_uri, subjectid).post rdfxml, :content_type => "application/rdf+xml"
38
+ end
39
+
40
+ # Load CSV string (format specification: http://toxcreate.org/help)
41
+ # - loads data_entries, compounds, features
42
+ # - sets metadata (warnings) for parser errors
43
+ # - you will have to set remaining metadata manually
44
+ # @param [String] csv CSV representation of the dataset
45
+ # @return [OpenTox::Dataset] Dataset object with CSV data
46
+ def self.from_csv service_uri, csv, subjectid=nil
47
+ Dataset.from_file(service_uri, csv, subjectid)
48
+ end
49
+
50
+ # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
51
+ # - loads data_entries, compounds, features
52
+ # - sets metadata (warnings) for parser errors
53
+ # - you will have to set remaining metadata manually
54
+ # @param [Excel] book Excel workbook object (created with roo gem)
55
+ # @return [OpenTox::Dataset] Dataset object with Excel data
56
+ def self.from_xls service_uri, xls, subjectid=nil
57
+ Dataset.create(service_uri, subjectid).post xls, :content_type => "application/vnd.ms-excel"
58
+ end
59
+
60
+ def self.from_sdf service_uri, sdf, subjectid=nil
61
+ Dataset.create(service_uri, subjectid).post sdf, :content_type => 'chemical/x-mdl-sdfile'
62
+ end
63
+ =end
64
+
65
+ # Load all data (metadata, data_entries, compounds and features) from URI
66
+ # TODO: move to opentox-server
67
+ def data_entries reload=true
68
+ if reload
69
+ file = Tempfile.new("ot-rdfxml")
70
+ file.puts get :accept => "application/rdf+xml"
71
+ file.close
72
+ to_delete = file.path
73
+
74
+ data = {}
75
+ feature_values = {}
76
+ feature = {}
77
+ feature_accept_values = {}
78
+ other_statements = {}
79
+ `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line|
80
+ triple = line.chomp.split(' ',3)
81
+ triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
82
+ case triple[1]
83
+ when /#{RDF::OT.values}|#{RDF::OT1.values}/i
84
+ data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
85
+ data[triple[0]][:values] << triple[2]
86
+ when /#{RDF::OT.value}|#{RDF::OT1.value}/i
87
+ feature_values[triple[0]] = triple[2]
88
+ when /#{RDF::OT.compound}|#{RDF::OT1.compound}/i
89
+ data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
90
+ data[triple[0]][:compound] = triple[2]
91
+ when /#{RDF::OT.feature}|#{RDF::OT1.feature}/i
92
+ feature[triple[0]] = triple[2]
93
+ when /#{RDF.type}/i
94
+ if triple[2]=~/#{RDF::OT.Compound}|#{RDF::OT1.Compound}/i and !data[triple[0]]
95
+ data[triple[0]] = {:compound => triple[0], :values => []}
96
+ end
97
+ when /#{RDF::OT.acceptValue}|#{RDF::OT1.acceptValue}/i # acceptValue in ambit datasets is only provided in dataset/<id> no in dataset/<id>/features
98
+ feature_accept_values[triple[0]] = [] unless feature_accept_values[triple[0]]
99
+ feature_accept_values[triple[0]] << triple[2]
100
+ else
101
+ end
102
+ end
103
+ File.delete(to_delete) if to_delete
104
+ data.each do |id,entry|
105
+ if entry[:values].size==0
106
+ # no feature values add plain compounds
107
+ @compounds << entry[:compound] unless @compounds.include? entry[:compound]
108
+ else
109
+ entry[:values].each do |value_id|
110
+ if feature_values[value_id]
111
+ split = feature_values[value_id].split(/\^\^/)
112
+ case split[-1]
113
+ when RDF::XSD.double, RDF::XSD.float
114
+ value = split.first.to_f
115
+ when RDF::XSD.boolean
116
+ value = split.first=~/(?i)true/ ? true : false
117
+ else
118
+ value = split.first
119
+ end
120
+ end
121
+ @compounds << entry[:compound] unless @compounds.include? entry[:compound]
122
+ @features[feature[value_id][value_id]] = {} unless @features[feature[value_id]]
123
+ @data_entries[entry[:compound].to_s] = {} unless @data_entries[entry[:compound].to_s]
124
+ @data_entries[entry[:compound].to_s][feature[value_id]] = [] unless @data_entries[entry[:compound]][feature[value_id]]
125
+ @data_entries[entry[:compound].to_s][feature[value_id]] << value if value!=nil
126
+ end
127
+ end
128
+ end
129
+ features subjectid
130
+ #feature_accept_values.each do |feature, values|
131
+ #self.features[feature][OT.acceptValue] = values
132
+ #end
133
+ self.metadata = metadata(subjectid)
134
+ end
135
+ @data_entries
136
+ end
137
+
138
+ # Load and return only compound URIs from the dataset service
139
+ # @return [Array] Compound URIs in the dataset
140
+ def compounds reload=true
141
+ reload ? @compounds = Compound.all(File.join(@uri,"compounds")) : @compounds
142
+ end
143
+
144
+ # Load and return only features from the dataset service
145
+ # @return [Hash] Features of the dataset
146
+ def features reload=true
147
+ reload ? @features = Feature.all(File.join(@uri,"features")) : @features
148
+ end
149
+
150
+ =begin
151
+ # returns the accept_values of a feature, i.e. the classification domain / all possible feature values
152
+ # @param [String] feature the URI of the feature
153
+ # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
154
+ def accept_values(feature)
155
+ load_features
156
+ accept_values = features[feature][OT.acceptValue]
157
+ accept_values.sort if accept_values
158
+ accept_values
159
+ end
160
+
161
+ # Detect feature type(s) in the dataset
162
+ # @return [String] `classification", "regression", "mixed" or unknown`
163
+ def feature_type
164
+ load_features
165
+ feature_types = @features.collect{|f,metadata| metadata[RDF.type]}.flatten.uniq
166
+ if feature_types.include?(OT.NominalFeature)
167
+ "classification"
168
+ elsif feature_types.include?(OT.NumericFeature)
169
+ "regression"
170
+ else
171
+ "unknown"
172
+ end
173
+ end
174
+ =end
175
+
176
+ # Get Excel representation (alias for to_spreadsheet)
177
+ # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
178
+ def to_xls
179
+ get :accept => "application/vnd.ms-excel"
180
+ end
181
+
182
+ # Get CSV string representation (data_entries only, metadata will be discarded)
183
+ # @return [String] CSV representation
184
+ def to_csv
185
+ get :accept => "text/csv"
186
+ end
187
+
188
+ def to_sdf
189
+ get :accept => 'chemical/x-mdl-sdfile'
190
+ end
191
+
192
+
193
+ # Get OWL-DL in ntriples format
194
+ # @return [String] N-Triples representation
195
+ def to_ntriples
196
+ get :accept => "application/rdf+xml"
197
+ end
198
+
199
+ # Get OWL-DL in RDF/XML format
200
+ # @return [String] RDF/XML representation
201
+ def to_rdfxml
202
+ get :accept => "application/rdf+xml"
203
+ end
204
+
205
+ # Get name (DC.title) of a feature
206
+ # @param [String] feature Feature URI
207
+ # @return [String] Feture title
208
+ def feature_name(feature)
209
+ features[feature][DC.title]
210
+ end
211
+
212
+ def title
213
+ metadata[DC.title]
214
+ end
215
+
216
+ # Insert a statement (compound_uri,feature_uri,value)
217
+ # @example Insert a statement (compound_uri,feature_uri,value)
218
+ # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
219
+ # @param [String] compound Compound URI
220
+ # @param [String] feature Compound URI
221
+ # @param [Boolean,Float] value Feature value
222
+ def add (compound,feature,value)
223
+ @compounds << compound unless @compounds.include? compound
224
+ @features[feature] = {} unless @features[feature]
225
+ @data_entries[compound] = {} unless @data_entries[compound]
226
+ @data_entries[compound][feature] = [] unless @data_entries[compound][feature]
227
+ @data_entries[compound][feature] << value if value!=nil
228
+ end
229
+
230
+ # Add a feature
231
+ # @param [String] feature Feature URI
232
+ # @param [Hash] metadata Hash with feature metadata
233
+ def add_feature(feature,metadata={})
234
+ @features[feature] = metadata
235
+ end
236
+
237
+ # Add/modify metadata for a feature
238
+ # @param [String] feature Feature URI
239
+ # @param [Hash] metadata Hash with feature metadata
240
+ def add_feature_metadata(feature,metadata)
241
+ metadata.each { |k,v| @features[feature][k] = v }
242
+ end
243
+
244
+ # Add a new compound
245
+ # @param [String] compound Compound URI
246
+ def add_compound (compound)
247
+ @compounds << compound unless @compounds.include? compound
248
+ end
249
+
250
+ # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features
251
+ # @param [Array] compounds List of compound URIs
252
+ # @param [Array] features List of feature URIs
253
+ # @param [Hash] metadata Hash containing the metadata for the new dataset
254
+ # @param [String] subjectid
255
+ # @return [OpenTox::Dataset] newly created dataset, already saved
256
+ def split( compounds, features, metadata)
257
+ LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds"
258
+ raise "no new compounds selected" unless compounds and compounds.size>0
259
+ dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],@subjectid)
260
+ if features.size==0
261
+ compounds.each{ |c| dataset.add_compound(c) }
262
+ else
263
+ compounds.each do |c|
264
+ features.each do |f|
265
+ if @data_entries[c]==nil or @data_entries[c][f]==nil
266
+ dataset.add(c,f,nil)
267
+ else
268
+ @data_entries[c][f].each do |v|
269
+ dataset.add(c,f,v)
270
+ end
271
+ end
272
+ end
273
+ end
274
+ end
275
+ # set feature metadata in new dataset accordingly (including accept values)
276
+ features.each do |f|
277
+ self.features[f].each do |k,v|
278
+ dataset.features[f][k] = v
279
+ end
280
+ end
281
+ dataset.add_metadata(metadata)
282
+ dataset.save
283
+ dataset
284
+ end
285
+
286
+ # Save dataset at the dataset service
287
+ # - creates a new dataset if uri is not set
288
+ # - overwrites dataset if uri exists
289
+ # @return [String] Dataset URI
290
+ def save
291
+ @compounds.uniq!
292
+ # create dataset if uri is empty
293
+ self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:subjectid => @subjectid}).to_s.chomp unless @uri
294
+ if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
295
+ RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => @subjectid})
296
+ else
297
+ s = Serializer::Owl.new
298
+ s.add_dataset(self)
299
+ RestClientWrapper.post(@uri, s.to_rdfxml,{:content_type => "application/rdf+xml" , :subjectid => @subjectid})
300
+ end
301
+ @uri
302
+ end
303
+
304
+ private
305
+ # Copy a dataset (rewrites URI)
306
+ def copy(dataset)
307
+ @metadata = dataset.metadata
308
+ @data_entries = dataset.data_entries
309
+ @compounds = dataset.compounds
310
+ @features = dataset.features
311
+ if @uri
312
+ self.uri = @uri
313
+ else
314
+ @uri = dataset.metadata[XSD.anyURI]
315
+ end
316
+ end
317
+ end
318
+ end
data/lib/error.rb ADDED
@@ -0,0 +1,106 @@
1
+ require 'open4'
2
+
3
+ # add additional fields to Exception class to format errors according to OT-API
4
+ class RuntimeError
5
+ attr_accessor :http_code, :uri
6
+ def initialize message, uri=nil
7
+ super message
8
+ @uri = uri
9
+ @http_code ||= 500
10
+ $logger.error "\n"+self.report.to_turtle
11
+ end
12
+
13
+ def report
14
+ # TODO: remove kludge for old task services
15
+ OpenTox::ErrorReport.new(@http_code, self)
16
+ end
17
+ end
18
+
19
+ module OpenTox
20
+
21
+ class Error < RuntimeError
22
+ def initialize code, message, uri=nil
23
+ @http_code = code
24
+ super message, uri
25
+ end
26
+ end
27
+
28
+ # OpenTox errors
29
+ {
30
+ "BadRequestError" => 400,
31
+ "NotAuthorizedError" => 401,
32
+ "NotFoundError" => 404,
33
+ "LockedError" => 423,
34
+ "InternalServerError" => 500,
35
+ "NotImplementedError" => 501,
36
+ "ServiceUnavailableError" => 503,
37
+ "TimeOutError" => 504,
38
+ }.each do |klass,code|
39
+ # create error classes
40
+ c = Class.new Error do
41
+ define_method :initialize do |message, uri=nil|
42
+ super code, message, uri
43
+ end
44
+ end
45
+ OpenTox.const_set klass,c
46
+
47
+ # define global methods for raising errors, eg. bad_request_error
48
+ Object.send(:define_method, klass.underscore.to_sym) do |message|
49
+ defined?(@uri) ? uri = @uri : uri=nil
50
+ # TODO: insert uri from sinatra
51
+ raise c, message, uri
52
+ end
53
+ end
54
+
55
+ # Errors received from RestClientWrapper calls
56
+ class RestCallError < Error
57
+ attr_accessor :request, :response
58
+ def initialize request, response, message
59
+ @request = request
60
+ @response = response
61
+ super 502, message, request.url
62
+ end
63
+ end
64
+
65
+ # TODO: create reports directly from errors, requires modified task service
66
+ class ErrorReport
67
+ def initialize http_code, error
68
+ @http_code = http_code
69
+ #@report = report#.to_yaml
70
+ @report = {}
71
+ @report[RDF::OT.actor] = error.uri
72
+ @report[RDF::OT.message] = error.message
73
+ @report[RDF::OT.statusCode] = @http_code
74
+ @report[RDF::OT.errorCode] = error.class.to_s
75
+ @report[RDF::OT.errorDetails] = caller.collect{|line| line unless line =~ /#{File.dirname(__FILE__)}/}.compact.join("\n")
76
+ @report[RDF::OT.errorDetails] += "REST paramenters:\n#{error.request.args.inspect}" if defined? error.request
77
+ @report[RDF::OT.message] += "\n" + error.response.body if defined? error.response
78
+ # TODO fix Error cause
79
+ #report[RDF::OT.errorCause] = @report if defined?(@report)
80
+ end
81
+
82
+ # define to_ and self.from_ methods for various rdf formats
83
+ RDF_FORMATS.each do |format|
84
+
85
+ send :define_method, "to_#{format}".to_sym do
86
+ rdf = RDF::Writer.for(format).buffer do |writer|
87
+ subject = RDF::Node.new
88
+ @report.each do |predicate,object|
89
+ writer << [subject, predicate, object] if object
90
+ end
91
+ end
92
+ rdf
93
+ end
94
+
95
+ =begin
96
+ define_singleton_method "from_#{format}".to_sym do |rdf|
97
+ report = ErrorReport.new
98
+ RDF::Reader.for(format).new(rdf) do |reader|
99
+ reader.each_statement{ |statement| report.rdf << statement }
100
+ end
101
+ report
102
+ end
103
+ =end
104
+ end
105
+ end
106
+ end
data/lib/model.rb ADDED
@@ -0,0 +1,14 @@
1
+ module OpenTox
2
+
3
+ class Model
4
+
5
+ # Run a model with parameters
6
+ # @param [Hash] params Parameters for OpenTox model
7
+ # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
8
+ # @return [text/uri-list] Task or resource URI
9
+ def run params=nil
10
+ post params, {:accept => 'text/uri-list'}
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,32 @@
1
+ require 'rubygems'
2
+ require "bundler/setup"
3
+ require 'rdf'
4
+ require 'rdf/raptor'
5
+ require "rest-client"
6
+ require 'uri'
7
+ require 'yaml'
8
+ require 'logger'
9
+
10
+ # define constants and global variables
11
+ #TODO: switch services to 1.2
12
+ RDF::OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.2#'
13
+ RDF::OT1 = RDF::Vocabulary.new 'http://www.opentox.org/api/1.1#'
14
+ RDF::OTA = RDF::Vocabulary.new 'http://www.opentox.org/algorithmTypes.owl#'
15
+
16
+ #CLASSES = ["Compound", "Feature", "Dataset", "Algorithm", "Model", "Validation", "Task", "ErrorReport", "Investigation"]
17
+ CLASSES = ["Compound", "Feature", "Dataset", "Algorithm", "Model", "Validation", "Task", "Investigation"]
18
+ RDF_FORMATS = [:rdfxml,:ntriples,:turtle]
19
+ $default_rdf = "application/rdf+xml"
20
+
21
+ # Regular expressions for parsing classification data
22
+ TRUE_REGEXP = /^(true|active|1|1.0|tox|activating|carcinogen|mutagenic)$/i
23
+ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-mutagenic)$/i
24
+
25
+ require File.join(File.dirname(__FILE__),"overwrite.rb")
26
+ require File.join(File.dirname(__FILE__),"error.rb")
27
+ require File.join(File.dirname(__FILE__),"rest-client-wrapper.rb")
28
+ require File.join(File.dirname(__FILE__),"otlogger.rb") # avoid require conflicts with logger
29
+ require File.join(File.dirname(__FILE__),"opentox.rb")
30
+ require File.join(File.dirname(__FILE__),"task.rb")
31
+ require File.join(File.dirname(__FILE__),"compound.rb")
32
+ #require File.join(File.dirname(__FILE__),"dataset.rb")