opentox-ruby 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +39 -46
- data/VERSION +1 -1
- data/lib/algorithm.rb +797 -80
- data/lib/compound.rb +40 -0
- data/lib/config/config_ru.rb +2 -0
- data/lib/dataset.rb +57 -18
- data/lib/environment.rb +3 -3
- data/lib/feature.rb +15 -13
- data/lib/helper.rb +1 -2
- data/lib/model.rb +185 -82
- data/lib/opentox-ruby.rb +1 -1
- data/lib/overwrite.rb +2 -1
- data/lib/parser.rb +247 -69
- data/lib/rest_client_wrapper.rb +3 -2
- data/lib/serializer.rb +24 -10
- data/lib/task.rb +10 -3
- data/lib/to-html.rb +66 -41
- data/lib/validation.rb +93 -29
- metadata +206 -117
data/lib/compound.rb
CHANGED
@@ -21,6 +21,17 @@ module OpenTox
|
|
21
21
|
else
|
22
22
|
@inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
|
23
23
|
end
|
24
|
+
|
25
|
+
if @uri and @inchi.to_s.size==0
|
26
|
+
LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
|
27
|
+
@inchi = Compound.smiles2inchi(Compound.smiles(@uri))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# request smiles from compound service via accept header
|
32
|
+
# @return smiles as string
|
33
|
+
def self.smiles(uri)
|
34
|
+
RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp
|
24
35
|
end
|
25
36
|
|
26
37
|
# Create a compound from smiles string
|
@@ -153,6 +164,35 @@ module OpenTox
|
|
153
164
|
#smarts_array.collect { |s| s if match?(s)}.compact
|
154
165
|
end
|
155
166
|
|
167
|
+
# Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
|
168
|
+
# @example
|
169
|
+
# compound = OpenTox::Compound.from_name("Benzene")
|
170
|
+
# compound.match(['cc','cN']) # returns ['cc']
|
171
|
+
# @param [Array] smarts_array Array with Smarts strings
|
172
|
+
# @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
|
173
|
+
def match_hits(smarts_array)
|
174
|
+
# avoid recreation of OpenBabel objects
|
175
|
+
obconversion = OpenBabel::OBConversion.new
|
176
|
+
obmol = OpenBabel::OBMol.new
|
177
|
+
obconversion.set_in_format('inchi')
|
178
|
+
obconversion.read_string(obmol,@inchi)
|
179
|
+
smarts_pattern = OpenBabel::OBSmartsPattern.new
|
180
|
+
smarts_hits = {}
|
181
|
+
#LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}"
|
182
|
+
smarts_array.collect do |smarts|
|
183
|
+
#LOGGER.debug "dv ----------- all smarts #{smarts}"
|
184
|
+
smarts_pattern.init(smarts)
|
185
|
+
if smarts_pattern.match(obmol)
|
186
|
+
hits = smarts_pattern.get_map_list
|
187
|
+
smarts_hits[smarts] = hits.size
|
188
|
+
end
|
189
|
+
end
|
190
|
+
#LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
|
191
|
+
return smarts_hits
|
192
|
+
#smarts_array.collect { |s| s if match?(s)}.compact
|
193
|
+
end
|
194
|
+
|
195
|
+
|
156
196
|
# Get URI of compound image with highlighted fragments
|
157
197
|
#
|
158
198
|
# @param [Array] activating Array with activating Smarts strings
|
data/lib/config/config_ru.rb
CHANGED
data/lib/dataset.rb
CHANGED
@@ -102,6 +102,13 @@ module OpenTox
|
|
102
102
|
copy parser.load_uri(subjectid)
|
103
103
|
end
|
104
104
|
|
105
|
+
def load_sdf(sdf,subjectid=nil)
|
106
|
+
save(subjectid) unless @uri # get a uri for creating features
|
107
|
+
parser = Parser::Sdf.new
|
108
|
+
parser.dataset = self
|
109
|
+
parser.load_sdf(sdf)
|
110
|
+
end
|
111
|
+
|
105
112
|
# Load CSV string (format specification: http://toxcreate.org/help)
|
106
113
|
# - loads data_entries, compounds, features
|
107
114
|
# - sets metadata (warnings) for parser errors
|
@@ -149,7 +156,11 @@ module OpenTox
|
|
149
156
|
# Load and return only compound URIs from the dataset service
|
150
157
|
# @return [Array] Compound URIs in the dataset
|
151
158
|
def load_compounds(subjectid=nil)
|
152
|
-
|
159
|
+
# fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50
|
160
|
+
u = URI::parse(uri)
|
161
|
+
u.path = File.join(u.path,"compounds")
|
162
|
+
u = u.to_s
|
163
|
+
RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
|
153
164
|
@compounds << compound_uri.chomp
|
154
165
|
end
|
155
166
|
@compounds.uniq!
|
@@ -167,19 +178,15 @@ module OpenTox
|
|
167
178
|
@features
|
168
179
|
end
|
169
180
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
else
|
178
|
-
nil
|
179
|
-
end
|
181
|
+
# returns the accept_values of a feature, i.e. the classification domain / all possible feature values
|
182
|
+
# @param [String] feature the URI of the feature
|
183
|
+
# @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
|
184
|
+
def accept_values(feature)
|
185
|
+
accept_values = features[feature][OT.acceptValue]
|
186
|
+
accept_values.sort if accept_values
|
187
|
+
accept_values
|
180
188
|
end
|
181
189
|
|
182
|
-
=begin
|
183
190
|
# Detect feature type(s) in the dataset
|
184
191
|
# @return [String] `classification", "regression", "mixed" or unknown`
|
185
192
|
def feature_type(subjectid=nil)
|
@@ -193,6 +200,7 @@ module OpenTox
|
|
193
200
|
"unknown"
|
194
201
|
end
|
195
202
|
end
|
203
|
+
=begin
|
196
204
|
=end
|
197
205
|
|
198
206
|
# Get Spreadsheet representation
|
@@ -229,6 +237,30 @@ module OpenTox
|
|
229
237
|
s.to_rdfxml
|
230
238
|
end
|
231
239
|
|
240
|
+
# Get SDF representation of compounds
|
241
|
+
# @return [String] SDF representation
|
242
|
+
def to_sdf
|
243
|
+
sum=""
|
244
|
+
@compounds.each{ |c|
|
245
|
+
sum << OpenTox::Compound.new(c).to_inchi
|
246
|
+
sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'')
|
247
|
+
@data_entries[c].each{ |f,v|
|
248
|
+
sum << "> <\"#{f}\">\n"
|
249
|
+
sum << v.join(", ")
|
250
|
+
sum << "\n\n"
|
251
|
+
}
|
252
|
+
sum << "$$$$\n"
|
253
|
+
}
|
254
|
+
sum
|
255
|
+
end
|
256
|
+
|
257
|
+
def to_urilist
|
258
|
+
@compounds.inject { |sum, c|
|
259
|
+
sum << OpenTox::Compound.new(c).uri
|
260
|
+
sum + "\n"
|
261
|
+
}
|
262
|
+
end
|
263
|
+
|
232
264
|
# Get name (DC.title) of a feature
|
233
265
|
# @param [String] feature Feature URI
|
234
266
|
# @return [String] Feture title
|
@@ -307,6 +339,12 @@ module OpenTox
|
|
307
339
|
end
|
308
340
|
end
|
309
341
|
end
|
342
|
+
# set feature metadata in new dataset accordingly (including accept values)
|
343
|
+
features.each do |f|
|
344
|
+
self.features[f].each do |k,v|
|
345
|
+
dataset.features[f][k] = v
|
346
|
+
end
|
347
|
+
end
|
310
348
|
dataset.add_metadata(metadata)
|
311
349
|
dataset.save(subjectid)
|
312
350
|
dataset
|
@@ -369,12 +407,14 @@ module OpenTox
|
|
369
407
|
end
|
370
408
|
|
371
409
|
def value(compound)
|
372
|
-
|
410
|
+
v = nil
|
411
|
+
v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri]
|
412
|
+
v = nil if v.is_a? Array and v.empty?
|
413
|
+
v
|
373
414
|
end
|
374
415
|
|
375
416
|
def confidence(compound)
|
376
|
-
|
377
|
-
@features[feature_uri][OT.confidence]
|
417
|
+
@data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri]
|
378
418
|
end
|
379
419
|
|
380
420
|
def descriptors(compound)
|
@@ -382,12 +422,11 @@ module OpenTox
|
|
382
422
|
end
|
383
423
|
|
384
424
|
def measured_activities(compound)
|
385
|
-
|
386
|
-
@data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
|
425
|
+
@data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri]
|
387
426
|
end
|
388
427
|
|
389
428
|
def neighbors(compound)
|
390
|
-
@data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
|
429
|
+
@data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
|
391
430
|
end
|
392
431
|
|
393
432
|
# def errors(compound)
|
data/lib/environment.rb
CHANGED
@@ -27,7 +27,7 @@ end
|
|
27
27
|
Ohm.connect :thread_safe => true
|
28
28
|
|
29
29
|
# load mail settings for error messages
|
30
|
-
load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
|
30
|
+
#load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
|
31
31
|
|
32
32
|
logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
|
33
33
|
#LOGGER = OTLogger.new(logfile,'daily') # daily rotation
|
@@ -40,8 +40,8 @@ else
|
|
40
40
|
end
|
41
41
|
|
42
42
|
# Regular expressions for parsing classification data
|
43
|
-
TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i
|
44
|
-
FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i
|
43
|
+
TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i
|
44
|
+
FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i
|
45
45
|
|
46
46
|
# Task durations
|
47
47
|
DEFAULT_TASK_MAX_DURATION = 36000
|
data/lib/feature.rb
CHANGED
@@ -2,6 +2,8 @@ module OpenTox
|
|
2
2
|
class Feature
|
3
3
|
include OpenTox
|
4
4
|
|
5
|
+
attr_accessor :subjectid
|
6
|
+
|
5
7
|
# Find a feature
|
6
8
|
# @param [String] uri Feature URI
|
7
9
|
# @return [OpenTox::Task] Feature object
|
@@ -13,31 +15,31 @@ module OpenTox
|
|
13
15
|
else
|
14
16
|
feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata
|
15
17
|
end
|
18
|
+
feature.subjectid = subjectid
|
16
19
|
feature
|
17
20
|
end
|
18
|
-
|
21
|
+
|
19
22
|
# provides feature type, possible types are "regression" or "classification"
|
20
23
|
# @return [String] feature type, unknown if OT.isA property is unknown/ not set
|
21
24
|
def feature_type
|
25
|
+
raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type]
|
22
26
|
if metadata[RDF.type].flatten.include?(OT.NominalFeature)
|
23
27
|
"classification"
|
24
28
|
elsif metadata[RDF.type].flatten.include?(OT.NumericFeature)
|
25
29
|
"regression"
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
"
|
30
|
+
elsif metadata[OWL.sameAs]
|
31
|
+
metadata[OWL.sameAs].each do |f|
|
32
|
+
begin
|
33
|
+
type = Feature.find(f, subjectid).feature_type
|
34
|
+
return type unless type=="unknown"
|
35
|
+
rescue => ex
|
36
|
+
LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
"unknown"
|
36
40
|
else
|
37
41
|
"unknown"
|
38
42
|
end
|
39
|
-
=end
|
40
43
|
end
|
41
|
-
|
42
44
|
end
|
43
45
|
end
|
data/lib/helper.rb
CHANGED
@@ -81,7 +81,7 @@ helpers do
|
|
81
81
|
when "css"
|
82
82
|
@accept = 'text/css'
|
83
83
|
else
|
84
|
-
#
|
84
|
+
# raise OpenTox::NotFoundError.new "File format #{extension} not supported."
|
85
85
|
end
|
86
86
|
end
|
87
87
|
end
|
@@ -94,4 +94,3 @@ before do
|
|
94
94
|
protected!(@subjectid)
|
95
95
|
end
|
96
96
|
end
|
97
|
-
|
data/lib/model.rb
CHANGED
@@ -23,7 +23,7 @@ module OpenTox
|
|
23
23
|
# Generic OpenTox model class for all API compliant services
|
24
24
|
class Generic
|
25
25
|
include Model
|
26
|
-
|
26
|
+
|
27
27
|
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
|
28
28
|
# @param [String] uri Model URI
|
29
29
|
# @return [OpenTox::Model::Generic] Model instance
|
@@ -34,42 +34,75 @@ module OpenTox
|
|
34
34
|
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
|
35
35
|
model
|
36
36
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
|
38
|
+
# provides feature type, possible types are "regression" or "classification"
|
39
|
+
# @return [String] feature type, "unknown" if type could not be estimated
|
40
40
|
def feature_type(subjectid=nil)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
|
45
|
-
algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
|
46
|
-
algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
|
47
|
-
algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
|
48
|
-
dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
|
49
|
-
dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
|
50
|
-
type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
|
51
|
-
type_indicators.each do |type|
|
52
|
-
case type
|
53
|
-
when /(?i)classification/
|
54
|
-
@feature_type = "classification"
|
55
|
-
break
|
56
|
-
when /(?i)regression/
|
57
|
-
@feature_type = "regression"
|
58
|
-
end
|
41
|
+
unless @feature_type
|
42
|
+
load_predicted_variables( subjectid ) unless @predicted_variable
|
43
|
+
@feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
|
59
44
|
end
|
60
|
-
raise "unknown model "+type_indicators.inspect unless @feature_type
|
61
45
|
@feature_type
|
62
46
|
end
|
63
|
-
|
64
|
-
end
|
65
47
|
|
48
|
+
def predicted_variable( subjectid )
|
49
|
+
load_predicted_variables( subjectid ) unless @predicted_variable
|
50
|
+
@predicted_variable
|
51
|
+
end
|
52
|
+
|
53
|
+
def predicted_variables( subjectid )
|
54
|
+
load_predicted_variables( subjectid, false ) unless @predicted_variables
|
55
|
+
@predicted_variables
|
56
|
+
end
|
57
|
+
|
58
|
+
def predicted_confidence( subjectid )
|
59
|
+
load_predicted_variables( subjectid ) unless @predicted_confidence
|
60
|
+
@predicted_confidence
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def load_predicted_variables( subjectid=nil, use_confidence=true )
|
65
|
+
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
|
66
|
+
if @metadata[OT.predictedVariables]
|
67
|
+
predictedVariables = @metadata[OT.predictedVariables]
|
68
|
+
if predictedVariables.is_a?(Array)
|
69
|
+
if (predictedVariables.size==1)
|
70
|
+
@predicted_variable = predictedVariables[0]
|
71
|
+
elsif (predictedVariables.size>=2)
|
72
|
+
# PENDING identify confidence
|
73
|
+
if use_confidence
|
74
|
+
conf_index = -1
|
75
|
+
predictedVariables.size.times do |i|
|
76
|
+
f = OpenTox::Feature.find(predictedVariables[i], subjectid)
|
77
|
+
conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
|
78
|
+
end
|
79
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+
|
80
|
+
"', number of predicted-variables==2, but no confidence found" if conf_index==-1
|
81
|
+
end
|
82
|
+
if (predictedVariables.size==2) && use_confidence
|
83
|
+
@predicted_variable = predictedVariables[1-conf_index]
|
84
|
+
@predicted_confidence = predictedVariables[conf_index]
|
85
|
+
else
|
86
|
+
@predicted_variables = predictedVariables
|
87
|
+
end
|
88
|
+
else
|
89
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"
|
90
|
+
end
|
91
|
+
else
|
92
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
66
99
|
# Lazy Structure Activity Relationship class
|
67
100
|
class Lazar
|
68
101
|
|
69
|
-
include Model
|
70
102
|
include Algorithm
|
103
|
+
include Model
|
71
104
|
|
72
|
-
attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
|
105
|
+
attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
|
73
106
|
|
74
107
|
def initialize(uri=nil)
|
75
108
|
|
@@ -78,7 +111,7 @@ module OpenTox
|
|
78
111
|
else
|
79
112
|
super CONFIG[:services]["opentox-model"]
|
80
113
|
end
|
81
|
-
|
114
|
+
|
82
115
|
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
|
83
116
|
|
84
117
|
@features = []
|
@@ -86,12 +119,18 @@ module OpenTox
|
|
86
119
|
@activities = {}
|
87
120
|
@p_values = {}
|
88
121
|
@fingerprints = {}
|
122
|
+
@value_map = {}
|
123
|
+
@prediction_min_max = []
|
89
124
|
|
90
125
|
@feature_calculation_algorithm = "Substructure.match"
|
91
126
|
@similarity_algorithm = "Similarity.tanimoto"
|
92
127
|
@prediction_algorithm = "Neighbors.weighted_majority_vote"
|
93
|
-
|
128
|
+
|
129
|
+
@nr_hits = false
|
94
130
|
@min_sim = 0.3
|
131
|
+
@prop_kernel = false
|
132
|
+
@transform = { "class" => "NOP" }
|
133
|
+
@conf_stdev = false
|
95
134
|
|
96
135
|
end
|
97
136
|
|
@@ -111,13 +150,25 @@ module OpenTox
|
|
111
150
|
# Create a new lazar model
|
112
151
|
# @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
|
113
152
|
# @return [OpenTox::Model::Lazar] lazar model
|
114
|
-
def self.create(params)
|
153
|
+
def self.create(params, waiting_task=nil )
|
115
154
|
subjectid = params[:subjectid]
|
116
155
|
lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
|
117
|
-
model_uri = lazar_algorithm.run(params)
|
156
|
+
model_uri = lazar_algorithm.run(params, waiting_task)
|
118
157
|
OpenTox::Model::Lazar.find(model_uri, subjectid)
|
119
158
|
end
|
120
159
|
|
160
|
+
def run( params, accept_header=nil, waiting_task=nil )
|
161
|
+
unless accept_header
|
162
|
+
if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
|
163
|
+
accept_header = 'application/x-yaml'
|
164
|
+
else
|
165
|
+
accept_header = 'application/rdf+xml'
|
166
|
+
end
|
167
|
+
end
|
168
|
+
LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
|
169
|
+
RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
|
170
|
+
end
|
171
|
+
|
121
172
|
# Get a parameter value
|
122
173
|
# @param [String] param Parameter name
|
123
174
|
# @return [String] Parameter value
|
@@ -131,6 +182,7 @@ module OpenTox
|
|
131
182
|
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
|
132
183
|
# @return [OpenTox::Dataset] Dataset with predictions
|
133
184
|
def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
|
185
|
+
|
134
186
|
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
|
135
187
|
@prediction_dataset.add_metadata({
|
136
188
|
OT.hasSource => @uri,
|
@@ -150,7 +202,7 @@ module OpenTox
|
|
150
202
|
LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
|
151
203
|
end
|
152
204
|
end
|
153
|
-
|
205
|
+
#@prediction_dataset.save(subjectid)
|
154
206
|
@prediction_dataset
|
155
207
|
end
|
156
208
|
|
@@ -164,49 +216,52 @@ module OpenTox
|
|
164
216
|
features = {}
|
165
217
|
|
166
218
|
unless @prediction_dataset
|
167
|
-
#@prediction_dataset = cached_prediction
|
168
|
-
#return @prediction_dataset if cached_prediction
|
169
219
|
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
|
170
220
|
@prediction_dataset.add_metadata( {
|
171
221
|
OT.hasSource => @uri,
|
172
222
|
DC.creator => @uri,
|
173
|
-
# TODO: fix dependentVariable
|
174
223
|
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
|
175
224
|
OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
|
176
225
|
} )
|
177
226
|
end
|
178
227
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
# TODO: fix dependentVariable
|
186
|
-
@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
|
187
|
-
|
188
|
-
if @neighbors.size == 0
|
189
|
-
@prediction_dataset.add_feature(prediction_feature_uri, {
|
190
|
-
RDF.type => [OT.MeasuredFeature],
|
191
|
-
OT.hasSource => @uri,
|
192
|
-
DC.creator => @uri,
|
193
|
-
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
|
194
|
-
OT.error => "No similar compounds in training dataset.",
|
195
|
-
OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
|
196
|
-
})
|
197
|
-
@prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
|
228
|
+
if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression"
|
229
|
+
all_activities = []
|
230
|
+
all_activities = @activities.values.flatten.collect! { |i| i.to_f }
|
231
|
+
@prediction_min_max[0] = (all_activities.to_scale.min/2)
|
232
|
+
@prediction_min_max[1] = (all_activities.to_scale.max*2)
|
233
|
+
end
|
198
234
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
235
|
+
unless database_activity(subjectid) # adds database activity to @prediction_dataset
|
236
|
+
|
237
|
+
neighbors
|
238
|
+
prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
|
239
|
+
:compound => @compound,
|
240
|
+
:features => @features,
|
241
|
+
:p_values => @p_values,
|
242
|
+
:fingerprints => @fingerprints,
|
243
|
+
:similarity_algorithm => @similarity_algorithm,
|
244
|
+
:prop_kernel => @prop_kernel,
|
245
|
+
:value_map => @value_map,
|
246
|
+
:nr_hits => @nr_hits,
|
247
|
+
:conf_stdev => @conf_stdev,
|
248
|
+
:prediction_min_max => @prediction_min_max,
|
249
|
+
:transform => @transform } ) ")
|
250
|
+
|
251
|
+
value_feature_uri = File.join( @uri, "predicted", "value")
|
252
|
+
confidence_feature_uri = File.join( @uri, "predicted", "confidence")
|
253
|
+
|
254
|
+
@prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
|
255
|
+
@prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
|
256
|
+
|
257
|
+
if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
|
258
|
+
@prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
|
259
|
+
else
|
260
|
+
@prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
|
261
|
+
end
|
262
|
+
@prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
|
263
|
+
@prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
|
264
|
+
@prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
|
210
265
|
|
211
266
|
if verbose
|
212
267
|
if @feature_calculation_algorithm == "Substructure.match"
|
@@ -260,7 +315,6 @@ module OpenTox
|
|
260
315
|
end
|
261
316
|
n+=1
|
262
317
|
end
|
263
|
-
# what happens with dataset predictions?
|
264
318
|
end
|
265
319
|
end
|
266
320
|
|
@@ -268,33 +322,53 @@ module OpenTox
|
|
268
322
|
@prediction_dataset
|
269
323
|
end
|
270
324
|
|
271
|
-
|
272
|
-
def neighbors
|
325
|
+
|
273
326
|
|
327
|
+
# Find neighbors and store them as object variable, access all compounds for that.
|
328
|
+
def neighbors
|
274
329
|
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
|
275
|
-
|
276
330
|
@neighbors = []
|
277
|
-
@fingerprints.each do |training_compound
|
278
|
-
|
279
|
-
if sim > @min_sim
|
280
|
-
@activities[training_compound].each do |act|
|
281
|
-
@neighbors << {
|
282
|
-
:compound => training_compound,
|
283
|
-
:similarity => sim,
|
284
|
-
:features => training_features,
|
285
|
-
:activity => act
|
286
|
-
}
|
287
|
-
end
|
288
|
-
end
|
331
|
+
@fingerprints.keys.each do |training_compound| # AM: access all compounds
|
332
|
+
add_neighbor @fingerprints[training_compound].keys, training_compound
|
289
333
|
end
|
334
|
+
end
|
290
335
|
|
336
|
+
# Adds a neighbor to @neighbors if it passes the similarity threshold.
|
337
|
+
def add_neighbor(training_features, training_compound)
|
338
|
+
compound_features_hits = {}
|
339
|
+
training_compound_features_hits = {}
|
340
|
+
if @nr_hits
|
341
|
+
compound_features_hits = @compound.match_hits(@compound_features)
|
342
|
+
training_compound_features_hits = @fingerprints[training_compound]
|
343
|
+
#LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
|
344
|
+
end
|
345
|
+
params = {}
|
346
|
+
params[:nr_hits] = @nr_hits
|
347
|
+
params[:compound_features_hits] = compound_features_hits
|
348
|
+
params[:training_compound_features_hits] = training_compound_features_hits
|
349
|
+
|
350
|
+
sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
|
351
|
+
if sim > @min_sim
|
352
|
+
@activities[training_compound].each do |act|
|
353
|
+
@neighbors << {
|
354
|
+
:compound => training_compound,
|
355
|
+
:similarity => sim,
|
356
|
+
:features => training_features,
|
357
|
+
:activity => act
|
358
|
+
}
|
359
|
+
end
|
360
|
+
end
|
291
361
|
end
|
292
362
|
|
293
363
|
# Find database activities and store them in @prediction_dataset
|
294
364
|
# @return [Boolean] true if compound has databasse activities, false if not
|
295
365
|
def database_activity(subjectid)
|
296
366
|
if @activities[@compound.uri]
|
297
|
-
|
367
|
+
if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
|
368
|
+
@activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
|
369
|
+
else
|
370
|
+
@activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
|
371
|
+
end
|
298
372
|
@prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
|
299
373
|
@prediction_dataset.save(subjectid)
|
300
374
|
true
|
@@ -303,6 +377,35 @@ module OpenTox
|
|
303
377
|
end
|
304
378
|
end
|
305
379
|
|
380
|
+
def prediction_features
|
381
|
+
[prediction_value_feature,prediction_confidence_feature]
|
382
|
+
end
|
383
|
+
|
384
|
+
def prediction_value_feature
|
385
|
+
dependent_uri = @metadata[OT.dependentVariables].first
|
386
|
+
feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
|
387
|
+
feature.add_metadata( {
|
388
|
+
RDF.type => [OT.ModelPrediction],
|
389
|
+
OT.hasSource => @uri,
|
390
|
+
DC.creator => @uri,
|
391
|
+
DC.title => URI.decode(File.basename( dependent_uri )),
|
392
|
+
OWL.sameAs => dependent_uri
|
393
|
+
})
|
394
|
+
feature
|
395
|
+
end
|
396
|
+
|
397
|
+
def prediction_confidence_feature
|
398
|
+
dependent_uri = @metadata[OT.dependentVariables].first
|
399
|
+
feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
|
400
|
+
feature.add_metadata( {
|
401
|
+
RDF.type => [OT.ModelPrediction],
|
402
|
+
OT.hasSource => @uri,
|
403
|
+
DC.creator => @uri,
|
404
|
+
DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
|
405
|
+
})
|
406
|
+
feature
|
407
|
+
end
|
408
|
+
|
306
409
|
# Save model at model service
|
307
410
|
def save(subjectid)
|
308
411
|
self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})
|